예제 #1
0
def test_random_wrapper(show_plots):
    with NumpySeed(100):
        tile_shape = (28, 35)
        n_examples = 10
        dset = GridEmnistObjectDetectionDataset(min_chars=25,
                                                max_chars=25,
                                                n_patch_examples=100,
                                                n_examples=n_examples,
                                                draw_shape_grid=(5, 5),
                                                image_shape=(4 * 14, 5 * 14),
                                                draw_offset=(0, 0),
                                                spacing=(-5, -5),
                                                characters=list(range(10)),
                                                colours="white",
                                                postprocessing="random",
                                                tile_shape=tile_shape,
                                                n_samples_per_image=4)

        assert dset.x[0].shape == tile_shape + (3, )
        assert dset.image_shape == tile_shape
        assert len(dset.x) == n_examples * 4
        assert dset.n_examples == n_examples * 4

    if show_plots:
        dset.visualize()
예제 #2
0
def test_emnist_dataset():
    with NumpySeed(100):

        kwargs = dict(shape=(14, 14),
                      include_blank=True,
                      one_hot=False,
                      balance=True,
                      classes=[1, 2, 3],
                      n_examples=100)

        dataset = EmnistDataset(**kwargs)
        assert set(dataset.y.flatten()) == set([0, 1, 2, 3])
        assert dataset.x.shape == (100, 14, 14)
        assert dataset.y.shape == (100, 1)
        assert dataset.x.min() == 0.0
        assert 0.0 <= dataset.x.min() <= 10.0
        assert 200.0 <= dataset.x.max() <= 255.0

        kwargs['one_hot'] = True
        dataset = EmnistDataset(**kwargs)
        assert dataset.x.shape == (100, 14, 14)
        assert dataset.y.shape == (100, 4)
        assert (dataset.y.sum(1) == 1).all()
        assert ((dataset.y == 0) | (dataset.y == 1)).all()
        assert 0.0 <= dataset.x.min() <= 10.0
        assert 200.0 <= dataset.x.max() <= 255.0
예제 #3
0
def _test_gradient(device):

    if device == "gpu" and visible_gpu():
        pytest.xfail("no gpu is visible")

    with NumpySeed(100):
        with tf.device('/{}:0'.format(device)):
            sprites, scales, offsets, backgrounds = get_data(random_alpha=True,
                                                             squash=0.99)

            sprites_tf = constant_op.constant(sprites)
            scales_tf = constant_op.constant(scales)
            offsets_tf = constant_op.constant(offsets)
            backgrounds_tf = constant_op.constant(backgrounds)

            images = render_sprites.render_sprites(sprites_tf, scales_tf,
                                                   offsets_tf, backgrounds_tf)

            sess = get_session()
            with sess.as_default():
                with tf.device(device):
                    err = gradient_checker.compute_gradient_error(
                        [sprites_tf, scales_tf, offsets_tf, backgrounds_tf], [
                            sprites.shape, scales.shape, offsets.shape,
                            backgrounds.shape
                        ],
                        images,
                        backgrounds.shape,
                        [sprites, scales, offsets, backgrounds],
                        delta=0.002)

            print("Jacobian error: {}".format(err))
            threshold = 2e-4
            assert err < threshold, "Jacobian error ({}) exceeded threshold ({})".format(
                err, threshold)
예제 #4
0
def test_omniglot_dataset():
    with NumpySeed(100):

        classes = [
            'Cyrillic,17', 'Mkhedruli_(Georgian),5', 'Bengali,23',
            'Mongolian,19', 'Malayalam,3', 'Ge_ez,15', 'Glagolitic,33',
            'Tagalog,11', 'Gujarati,23', 'Old_Church_Slavonic_(Cyrillic),7'
        ]  # Chosen randomly from set of all omniglot characters.
        indices = [1, 3, 5, 7, 9]
        kwargs = dict(shape=(14, 14),
                      include_blank=True,
                      one_hot=False,
                      indices=indices,
                      classes=classes)

        n_classes = len(classes) + 1
        n_examples = len(indices) * n_classes

        dataset = OmniglotDataset(**kwargs)
        assert set(dataset.y.flatten()) == set(range(n_classes))
        assert dataset.x.shape == (n_examples, 14, 14)
        assert dataset.y.shape == (n_examples, 1)
        assert dataset.x.min() == 0.0
        assert 0.0 <= dataset.x.min() <= 10.0
        assert 200.0 <= dataset.x.max() <= 255.0

        kwargs['one_hot'] = True

        dataset = OmniglotDataset(**kwargs)
        assert dataset.x.shape == (n_examples, 14, 14)
        assert dataset.y.shape == (n_examples, n_classes)
        assert (dataset.y.sum(1) == 1).all()
        assert ((dataset.y == 0) | (dataset.y == 1)).all()
        assert 0.0 <= dataset.x.min() <= 10.0
        assert 200.0 <= dataset.x.max() <= 255.0
예제 #5
0
    def __init__(self):
        train_seed, val_seed, test_seed = 0, 1, 2
        perm_seed = 4

        n_episodes = count_episodes(cfg.atari_game, cfg.after_warp)

        with NumpySeed(perm_seed):
            perm = np.random.permutation(n_episodes)

        n_val_episodes = max(1, int(cfg.val_fraction*n_episodes))
        train_end = n_val_episodes
        val_end = n_episodes - n_val_episodes

        if cfg.do_train:
            train_episode_range = (None, train_end)
        else:
            train_episode_range = (None, 5)

        val_episode_range = (train_end, val_end)
        test_episode_range = (val_end, None)

        train_episode_indices = perm[slice(*train_episode_range)]
        val_episode_indices = perm[slice(*val_episode_range)]
        test_episode_indices = perm[slice(*test_episode_range)]

        self.dataset = dict(
            train=AtariLongVideoVideoDataset(seed=train_seed, episode_indices=train_episode_indices,),
            val=AtariLongVideoVideoDataset(seed=val_seed, episode_indices=val_episode_indices,),
            test=AtariLongVideoVideoDataset(seed=test_seed, episode_indices=test_episode_indices,),
        )
예제 #6
0
def test_omniglot(build_function, test_config):
    with NumpySeed(83849):
        classes = ["Greek,18", "Greek,19"]
        n_classes = len(classes)

        config = OMNIGLOT_CONFIG.copy(
            patience=10000,
            build_function=build_function,
            classes=classes,
            n_controller_units=100,
            threshold=0.2,
            stopping_criteria="01_loss,min",
            train_indices=list(range(15)),
            val_indices=list(range(15, 20)),
            test_indices=list(range(15, 20)),
        )
        config.update(test_config)

        checkpoint_dir = make_checkpoint_dir(config, 'test_omni')
        output_size = n_classes + 1

        name_params = 'classes include_blank shape n_controller_units'

        g, sess = get_graph_and_session()

        with ExitStack() as stack:
            stack.enter_context(g.as_default())
            stack.enter_context(sess)
            stack.enter_context(sess.as_default())
            stack.enter_context(config)

            f = build_function()
            f.set_pretraining_params(config, name_params, checkpoint_dir)
            x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape)

            inference = f(x_ph, output_size, False)

            test_dataset = OmniglotDataset(indices=cfg.test_indices,
                                           one_hot=True)
            _eval_model(test_dataset, inference, x_ph)

        g, sess = get_graph_and_session()

        with ExitStack() as stack:
            stack.enter_context(g.as_default())
            stack.enter_context(sess)
            stack.enter_context(sess.as_default())
            stack.enter_context(config)

            f = build_function()
            f.set_pretraining_params(config, name_params, checkpoint_dir)
            x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape)
            inference = f(x_ph, output_size, False)

            assert f.was_loaded is True

            test_dataset = OmniglotDataset(indices=cfg.test_indices,
                                           one_hot=True)
            _eval_model(test_dataset, inference, x_ph)
예제 #7
0
def test_emnist_pretrained(build_function, test_config):
    with NumpySeed(83849):
        n_classes = 10
        classes = EmnistDataset.sample_classes(n_classes)

        config = EMNIST_CONFIG.copy(build_function=build_function,
                                    classes=classes,
                                    threshold=0.1,
                                    stopping_criteria="01_loss,min",
                                    n_controller_units=100,
                                    n_train=10000)
        config.update(test_config)

        checkpoint_dir = make_checkpoint_dir(config, 'test_emnist')
        output_size = n_classes + 1

        name_params = 'classes include_blank shape n_controller_units'

        g, sess = get_graph_and_session()
        with ExitStack() as stack:
            stack.enter_context(g.as_default())
            stack.enter_context(sess)
            stack.enter_context(sess.as_default())
            stack.enter_context(config)

            f = build_function()
            f.set_pretraining_params(config, name_params, checkpoint_dir)
            x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape)
            inference = f(x_ph, output_size, False)

            assert f.was_loaded is False

            with config:
                test_dataset = EmnistDataset(n_examples=cfg.n_val,
                                             one_hot=True)
                _eval_model(test_dataset, inference, x_ph)

        g, sess = get_graph_and_session()
        with ExitStack() as stack:
            stack.enter_context(g.as_default())
            stack.enter_context(sess)
            stack.enter_context(sess.as_default())
            stack.enter_context(config)

            f = build_function()
            f.set_pretraining_params(config, name_params, checkpoint_dir)
            x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape)
            inference = f(x_ph, output_size, False)

            assert f.was_loaded is True

            with config:
                test_dataset = EmnistDataset(n_examples=cfg.n_val,
                                             one_hot=True)
                _eval_model(test_dataset, inference, x_ph)
예제 #8
0
def test_determinism(dataset, test_config):
    build_function = build_mlp  # Can't use build_lenet here as it is slightly non-deterministic for reasons unknown.
    with NumpySeed(83849):
        n_classes = 10

        info = determinism_info[dataset]

        classes = info['sample_classes'](n_classes)
        config = info['config'].copy(
            build_function=build_function,
            classes=classes,
            n_controller_units=100,
            threshold=0.2,
            stopping_criteria="01_loss,min",
            seed=334324923,
            display_step=100,
            eval_step=100,
            max_steps=1001,
            tee=False,
            n_train=500,
        )
        config.update(test_config)

        name_params = 'classes include_blank shape n_controller_units'
        output_size = n_classes + 1

        n_repeats = 5

        output = defaultdict(int)

        dir_names = []
        try:
            for i in range(n_repeats):
                checkpoint_dir = make_checkpoint_dir(
                    config, 'test_{}_{}'.format(dataset, i))
                dir_names.append(checkpoint_dir)
                _train_classifier(build_function, config, name_params,
                                  output_size, checkpoint_dir)
                o = _get_deterministic_output(checkpoint_dir)
                output[o] += 1

            if len(output) != 1:
                for o in sorted(output):
                    print("\n" + "*" * 80)
                    print("The following occurred {} times:\n".format(
                        output[o]))
                    print(o)
                raise Exception("Results were not deterministic.")
        finally:
            for dn in dir_names:
                try:
                    shutil.rmtree(dn)
                except FileNotFoundError:
                    pass
예제 #9
0
파일: base.py 프로젝트: alcinos/dps
    def __init__(self, **kwargs):
        start = time.time()
        print("Trying to find dataset in cache...")

        directory = kwargs.get(
            "data_dir",
            os.path.join(cfg.data_dir, "cached_datasets",
                         self.__class__.__name__))
        os.makedirs(directory, exist_ok=True)

        params = self.param_values()
        param_hash = get_param_hash(params)
        print(self.__class__.__name__)
        print("Params:")
        pprint.pprint(params)
        print("Param hash: {}".format(param_hash))

        self.directory = os.path.join(directory, str(param_hash))
        cfg_filename = os.path.join(self.directory, "config.txt")

        if not os.path.exists(cfg_filename):

            # Start fresh
            try:
                shutil.rmtree(self.directory)
            except FileNotFoundError:
                pass

            print("Directory for dataset not found, creating...")
            os.makedirs(self.directory, exist_ok=False)

            try:
                with NumpySeed(self.seed):
                    self._make()

                print("Done creating dataset.")
            except BaseException:
                try:
                    shutil.rmtree(self.directory)
                except FileNotFoundError:
                    pass
                raise

            with open(cfg_filename, 'w') as f:
                f.write(pprint.pformat(params))
        else:
            print("Found.")

        print("Took {} seconds.".format(time.time() - start))
        print("Features for dataset: ")
        pprint.pprint(self.features)
        print()
예제 #10
0
def test_emnist_load_or_train(build_function, test_config):
    with NumpySeed(83849):
        n_classes = 10
        classes = EmnistDataset.sample_classes(n_classes)

        config = EMNIST_CONFIG.copy(build_function=build_function,
                                    classes=classes,
                                    threshold=0.1,
                                    stopping_criteria="01_loss,min",
                                    n_controller_units=100,
                                    n_train=10000)
        config.update(test_config)

        checkpoint_dir = make_checkpoint_dir(config, 'test_emnist')
        output_size = n_classes + 1

        g, sess = get_graph_and_session()
        with ExitStack() as stack:
            stack.enter_context(g.as_default())
            stack.enter_context(sess)
            stack.enter_context(sess.as_default())
            stack.enter_context(config)

            f = build_function()
            x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape)
            inference = f(x_ph, output_size, False)

            loaded = load_or_train(config, f.scope,
                                   os.path.join(checkpoint_dir, 'model'))
            assert not loaded

            test_dataset = EmnistDataset(n_examples=cfg.n_val, one_hot=True)
            _eval_model(test_dataset, inference, x_ph)

        g, sess = get_graph_and_session()
        with ExitStack() as stack:
            stack.enter_context(g.as_default())
            stack.enter_context(sess)
            stack.enter_context(sess.as_default())
            stack.enter_context(config)

            f = build_function()
            x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape)
            inference = f(x_ph, output_size, False)

            loaded = load_or_train(config, f.scope,
                                   os.path.join(checkpoint_dir, 'model'))
            assert loaded

            test_dataset = EmnistDataset(n_examples=cfg.n_val, one_hot=True)
            _eval_model(test_dataset, inference, x_ph)
예제 #11
0
def test_cache_dataset():
    with NumpySeed(100):

        kwargs = dict(shape=(14, 14),
                      include_blank=True,
                      one_hot=False,
                      balance=True,
                      classes=[1, 2, 3],
                      n_examples=100)

        cache_dir = "/tmp/dps_test/cached_datasets"

        shutil.rmtree(cache_dir, ignore_errors=True)

        with remove(cache_dir):
            dataset = EmnistDataset(**kwargs, use_dataset_cache=cache_dir)
            assert not dataset.loaded

            assert set(dataset.y.flatten()) == set([0, 1, 2, 3])
            assert dataset.x.shape == (100, 14, 14)
            assert dataset.y.shape == (100, 1)
            assert dataset.x.min() == 0.0
            assert 0.0 <= dataset.x.min() <= 10.0
            assert 200.0 <= dataset.x.max() <= 255.0

            dataset2 = EmnistDataset(**kwargs, use_dataset_cache=cache_dir)
            assert dataset2.loaded

            assert set(dataset2.y.flatten()) == set([0, 1, 2, 3])
            assert dataset2.x.shape == (100, 14, 14)
            assert dataset2.y.shape == (100, 1)
            assert dataset2.x.min() == 0.0
            assert 0.0 <= dataset2.x.min() <= 10.0
            assert 200.0 <= dataset2.x.max() <= 255.0

            assert (dataset.x == dataset2.x).all()
            assert (dataset.y == dataset2.y).all()

            dataset3 = EmnistDataset(**kwargs, use_dataset_cache=False)

            assert set(dataset3.y.flatten()) == set([0, 1, 2, 3])
            assert dataset3.x.shape == (100, 14, 14)
            assert dataset3.y.shape == (100, 1)
            assert dataset3.x.min() == 0.0
            assert 0.0 <= dataset3.x.min() <= 10.0
            assert 200.0 <= dataset3.x.max() <= 255.0

            assert (dataset3.x != dataset2.x).any()
            assert (dataset3.y != dataset2.y).any()
예제 #12
0
    def __init__(self):
        train_seed, val_seed, test_seed = 0, 1, 2
        perm_seed = 4

        n_episodes = count_episodes(cfg.atari_game, cfg.after_warp)

        with NumpySeed(perm_seed):
            perm = np.random.permutation(n_episodes)

        n_val_episodes = max(1, int(cfg.val_fraction*n_episodes))
        train_end = n_episodes - 2*n_val_episodes
        val_end = n_episodes - n_val_episodes

        if cfg.do_train:
            train_episode_range = (None, train_end)
        else:
            train_episode_range = (None, 1)
        val_episode_range = (train_end, val_end)
        test_episode_range = (val_end, None)

        train_episode_indices = perm[slice(*train_episode_range)]
        val_episode_indices = perm[slice(*val_episode_range)]
        test_episode_indices = perm[slice(*test_episode_range)]

        get_annotations = not cfg.do_train

        train = AtariVideoDataset(
            max_examples=int(cfg.n_train), seed=train_seed,
            episode_indices=train_episode_indices, get_annotations=False,
            sample_density=cfg.train_sample_density)

        val = AtariVideoDataset(
            max_examples=int(cfg.n_val), seed=val_seed,
            episode_indices=val_episode_indices, get_annotations=get_annotations,
            sample_density=cfg.val_sample_density)

        test = AtariVideoDataset(
            max_examples=int(cfg.n_val), seed=test_seed,
            episode_indices=test_episode_indices, get_annotations=get_annotations,
            sample_density=cfg.val_sample_density)

        self.datasets = dict(train=train, val=val, test=test)
예제 #13
0
def f(game, modes, K, N, in_colour, seed):
    print("Running clustering...")
    with NumpySeed(seed):
        dset = StaticAtariDataset(game=game, after_warp=not in_colour)

        X = dset.x

        if N:
            X = X[:N, ...]
        else:
            N = X.shape[0]

        if not in_colour:
            X = X[..., 0]
        image_shape = X.shape[1:]
        X = X.reshape(N, -1)

        if modes:
            km = KModes(n_clusters=K, init='Huang', n_init=1, verbose=1)
            km.fit(X)

            centroids = km.cluster_centroids_
            centroids = centroids.reshape(K, *image_shape)
            discrete_centroids = centroids
            centroids = centroids / 255.

            labels = km.labels_
        else:
            result = k_means(X / 255., K)
            centroids = result[0]
            discrete_centroids = np.uint8(np.floor(centroids * 255))

        centroids = np.maximum(centroids, 1e-6)
        centroids = np.minimum(centroids, 1 - 1e-6)
        centroids = centroids.reshape(K, *image_shape)

        labels = np.array(labels)
        X = X.reshape(N, *image_shape)
        return centroids, discrete_centroids, labels, X
    print("Done.")
예제 #14
0
def run(device, show_plots, process_data=None, **get_data_kwargs):
    with NumpySeed(100):
        data = get_data(**get_data_kwargs)

        if process_data is None:
            process_data = lambda *x: x

        sprites, scales, offsets, backgrounds = process_data(*data)

        with tf.device('/{}:0'.format(device)):
            images = render_sprites.render_sprites(sprites, scales, offsets,
                                                   backgrounds)
            sess = get_session()
            result = sess.run(images)

        result = np.clip(result, 1e-6, 1 - 1e-6)

    if show_plots:
        import matplotlib.pyplot as plt
        fig, (ax1, ax2) = plt.subplots(1, 2)
        ax1.imshow(result[0])
        ax2.imshow(result[1])
        plt.show()
예제 #15
0
def test_visual_arithmetic_dataset():
    with NumpySeed(100):
        n_examples = 100

        kwargs = dict(reductions="sum",
                      min_digits=2,
                      max_digits=3,
                      digits=list(range(5)),
                      patch_shape=(14, 14),
                      image_shape=(50, 50),
                      largest_digit=1000,
                      one_hot=False,
                      n_patch_examples=100,
                      n_examples=n_examples)

        dataset = VisualArithmeticDataset(**kwargs)
        assert dataset.x.shape == (n_examples, 50, 50)
        assert dataset.y.shape == (n_examples, 1)
        assert set(dataset.y.flatten()).issubset(set(range(13)))

        _kwargs = kwargs.copy()
        _kwargs.update(image_shape=(100, 100), draw_shape=(50, 50))
        dataset = VisualArithmeticDataset(**_kwargs)
        assert dataset.x.shape == (n_examples, 100, 100)
        assert dataset.y.shape == (n_examples, 1)
        assert set(dataset.y.flatten()).issubset(set(range(13)))
        assert (dataset.x[:, 50:, :] == 0).all()
        assert (dataset.x[:, :, 50:] == 0).all()

        _kwargs = kwargs.copy()
        _kwargs.update(image_shape=(100, 100),
                       draw_shape=(50, 50),
                       draw_offset=(50, 50))
        dataset = VisualArithmeticDataset(**_kwargs)
        assert dataset.x.shape == (n_examples, 100, 100)
        assert dataset.y.shape == (n_examples, 1)
        assert set(dataset.y.flatten()).issubset(set(range(13)))
        assert (dataset.x[:, :50, :] == 0).all()
        assert (dataset.x[:, :, :50] == 0).all()

        _kwargs = kwargs.copy()
        _kwargs.update(largest_digit=5)
        dataset = VisualArithmeticDataset(**_kwargs)
        assert dataset.x.shape == (n_examples, 50, 50)
        assert dataset.y.shape == (n_examples, 1)
        assert set(dataset.y.flatten()).issubset(set(range(6)))

        _kwargs = kwargs.copy()
        _kwargs.update(one_hot=True, largest_digit=5)
        dataset = VisualArithmeticDataset(**_kwargs)
        assert dataset.x.shape == (n_examples, 50, 50)
        assert dataset.y.shape == (n_examples, 7)
        assert ((dataset.y == 0) | (dataset.y == 1)).all()

        _kwargs = kwargs.copy()
        _kwargs.update(reductions=sum)
        dataset = VisualArithmeticDataset(**_kwargs)
        assert dataset.x.shape == (n_examples, 50, 50)
        assert dataset.y.shape == (n_examples, 1)
        assert set(dataset.y.flatten()).issubset(set(range(13)))

        _kwargs = kwargs.copy()
        _kwargs.update(reductions="A:sum")
        dataset = VisualArithmeticDataset(**_kwargs)
        assert dataset.x.shape == (n_examples, 50, 50)
        assert dataset.y.shape == (n_examples, 1)
        assert set(dataset.y.flatten()).issubset(set(range(13)))

        _kwargs = kwargs.copy()
        _kwargs.update(reductions="A:sum,M:prod")
        dataset = VisualArithmeticDataset(**_kwargs)
        assert dataset.x.shape == (n_examples, 50, 50)
        assert dataset.y.shape == (n_examples, 1)
        assert set(dataset.y.flatten()).issubset(set(range(126)))
        assert not set(dataset.y.flatten()).issubset(set(range(20)))

        _kwargs = kwargs.copy()
        _kwargs.update(reductions="min")
        dataset = VisualArithmeticDataset(**_kwargs)
        assert dataset.x.shape == (n_examples, 50, 50)
        assert dataset.y.shape == (n_examples, 1)
        assert set(dataset.y.flatten()).issubset(set(range(10)))
예제 #16
0
파일: base.py 프로젝트: alcinos/dps
    def __init__(self, shuffle=True, **kwargs):
        start = time.time()
        print("Trying to find dataset in cache...")

        directory = kwargs.get(
            "data_dir",
            os.path.join(cfg.data_dir, "cached_datasets",
                         self.__class__.__name__))
        os.makedirs(directory, exist_ok=True)

        params = self.param_values()
        param_hash = get_param_hash(params)
        print(self.__class__.__name__)
        print("Params:")
        pprint.pprint(params)
        print("Param hash: {}".format(param_hash))

        self.filename = os.path.join(directory, str(param_hash))
        cfg_filename = self.filename + ".cfg"

        no_cache = os.getenv("DPS_NO_CACHE")
        if no_cache:
            print(
                "Skipping dataset cache as DPS_NO_CACHE is set (value is {}).".
                format(no_cache))

        # We require cfg_filename to exist as it marks that dataset creation completed successfully.
        if no_cache or not os.path.exists(
                self.filename) or not os.path.exists(cfg_filename):

            if kwargs.get("no_make", False):
                raise Exception(
                    "`no_make` is True, but dataset was not found in cache.")

            # Start fresh
            try:
                os.remove(self.filename)
            except FileNotFoundError:
                pass
            try:
                os.remove(cfg_filename)
            except FileNotFoundError:
                pass

            print("File for dataset not found, creating...")

            run_kwargs = kwargs.get('run_kwargs', None)
            if run_kwargs is not None:
                # Create the dataset in parallel and write it to the cache.
                make_dataset_in_parallel(run_kwargs, self.__class__, params)
            else:
                self._writer = tf.python_io.TFRecordWriter(self.filename)
                try:
                    with NumpySeed(self.seed):
                        self._make()
                    self._writer.close()
                    print("Done creating dataset.")
                except BaseException:
                    self._writer.close()

                    try:
                        os.remove(self.filename)
                    except FileNotFoundError:
                        pass
                    try:
                        os.remove(cfg_filename)
                    except FileNotFoundError:
                        pass

                    raise

            with open(cfg_filename, 'w') as f:
                f.write(pprint.pformat(params))
        else:
            print("Found.")

        print("Took {} seconds.".format(time.time() - start))
        print("Features for dataset: ")
        pprint.pprint(self.features)
        print()
예제 #17
0
def run_stage(mpi_context, env, stage_idx, exp_dir):
    config, seed = mpi_context.start_stage()

    with ExitStack() as stack:
        stack.enter_context(config)
        stack.enter_context(NumpySeed(seed))

        # Accept config for new stage
        print("\n" + "-" * 10 + " Stage set-up " + "-" * 10)

        print(cfg.to_string())

        # Configure and create session and graph for stage.
        session_config = tf.ConfigProto()
        session_config.intra_op_parallelism_threads = cfg.get(
            'intra_op_parallelism_threads', 0)
        session_config.inter_op_parallelism_threads = cfg.get(
            'inter_op_parallelism_threads', 0)

        # if cfg.use_gpu:
        #     per_process_gpu_memory_fraction = getattr(cfg, 'per_process_gpu_memory_fraction', None)
        #     if per_process_gpu_memory_fraction:
        #         session_config.gpu_options.per_process_gpu_memory_fraction = \
        #             per_process_gpu_memory_fraction

        #     gpu_allow_growth = getattr(cfg, 'gpu_allow_growth', None)
        #     if gpu_allow_growth:
        #         session_config.gpu_options.allow_growth = gpu_allow_growth

        # if cfg.use_gpu:
        #     print("Using GPU if available.")
        #     print("Using {}% of GPU memory.".format(
        #         100 * session_config.gpu_options.per_process_gpu_memory_fraction))
        #     print("Allowing growth of GPU memory: {}".format(session_config.gpu_options.allow_growth))

        graph = tf.Graph()
        sess = tf.Session(graph=graph, config=session_config)

        # This HAS to come after the creation of the session, otherwise
        # it allocates all GPU memory if using the GPU.
        print("\nAvailable devices:")
        from tensorflow.python.client import device_lib
        print(device_lib.list_local_devices())

        # if not cfg.use_gpu:
        #     print("Not using GPU.")
        #     stack.enter_context(graph.device("/cpu:0"))

        stack.enter_context(graph.device("/cpu:0"))

        stack.enter_context(graph.as_default())
        stack.enter_context(sess)
        stack.enter_context(sess.as_default())

        tf_seed = gen_seed()
        print(
            "Setting tensorflow seed to generated seed: {}\n".format(tf_seed))
        tf.set_random_seed(tf_seed)

        # Set limit on CPU RAM for the stage
        cpu_ram_limit_mb = cfg.get("cpu_ram_limit_mb", None)
        if cpu_ram_limit_mb is not None:
            stack.enter_context(memory_limit(cfg.cpu_ram_limit_mb))

        print("Building env...\n")

        # Maybe build env
        if stage_idx == 0 or not cfg.preserve_env:
            if env is not None:
                env.close()
            env = cfg.build_env()

        if hasattr(env, "print_memory_footprint"):
            env.print_memory_footprint()

        print("\nDone building env.\n")
        print("Building updater...\n")

        updater = cfg.get_updater(env, mpi_context=mpi_context)
        updater.stage_idx = stage_idx
        updater.exp_dir = exp_dir

        updater.build_graph()
        print("\nDone building updater.\n")

        # walk_variable_scopes(max_depth=3)

        tf.train.get_or_create_global_step()
        sess.run(uninitialized_variables_initializer())
        sess.run(tf.assert_variables_initialized())

        updater.worker_code()

        stage_idx += 1

    return env
예제 #18
0
                        images,
                        backgrounds.shape,
                        [sprites, scales, offsets, backgrounds],
                        delta=0.002)

            print("Jacobian error: {}".format(err))
            threshold = 2e-4
            assert err < threshold, "Jacobian error ({}) exceeded threshold ({})".format(
                err, threshold)


if __name__ == "__main__":

    from contextlib import ExitStack

    with NumpySeed(100000):
        sprites, scales, offsets, backgrounds = _get_data()

        device = 'gpu'

        print("Running...")

        session_config = tf.ConfigProto()
        session_config.log_device_placement = 1
        session_config.gpu_options.per_process_gpu_memory_fraction = 0.1
        session_config.gpu_options.allow_growth = True

        graph = tf.Graph()
        sess = tf.Session(graph=graph, config=session_config)

        with ExitStack() as stack:
예제 #19
0
파일: base.py 프로젝트: lqiang2003cn/dps
def build_search(path,
                 name,
                 distributions,
                 config,
                 n_repeats,
                 n_param_settings=None,
                 _zip=True,
                 add_date=0,
                 do_local_test=True,
                 readme=""):
    """ Create a job implementing a hyper-parameter search.

    Parameters
    ----------
    path: str
        Path to the directory where the search archive will be saved.
    name: str
        Name for the search.
    distributions: dict (str -> (list or distribution))
        Distributions to sample from. Can also be a list of samples.
    config: Config instance
        The base configuration.
    n_repeats: int
        Number of different random seeds to run each sample with.
    n_param_settings: int
        Number of parameter settings to sample. If not supplied, all
        possibilities are generated.
    _zip: bool
        Whether to zip the created search directory.
    add_date: bool
        Whether to add time to name of experiment directory.
    do_local_test: bool
        If True, run a short test using one of the sampled
        configs on the local machine to catch any dumb errors
        before starting the real experiment.
    readme: str
        String specifiying context/purpose of search.

    """
    if config.get('seed', None) is None:
        config.seed = gen_seed()

    with NumpySeed(config.seed):
        es = ExperimentStore(path, prefix="build_search")

        count = 0
        base_name = name
        has_built = False
        while not has_built:
            try:
                exp_dir = es.new_experiment(name,
                                            config.seed,
                                            add_date=add_date,
                                            force_fresh=1)
                has_built = True
            except FileExistsError:
                name = "{}_{}".format(base_name, count)
                count += 1

        if readme:
            with open(exp_dir.path_for('README.md'), 'w') as f:
                f.write(readme)

        print(config)
        exp_dir.record_environment(config=config)

        print("Building parameter search at {}.".format(exp_dir.path))

        job = Job(exp_dir.path)

        new_configs = sample_configs(distributions, n_repeats,
                                     n_param_settings)

        with open(exp_dir.path_for("sampled_configs.txt"), "w") as f:
            f.write("\n".join("idx={}: {}".format(c["idx"], pformat(c))
                              for c in new_configs))

        print("{} configs were sampled for parameter search.".format(
            len(new_configs)))

        if do_local_test:
            print("\nStarting local test " + ("=" * 80))
            test_config = new_configs[0].copy()
            test_config.update(max_steps=1000, render_hook=None)
            _RunTrainingLoop(config)(test_config)
            print("Done local test " + ("=" * 80) + "\n")

        job.map(_RunTrainingLoop(config.copy()), new_configs)

        job.save_object('metadata', 'distributions', distributions)
        job.save_object('metadata', 'config', config)

        print(job.summary())

        if _zip:
            path = job.zip(delete=True)
        else:
            path = exp_dir.path

        print("Zipped {} as {}.".format(exp_dir.path, path))

        return path, len(new_configs)
예제 #20
0
파일: train.py 프로젝트: alcinos/dps
    def run(self, start_time):
        """ Run the training loop.

        Parameters
        ----------
        start_time: int
            Start time (in seconds since epoch) for measuring elapsed time for
            purposes of interrupting the training loop.

        """
        if start_time is None:
            start_time = time.time()
        self.start_time = start_time

        self.timestamp("Entering TrainingLoop.run")

        prepare_func = cfg.get("prepare_func", None)
        if callable(prepare_func):
            prepare_func()  # Modify the config in arbitrary ways before training
        else:
            try:
                prepare_funcs = list(prepare_func)
            except (TypeError, ValueError):
                pass
            else:
                for f in prepare_funcs:
                    if callable(f):
                        f()

        self.curriculum = cfg.curriculum + []

        if cfg.seed is None or cfg.seed < 0:
            cfg.seed = gen_seed()

        # Create a directory to store the results of the training session.
        self.experiment_store = ExperimentStore(os.path.join(cfg.local_experiments_dir, cfg.env_name))
        exp_dir = self.experiment_store.new_experiment(
            self.exp_name, cfg.seed, add_date=1, force_fresh=1, update_latest=False)
        self.exp_dir = exp_dir
        cfg.path = exp_dir.path

        breaker = "-" * 40
        header = "{}\nREADME.md - {}\n{}\n\n\n".format(breaker, os.path.basename(exp_dir.path), breaker)
        readme = header + (cfg.readme if cfg.readme else "") + "\n\n"

        with open(exp_dir.path_for('README.md'), 'w') as f:
            f.write(readme)

        self.data = _TrainingLoopData(exp_dir)
        self.data.setup()

        frozen_data = None

        with ExitStack() as stack:
            if cfg.pdb:
                stack.enter_context(pdb_postmortem())
                print("`pdb` is turned on, so forcing setting robust=False")
                cfg.robust = False

            stack.enter_context(redirect_stream('stdout', self.data.path_for('stdout'), tee=cfg.tee))
            stack.enter_context(redirect_stream('stderr', self.data.path_for('stderr'), tee=cfg.tee))

            print("\n\n" + "=" * 80)
            self.timestamp("Starting training run (name={})".format(self.exp_name))

            print("\nDirectory for this training run is {}.".format(exp_dir.path))

            stack.enter_context(NumpySeed(cfg.seed))
            print("\nSet numpy random seed to {}.\n".format(cfg.seed))

            limiter = time_limit(
                self.time_remaining, verbose=True,
                timeout_callback=lambda limiter: print("Training run exceeded its time limit."))

            self.mpi_context = MPI_MasterContext(cfg.get('n_procs', 1), exp_dir)

            try:
                with limiter:
                    self._run()

            finally:
                self.data.summarize()

                self.timestamp("Done training run (name={})".format(self.exp_name))
                print("=" * 80)
                print("\n\n")

                frozen_data = self.data.freeze()

        self.timestamp("Leaving TrainingLoop.run")

        return frozen_data
예제 #21
0
def make_dataset_in_parallel(run_kwargs, dataset_cls, param_values=None):
    """ Uses dps.hyper.parallel_session.ParallelSession to create a dataset in parallel. """

    # Get run_kwargs from command line
    sig = inspect.signature(ParallelSession.__init__)
    default_run_kwargs = sig.bind_partial()
    default_run_kwargs.apply_defaults()
    cl_run_kwargs = clify.command_line(default_run_kwargs.arguments).parse()
    run_kwargs.update(cl_run_kwargs)

    param_values = param_values or dataset_cls._capture_param_values()
    param_values = Config(param_values)
    seed = param_values["seed"]
    if seed is None or seed < 0:
        seed = gen_seed()

    n_examples = param_values["n_examples"]
    n_examples_per_shard = run_kwargs["n_examples_per_shard"]

    experiment_store = ExperimentStore(
        cfg.parallel_experiments_build_dir, prefix="build_{}".format(dataset_cls.__name__))

    count = 0
    name = "attempt=0"
    has_built = False
    while not has_built:
        try:
            exp_dir = experiment_store.new_experiment(name, seed, add_date=True, force_fresh=True)
            has_built = True
        except FileExistsError:
            count += 1
            name = "attempt_{}".format(count)

    print("Building dataset.")

    job = Job(exp_dir.path)
    n_examples_remaining = n_examples

    with NumpySeed(seed):
        inputs = []
        idx = 0
        while n_examples_remaining:
            seed = gen_seed()
            cur_n_examples = min(n_examples_remaining, n_examples_per_shard)
            n_examples_remaining -= cur_n_examples

            inputs.append((idx, seed, cur_n_examples))
            idx += 1

        job.map(_BuildDataset(dataset_cls, param_values), inputs)
        job.save_object('metadata', 'param_values', param_values)

    print(job.summary())
    archive_path = job.zip(delete=True)
    print("Zipped {} as {}.".format(exp_dir.path, archive_path))

    run_kwargs = run_kwargs.copy()

    del run_kwargs['n_examples_per_shard']

    run_kwargs.update(
        archive_path=archive_path, name=name, kind="parallel",
        parallel_exe=cfg.parallel_exe)
    parallel_session = submit_job(**run_kwargs)

    with cd(os.path.join(parallel_session.job_path, 'experiments')):
        dataset_files = []
        for dir_path, dirs, files in os.walk('.'):
            if not dir_path.startswith("./exp__seed="):
                continue

            df = [f for f in files if not f.endswith('.cfg')]
            assert len(df) == 1
            dataset_files.append(os.path.join(dir_path, df[0]))

        cached_filename = os.path.join(cfg.data_dir, "cached_datasets", dataset_cls.__name__, str(get_param_hash(param_values)))

        command = "cat " + " ".join(dataset_files) + " > " + cached_filename
        print("Running command: \n" + command)
        subprocess.run(command, shell=True, check=True)
        print("Done.")

        with open(cached_filename + ".cfg", 'w') as f:
            f.write(pprint.pformat(param_values))

    return parallel_session