def test_random_wrapper(show_plots): with NumpySeed(100): tile_shape = (28, 35) n_examples = 10 dset = GridEmnistObjectDetectionDataset(min_chars=25, max_chars=25, n_patch_examples=100, n_examples=n_examples, draw_shape_grid=(5, 5), image_shape=(4 * 14, 5 * 14), draw_offset=(0, 0), spacing=(-5, -5), characters=list(range(10)), colours="white", postprocessing="random", tile_shape=tile_shape, n_samples_per_image=4) assert dset.x[0].shape == tile_shape + (3, ) assert dset.image_shape == tile_shape assert len(dset.x) == n_examples * 4 assert dset.n_examples == n_examples * 4 if show_plots: dset.visualize()
def test_emnist_dataset(): with NumpySeed(100): kwargs = dict(shape=(14, 14), include_blank=True, one_hot=False, balance=True, classes=[1, 2, 3], n_examples=100) dataset = EmnistDataset(**kwargs) assert set(dataset.y.flatten()) == set([0, 1, 2, 3]) assert dataset.x.shape == (100, 14, 14) assert dataset.y.shape == (100, 1) assert dataset.x.min() == 0.0 assert 0.0 <= dataset.x.min() <= 10.0 assert 200.0 <= dataset.x.max() <= 255.0 kwargs['one_hot'] = True dataset = EmnistDataset(**kwargs) assert dataset.x.shape == (100, 14, 14) assert dataset.y.shape == (100, 4) assert (dataset.y.sum(1) == 1).all() assert ((dataset.y == 0) | (dataset.y == 1)).all() assert 0.0 <= dataset.x.min() <= 10.0 assert 200.0 <= dataset.x.max() <= 255.0
def _test_gradient(device): if device == "gpu" and visible_gpu(): pytest.xfail("no gpu is visible") with NumpySeed(100): with tf.device('/{}:0'.format(device)): sprites, scales, offsets, backgrounds = get_data(random_alpha=True, squash=0.99) sprites_tf = constant_op.constant(sprites) scales_tf = constant_op.constant(scales) offsets_tf = constant_op.constant(offsets) backgrounds_tf = constant_op.constant(backgrounds) images = render_sprites.render_sprites(sprites_tf, scales_tf, offsets_tf, backgrounds_tf) sess = get_session() with sess.as_default(): with tf.device(device): err = gradient_checker.compute_gradient_error( [sprites_tf, scales_tf, offsets_tf, backgrounds_tf], [ sprites.shape, scales.shape, offsets.shape, backgrounds.shape ], images, backgrounds.shape, [sprites, scales, offsets, backgrounds], delta=0.002) print("Jacobian error: {}".format(err)) threshold = 2e-4 assert err < threshold, "Jacobian error ({}) exceeded threshold ({})".format( err, threshold)
def test_omniglot_dataset(): with NumpySeed(100): classes = [ 'Cyrillic,17', 'Mkhedruli_(Georgian),5', 'Bengali,23', 'Mongolian,19', 'Malayalam,3', 'Ge_ez,15', 'Glagolitic,33', 'Tagalog,11', 'Gujarati,23', 'Old_Church_Slavonic_(Cyrillic),7' ] # Chosen randomly from set of all omniglot characters. indices = [1, 3, 5, 7, 9] kwargs = dict(shape=(14, 14), include_blank=True, one_hot=False, indices=indices, classes=classes) n_classes = len(classes) + 1 n_examples = len(indices) * n_classes dataset = OmniglotDataset(**kwargs) assert set(dataset.y.flatten()) == set(range(n_classes)) assert dataset.x.shape == (n_examples, 14, 14) assert dataset.y.shape == (n_examples, 1) assert dataset.x.min() == 0.0 assert 0.0 <= dataset.x.min() <= 10.0 assert 200.0 <= dataset.x.max() <= 255.0 kwargs['one_hot'] = True dataset = OmniglotDataset(**kwargs) assert dataset.x.shape == (n_examples, 14, 14) assert dataset.y.shape == (n_examples, n_classes) assert (dataset.y.sum(1) == 1).all() assert ((dataset.y == 0) | (dataset.y == 1)).all() assert 0.0 <= dataset.x.min() <= 10.0 assert 200.0 <= dataset.x.max() <= 255.0
def __init__(self): train_seed, val_seed, test_seed = 0, 1, 2 perm_seed = 4 n_episodes = count_episodes(cfg.atari_game, cfg.after_warp) with NumpySeed(perm_seed): perm = np.random.permutation(n_episodes) n_val_episodes = max(1, int(cfg.val_fraction*n_episodes)) train_end = n_val_episodes val_end = n_episodes - n_val_episodes if cfg.do_train: train_episode_range = (None, train_end) else: train_episode_range = (None, 5) val_episode_range = (train_end, val_end) test_episode_range = (val_end, None) train_episode_indices = perm[slice(*train_episode_range)] val_episode_indices = perm[slice(*val_episode_range)] test_episode_indices = perm[slice(*test_episode_range)] self.dataset = dict( train=AtariLongVideoVideoDataset(seed=train_seed, episode_indices=train_episode_indices,), val=AtariLongVideoVideoDataset(seed=val_seed, episode_indices=val_episode_indices,), test=AtariLongVideoVideoDataset(seed=test_seed, episode_indices=test_episode_indices,), )
def test_omniglot(build_function, test_config): with NumpySeed(83849): classes = ["Greek,18", "Greek,19"] n_classes = len(classes) config = OMNIGLOT_CONFIG.copy( patience=10000, build_function=build_function, classes=classes, n_controller_units=100, threshold=0.2, stopping_criteria="01_loss,min", train_indices=list(range(15)), val_indices=list(range(15, 20)), test_indices=list(range(15, 20)), ) config.update(test_config) checkpoint_dir = make_checkpoint_dir(config, 'test_omni') output_size = n_classes + 1 name_params = 'classes include_blank shape n_controller_units' g, sess = get_graph_and_session() with ExitStack() as stack: stack.enter_context(g.as_default()) stack.enter_context(sess) stack.enter_context(sess.as_default()) stack.enter_context(config) f = build_function() f.set_pretraining_params(config, name_params, checkpoint_dir) x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape) inference = f(x_ph, output_size, False) test_dataset = OmniglotDataset(indices=cfg.test_indices, one_hot=True) _eval_model(test_dataset, inference, x_ph) g, sess = get_graph_and_session() with ExitStack() as stack: stack.enter_context(g.as_default()) stack.enter_context(sess) stack.enter_context(sess.as_default()) stack.enter_context(config) f = build_function() f.set_pretraining_params(config, name_params, checkpoint_dir) x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape) inference = f(x_ph, output_size, False) assert f.was_loaded is True test_dataset = OmniglotDataset(indices=cfg.test_indices, one_hot=True) _eval_model(test_dataset, inference, x_ph)
def test_emnist_pretrained(build_function, test_config): with NumpySeed(83849): n_classes = 10 classes = EmnistDataset.sample_classes(n_classes) config = EMNIST_CONFIG.copy(build_function=build_function, classes=classes, threshold=0.1, stopping_criteria="01_loss,min", n_controller_units=100, n_train=10000) config.update(test_config) checkpoint_dir = make_checkpoint_dir(config, 'test_emnist') output_size = n_classes + 1 name_params = 'classes include_blank shape n_controller_units' g, sess = get_graph_and_session() with ExitStack() as stack: stack.enter_context(g.as_default()) stack.enter_context(sess) stack.enter_context(sess.as_default()) stack.enter_context(config) f = build_function() f.set_pretraining_params(config, name_params, checkpoint_dir) x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape) inference = f(x_ph, output_size, False) assert f.was_loaded is False with config: test_dataset = EmnistDataset(n_examples=cfg.n_val, one_hot=True) _eval_model(test_dataset, inference, x_ph) g, sess = get_graph_and_session() with ExitStack() as stack: stack.enter_context(g.as_default()) stack.enter_context(sess) stack.enter_context(sess.as_default()) stack.enter_context(config) f = build_function() f.set_pretraining_params(config, name_params, checkpoint_dir) x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape) inference = f(x_ph, output_size, False) assert f.was_loaded is True with config: test_dataset = EmnistDataset(n_examples=cfg.n_val, one_hot=True) _eval_model(test_dataset, inference, x_ph)
def test_determinism(dataset, test_config): build_function = build_mlp # Can't use build_lenet here as it is slightly non-deterministic for reasons unknown. with NumpySeed(83849): n_classes = 10 info = determinism_info[dataset] classes = info['sample_classes'](n_classes) config = info['config'].copy( build_function=build_function, classes=classes, n_controller_units=100, threshold=0.2, stopping_criteria="01_loss,min", seed=334324923, display_step=100, eval_step=100, max_steps=1001, tee=False, n_train=500, ) config.update(test_config) name_params = 'classes include_blank shape n_controller_units' output_size = n_classes + 1 n_repeats = 5 output = defaultdict(int) dir_names = [] try: for i in range(n_repeats): checkpoint_dir = make_checkpoint_dir( config, 'test_{}_{}'.format(dataset, i)) dir_names.append(checkpoint_dir) _train_classifier(build_function, config, name_params, output_size, checkpoint_dir) o = _get_deterministic_output(checkpoint_dir) output[o] += 1 if len(output) != 1: for o in sorted(output): print("\n" + "*" * 80) print("The following occurred {} times:\n".format( output[o])) print(o) raise Exception("Results were not deterministic.") finally: for dn in dir_names: try: shutil.rmtree(dn) except FileNotFoundError: pass
def __init__(self, **kwargs): start = time.time() print("Trying to find dataset in cache...") directory = kwargs.get( "data_dir", os.path.join(cfg.data_dir, "cached_datasets", self.__class__.__name__)) os.makedirs(directory, exist_ok=True) params = self.param_values() param_hash = get_param_hash(params) print(self.__class__.__name__) print("Params:") pprint.pprint(params) print("Param hash: {}".format(param_hash)) self.directory = os.path.join(directory, str(param_hash)) cfg_filename = os.path.join(self.directory, "config.txt") if not os.path.exists(cfg_filename): # Start fresh try: shutil.rmtree(self.directory) except FileNotFoundError: pass print("Directory for dataset not found, creating...") os.makedirs(self.directory, exist_ok=False) try: with NumpySeed(self.seed): self._make() print("Done creating dataset.") except BaseException: try: shutil.rmtree(self.directory) except FileNotFoundError: pass raise with open(cfg_filename, 'w') as f: f.write(pprint.pformat(params)) else: print("Found.") print("Took {} seconds.".format(time.time() - start)) print("Features for dataset: ") pprint.pprint(self.features) print()
def test_emnist_load_or_train(build_function, test_config): with NumpySeed(83849): n_classes = 10 classes = EmnistDataset.sample_classes(n_classes) config = EMNIST_CONFIG.copy(build_function=build_function, classes=classes, threshold=0.1, stopping_criteria="01_loss,min", n_controller_units=100, n_train=10000) config.update(test_config) checkpoint_dir = make_checkpoint_dir(config, 'test_emnist') output_size = n_classes + 1 g, sess = get_graph_and_session() with ExitStack() as stack: stack.enter_context(g.as_default()) stack.enter_context(sess) stack.enter_context(sess.as_default()) stack.enter_context(config) f = build_function() x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape) inference = f(x_ph, output_size, False) loaded = load_or_train(config, f.scope, os.path.join(checkpoint_dir, 'model')) assert not loaded test_dataset = EmnistDataset(n_examples=cfg.n_val, one_hot=True) _eval_model(test_dataset, inference, x_ph) g, sess = get_graph_and_session() with ExitStack() as stack: stack.enter_context(g.as_default()) stack.enter_context(sess) stack.enter_context(sess.as_default()) stack.enter_context(config) f = build_function() x_ph = tf.placeholder(tf.float32, (None, ) + cfg.shape) inference = f(x_ph, output_size, False) loaded = load_or_train(config, f.scope, os.path.join(checkpoint_dir, 'model')) assert loaded test_dataset = EmnistDataset(n_examples=cfg.n_val, one_hot=True) _eval_model(test_dataset, inference, x_ph)
def test_cache_dataset(): with NumpySeed(100): kwargs = dict(shape=(14, 14), include_blank=True, one_hot=False, balance=True, classes=[1, 2, 3], n_examples=100) cache_dir = "/tmp/dps_test/cached_datasets" shutil.rmtree(cache_dir, ignore_errors=True) with remove(cache_dir): dataset = EmnistDataset(**kwargs, use_dataset_cache=cache_dir) assert not dataset.loaded assert set(dataset.y.flatten()) == set([0, 1, 2, 3]) assert dataset.x.shape == (100, 14, 14) assert dataset.y.shape == (100, 1) assert dataset.x.min() == 0.0 assert 0.0 <= dataset.x.min() <= 10.0 assert 200.0 <= dataset.x.max() <= 255.0 dataset2 = EmnistDataset(**kwargs, use_dataset_cache=cache_dir) assert dataset2.loaded assert set(dataset2.y.flatten()) == set([0, 1, 2, 3]) assert dataset2.x.shape == (100, 14, 14) assert dataset2.y.shape == (100, 1) assert dataset2.x.min() == 0.0 assert 0.0 <= dataset2.x.min() <= 10.0 assert 200.0 <= dataset2.x.max() <= 255.0 assert (dataset.x == dataset2.x).all() assert (dataset.y == dataset2.y).all() dataset3 = EmnistDataset(**kwargs, use_dataset_cache=False) assert set(dataset3.y.flatten()) == set([0, 1, 2, 3]) assert dataset3.x.shape == (100, 14, 14) assert dataset3.y.shape == (100, 1) assert dataset3.x.min() == 0.0 assert 0.0 <= dataset3.x.min() <= 10.0 assert 200.0 <= dataset3.x.max() <= 255.0 assert (dataset3.x != dataset2.x).any() assert (dataset3.y != dataset2.y).any()
def __init__(self): train_seed, val_seed, test_seed = 0, 1, 2 perm_seed = 4 n_episodes = count_episodes(cfg.atari_game, cfg.after_warp) with NumpySeed(perm_seed): perm = np.random.permutation(n_episodes) n_val_episodes = max(1, int(cfg.val_fraction*n_episodes)) train_end = n_episodes - 2*n_val_episodes val_end = n_episodes - n_val_episodes if cfg.do_train: train_episode_range = (None, train_end) else: train_episode_range = (None, 1) val_episode_range = (train_end, val_end) test_episode_range = (val_end, None) train_episode_indices = perm[slice(*train_episode_range)] val_episode_indices = perm[slice(*val_episode_range)] test_episode_indices = perm[slice(*test_episode_range)] get_annotations = not cfg.do_train train = AtariVideoDataset( max_examples=int(cfg.n_train), seed=train_seed, episode_indices=train_episode_indices, get_annotations=False, sample_density=cfg.train_sample_density) val = AtariVideoDataset( max_examples=int(cfg.n_val), seed=val_seed, episode_indices=val_episode_indices, get_annotations=get_annotations, sample_density=cfg.val_sample_density) test = AtariVideoDataset( max_examples=int(cfg.n_val), seed=test_seed, episode_indices=test_episode_indices, get_annotations=get_annotations, sample_density=cfg.val_sample_density) self.datasets = dict(train=train, val=val, test=test)
def f(game, modes, K, N, in_colour, seed): print("Running clustering...") with NumpySeed(seed): dset = StaticAtariDataset(game=game, after_warp=not in_colour) X = dset.x if N: X = X[:N, ...] else: N = X.shape[0] if not in_colour: X = X[..., 0] image_shape = X.shape[1:] X = X.reshape(N, -1) if modes: km = KModes(n_clusters=K, init='Huang', n_init=1, verbose=1) km.fit(X) centroids = km.cluster_centroids_ centroids = centroids.reshape(K, *image_shape) discrete_centroids = centroids centroids = centroids / 255. labels = km.labels_ else: result = k_means(X / 255., K) centroids = result[0] discrete_centroids = np.uint8(np.floor(centroids * 255)) centroids = np.maximum(centroids, 1e-6) centroids = np.minimum(centroids, 1 - 1e-6) centroids = centroids.reshape(K, *image_shape) labels = np.array(labels) X = X.reshape(N, *image_shape) return centroids, discrete_centroids, labels, X print("Done.")
def run(device, show_plots, process_data=None, **get_data_kwargs): with NumpySeed(100): data = get_data(**get_data_kwargs) if process_data is None: process_data = lambda *x: x sprites, scales, offsets, backgrounds = process_data(*data) with tf.device('/{}:0'.format(device)): images = render_sprites.render_sprites(sprites, scales, offsets, backgrounds) sess = get_session() result = sess.run(images) result = np.clip(result, 1e-6, 1 - 1e-6) if show_plots: import matplotlib.pyplot as plt fig, (ax1, ax2) = plt.subplots(1, 2) ax1.imshow(result[0]) ax2.imshow(result[1]) plt.show()
def test_visual_arithmetic_dataset(): with NumpySeed(100): n_examples = 100 kwargs = dict(reductions="sum", min_digits=2, max_digits=3, digits=list(range(5)), patch_shape=(14, 14), image_shape=(50, 50), largest_digit=1000, one_hot=False, n_patch_examples=100, n_examples=n_examples) dataset = VisualArithmeticDataset(**kwargs) assert dataset.x.shape == (n_examples, 50, 50) assert dataset.y.shape == (n_examples, 1) assert set(dataset.y.flatten()).issubset(set(range(13))) _kwargs = kwargs.copy() _kwargs.update(image_shape=(100, 100), draw_shape=(50, 50)) dataset = VisualArithmeticDataset(**_kwargs) assert dataset.x.shape == (n_examples, 100, 100) assert dataset.y.shape == (n_examples, 1) assert set(dataset.y.flatten()).issubset(set(range(13))) assert (dataset.x[:, 50:, :] == 0).all() assert (dataset.x[:, :, 50:] == 0).all() _kwargs = kwargs.copy() _kwargs.update(image_shape=(100, 100), draw_shape=(50, 50), draw_offset=(50, 50)) dataset = VisualArithmeticDataset(**_kwargs) assert dataset.x.shape == (n_examples, 100, 100) assert dataset.y.shape == (n_examples, 1) assert set(dataset.y.flatten()).issubset(set(range(13))) assert (dataset.x[:, :50, :] == 0).all() assert (dataset.x[:, :, :50] == 0).all() _kwargs = kwargs.copy() _kwargs.update(largest_digit=5) dataset = VisualArithmeticDataset(**_kwargs) assert dataset.x.shape == (n_examples, 50, 50) assert dataset.y.shape == (n_examples, 1) assert set(dataset.y.flatten()).issubset(set(range(6))) _kwargs = kwargs.copy() _kwargs.update(one_hot=True, largest_digit=5) dataset = VisualArithmeticDataset(**_kwargs) assert dataset.x.shape == (n_examples, 50, 50) assert dataset.y.shape == (n_examples, 7) assert ((dataset.y == 0) | (dataset.y == 1)).all() _kwargs = kwargs.copy() _kwargs.update(reductions=sum) dataset = VisualArithmeticDataset(**_kwargs) assert dataset.x.shape == (n_examples, 50, 50) assert dataset.y.shape == (n_examples, 1) assert set(dataset.y.flatten()).issubset(set(range(13))) _kwargs = kwargs.copy() _kwargs.update(reductions="A:sum") dataset = VisualArithmeticDataset(**_kwargs) assert dataset.x.shape == (n_examples, 50, 50) assert dataset.y.shape == (n_examples, 1) assert set(dataset.y.flatten()).issubset(set(range(13))) _kwargs = kwargs.copy() _kwargs.update(reductions="A:sum,M:prod") dataset = VisualArithmeticDataset(**_kwargs) assert dataset.x.shape == (n_examples, 50, 50) assert dataset.y.shape == (n_examples, 1) assert set(dataset.y.flatten()).issubset(set(range(126))) assert not set(dataset.y.flatten()).issubset(set(range(20))) _kwargs = kwargs.copy() _kwargs.update(reductions="min") dataset = VisualArithmeticDataset(**_kwargs) assert dataset.x.shape == (n_examples, 50, 50) assert dataset.y.shape == (n_examples, 1) assert set(dataset.y.flatten()).issubset(set(range(10)))
def __init__(self, shuffle=True, **kwargs): start = time.time() print("Trying to find dataset in cache...") directory = kwargs.get( "data_dir", os.path.join(cfg.data_dir, "cached_datasets", self.__class__.__name__)) os.makedirs(directory, exist_ok=True) params = self.param_values() param_hash = get_param_hash(params) print(self.__class__.__name__) print("Params:") pprint.pprint(params) print("Param hash: {}".format(param_hash)) self.filename = os.path.join(directory, str(param_hash)) cfg_filename = self.filename + ".cfg" no_cache = os.getenv("DPS_NO_CACHE") if no_cache: print( "Skipping dataset cache as DPS_NO_CACHE is set (value is {}).". format(no_cache)) # We require cfg_filename to exist as it marks that dataset creation completed successfully. if no_cache or not os.path.exists( self.filename) or not os.path.exists(cfg_filename): if kwargs.get("no_make", False): raise Exception( "`no_make` is True, but dataset was not found in cache.") # Start fresh try: os.remove(self.filename) except FileNotFoundError: pass try: os.remove(cfg_filename) except FileNotFoundError: pass print("File for dataset not found, creating...") run_kwargs = kwargs.get('run_kwargs', None) if run_kwargs is not None: # Create the dataset in parallel and write it to the cache. make_dataset_in_parallel(run_kwargs, self.__class__, params) else: self._writer = tf.python_io.TFRecordWriter(self.filename) try: with NumpySeed(self.seed): self._make() self._writer.close() print("Done creating dataset.") except BaseException: self._writer.close() try: os.remove(self.filename) except FileNotFoundError: pass try: os.remove(cfg_filename) except FileNotFoundError: pass raise with open(cfg_filename, 'w') as f: f.write(pprint.pformat(params)) else: print("Found.") print("Took {} seconds.".format(time.time() - start)) print("Features for dataset: ") pprint.pprint(self.features) print()
def run_stage(mpi_context, env, stage_idx, exp_dir): config, seed = mpi_context.start_stage() with ExitStack() as stack: stack.enter_context(config) stack.enter_context(NumpySeed(seed)) # Accept config for new stage print("\n" + "-" * 10 + " Stage set-up " + "-" * 10) print(cfg.to_string()) # Configure and create session and graph for stage. session_config = tf.ConfigProto() session_config.intra_op_parallelism_threads = cfg.get( 'intra_op_parallelism_threads', 0) session_config.inter_op_parallelism_threads = cfg.get( 'inter_op_parallelism_threads', 0) # if cfg.use_gpu: # per_process_gpu_memory_fraction = getattr(cfg, 'per_process_gpu_memory_fraction', None) # if per_process_gpu_memory_fraction: # session_config.gpu_options.per_process_gpu_memory_fraction = \ # per_process_gpu_memory_fraction # gpu_allow_growth = getattr(cfg, 'gpu_allow_growth', None) # if gpu_allow_growth: # session_config.gpu_options.allow_growth = gpu_allow_growth # if cfg.use_gpu: # print("Using GPU if available.") # print("Using {}% of GPU memory.".format( # 100 * session_config.gpu_options.per_process_gpu_memory_fraction)) # print("Allowing growth of GPU memory: {}".format(session_config.gpu_options.allow_growth)) graph = tf.Graph() sess = tf.Session(graph=graph, config=session_config) # This HAS to come after the creation of the session, otherwise # it allocates all GPU memory if using the GPU. print("\nAvailable devices:") from tensorflow.python.client import device_lib print(device_lib.list_local_devices()) # if not cfg.use_gpu: # print("Not using GPU.") # stack.enter_context(graph.device("/cpu:0")) stack.enter_context(graph.device("/cpu:0")) stack.enter_context(graph.as_default()) stack.enter_context(sess) stack.enter_context(sess.as_default()) tf_seed = gen_seed() print( "Setting tensorflow seed to generated seed: {}\n".format(tf_seed)) tf.set_random_seed(tf_seed) # Set limit on CPU RAM for the stage cpu_ram_limit_mb = cfg.get("cpu_ram_limit_mb", None) if cpu_ram_limit_mb is not None: stack.enter_context(memory_limit(cfg.cpu_ram_limit_mb)) print("Building env...\n") # Maybe build env if stage_idx == 0 or not cfg.preserve_env: if env is not None: env.close() env = cfg.build_env() if hasattr(env, "print_memory_footprint"): env.print_memory_footprint() print("\nDone building env.\n") print("Building updater...\n") updater = cfg.get_updater(env, mpi_context=mpi_context) updater.stage_idx = stage_idx updater.exp_dir = exp_dir updater.build_graph() print("\nDone building updater.\n") # walk_variable_scopes(max_depth=3) tf.train.get_or_create_global_step() sess.run(uninitialized_variables_initializer()) sess.run(tf.assert_variables_initialized()) updater.worker_code() stage_idx += 1 return env
images, backgrounds.shape, [sprites, scales, offsets, backgrounds], delta=0.002) print("Jacobian error: {}".format(err)) threshold = 2e-4 assert err < threshold, "Jacobian error ({}) exceeded threshold ({})".format( err, threshold) if __name__ == "__main__": from contextlib import ExitStack with NumpySeed(100000): sprites, scales, offsets, backgrounds = _get_data() device = 'gpu' print("Running...") session_config = tf.ConfigProto() session_config.log_device_placement = 1 session_config.gpu_options.per_process_gpu_memory_fraction = 0.1 session_config.gpu_options.allow_growth = True graph = tf.Graph() sess = tf.Session(graph=graph, config=session_config) with ExitStack() as stack:
def build_search(path, name, distributions, config, n_repeats, n_param_settings=None, _zip=True, add_date=0, do_local_test=True, readme=""): """ Create a job implementing a hyper-parameter search. Parameters ---------- path: str Path to the directory where the search archive will be saved. name: str Name for the search. distributions: dict (str -> (list or distribution)) Distributions to sample from. Can also be a list of samples. config: Config instance The base configuration. n_repeats: int Number of different random seeds to run each sample with. n_param_settings: int Number of parameter settings to sample. If not supplied, all possibilities are generated. _zip: bool Whether to zip the created search directory. add_date: bool Whether to add time to name of experiment directory. do_local_test: bool If True, run a short test using one of the sampled configs on the local machine to catch any dumb errors before starting the real experiment. readme: str String specifiying context/purpose of search. """ if config.get('seed', None) is None: config.seed = gen_seed() with NumpySeed(config.seed): es = ExperimentStore(path, prefix="build_search") count = 0 base_name = name has_built = False while not has_built: try: exp_dir = es.new_experiment(name, config.seed, add_date=add_date, force_fresh=1) has_built = True except FileExistsError: name = "{}_{}".format(base_name, count) count += 1 if readme: with open(exp_dir.path_for('README.md'), 'w') as f: f.write(readme) print(config) exp_dir.record_environment(config=config) print("Building parameter search at {}.".format(exp_dir.path)) job = Job(exp_dir.path) new_configs = sample_configs(distributions, n_repeats, n_param_settings) with open(exp_dir.path_for("sampled_configs.txt"), "w") as f: f.write("\n".join("idx={}: {}".format(c["idx"], pformat(c)) for c in new_configs)) print("{} configs were sampled for parameter search.".format( len(new_configs))) if do_local_test: print("\nStarting local test " + ("=" * 80)) test_config = new_configs[0].copy() test_config.update(max_steps=1000, render_hook=None) _RunTrainingLoop(config)(test_config) print("Done local test " + ("=" * 80) + "\n") job.map(_RunTrainingLoop(config.copy()), new_configs) job.save_object('metadata', 'distributions', distributions) job.save_object('metadata', 'config', config) print(job.summary()) if _zip: path = job.zip(delete=True) else: path = exp_dir.path print("Zipped {} as {}.".format(exp_dir.path, path)) return path, len(new_configs)
def run(self, start_time): """ Run the training loop. Parameters ---------- start_time: int Start time (in seconds since epoch) for measuring elapsed time for purposes of interrupting the training loop. """ if start_time is None: start_time = time.time() self.start_time = start_time self.timestamp("Entering TrainingLoop.run") prepare_func = cfg.get("prepare_func", None) if callable(prepare_func): prepare_func() # Modify the config in arbitrary ways before training else: try: prepare_funcs = list(prepare_func) except (TypeError, ValueError): pass else: for f in prepare_funcs: if callable(f): f() self.curriculum = cfg.curriculum + [] if cfg.seed is None or cfg.seed < 0: cfg.seed = gen_seed() # Create a directory to store the results of the training session. self.experiment_store = ExperimentStore(os.path.join(cfg.local_experiments_dir, cfg.env_name)) exp_dir = self.experiment_store.new_experiment( self.exp_name, cfg.seed, add_date=1, force_fresh=1, update_latest=False) self.exp_dir = exp_dir cfg.path = exp_dir.path breaker = "-" * 40 header = "{}\nREADME.md - {}\n{}\n\n\n".format(breaker, os.path.basename(exp_dir.path), breaker) readme = header + (cfg.readme if cfg.readme else "") + "\n\n" with open(exp_dir.path_for('README.md'), 'w') as f: f.write(readme) self.data = _TrainingLoopData(exp_dir) self.data.setup() frozen_data = None with ExitStack() as stack: if cfg.pdb: stack.enter_context(pdb_postmortem()) print("`pdb` is turned on, so forcing setting robust=False") cfg.robust = False stack.enter_context(redirect_stream('stdout', self.data.path_for('stdout'), tee=cfg.tee)) stack.enter_context(redirect_stream('stderr', self.data.path_for('stderr'), tee=cfg.tee)) print("\n\n" + "=" * 80) self.timestamp("Starting training run (name={})".format(self.exp_name)) print("\nDirectory for this training run is {}.".format(exp_dir.path)) stack.enter_context(NumpySeed(cfg.seed)) print("\nSet numpy random seed to {}.\n".format(cfg.seed)) limiter = time_limit( self.time_remaining, verbose=True, timeout_callback=lambda limiter: print("Training run exceeded its time limit.")) self.mpi_context = MPI_MasterContext(cfg.get('n_procs', 1), exp_dir) try: with limiter: self._run() finally: self.data.summarize() self.timestamp("Done training run (name={})".format(self.exp_name)) print("=" * 80) print("\n\n") frozen_data = self.data.freeze() self.timestamp("Leaving TrainingLoop.run") return frozen_data
def make_dataset_in_parallel(run_kwargs, dataset_cls, param_values=None): """ Uses dps.hyper.parallel_session.ParallelSession to create a dataset in parallel. """ # Get run_kwargs from command line sig = inspect.signature(ParallelSession.__init__) default_run_kwargs = sig.bind_partial() default_run_kwargs.apply_defaults() cl_run_kwargs = clify.command_line(default_run_kwargs.arguments).parse() run_kwargs.update(cl_run_kwargs) param_values = param_values or dataset_cls._capture_param_values() param_values = Config(param_values) seed = param_values["seed"] if seed is None or seed < 0: seed = gen_seed() n_examples = param_values["n_examples"] n_examples_per_shard = run_kwargs["n_examples_per_shard"] experiment_store = ExperimentStore( cfg.parallel_experiments_build_dir, prefix="build_{}".format(dataset_cls.__name__)) count = 0 name = "attempt=0" has_built = False while not has_built: try: exp_dir = experiment_store.new_experiment(name, seed, add_date=True, force_fresh=True) has_built = True except FileExistsError: count += 1 name = "attempt_{}".format(count) print("Building dataset.") job = Job(exp_dir.path) n_examples_remaining = n_examples with NumpySeed(seed): inputs = [] idx = 0 while n_examples_remaining: seed = gen_seed() cur_n_examples = min(n_examples_remaining, n_examples_per_shard) n_examples_remaining -= cur_n_examples inputs.append((idx, seed, cur_n_examples)) idx += 1 job.map(_BuildDataset(dataset_cls, param_values), inputs) job.save_object('metadata', 'param_values', param_values) print(job.summary()) archive_path = job.zip(delete=True) print("Zipped {} as {}.".format(exp_dir.path, archive_path)) run_kwargs = run_kwargs.copy() del run_kwargs['n_examples_per_shard'] run_kwargs.update( archive_path=archive_path, name=name, kind="parallel", parallel_exe=cfg.parallel_exe) parallel_session = submit_job(**run_kwargs) with cd(os.path.join(parallel_session.job_path, 'experiments')): dataset_files = [] for dir_path, dirs, files in os.walk('.'): if not dir_path.startswith("./exp__seed="): continue df = [f for f in files if not f.endswith('.cfg')] assert len(df) == 1 dataset_files.append(os.path.join(dir_path, df[0])) cached_filename = os.path.join(cfg.data_dir, "cached_datasets", dataset_cls.__name__, str(get_param_hash(param_values))) command = "cat " + " ".join(dataset_files) + " > " + cached_filename print("Running command: \n" + command) subprocess.run(command, shell=True, check=True) print("Done.") with open(cached_filename + ".cfg", 'w') as f: f.write(pprint.pformat(param_values)) return parallel_session