def testResults(self): with temporary_directory() as logdir: results_obj = results_lib.Results(logdir) self.assertEqual(results_obj.read_this_shard(), []) results_obj.append( {'foo': 1.5, 'bar': 2.5, 'baz': 0}) results_obj.append( {'foo': 5.5, 'bar': -1, 'baz': 2}) self.assertEqual( results_obj.read_this_shard(), [{'foo': 1.5, 'bar': 2.5, 'baz': 0}, {'foo': 5.5, 'bar': -1, 'baz': 2}])
def testShardedResults(self): with temporary_directory() as logdir: n = 4 # Number of shards. results_objs = [ results_lib.Results(logdir, shard_id=i) for i in xrange(n)] for i, robj in enumerate(results_objs): robj.append({'foo': i, 'bar': 1 + i * 2}) results_list, _ = results_objs[0].read_all() # Check results. Order does not matter here. self.assertEqual( set(freeze(r) for r in results_list), set(freeze({'foo': i, 'bar': 1 + i * 2}) for i in xrange(n)))
def get_results_for_experiment(models_dir, task_name, model_type='pg', max_npe='5M', desc='v0', name_prefix='bf_rl_paper', extra_desc=''): """Get and process results for a given experiment. An experiment is a set of runs with the same hyperparameters and environment. It is uniquely specified by a (task_name, model_type, max_npe) triple, as well as an optional description. We assume that each experiment has a folder with the same name as the job that ran the experiment. The name is computed by "%name_prefix%.%desc%-%max_npe%_%task_name%". Args: models_dir: Parent directory containing experiment folders. task_name: String name of task (the coding env). See code_tasks.py or run_eval_tasks.py model_type: Name of the algorithm, such as 'pg', 'topk', 'ga', 'rand'. max_npe: String SI unit representation of the maximum NPE threshold for the experiment. For example, "5M" means 5 million. desc: Description. name_prefix: Prefix of job names. Normally leave this as default. extra_desc: Optional extra description at the end of the job name. Returns: ProcessedResults namedtuple instance, containing metrics: Raw dicts read from disk. processed: Stats computed by `process_results`. Raises: ValueError: If max_npe in the metrics does not match NPE in the experiment folder name. """ folder = name_prefix + '.{0}.{1}-{2}_{3}'.format(desc, model_type, max_npe, task_name) if extra_desc: folder += '.' + extra_desc results = results_lib.Results(os.path.join(models_dir, folder)) metrics, _ = results.read_all() processed = process_results(metrics) if (not np.isclose(processed['max_npe'], misc.si_to_int(max_npe)) and processed['repetitions']): raise ValueError( 'Invalid experiment. Max-NPE setting does not match expected max-NPE ' 'in experiment name.') return ProcessedResults(metrics=metrics, processed=processed)
def run_training( config=None, tuner=None, logdir=None, trial_name=None, # pylint: disable=unused-argument is_chief=True): """Do all training runs. This is the top level training function for policy gradient based models. Run this from the main function. Args: config: config_lib.Config instance containing global config (agent and environment hparams). If None, config will be parsed from FLAGS.config. tuner: (unused) A tuner instance. Leave as None if not tuning. logdir: Parent directory where all data from all runs will be written. If None, FLAGS.logdir will be used. trial_name: (unused) If tuning, set this to a unique string that identifies this trial. If `tuner` is not None, this also must be set. is_chief: True if this worker is the chief. Returns: List of results dicts which were written to disk. Each training run gets a results dict. Results dict contains metrics, i.e. (name, value) pairs which give information about the training run. Raises: ValueError: If FLAGS.num_workers does not divide FLAGS.num_repetitions. ValueError: If results dicts read from disk contain invalid data. """ if not config: # If custom config is not given, get it from flags. config = defaults.default_config_with_updates(FLAGS.config) if not logdir: logdir = FLAGS.logdir if FLAGS.num_repetitions % FLAGS.num_workers != 0: raise ValueError('Number of workers must divide number of repetitions') num_local_reps = FLAGS.num_repetitions // FLAGS.num_workers logging.info('Running %d reps globally.', FLAGS.num_repetitions) logging.info('This worker will run %d local reps.', num_local_reps) if FLAGS.max_npe: max_generations = FLAGS.max_npe // config.batch_size logging.info('Max samples per rep: %d', FLAGS.max_npe) logging.info('Max generations per rep: %d', max_generations) else: max_generations = sys.maxint logging.info('Running unlimited generations.') assert FLAGS.num_workers > 0 logging.info('Starting experiment. Directory: "%s"', logdir) results = results_lib.Results(logdir, FLAGS.task_id) local_results_list = results.read_this_shard() if local_results_list: if local_results_list[0]['max_npe'] != FLAGS.max_npe: raise ValueError( 'Cannot resume training. Max-NPE changed. Was %s, now %s', local_results_list[0]['max_npe'], FLAGS.max_npe) if local_results_list[0][ 'max_global_repetitions'] != FLAGS.num_repetitions: raise ValueError( 'Cannot resume training. Number of repetitions changed. Was %s, ' 'now %s', local_results_list[0]['max_global_repetitions'], FLAGS.num_repetitions) start_rep = len(local_results_list) for rep in xrange(start_rep, num_local_reps): global_rep = num_local_reps * FLAGS.task_id + rep logging.info('Starting repetition: Rep = %d. (global rep = %d)', rep, global_rep) # Save data for each rep, like checkpoints, goes into separate folders. run_dir = os.path.join(logdir, 'run_%d' % global_rep) if not tf.gfile.IsDirectory(run_dir): tf.gfile.MakeDirs(run_dir) checkpoint_writer = CheckpointWriter(run_dir, population_size=config.batch_size) data_manager = data.DataManager(config, run_number=global_rep) task_eval_fn = ga_lib.make_task_eval_fn(data_manager.rl_task) if config.agent.algorithm == 'rand': logging.info('Running random search.') assert FLAGS.max_npe result = run_random_search(FLAGS.max_npe, run_dir, task_eval_fn, config.timestep_limit) else: assert config.agent.algorithm == 'ga' logging.info('Running genetic algorithm.') pop = ga_lib.make_population(ga_lib.random_individual( config.timestep_limit), n=config.batch_size) hof = utils.MaxUniquePriorityQueue(2) # Hall of fame. result = ga_lib.ga_loop(pop, cxpb=config.agent.crossover_rate, mutpb=config.agent.mutation_rate, task_eval_fn=task_eval_fn, ngen=max_generations, halloffame=hof, checkpoint_writer=checkpoint_writer) logging.info('Finished rep. Num gens: %d', result.generations) results_dict = { 'max_npe': FLAGS.max_npe, 'batch_size': config.batch_size, 'max_batches': FLAGS.max_npe // config.batch_size, 'npe': result.num_programs, 'max_global_repetitions': FLAGS.num_repetitions, 'max_local_repetitions': num_local_reps, 'code_solution': result.best_code if result.solution_found else '', 'best_reward': result.reward, 'num_batches': result.generations, 'found_solution': result.solution_found, 'task': data_manager.task_name, 'global_rep': global_rep } logging.info('results_dict: %s', results_dict) results.append(results_dict) if is_chief: logging.info( 'Worker is chief. Waiting for all workers to finish so that results ' 'can be reported to the tuner.') global_results_list, shard_stats = results.read_all( num_shards=FLAGS.num_workers) while not all(s.finished for s in shard_stats): logging.info( 'Still waiting on these workers: %s', ', '.join([ '%d (%d reps left)' % (i, s.max_local_reps - s.num_local_reps_completed) for i, s in enumerate(shard_stats) if not s.finished ])) sleep(60) global_results_list, shard_stats = results.read_all( num_shards=FLAGS.num_workers) logging.info( '%d results obtained. Chief worker is exiting the experiment.', len(global_results_list)) return global_results_list
def run_training(config=None, tuner=None, logdir=None, trial_name=None, is_chief=True): """Do all training runs. This is the top level training function for policy gradient based models. Run this from the main function. Args: config: config_lib.Config instance containing global config (agent and environment hparams). If None, config will be parsed from FLAGS.config. tuner: A tuner instance. Leave as None if not tuning. logdir: Parent directory where all data from all runs will be written. If None, FLAGS.logdir will be used. trial_name: If tuning, set this to a unique string that identifies this trial. If `tuner` is not None, this also must be set. is_chief: True if this worker is the chief. Returns: List of results dicts which were written to disk. Each training run gets a results dict. Results dict contains metrics, i.e. (name, value) pairs which give information about the training run. Raises: ValueError: If results dicts read from disk contain invalid data. """ if not config: # If custom config is not given, get it from flags. config = defaults.default_config_with_updates(FLAGS.config) if not logdir: logdir = FLAGS.logdir if not tf.gfile.Exists(logdir): tf.gfile.MakeDirs(logdir) assert FLAGS.num_repetitions > 0 results = results_lib.Results(logdir) results_list, _ = results.read_all() tf.logging.info('Starting experiment. Directory: "%s"', logdir) if results_list: if results_list[0]['max_npe'] != FLAGS.max_npe: raise ValueError( 'Cannot resume training. Max-NPE changed. Was %s, now %s', results_list[0]['max_npe'], FLAGS.max_npe) if results_list[0]['max_global_repetitions'] != FLAGS.num_repetitions: raise ValueError( 'Cannot resume training. Number of repetitions changed. Was %s, ' 'now %s', results_list[0]['max_global_repetitions'], FLAGS.num_repetitions) while len(results_list) < FLAGS.num_repetitions: run_number = len(results_list) rep_container_name = trial_name if trial_name else 'container' if FLAGS.num_repetitions > 1: rep_dir = os.path.join(logdir, 'run_%d' % run_number) rep_container_name = rep_container_name + '_run_' + str(run_number) else: rep_dir = logdir tf.logging.info('Starting repetition %d (%d out of %d)', run_number, run_number + 1, FLAGS.num_repetitions) # Train will write result to disk. with tf.container(rep_container_name): trainer = train(config, is_chief, tuner, rep_dir, run_number, results) tf.logging.info('Done training.') if is_chief: # Destroy current container immediately (clears current graph). tf.logging.info('Clearing shared variables.') tf.Session.reset(FLAGS.master, containers=[rep_container_name]) tf.logging.info('Shared variables cleared.') # Delete replay buffer on disk. assert trainer trainer.delete_replay_buffer() else: # Give chief worker time to clean up. sleep_sec = 30.0 tf.logging.info('Sleeping for %s sec.', sleep_sec) time.sleep(sleep_sec) tf.reset_default_graph() tf.logging.info('Default graph reset.') # Expecting that train wrote new result to disk before returning. results_list, _ = results.read_all() return results_list