Ejemplo n.º 1
0
def main(_):
    seed = common.set_random_seed(FLAGS.random_seed)
    gin_file = common.get_gin_file()
    gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param)
    algorithm_ctor = gin.query_parameter(
        'TrainerConfig.algorithm_ctor').scoped_configurable_fn
    env = create_environment(nonparallel=True, seed=seed)
    env.reset()
    common.set_global_env(env)
    config = policy_trainer.TrainerConfig(root_dir="")
    data_transformer = create_data_transformer(config.data_transformer_ctor,
                                               env.observation_spec())
    config.data_transformer = data_transformer
    observation_spec = data_transformer.transformed_observation_spec
    common.set_transformed_observation_spec(observation_spec)
    algorithm = algorithm_ctor(
        observation_spec=observation_spec,
        action_spec=env.action_spec(),
        config=config)
    try:
        policy_trainer.play(
            FLAGS.root_dir,
            env,
            algorithm,
            checkpoint_step=FLAGS.checkpoint_step or "latest",
            epsilon_greedy=FLAGS.epsilon_greedy,
            num_episodes=FLAGS.num_episodes,
            max_episode_length=FLAGS.max_episode_length,
            sleep_time_per_step=FLAGS.sleep_time_per_step,
            record_file=FLAGS.record_file,
            ignored_parameter_prefixes=FLAGS.ignored_parameter_prefixes.split(
                ",") if FLAGS.ignored_parameter_prefixes else [])
    finally:
        env.close()
Ejemplo n.º 2
0
    def run(self):
        """Run trainings with all possible parameter combinations in configured space
        """

        # parsing gin configuration here to make all jobs have same copy
        #   of base configuration (gin file may be changed occasionally)
        gin_file = common.get_gin_file()
        gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param)

        param_keys = self._conf.param_keys
        param_values = self._conf.param_values
        max_worker_num = self._conf.max_worker_num

        process_pool = multiprocessing.Pool(processes=max_worker_num,
                                            maxtasksperchild=1)
        device_queue = self._init_device_queue(max_worker_num)

        task_count = 0
        for values in itertools.product(*param_values):
            parameters = dict(zip(param_keys, values))
            for repeat in range(self._conf.repeats):
                root_dir = "%s/%s" % (FLAGS.root_dir,
                                      self._generate_run_name(
                                          parameters, task_count, repeat))
                process_pool.apply_async(
                    func=self._worker,
                    args=[root_dir, parameters, device_queue],
                    error_callback=lambda e: logging.error(e))
            task_count += 1

        process_pool.close()
        process_pool.join()
Ejemplo n.º 3
0
 def _worker(self, root_dir, parameters):
     logging.set_verbosity(logging.INFO)
     gin_file = common.get_gin_file()
     if FLAGS.gin_file:
         common.copy_gin_configs(root_dir, gin_file)
     gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param)
     with gin.unlock_config():
         gin.parse_config(['%s=%s' % (k, v) for k, v in parameters.items()])
     train_eval(root_dir)
Ejemplo n.º 4
0
def main(_):
    gin_file = common.get_gin_file()
    gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param)
    algorithm_ctor = gin.query_parameter('TrainerConfig.algorithm_ctor')
    env = create_environment(num_parallel_environments=1)
    algorithm = algorithm_ctor(env)
    policy_trainer.play(FLAGS.root_dir,
                        env,
                        algorithm,
                        checkpoint_name=FLAGS.checkpoint_name,
                        greedy_predict=FLAGS.greedy_predict,
                        random_seed=FLAGS.random_seed,
                        num_episodes=FLAGS.num_episodes,
                        sleep_time_per_step=FLAGS.sleep_time_per_step,
                        record_file=FLAGS.record_file)
Ejemplo n.º 5
0
def main(_):
    logging.set_verbosity(logging.INFO)

    gin_file = common.get_gin_file()

    if FLAGS.gin_file and not FLAGS.play:
        common.copy_gin_configs(FLAGS.root_dir, gin_file)

    gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param)
    if FLAGS.play:
        with gin.unlock_config():
            gin.bind_parameter(
                '__main__.play.algorithm_ctor',
                gin.query_parameter('TrainerConfig.algorithm_ctor'))
        play(FLAGS.root_dir)
    else:
        train_eval(FLAGS.root_dir)
Ejemplo n.º 6
0
Archivo: play.py Proyecto: runjerry/alf
def main(_):
    seed = common.set_random_seed(FLAGS.random_seed,
                                  not FLAGS.use_tf_functions)
    gin_file = common.get_gin_file()
    gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param)
    algorithm_ctor = gin.query_parameter(
        'TrainerConfig.algorithm_ctor').scoped_configurable_fn
    env = create_environment(nonparallel=True, seed=seed)
    env.reset()
    common.set_global_env(env)
    algorithm = algorithm_ctor(observation_spec=env.observation_spec(),
                               action_spec=env.action_spec())
    policy_trainer.play(FLAGS.root_dir,
                        env,
                        algorithm,
                        checkpoint_name=FLAGS.checkpoint_name,
                        epsilon_greedy=FLAGS.epsilon_greedy,
                        num_episodes=FLAGS.num_episodes,
                        sleep_time_per_step=FLAGS.sleep_time_per_step,
                        record_file=FLAGS.record_file,
                        use_tf_functions=FLAGS.use_tf_functions)
    env.pyenv.close()
Ejemplo n.º 7
0
def main(_):
    gin_file = common.get_gin_file()
    FLAGS.alsologtostderr = True
    gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param)
    train_eval(FLAGS.ml_type, FLAGS.root_dir)
Ejemplo n.º 8
0
def main(_):
    gin_file = common.get_gin_file()
    gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param)
    train_eval(FLAGS.root_dir)
Ejemplo n.º 9
0
    def _train(self):
        for env in self._envs:
            env.reset()
        if self._eval_env:
            self._eval_env.reset()

        begin_iter_num = int(self._trainer_progress._iter_num)
        iter_num = begin_iter_num

        checkpoint_interval = math.ceil(
            (self._num_iterations or self._num_env_steps) /
            self._num_checkpoints)

        if self._num_iterations:
            time_to_checkpoint = self._trainer_progress._iter_num + checkpoint_interval
        else:
            time_to_checkpoint = self._trainer_progress._env_steps + checkpoint_interval

        while True:
            t0 = time.time()
            with record_time("time/train_iter"):
                train_steps = self._algorithm.train_iter()
            t = time.time() - t0
            logging.log_every_n_seconds(
                logging.INFO,
                '%s -> %s: %s time=%.3f throughput=%0.2f' %
                (common.get_gin_file(), [
                    os.path.basename(self._root_dir.strip('/'))
                ], iter_num, t, int(train_steps) / t),
                n_seconds=1)

            if self._evaluate and (iter_num + 1) % self._eval_interval == 0:
                self._eval()
            if iter_num == begin_iter_num:
                self._summarize_training_setting()

            # check termination
            env_steps_metric = self._algorithm.get_step_metrics()[1]
            total_time_steps = env_steps_metric.result()
            iter_num += 1

            self._trainer_progress.update(iter_num, total_time_steps)

            if ((self._num_iterations and iter_num >= self._num_iterations)
                    or (self._num_env_steps
                        and total_time_steps >= self._num_env_steps)):
                # Evaluate before exiting so that the eval curve shown in TB
                # will align with the final iter/env_step.
                if self._evaluate:
                    self._eval()
                break

            if ((self._num_iterations and iter_num >= time_to_checkpoint)
                    or (self._num_env_steps
                        and total_time_steps >= time_to_checkpoint)):
                self._save_checkpoint()
                time_to_checkpoint += checkpoint_interval
            elif self._checkpoint_requested:
                logging.info("Saving checkpoint upon request...")
                self._save_checkpoint()
                self._checkpoint_requested = False

            if self._debug_requested:
                self._debug_requested = False
                import pdb
                pdb.set_trace()