def test_meta_evaluator(): set_seed(100) tasks = SetTaskSampler(PointEnv, wrapper=set_length) max_episode_length = 200 with tempfile.TemporaryDirectory() as log_dir_name: trainer = Trainer( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) env = PointEnv(max_episode_length=max_episode_length) algo = OptimalActionInference(env=env, max_episode_length=max_episode_length) trainer.setup(algo, env) meta_eval = MetaEvaluator(test_task_sampler=tasks, n_test_tasks=10) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) logger.log(tabular) meta_eval.evaluate(algo) logger.log(tabular) logger.dump_output_type(CsvOutput) logger.remove_output_type(CsvOutput) with open(log_file.name, 'r') as file: rows = list(csv.DictReader(file)) assert len(rows) == 2 assert float( rows[0]['MetaTest/__unnamed_task__/TerminationRate']) < 1.0 assert float(rows[0]['MetaTest/__unnamed_task__/Iteration']) == 0 assert (float(rows[0]['MetaTest/__unnamed_task__/MaxReturn']) >= float( rows[0]['MetaTest/__unnamed_task__/AverageReturn'])) assert (float(rows[0]['MetaTest/__unnamed_task__/AverageReturn']) >= float(rows[0]['MetaTest/__unnamed_task__/MinReturn'])) assert float(rows[1]['MetaTest/__unnamed_task__/Iteration']) == 1
def test_one_folder(self, meta_train_dir, itrs): snapshot_config = SnapshotConfig(snapshot_dir=meta_train_dir, snapshot_mode='all', snapshot_gap=1) runner = LocalRunner(snapshot_config=snapshot_config) meta_sampler = AllSetTaskSampler(self.meta_task_cls) runner.restore(meta_train_dir) meta_evaluator = MetaEvaluator( runner, test_task_sampler=meta_sampler, max_path_length=self.max_path_length, n_test_tasks=meta_sampler.n_tasks, n_exploration_traj=self.adapt_rollout_per_task, prefix='') for itr in itrs: log_filename = os.path.join(meta_train_dir, 'meta-test-itr_{}.csv'.format(itr)) logger.add_output(CsvOutput(log_filename)) logger.log("Writing into {}".format(log_filename)) runner.restore(meta_train_dir, from_epoch=itr) meta_evaluator.evaluate(runner._algo, self.test_rollout_per_task) tabular.record('Iteration', runner._stats.total_epoch) tabular.record('TotalEnvSteps', runner._stats.total_env_steps) logger.log(tabular) logger.dump_output_type(CsvOutput) logger.remove_output_type(CsvOutput)
def test_meta_evaluator_with_tf(): set_seed(100) tasks = SetTaskSampler(PointEnv, wrapper=set_length) max_episode_length = 200 env = PointEnv() n_eps = 3 with tempfile.TemporaryDirectory() as log_dir_name: ctxt = SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='none', snapshot_gap=1) with TFTrainer(ctxt) as trainer: meta_eval = MetaEvaluator(test_task_sampler=tasks, n_test_tasks=10, n_exploration_eps=n_eps) policy = GaussianMLPPolicy(env.spec) algo = MockAlgo(env, policy, max_episode_length, n_eps, meta_eval) trainer.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) algo_pickle = cloudpickle.dumps(algo) tf.compat.v1.reset_default_graph() with TFTrainer(ctxt) as trainer: algo2 = cloudpickle.loads(algo_pickle) trainer.setup(algo2, env) trainer.train(10, 0)
def alg_train(ctxt=None): get_args(parser) args = parser.parse_args() args.prefix = use_prefix set_seed(args.seed) env = GymEnv(args.env_name) if args.env_norm: env = normalize(env) trainer = Trainer(ctxt) logger.remove_all() logger.add_output(StdLogger(args.log_interval)) if not args.no_wb: wb_logger = WbOutput(args.log_interval, base_args) logger.add_output(wb_logger) algo = get_algo(env, trainer, args) if args.cuda: set_gpu_mode(True) algo.to() else: set_gpu_mode(False) trainer.train(n_epochs=args.n_epochs, batch_size=args.batch_size)
def test_meta_evaluator_with_tf(): set_seed(100) tasks = SetTaskSampler(lambda: GarageEnv(PointEnv())) max_path_length = 200 env = GarageEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: ctxt = SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='none', snapshot_gap=1) with LocalTFRunner(ctxt) as runner: meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) policy = GaussianMLPPolicy(env.spec) algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval) runner.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) algo_pickle = cloudpickle.dumps(algo) with tf.Graph().as_default(): with LocalTFRunner(ctxt) as runner: algo2 = cloudpickle.loads(algo_pickle) runner.setup(algo2, env) runner.train(10, 0)
def test_pickle_meta_evaluator(): set_seed(100) tasks = SetTaskSampler(lambda: GarageEnv(PointEnv())) max_path_length = 200 env = GarageEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) policy = RandomPolicy(env.spec.action_space) algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval) runner.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) meta_eval_pickle = cloudpickle.dumps(meta_eval) meta_eval2 = cloudpickle.loads(meta_eval_pickle) meta_eval2.evaluate(algo)
def setup_method(self): self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph) self.sess.__enter__() logger.add_output(NullOutput()) deterministic.set_seed(1) # initialize global singleton_pool for each test case from garage.sampler import singleton_pool singleton_pool.initialize(1)
def setup_class(cls): cls.reset_tf() cls.log_dir = tempfile.TemporaryDirectory() cls.prev_log_dir = snapshotter.snapshot_dir cls.prev_mode = snapshotter.snapshot_mode snapshotter.snapshot_dir = cls.log_dir.name snapshotter.snapshot_mode = 'all' logger.add_output(NullOutput())
def test_log_multitask_performance_task_id(): lengths = np.array([10, 5, 1, 1]) batch = TrajectoryBatch( EnvSpec(akro.Box(np.array([0., 0., 0.]), np.array([1., 1., 1.])), akro.Box(np.array([-1., -1.]), np.array([0., 0.]))), observations=np.ones((sum(lengths), 3), dtype=np.float32), last_observations=np.ones((len(lengths), 3), dtype=np.float32), actions=np.zeros((sum(lengths), 2), dtype=np.float32), rewards=np.array([ 0.34026529, 0.58263177, 0.84307509, 0.97651095, 0.81723901, 0.22631398, 0.03421301, 0.97515046, 0.64311832, 0.65068933, 0.17657714, 0.04783857, 0.73904013, 0.41364329, 0.52235551, 0.24203526, 0.43328910 ]), terminals=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1], dtype=bool), env_infos={ 'success': np.array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1], dtype=bool), 'task_id': np.array([1] * 10 + [3] * 5 + [1] + [4]) }, agent_infos={}, lengths=lengths) log_file = tempfile.NamedTemporaryFile() csv_output = dowel.CsvOutput(log_file.name) logger.add_output(csv_output) log_multitask_performance(7, batch, 0.8, { 1: 'env1', 3: 'env2', 4: 'env3', 5: 'env4' }) logger.log(tabular) logger.dump_output_type(dowel.CsvOutput) with open(log_file.name, 'r') as file: rows = list(csv.DictReader(file)) res = {k: float(r) for (k, r) in rows[0].items()} assert res['env1/Iteration'] == 7 assert res['env2/Iteration'] == 7 assert res['env3/Iteration'] == 7 assert res['env4/Iteration'] == 7 assert res['env1/NumTrajs'] == 2 assert res['env2/NumTrajs'] == 1 assert res['env3/NumTrajs'] == 1 assert res['env4/NumTrajs'] == 0 assert math.isclose(res['env1/SuccessRate'], 0.5) assert math.isclose(res['env2/SuccessRate'], 1.0) assert math.isclose(res['env3/SuccessRate'], 1.0) assert math.isnan(res['env4/SuccessRate']) assert math.isnan(res['env4/AverageReturn'])
def setup_method(self): """Setup the Session and default Graph.""" self.graph = tf.Graph() for c in self.graph.collections: self.graph.clear_collection(c) self.graph_manager = self.graph.as_default() self.graph_manager.__enter__() self.sess = tf.compat.v1.Session(graph=self.graph) self.sess_manager = self.sess.as_default() self.sess_manager.__enter__() self.sess.__enter__() logger.add_output(NullOutput()) deterministic.set_seed(1)
def setup_method(self): self.graph = tf.Graph() for c in self.graph.collections: self.graph.clear_collection(c) self.graph_manager = self.graph.as_default() self.graph_manager.__enter__() self.sess = tf.compat.v1.Session(graph=self.graph) self.sess_manager = self.sess.as_default() self.sess_manager.__enter__() self.sess.__enter__() logger.add_output(NullOutput()) deterministic.set_seed(1) # initialize global singleton_pool for each test case from metarl.sampler import singleton_pool singleton_pool.initialize(1)
def test_log_performance(): lengths = np.array([10, 5, 1, 1]) batch = EpisodeBatch( EnvSpec(akro.Box(np.array([0., 0., 0.]), np.array([1., 1., 1.])), akro.Box(np.array([-1., -1.]), np.array([0., 0.]))), observations=np.ones((sum(lengths), 3), dtype=np.float32), last_observations=np.ones((len(lengths), 3), dtype=np.float32), actions=np.zeros((sum(lengths), 2), dtype=np.float32), rewards=np.array([ 0.34026529, 0.58263177, 0.84307509, 0.97651095, 0.81723901, 0.22631398, 0.03421301, 0.97515046, 0.64311832, 0.65068933, 0.17657714, 0.04783857, 0.73904013, 0.41364329, 0.52235551, 0.24203526, 0.43328910 ]), step_types=np.array( [StepType.FIRST] + [StepType.MID] * (lengths[0] - 2) + [StepType.TERMINAL] + [StepType.FIRST] + [StepType.MID] * (lengths[1] - 2) + [StepType.TERMINAL] + [StepType.FIRST] + [StepType.FIRST], dtype=StepType), env_infos={ 'success': np.array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1], dtype=bool) }, agent_infos={}, lengths=lengths) log_file = tempfile.NamedTemporaryFile() csv_output = dowel.CsvOutput(log_file.name) logger.add_output(csv_output) log_performance(7, batch, 0.8, prefix='test_log_performance') logger.log(tabular) logger.dump_output_type(dowel.CsvOutput) with open(log_file.name, 'r') as file: rows = list(csv.DictReader(file)) res = {k: float(r) for (k, r) in rows[0].items()} assert res['test_log_performance/Iteration'] == 7 assert res['test_log_performance/NumEpisodes'] == 4 assert math.isclose(res['test_log_performance/SuccessRate'], 0.75) assert math.isclose(res['test_log_performance/TerminationRate'], 0.5) assert math.isclose(res['test_log_performance/AverageDiscountedReturn'], 1.1131040640673113) assert math.isclose(res['test_log_performance/AverageReturn'], 2.1659965525) assert math.isclose(res['test_log_performance/StdReturn'], 2.354067152038576)
def eval(self, policy, n_episodes=20, greedy=True, load_from_file=False, save_replay=False): if load_from_file: logger.add_output(dowel.StdOutput()) logger.log('Evaluating policy, {} episodes, greedy = {} ...'.format( n_episodes, greedy)) n_won = 0 episode_rewards = [] pbar = ProgBarCounter(n_episodes) for e in range(n_episodes): obs = self.reset() policy.reset([True]) info = {'battle_won': False} terminated = False episode_rewards.append(0) while not terminated: obs = np.array([obs]) # add [.] for vec_env avail_actions = np.array([self.get_avail_actions()]) actions, agent_infos = policy.get_actions(obs, avail_actions, greedy=greedy) obs, reward, terminated, info = self.step(actions[0]) if not self.centralized: terminated = all(terminated) episode_rewards[-1] += np.mean(reward) pbar.inc(1) if save_replay: self.save_replay() # If case SC2 restarts during eval, KeyError: 'battle_won' can happen # Take precaution if type(info) == dict: if 'battle_won' in info.keys(): n_won += 1 if info['battle_won'] else 0 pbar.stop() policy.reset([True]) win_rate = n_won / n_episodes avg_return = np.mean(episode_rewards) logger.log('EvalWinRate: {}'.format(win_rate)) logger.log('EvalAvgReturn: {}'.format(avg_return)) if not load_from_file: tabular.record('EvalWinRate', win_rate) tabular.record('EvalAvgReturn', avg_return)
def _make_context(self, *args, **kwargs): """Make a context from the template information and variant args. Currently, all arguments should be keyword arguments. Args: args (list): Should be empty. kwargs (dict): Keyword arguments for the wrapped function. Will be logged to `variant.json` Returns: ExperimentContext: The created experiment context. Raises: ValueError: If args is not empty. """ if args: raise ValueError('garage.experiment currently only supports ' 'keyword arguments') name = self.name if name is None: name = self.function.__name__ if self.name_parameters: name = self._augment_name(name, kwargs) log_dir = self.log_dir if log_dir is None: log_dir = ('{data}/local/{prefix}/{name}'.format( data=os.path.join(os.getcwd(), 'data'), prefix=self.prefix, name=name)) log_dir = _make_sequential_log_dir(log_dir) tabular_log_file = os.path.join(log_dir, 'progress.csv') text_log_file = os.path.join(log_dir, 'debug.log') variant_log_file = os.path.join(log_dir, 'variant.json') metadata_log_file = os.path.join(log_dir, 'metadata.json') dump_json(variant_log_file, kwargs) git_root_path, metadata = get_metadata() dump_json(metadata_log_file, metadata) if git_root_path and self.archive_launch_repo: make_launcher_archive(git_root_path=git_root_path, log_dir=log_dir) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output( dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps')) logger.add_output(dowel.StdOutput()) logger.push_prefix('[{}] '.format(name)) logger.log('Logging to {}'.format(log_dir)) return ExperimentContext(snapshot_dir=log_dir, snapshot_mode=self.snapshot_mode, snapshot_gap=self.snapshot_gap)
def read_cmd(cls, env_cls): logger.add_output(StdOutput()) parser = argparse.ArgumentParser() parser.add_argument("folder", nargs="+") # Adaptation parameters parser.add_argument("--adapt-rollouts", nargs="?", default=10, type=int) parser.add_argument("--test-rollouts", nargs="?", default=10, type=int) parser.add_argument("--max-path-length", nargs="?", default=100, type=int) # Number of workers parser.add_argument("--parallel", nargs="?", default=0, type=int) # Skip iteration that has existing meta-testing result. parser.add_argument("--skip-exist", action='store_true', default=True) # Merge all meta-testing result to meta-test.csv parser.add_argument("--merge", action='store_true', default=True) # Skip some iterations. # e.g. stride=3 sample 1 iteration every 3 iterations. parser.add_argument("--stride", default=1, type=int) args = parser.parse_args() meta_train_dirs = args.folder workers = args.parallel adapt_rollout_per_task = args.adapt_rollouts test_rollout_per_task = args.test_rollouts max_path_length = args.max_path_length skip_existing = args.skip_exist to_merge = args.merge stride = args.stride helper = cls(meta_task_cls=env_cls, max_path_length=max_path_length, adapt_rollout_per_task=adapt_rollout_per_task, test_rollout_per_task=test_rollout_per_task) helper.test_many_folders(folders=meta_train_dirs, workers=workers, skip_existing=skip_existing, to_merge=to_merge, stride=stride)
def __enter__(self): tabular_log_file = os.path.join(self.log_dir, 'progress.csv') text_log_file = os.path.join(self.log_dir, 'debug.log') logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output(dowel.TensorBoardOutput(self.log_dir)) logger.add_output(dowel.StdOutput()) logger.push_prefix('[%s] ' % self.exp_name) return self
def _make_context(cls, options, **kwargs): """Make a context from the template information and variant args. Currently, all arguments should be keyword arguments. Args: options (dict): Options to `wrap_experiment` itself. See the function documentation for details. kwargs (dict): Keyword arguments for the wrapped function. Will be logged to `variant.json` Returns: ExperimentContext: The created experiment context. """ name = options['name'] if name is None: name = options['function'].__name__ name = cls._augment_name(options, name, kwargs) log_dir = options['log_dir'] if log_dir is None: log_dir = ('{data}/local/{prefix}/{name}'.format( data=os.path.join(os.getcwd(), 'data'), prefix=options['prefix'], name=name)) if options['use_existing_dir']: os.makedirs(log_dir, exist_ok=True) else: log_dir = _make_sequential_log_dir(log_dir) tabular_log_file = os.path.join(log_dir, 'progress.csv') text_log_file = os.path.join(log_dir, 'debug.log') variant_log_file = os.path.join(log_dir, 'variant.json') metadata_log_file = os.path.join(log_dir, 'metadata.json') dump_json(variant_log_file, kwargs) git_root_path, metadata = get_metadata() dump_json(metadata_log_file, metadata) if git_root_path and options['archive_launch_repo']: make_launcher_archive(git_root_path=git_root_path, log_dir=log_dir) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output( dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps')) logger.add_output(dowel.StdOutput()) logger.push_prefix('[{}] '.format(name)) logger.log('Logging to {}'.format(log_dir)) return ExperimentContext(snapshot_dir=log_dir, snapshot_mode=options['snapshot_mode'], snapshot_gap=options['snapshot_gap'])
def restore_training(log_dir, exp_name, args, env_saved=True, env=None): tabular_log_file = os.path.join( log_dir, 'progress_restored.{}.{}.csv'.format( str(time.time())[:10], socket.gethostname())) text_log_file = os.path.join( log_dir, 'debug_restored.{}.{}.log'.format( str(time.time())[:10], socket.gethostname())) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output(dowel.TensorBoardOutput(log_dir)) logger.add_output(dowel.StdOutput()) logger.push_prefix('[%s] ' % exp_name) ctxt = ExperimentContext(snapshot_dir=log_dir, snapshot_mode='last', snapshot_gap=1) runner = LocalRunnerWrapper(ctxt, eval=args.eval_during_training, n_eval_episodes=args.n_eval_episodes, eval_greedy=args.eval_greedy, eval_epoch_freq=args.eval_epoch_freq, save_env=env_saved) saved = runner._snapshotter.load(log_dir, 'last') runner._setup_args = saved['setup_args'] runner._train_args = saved['train_args'] runner._stats = saved['stats'] set_seed(runner._setup_args.seed) algo = saved['algo'] # Compatibility patch if not hasattr(algo, '_clip_grad_norm'): setattr(algo, '_clip_grad_norm', args.clip_grad_norm) if env_saved: env = saved['env'] runner.setup(env=env, algo=algo, sampler_cls=runner._setup_args.sampler_cls, sampler_args=runner._setup_args.sampler_args) runner._train_args.start_epoch = runner._stats.total_epoch + 1 runner._train_args.n_epochs = runner._train_args.start_epoch + args.n_epochs print('\nRestored checkpoint from epoch #{}...'.format( runner._train_args.start_epoch)) print('To be trained for additional {} epochs...'.format(args.n_epochs)) print('Will be finished at epoch #{}...\n'.format( runner._train_args.n_epochs)) return runner._algo.train(runner)
def ppo_cmb(env, seed, log_dir): """Create test continuous mlp baseline on ppo. Args: env (gym_env): Environment of the task. seed (int): Random seed for the trial. log_dir (str): Log dir path. Returns: str: training results in csv format. """ deterministic.set_seed(seed) config = tf.compat.v1.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=num_proc, inter_op_parallelism_threads=num_proc) sess = tf.compat.v1.Session(config=config) with LocalTFRunner(snapshot_config, sess=sess, max_cpus=num_proc) as runner: env = TfEnv(normalize(env)) policy = GaussianLSTMPolicy( env_spec=env.spec, hidden_dim=policy_params['policy_hidden_sizes'], hidden_nonlinearity=policy_params['hidden_nonlinearity'], ) baseline = ContinuousMLPBaseline( env_spec=env.spec, regressor_args=baseline_params['regressor_args'], ) algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=algo_params['max_path_length'], discount=algo_params['discount'], gae_lambda=algo_params['gae_lambda'], lr_clip_range=algo_params['lr_clip_range'], entropy_method=algo_params['entropy_method'], policy_ent_coeff=algo_params['policy_ent_coeff'], optimizer_args=algo_params['optimizer_args'], center_adv=algo_params['center_adv'], stop_entropy_gradient=True) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env, sampler_args=dict(n_envs=algo_params['n_envs'])) runner.train(n_epochs=algo_params['n_epochs'], batch_size=algo_params['n_rollout_steps']) dowel_logger.remove_all() return tabular_log_file
def _make_context(self, *args, **kwargs): """Make a context from the template information and variant args. Currently, all arguments should be keyword arguments. Args: args (list): Should be empty. kwargs (dict): Keyword arguments for the wrapped function. Will be logged to `variant.json` Returns: ExperimentContext: The created experiment context. Raises: ValueError: If args is not empty. """ if args: raise ValueError('metarl.experiment currently only supports ' 'keyword arguments') log_dir = self.log_dir if log_dir is None: name = self.name if name is None: name = self.function.__name__ self.name = self.function.__name__ log_dir = ('{data}/local/{prefix}/{name}/{time}'.format( data=osp.join(os.getcwd(), 'data'), prefix=self.prefix, name=name, time=timestamp)) log_dir = _make_sequential_log_dir(log_dir) tabular_log_file = os.path.join(log_dir, 'progress.csv') text_log_file = os.path.join(log_dir, 'debug.log') variant_log_file = os.path.join(log_dir, 'variant.json') dump_json(variant_log_file, kwargs) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output( dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps')) logger.add_output(dowel.StdOutput()) logger.push_prefix('[%s] ' % self.name) return ExperimentContext(snapshot_dir=log_dir, snapshot_mode=self.snapshot_mode, snapshot_gap=self.snapshot_gap)
def run_metarl(env, seed, log_dir): ''' Create metarl model and training. Replace the ddpg with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: ''' deterministic.set_seed(seed) with LocalTFRunner(snapshot_config) as runner: env = TfEnv(normalize(env)) # Set up params for ddpg action_noise = OUStrategy(env.spec, sigma=params['sigma']) policy = ContinuousMLPPolicy( env_spec=env.spec, hidden_sizes=params['policy_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], hidden_nonlinearity=tf.nn.relu) replay_buffer = SimpleReplayBuffer( env_spec=env.spec, size_in_transitions=params['replay_buffer_size'], time_horizon=params['n_rollout_steps']) ddpg = DDPG(env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, steps_per_epoch=params['steps_per_epoch'], policy_lr=params['policy_lr'], qf_lr=params['qf_lr'], target_update_tau=params['tau'], n_train_steps=params['n_train_steps'], discount=params['discount'], min_buffer_size=int(1e4), exploration_strategy=action_noise, policy_optimizer=tf.train.AdamOptimizer, qf_optimizer=tf.train.AdamOptimizer) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') tensorboard_log_dir = osp.join(log_dir) dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(tensorboard_log_dir)) runner.setup(ddpg, env) runner.train(n_epochs=params['n_epochs'], batch_size=params['n_rollout_steps']) dowel_logger.remove_all() return tabular_log_file
def run_garage_pytorch(env, seed, log_dir): """Create garage PyTorch VPG model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ env = TfEnv(normalize(env)) deterministic.set_seed(seed) runner = LocalRunner(snapshot_config) policy = PyTorch_GMP(env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_function = GaussianMLPValueFunction(env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) policy_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)), policy, max_optimization_epochs=10, minibatch_size=64) vf_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)), value_function, max_optimization_epochs=10, minibatch_size=64) algo = PyTorch_VPG(env_spec=env.spec, policy=policy, value_function=value_function, policy_optimizer=policy_optimizer, vf_optimizer=vf_optimizer, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], center_adv=hyper_parameters['center_adv']) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) dowel_logger.remove_all() return tabular_log_file
def test_meta_evaluator_n_traj(): set_seed(100) tasks = SetTaskSampler(PointEnv) max_path_length = 200 env = MetaRLEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) algo = MockAlgo(env, max_path_length, n_traj) runner.setup(algo, env) meta_eval = MetaEvaluator(runner, test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo)
def run_metarl(env, seed, log_dir): ''' Create metarl model and training. Replace the ddpg with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: ''' deterministic.set_seed(seed) runner = LocalRunner(snapshot_config) # Set up params for ddpg policy = TanhGaussianMLPPolicy2(env_spec=env.spec, hidden_sizes=params['policy_hidden_sizes'], hidden_nonlinearity=nn.ReLU, output_nonlinearity=None) qf1 = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], hidden_nonlinearity=F.relu) qf2 = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], hidden_nonlinearity=F.relu) replay_buffer = SACReplayBuffer(env_spec=env.spec, max_size=params['replay_buffer_size']) sampler_args = { 'agent': policy, 'max_path_length': 1000, } sac = SAC(env_spec=env.spec, policy=policy, qf1=qf1, qf2=qf2, gradient_steps_per_itr=params['gradient_steps_per_itr'], replay_buffer=replay_buffer, buffer_batch_size=params['buffer_batch_size']) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') tensorboard_log_dir = osp.join(log_dir) dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(tensorboard_log_dir)) runner.setup(algo=sac, env=env, sampler_cls=SimpleSampler, sampler_args=sampler_args) runner.train(n_epochs=params['n_epochs'], batch_size=params['gradient_steps_per_itr']) dowel_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): ''' Create garage model and training. Replace the trpo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return:import baselines.common.tf_util as U ''' deterministic.set_seed(seed) with LocalRunner() as runner: env = TfEnv(normalize(env)) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict( hidden_sizes=(32, 32), use_trust_region=True, ), ) algo = TRPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.98, max_kl_step=0.01, policy_ent_coeff=0.0, plot=False, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=976, batch_size=1024) dowel_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): """Create garage model and training. Replace the ppo with the algorithm you want to run. Args: env (gym.Env): Environment of the task. seed (int): Random seed for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ deterministic.set_seed(seed) config = tf.compat.v1.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=12, inter_op_parallelism_threads=12) sess = tf.compat.v1.Session(config=config) with LocalTFRunner(snapshot_config, sess=sess, max_cpus=12) as runner: env = TfEnv(normalize(env)) policy = CategoricalGRUPolicy( env_spec=env.spec, hidden_dim=32, hidden_nonlinearity=tf.nn.tanh, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env, sampler_args=dict(n_envs=12)) runner.train(n_epochs=488, batch_size=2048) dowel_logger.remove_all() return tabular_log_file
def run_garage_tf(env, seed, log_dir): """Create garage TensorFlow PPO model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ deterministic.set_seed(seed) with LocalTFRunner(snapshot_config) as runner: env = TfEnv(normalize(env)) policy = TF_GMP( env_spec=env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TF_PPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], gae_lambda=hyper_parameters['gae_lambda'], center_adv=hyper_parameters['center_adv'], lr_clip_range=hyper_parameters['lr_clip_range'], optimizer_args=dict( batch_size=None, max_epochs=1, tf_optimizer_args=dict( learning_rate=hyper_parameters['learning_rate']), verbose=True)) # yapf: disable # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) dowel_logger.remove_all() return tabular_log_file
def run_metarl(env, seed, log_dir): """Create metarl PyTorch MAML model and training. Args: env (MetaRLEnv): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ deterministic.set_seed(seed) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=torch.tanh, output_nonlinearity=None, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO(env=env, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], gae_lambda=hyper_parameters['gae_lambda'], meta_batch_size=hyper_parameters['meta_batch_size'], inner_lr=hyper_parameters['inner_lr'], max_kl_step=hyper_parameters['max_kl'], num_grad_updates=hyper_parameters['num_grad_update']) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) snapshot_config = SnapshotConfig(snapshot_dir=log_dir, snapshot_mode='all', snapshot_gap=1) runner = LocalRunner(snapshot_config=snapshot_config) runner.setup(algo, env, sampler_args=dict(n_envs=5)) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=(hyper_parameters['fast_batch_size'] * hyper_parameters['max_path_length'])) dowel_logger.remove_all() return tabular_log_file
def run_garage_pytorch(env, seed, log_dir): """Create garage PyTorch PPO model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ env = TfEnv(normalize(env)) deterministic.set_seed(seed) runner = LocalRunner(snapshot_config) policy = PyTorch_GMP(env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_functions = LinearFeatureBaseline(env_spec=env.spec) algo = PyTorch_PPO(env_spec=env.spec, policy=policy, value_function=value_functions, optimizer=torch.optim.Adam, policy_lr=3e-4, max_path_length=hyper_parameters['max_path_length'], discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2, minibatch_size=128, max_optimization_epochs=10) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) dowel_logger.remove_all() return tabular_log_file
def run_garage_pytorch(env, seed, log_dir): """Create garage PyTorch PPO model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ env = TfEnv(normalize(env)) deterministic.set_seed(seed) runner = LocalRunner(snapshot_config) policy = PyTorch_GMP(env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=torch.tanh, output_nonlinearity=None) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = PyTorch_TRPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], gae_lambda=hyper_parameters['gae_lambda'], max_kl=hyper_parameters['max_kl'], ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) dowel_logger.remove_all() return tabular_log_file