def start_exp(config_path): logger.add_output(dowel.StdOutput()) deterministic.set_seed(21) ex.observers.append(FileStorageObserver("runs")) ex.add_config(config_path) ex.run()
def run_experiment(argv): now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=('Number of parallel workers to perform rollouts. ' "0 => don't start any workers")) parser.add_argument('--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument('--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument('--snapshot_mode', type=str, default='all', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument('--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument('--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument('--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument('--tensorboard_step_key', type=str, default=None, help='Name of the step key in tensorboard_summary.') parser.add_argument('--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument('--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument( '--resume_from_dir', type=str, default=None, help='Directory of the pickle file to resume experiment from.') parser.add_argument('--resume_epoch', type=str, default=None, help='Index of iteration to restore from. ' 'Can be "first", "last" or a number. ' 'Not applicable when snapshot_mode="last"') parser.add_argument('--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, help='Random seed for numpy') parser.add_argument('--args_data', type=str, help='Pickled data for objects') parser.add_argument('--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument('--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) if args.seed is not None: deterministic.set_seed(args.seed) # SIGINT is blocked for all processes created in parallel_sampler to avoid # the creation of sleeping and zombie processes. # # If the user interrupts run_experiment, there's a chance some processes # won't die due to a dead lock condition where one of the children in the # parallel sampler exits without releasing a lock once after it catches # SIGINT. # # Later the parent tries to acquire the same lock to proceed with his # cleanup, but it remains sleeping waiting for the lock to be released. # In the meantime, all the process in parallel sampler remain in the zombie # state since the parent cannot proceed with their clean up. with mask_signals([signal.SIGINT]): if args.n_parallel > 0: parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: if args.resume_from_dir is None: log_dir = osp.join(osp.join(os.getcwd(), 'data'), args.exp_name) else: log_dir = args.resume_from_dir else: log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = osp.join(log_dir, args.variant_log_file) dump_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: log_parameters(params_log_file, args) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output(dowel.TensorBoardOutput(log_dir)) logger.add_output(dowel.StdOutput()) prev_snapshot_dir = snapshotter.snapshot_dir prev_mode = snapshotter.snapshot_mode snapshotter.snapshot_dir = log_dir snapshotter.snapshot_mode = args.snapshot_mode snapshotter.snapshot_gap = args.snapshot_gap logger.push_prefix('[%s] ' % args.exp_name) if args.resume_from_dir is not None: with LocalRunner() as runner: runner.restore(args.resume_from_dir, from_epoch=args.resume_epoch) runner.resume() else: # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(variant_data) except BaseException: children = garage.plotter.Plotter.get_plotters() children += garage.tf.plotter.Plotter.get_plotters() if args.n_parallel > 0: children += [parallel_sampler] child_proc_shutdown(children) raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass snapshotter.snapshot_mode = prev_mode snapshotter.snapshot_dir = prev_snapshot_dir logger.remove_all() logger.pop_prefix()
def run_metarl(env, envs, tasks, seed, log_dir): """Create metarl Tensorflow PPO model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ deterministic.set_seed(seed) snapshot_config = SnapshotConfig(snapshot_dir=log_dir, snapshot_mode='gap', snapshot_gap=10) with LocalTFRunner(snapshot_config) as runner: policy = GaussianGRUPolicy( hidden_dims=hyper_parameters['hidden_sizes'], env_spec=env.spec, state_include_action=False) baseline = MetaRLLinearFeatureBaseline(env_spec=env.spec) inner_algo = RL2PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'] * hyper_parameters['rollout_per_task'], discount=hyper_parameters['discount'], gae_lambda=hyper_parameters['gae_lambda'], lr_clip_range=hyper_parameters['lr_clip_range'], optimizer_args=dict( max_epochs=hyper_parameters['optimizer_max_epochs'], tf_optimizer_args=dict( learning_rate=hyper_parameters['optimizer_lr'], ), )) # Need to pass this if meta_batch_size < num_of_tasks task_names = list(ML45_ENVS['train'].keys()) algo = RL2(policy=policy, inner_algo=inner_algo, max_path_length=hyper_parameters['max_path_length'], meta_batch_size=hyper_parameters['meta_batch_size'], task_sampler=tasks, task_names=None if hyper_parameters['meta_batch_size'] >= len(task_names) else task_names) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') text_log_file = osp.join(log_dir, 'debug.log') dowel_logger.add_output(dowel.TextOutput(text_log_file)) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup( algo, envs, sampler_cls=hyper_parameters['sampler_cls'], n_workers=hyper_parameters['meta_batch_size'], worker_class=RL2Worker, sampler_args=dict( use_all_workers=hyper_parameters['use_all_workers']), worker_args=dict( n_paths_per_trial=hyper_parameters['rollout_per_task'])) # meta evaluator env_obs_dim = [ env().observation_space.shape[0] for (_, env) in ML45_ENVS['test'].items() ] max_obs_dim = max(env_obs_dim) ML_test_envs = [ TaskIdWrapper(RL2Env( env(*ML45_ARGS['test'][task]['args'], **ML45_ARGS['test'][task]['kwargs']), max_obs_dim), task_id=task_id, task_name=task) for (task_id, (task, env)) in enumerate(ML45_ENVS['test'].items()) ] test_tasks = task_sampler.EnvPoolSampler(ML_test_envs) test_tasks.grow_pool(hyper_parameters['n_test_tasks']) test_task_names = list(ML45_ENVS['test'].keys()) runner.setup_meta_evaluator( test_task_sampler=test_tasks, n_exploration_traj=hyper_parameters['rollout_per_task'], n_test_rollouts=hyper_parameters['test_rollout_per_task'], n_test_tasks=hyper_parameters['n_test_tasks'], n_workers=hyper_parameters['n_test_tasks'], test_task_names=None if hyper_parameters['n_test_tasks'] >= len(test_task_names) else test_task_names) runner.train(n_epochs=hyper_parameters['n_itr'], batch_size=hyper_parameters['meta_batch_size'] * hyper_parameters['rollout_per_task'] * hyper_parameters['max_path_length']) dowel_logger.remove_all() return tabular_log_file
def main(args): import dowel from dowel import logger, tabular training.utility.set_up_logging() stages = {'500k': 'model.ckpt-2502500'} # stages = {'1000k': 'model.ckpt-5005000'} num_traj = 10 # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500'} # stages = {'1M': 'model.ckpt-5005000'} # stages = {'final': 'model.ckpt-2652650'} # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500', '1M': 'model.ckpt-5005000'} # stages = {'100k': 'model.ckpt-600500', '500k': 'model.ckpt-3002500', # 'final':'model.ckpt-3182650'} # methods = ['weighted_100'] # methods = ['aug7'] methods = ['baseline3'] # rival_method = 'baseline3' rival_method = 'aug7' rival_runs = 5 base_dir = 'benchmark' envs = ['finger_spin'] # envs = ['cartpole_swingup'] # envs = ['finger_spin', 'cartpole_swingup','cheetah_run', 'cup_catch'] # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run'] # envs = ['cartpole_swingup', 'cheetah_run', 'walker_walk', 'cup_catch'] # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run', 'walker_walk', 'cup_catch'] if not check_finish(base_dir, stages, methods, envs, args.num_runs): exit() for pref, chkpt in stages.items(): print(pref, 'begin') logger.add_output(dowel.StdOutput()) logger.add_output(dowel.CsvOutput('benchmark_{}.csv'.format(pref))) for env in envs: tabular.record('Env', env) for method in methods: for id in range(rival_runs): means, stds, all_scores = [], [], [] with args.params.unlocked: args.params.chkpt = chkpt args.params.tasks = [env] args.params.planner_horizon = 12 args.params.eval_ratio = 1 / num_traj # args.params.r_loss = 'contra' # args.params.aug = 'rad' args.params.planner = 'dual2' args.params.rival = '{}/{}/00{}'.format( env, rival_method, id + 1) experiment = training.Experiment( os.path.join(base_dir, env, method), process_fn=functools.partial(process, args=args), num_runs=args.num_runs, ping_every=args.ping_every, resume_runs=args.resume_runs, planner=args.params.planner, task_str=env) for i, run in enumerate(experiment): scores = [] for i, unused_score in enumerate(run): print('unused', unused_score) scores.append(unused_score) if i == num_traj - 1: break means.append(np.mean(scores)) stds.append(np.std(scores)) all_scores.append(scores) print(means) # if args.params.planner != 'cem': # exit() if args.params.planner == 'cem_eval': np.save( os.path.join( args.logdir, env, method, '00{}/scores_{}_cem.npy'.format(i, pref)), np.array(all_scores)) mean, std = np.mean(means), np.std(means) print('{} {}+/-{}'.format(method, int(mean), int(std))) if mean > 0: tabular.record(method, '{}+/-{}'.format(int(mean), int(std))) np.save( os.path.join(args.logdir, env, method, 'scores_{}.npy'.format(pref)), np.array(all_scores)) logger.log(tabular) logger.dump_all() logger.remove_all()
def main(args): if args.output_folder is not None: if not os.path.exists(args.output_folder): raise ValueError( "The folder with the training files does not exist") policy_filename = os.path.join(args.output_folder, 'policy.th') dynamics_filename = os.path.join(args.output_folder, 'dynamics.th') config_filename = os.path.join(args.output_folder, 'config.json') # eval_filename = os.path.join(args.output_folder, 'eval.npz') text_log_file = os.path.join(args.output_folder, 'test_log.txt') tabular_log_file = os.path.join(args.output_folder, 'test_result.csv') output_test_folder = args.output_folder + "test" if args.output_folder[ -1] == '/' else args.output_folder + "/test" if os.path.exists(output_test_folder): shutil.rmtree(output_test_folder) os.makedirs(output_test_folder) # Set up logger logger.add_output(dowel.StdOutput()) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output( dowel.TensorBoardOutput(output_test_folder, x_axis='Batch')) logger.log('Logging to {}'.format(output_test_folder)) with open(config_filename, 'r') as f: config = json.load(f) seed = config["seed"] if "seed" in config else args.seed if seed is not None: torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) # Metaworld if config['env-name'].startswith('Metaworld'): env_name = config['env-name'].replace("Metaworld-", "") metaworld = __import__('metaworld') class_ = getattr(metaworld, env_name) metaworld_benchmark = class_() for name, env_cls in metaworld_benchmark.train_classes.items(): env = env_cls() env.close() benchmark = metaworld_benchmark # Other gym envs else: env_name = config['env-name'] env = gym.make(config['env-name'], **config.get('env-kwargs', {})) env.close() benchmark = None # Policy policy = get_policy_for_env(env, hidden_sizes=config['hidden-sizes'], nonlinearity=config['nonlinearity']) with open(policy_filename, 'rb') as f: state_dict = torch.load(f, map_location=torch.device(args.device)) policy.load_state_dict(state_dict) policy.share_memory() # Dynamics dynamics = get_dynamics_for_env(env, config['use_vime'], config['use_inv_vime'], args.device, config, benchmark=benchmark) inverse_dynamics = config['use_inv_vime'] use_dynamics = config["use_vime"] or config["use_inv_vime"] if use_dynamics: with open(dynamics_filename, 'rb') as f: state_dict = torch.load(f, map_location=torch.device(args.device)) dynamics.load_state_dict(state_dict) dynamics.share_memory() # Eta if config['adapt_eta']: eta_value = torch.Tensor([config["adapted-eta"]]) else: eta_value = torch.Tensor([config["eta"]]) eta_value = torch.log(eta_value / (1 - eta_value)) eta = EtaParameter(eta_value, adapt_eta=config['adapt_eta']) eta.share_memory() # Baseline baseline = LinearFeatureBaseline(get_input_size(env)) # Sampler normalize_spaces = config[ "normalize-spaces"] if "normalize-spaces" in config else True act_prev_mean = mp.Manager().list() obs_prev_mean = mp.Manager().list() # Sampler if normalize_spaces: obs_prev_mean.append({ "mean": torch.Tensor(config["obs_mean"]), "std": torch.Tensor(config["obs_std"]) }) act_prev_mean.append({ "mean": torch.Tensor(config["act_mean"]), "std": torch.Tensor(config["act_std"]) }) epochs_counter = mp.Value('i', 100) sampler = MultiTaskSampler( config['env-name'], env_kwargs=config.get('env-kwargs', {}), batch_size=config['fast-batch-size'], # TODO policy=policy, baseline=baseline, dynamics=dynamics, inverse_dynamics=inverse_dynamics, env=env, seed=args.seed, num_workers=args.num_workers, epochs_counter=epochs_counter, act_prev_mean=act_prev_mean, obs_prev_mean=obs_prev_mean, # rew_prev_mean=rew_prev_mean, eta=eta, benchmark=benchmark, normalize_spaces=normalize_spaces) logs = {'tasks': []} train_returns, valid_returns = [], [] for batch in trange(args.num_batches): tasks = sampler.sample_test_tasks(num_tasks=config['meta-batch-size']) train_episodes, valid_episodes = sampler.sample( tasks, num_steps=args.num_steps, fast_lr=config['fast-lr'], gamma=config['gamma'], gae_lambda=config['gae-lambda'], device=args.device) logs['tasks'].extend(tasks) train_returns.append(get_returns(train_episodes[0])) valid_returns.append(get_returns(valid_episodes)) logs['train_returns'] = np.concatenate(train_returns, axis=0) logs['valid_returns'] = np.concatenate(valid_returns, axis=0) tabular.record("Batch", batch) log_returns(train_episodes, valid_episodes, batch, log_dynamics=use_dynamics, benchmark=benchmark, env=env, env_name=env_name, is_testing=True) log_trajectories(config['env-name'], output_test_folder, train_episodes, valid_episodes, batch) logger.log(tabular) logger.dump_all() # with open(eval_filename + "_" + str(batch), 'wb') as f: # np.savez(f, **logs) logger.remove_all()
def run_garage_tf(env, seed, log_dir): """Create garage TensorFlow PPO model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ deterministic.set_seed(seed) with LocalTFRunner(snapshot_config) as runner: env = TfEnv(normalize(env)) policy = TF_GMP( env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = TF_GMB( env_spec=env.spec, regressor_args=dict( hidden_sizes=(32, 32), use_trust_region=False, optimizer=FirstOrderOptimizer, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=3e-4), ), ), ) algo = TF_PPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'], discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=3e-4), verbose=True)) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) dowel_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): """ Create garage model and training. Replace the td3 with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trail. :param log_dir: Log dir path. :return: """ deterministic.set_seed(seed) with LocalTFRunner(snapshot_config) as runner: env = TfEnv(normalize(env)) # Set up params for TD3 exploration_noise = GaussianStrategy(env.spec, max_sigma=params['sigma'], min_sigma=params['sigma']) policy = ContinuousMLPPolicy( env_spec=env.spec, hidden_sizes=params['policy_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) qf = ContinuousMLPQFunction(name='ContinuousMLPQFunction', env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], action_merge_layer=0, hidden_nonlinearity=tf.nn.relu) qf2 = ContinuousMLPQFunction(name='ContinuousMLPQFunction2', env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], action_merge_layer=0, hidden_nonlinearity=tf.nn.relu) replay_buffer = SimpleReplayBuffer( env_spec=env.spec, size_in_transitions=params['replay_buffer_size'], time_horizon=params['n_rollout_steps']) td3 = TD3(env.spec, policy=policy, qf=qf, qf2=qf2, replay_buffer=replay_buffer, steps_per_epoch=params['steps_per_epoch'], policy_lr=params['policy_lr'], qf_lr=params['qf_lr'], target_update_tau=params['tau'], n_train_steps=params['n_train_steps'], discount=params['discount'], smooth_return=params['smooth_return'], min_buffer_size=params['min_buffer_size'], buffer_batch_size=params['buffer_batch_size'], exploration_strategy=exploration_noise, policy_optimizer=tf.compat.v1.train.AdamOptimizer, qf_optimizer=tf.compat.v1.train.AdamOptimizer) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(td3, env) runner.train(n_epochs=params['n_epochs'], batch_size=params['n_rollout_steps']) dowel_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): """Create garage model and training. Replace the ppo with the algorithm you want to run. Args: env (gym.Env): Environment of the task. seed (int): Random seed for the trial. log_dir (str): Log dir path. Returns: str: The log file path. """ deterministic.set_seed(seed) with LocalTFRunner(snapshot_config) as runner: env = TfEnv(normalize(env)) # Set up params for ddpg action_noise = OUStrategy(env.spec, sigma=params['sigma']) policy = ContinuousMLPPolicy( env_spec=env.spec, hidden_sizes=params['policy_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], hidden_nonlinearity=tf.nn.relu) replay_buffer = SimpleReplayBuffer( env_spec=env.spec, size_in_transitions=params['replay_buffer_size'], time_horizon=params['n_rollout_steps']) algo = DDPG(env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, steps_per_epoch=params['steps_per_epoch'], policy_lr=params['policy_lr'], qf_lr=params['qf_lr'], target_update_tau=params['tau'], n_train_steps=params['n_train_steps'], discount=params['discount'], min_buffer_size=int(1e4), exploration_strategy=action_noise, policy_optimizer=tf.compat.v1.train.AdamOptimizer, qf_optimizer=tf.compat.v1.train.AdamOptimizer) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') tensorboard_log_dir = osp.join(log_dir) dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(tensorboard_log_dir)) runner.setup(algo, env) runner.train(n_epochs=params['n_epochs'], batch_size=params['n_rollout_steps']) dowel_logger.remove_all() return tabular_log_file
def run_metarl(env, seed, log_dir): """Create metarl Tensorflow PPO model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ deterministic.set_seed(seed) snapshot_config = SnapshotConfig(snapshot_dir=log_dir, snapshot_mode='gap', snapshot_gap=10) with LocalTFRunner(snapshot_config) as runner: env, task_samplers = _prepare_meta_env(env) policy = GaussianGRUPolicy( hidden_dims=hyper_parameters['hidden_sizes'], env_spec=env.spec, state_include_action=False) baseline = MetaRLLinearFeatureBaseline(env_spec=env.spec) inner_algo = RL2PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'] * hyper_parameters['rollout_per_task'], discount=hyper_parameters['discount'], gae_lambda=hyper_parameters['gae_lambda'], lr_clip_range=hyper_parameters['lr_clip_range'], optimizer_args=dict( max_epochs=hyper_parameters['optimizer_max_epochs'], tf_optimizer_args=dict( learning_rate=hyper_parameters['optimizer_lr'], ), )) algo = RL2(policy=policy, inner_algo=inner_algo, max_path_length=hyper_parameters['max_path_length'], meta_batch_size=hyper_parameters['meta_batch_size'], task_sampler=task_samplers) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') text_log_file = osp.join(log_dir, 'debug.log') dowel_logger.add_output(dowel.TextOutput(text_log_file)) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup( algo, task_samplers.sample(hyper_parameters['meta_batch_size']), sampler_cls=hyper_parameters['sampler_cls'], n_workers=hyper_parameters['meta_batch_size'], worker_class=RL2Worker, sampler_args=dict( use_all_workers=hyper_parameters['use_all_workers']), worker_args=dict( n_paths_per_trial=hyper_parameters['rollout_per_task'])) runner.setup_meta_evaluator( test_task_sampler=task_samplers, n_exploration_traj=hyper_parameters['rollout_per_task'], n_test_rollouts=hyper_parameters['test_rollout_per_task'], n_test_tasks=hyper_parameters['n_test_tasks'], n_workers=hyper_parameters['n_test_tasks']) runner.train(n_epochs=hyper_parameters['n_itr'], batch_size=hyper_parameters['meta_batch_size'] * hyper_parameters['rollout_per_task'] * hyper_parameters['max_path_length']) dowel_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): ''' Create garage model and training. Replace the ppo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: ''' deterministic.set_seed(seed) env.reset() with LocalTFRunner(snapshot_config) as runner: env = TfEnv(normalize(env)) action_noise = OUStrategy(env.spec, sigma=params['sigma']) policy = ContinuousMLPPolicy( env_spec=env.spec, hidden_sizes=params['policy_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh, input_include_goal=True, ) qf = ContinuousMLPQFunction( env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, input_include_goal=True, ) replay_buffer = HerReplayBuffer( env_spec=env.spec, size_in_transitions=params['replay_buffer_size'], time_horizon=params['n_rollout_steps'], replay_k=0.4, reward_fun=env.compute_reward, ) algo = DDPG( env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, policy_lr=params['policy_lr'], qf_lr=params['qf_lr'], target_update_tau=params['tau'], n_train_steps=params['n_train_steps'], discount=params['discount'], exploration_strategy=action_noise, policy_optimizer=tf.train.AdamOptimizer, qf_optimizer=tf.train.AdamOptimizer, buffer_batch_size=256, input_include_goal=True, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') logger.add_output(dowel.StdOutput()) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=params['n_epochs'], n_epoch_cycles=params['n_epoch_cycles'], batch_size=params['n_rollout_steps']) logger.remove_all() return tabular_log_file
def standard_eval(env, policy, n_episodes=20, greedy=True, load_from_file=False, render=False, recorder=None, max_steps=10000): if recorder is not None: render = False # force off if load_from_file: logger.add_output(dowel.StdOutput()) logger.log('Evaluating policy, {} episodes, greedy = {} ...'.format( n_episodes, greedy)) episode_rewards = [] pbar = ProgBarCounter(n_episodes) for e in range(n_episodes): obs = env.reset() policy.reset([True]) terminated = False t = 0 episode_rewards.append(0) while not terminated: if render: env.render() # time.sleep(0.05) if recorder is not None: recorder.capture_frame() if not env.centralized: # obs.shape = (n_agents, n_envs, obs_dim) obs = torch.Tensor(obs).unsqueeze(1) # add n_envs dim avail_actions = torch.Tensor( env.get_avail_actions()).unsqueeze(1) actions, agent_infos = policy.get_actions(obs, avail_actions, greedy=greedy) if len(actions.shape) == 3: # n-d action actions = actions[:, 0, :] elif len(actions.shape) == 2: # 1-d action actions = actions[:, 0] obs, reward, terminated, info = env.step(actions) # n_env = 1 terminated = all(terminated) else: # obs.shape = (n_envs, n_agents * obs_dim) obs = np.array([obs]) avail_actions = np.array([env.get_avail_actions()]) actions, agent_infos = policy.get_actions(obs, avail_actions, greedy=greedy) obs, reward, terminated, info = env.step( actions[0]) # n_env = 1 t += 1 if t > max_steps: terminated = True episode_rewards[-1] += np.mean(reward) pbar.inc(1) pbar.stop() policy.reset([True]) avg_return = np.mean(episode_rewards) logger.log('EvalAvgReturn: {}'.format(avg_return)) if not load_from_file: tabular.record('EvalAvgReturn', avg_return)
def eval(self, policy, n_episodes=20, greedy=True, load_from_file=False, max_steps=60): import dowel from dowel import logger, tabular from garage.misc.prog_bar_counter import ProgBarCounter if load_from_file: logger.add_output(dowel.StdOutput()) logger.log('Evaluating policy, {} episodes, greedy = {} ...'.format( n_episodes, greedy)) episode_rewards = [] success = 0 pbar = ProgBarCounter(n_episodes) for e in range(n_episodes): obs = self.reset() policy.reset([True]) terminated = False t = 0 episode_rewards.append(0) while not terminated: if not self.centralized: # obs.shape = (n_agents, n_envs, obs_dim) obs = torch.Tensor(obs).unsqueeze(1) # add n_envs dim avail_actions = torch.Tensor( self.get_avail_actions()).unsqueeze(1) actions, agent_infos = policy.get_actions(obs, avail_actions, greedy=greedy) if len(actions.shape) == 3: # n-d action actions = actions[:, 0, :] elif len(actions.shape) == 2: # 1-d action actions = actions[:, 0] obs, reward, terminated, info = self.step( actions) # n_env = 1 terminated = all(terminated) else: # obs.shape = (n_envs, n_agents * obs_dim) obs = np.array([obs]) avail_actions = np.array([self.get_avail_actions()]) actions, agent_infos = policy.get_actions(obs, avail_actions, greedy=greedy) obs, reward, terminated, info = self.step( actions[0]) # n_env = 1 t += 1 if t >= max_steps: terminated = True episode_rewards[-1] += np.mean(reward) # episode end success += self.stat['success'] pbar.inc(1) pbar.stop() policy.reset([True]) avg_return = np.mean(episode_rewards) success = success / n_episodes logger.log('EvalAvgReturn: {}'.format(avg_return)) logger.log('EvalSucessRate: {}'.format(success)) if not load_from_file: tabular.record('EvalAvgReturn', avg_return) tabular.record('EvalSucessRate', success)
def run_experiment(argv): """Run experiment.""" now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=('Number of parallel workers to perform rollouts. ' "0 => don't start any workers")) parser.add_argument('--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument('--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument('--snapshot_mode', type=str, default='last', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument('--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--resume_from_dir', type=str, default=None, help='Directory of the pickle file to resume experiment from.') parser.add_argument('--resume_from_epoch', type=str, default=None, help='Index of iteration to restore from. ' 'Can be "first", "last" or a number. ' 'Not applicable when snapshot_mode="last"') parser.add_argument('--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument('--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument('--tensorboard_step_key', type=str, default=None, help='Name of the step key in tensorboard_summary.') parser.add_argument('--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument('--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument('--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, default=None, help='Random seed for numpy') parser.add_argument('--args_data', type=str, help='Pickled data for objects') parser.add_argument('--variant_data', type=str, help='Pickled data for variant configuration') args = parser.parse_args(argv[1:]) if args.seed is not None: deterministic.set_seed(args.seed) if args.n_parallel > 0: parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = os.path.join(os.path.join(os.getcwd(), 'data'), args.exp_name) else: log_dir = args.log_dir tabular_log_file = os.path.join(log_dir, args.tabular_log_file) text_log_file = os.path.join(log_dir, args.text_log_file) params_log_file = os.path.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = os.path.join(log_dir, args.variant_log_file) dump_variant(variant_log_file, variant_data) else: variant_data = None log_parameters(params_log_file, args) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output(dowel.TensorBoardOutput(log_dir)) logger.add_output(dowel.StdOutput()) logger.push_prefix('[%s] ' % args.exp_name) snapshot_config = SnapshotConfig(snapshot_dir=log_dir, snapshot_mode=args.snapshot_mode, snapshot_gap=args.snapshot_gap) method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(snapshot_config, variant_data, args.resume_from_dir, args.resume_from_epoch) except BaseException: children = garage.plotter.Plotter.get_plotters() children += garage.tf.plotter.Plotter.get_plotters() if args.n_parallel > 0: children += [parallel_sampler] child_proc_shutdown(children) raise logger.remove_all() logger.pop_prefix()
def run_metarl(env, test_env, seed, log_dir): """Create metarl model and training.""" deterministic.set_seed(seed) snapshot_config = SnapshotConfig(snapshot_dir=log_dir, snapshot_mode='gap', snapshot_gap=10) runner = LocalRunner(snapshot_config) obs_dim = int(np.prod(env[0]().observation_space.shape)) action_dim = int(np.prod(env[0]().action_space.shape)) reward_dim = 1 # instantiate networks encoder_in_dim = obs_dim + action_dim + reward_dim encoder_out_dim = params['latent_size'] * 2 net_size = params['net_size'] context_encoder = MLPEncoder(input_dim=encoder_in_dim, output_dim=encoder_out_dim, hidden_sizes=[200, 200, 200]) space_a = akro.Box(low=-1, high=1, shape=(obs_dim + params['latent_size'], ), dtype=np.float32) space_b = akro.Box(low=-1, high=1, shape=(action_dim, ), dtype=np.float32) augmented_env = EnvSpec(space_a, space_b) qf1 = ContinuousMLPQFunction(env_spec=augmented_env, hidden_sizes=[net_size, net_size, net_size]) qf2 = ContinuousMLPQFunction(env_spec=augmented_env, hidden_sizes=[net_size, net_size, net_size]) obs_space = akro.Box(low=-1, high=1, shape=(obs_dim, ), dtype=np.float32) action_space = akro.Box(low=-1, high=1, shape=(params['latent_size'], ), dtype=np.float32) vf_env = EnvSpec(obs_space, action_space) vf = ContinuousMLPQFunction(env_spec=vf_env, hidden_sizes=[net_size, net_size, net_size]) policy = TanhGaussianMLPPolicy2( env_spec=augmented_env, hidden_sizes=[net_size, net_size, net_size]) context_conditioned_policy = ContextConditionedPolicy( latent_dim=params['latent_size'], context_encoder=context_encoder, policy=policy, use_ib=params['use_information_bottleneck'], use_next_obs=params['use_next_obs_in_context'], ) train_task_names = ML10.get_train_tasks()._task_names test_task_names = ML10.get_test_tasks()._task_names pearlsac = PEARLSAC( env=env, test_env=test_env, policy=context_conditioned_policy, qf1=qf1, qf2=qf2, vf=vf, num_train_tasks=params['num_train_tasks'], num_test_tasks=params['num_test_tasks'], latent_dim=params['latent_size'], meta_batch_size=params['meta_batch_size'], num_steps_per_epoch=params['num_steps_per_epoch'], num_initial_steps=params['num_initial_steps'], num_tasks_sample=params['num_tasks_sample'], num_steps_prior=params['num_steps_prior'], num_extra_rl_steps_posterior=params['num_extra_rl_steps_posterior'], num_evals=params['num_evals'], num_steps_per_eval=params['num_steps_per_eval'], batch_size=params['batch_size'], embedding_batch_size=params['embedding_batch_size'], embedding_mini_batch_size=params['embedding_mini_batch_size'], max_path_length=params['max_path_length'], reward_scale=params['reward_scale'], train_task_names=train_task_names, test_task_names=test_task_names, ) tu.set_gpu_mode(params['use_gpu'], gpu_id=0) if params['use_gpu']: pearlsac.to() tabular_log_file = osp.join(log_dir, 'progress.csv') tensorboard_log_dir = osp.join(log_dir) dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(tensorboard_log_dir)) runner.setup(algo=pearlsac, env=env, sampler_cls=PEARLSampler, sampler_args=dict(max_path_length=params['max_path_length'])) runner.train(n_epochs=params['num_epochs'], batch_size=params['batch_size']) dowel_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): ''' Create garage model and training. Replace the ppo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: ''' deterministic.set_seed(seed) with LocalRunner() as runner: env = TfEnv(normalize(env)) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict( hidden_sizes=(64, 64), use_trust_region=False, optimizer=FirstOrderOptimizer, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), ), ) algo = PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=488, batch_size=2048) dowel_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): ''' Create garage model and training. Replace the ppo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: ''' deterministic.set_seed(seed) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=12, inter_op_parallelism_threads=12) sess = tf.Session(config=config) with LocalTFRunner(snapshot_config, sess=sess, max_cpus=12) as runner: env = TfEnv(normalize(env)) policy = CategoricalCNNPolicy( env_spec=env.spec, conv_filters=params['conv_filters'], conv_filter_sizes=params['conv_filter_sizes'], conv_strides=params['conv_strides'], conv_pad=params['conv_pad'], hidden_sizes=params['hidden_sizes']) baseline = GaussianCNNBaseline( env_spec=env.spec, regressor_args=dict(num_filters=params['conv_filters'], filter_dims=params['conv_filter_sizes'], strides=params['conv_strides'], padding=params['conv_pad'], hidden_sizes=params['hidden_sizes'], use_trust_region=params['use_trust_region'])) algo = PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), flatten_input=False, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=params['n_epochs'], batch_size=params['batch_size']) dowel_logger.remove_all() return tabular_log_file
def run_garage_pytorch(env, seed, log_dir): """Create garage PyTorch PPO model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ env = TfEnv(normalize(env)) deterministic.set_seed(seed) runner = LocalRunner(snapshot_config) policy = PyTorch_GMP(env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_function = GaussianMLPValueFunction(env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) policy_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)), policy, max_optimization_epochs=10, minibatch_size=64) vf_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)), value_function, max_optimization_epochs=10, minibatch_size=64) algo = PyTorch_PPO(env_spec=env.spec, policy=policy, value_function=value_function, policy_optimizer=policy_optimizer, vf_optimizer=vf_optimizer, max_path_length=hyper_parameters['max_path_length'], discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) dowel_logger.remove_all() return tabular_log_file
def run_metarl(env, seed, log_dir): """Create metarl model and training. Replace the ddpg with the algorithm you want to run. Args: env (gym.Env): Environment of the task. seed (int): Random seed for the trial. log_dir (str): Log dir path. Returns: str: Log file path. """ deterministic.set_seed(seed) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=12, inter_op_parallelism_threads=12) sess = tf.Session(config=config) with LocalTFRunner(snapshot_config, sess=sess, max_cpus=12) as runner: env = TfEnv(normalize(env)) # Set up params for ddpg action_noise = OUStrategy(env.spec, sigma=params['sigma']) policy = ContinuousMLPPolicy( env_spec=env.spec, name='ContinuousMLPPolicy', hidden_sizes=params['policy_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, name='ContinuousMLPQFunction') replay_buffer = SimpleReplayBuffer( env_spec=env.spec, size_in_transitions=params['replay_buffer_size'], time_horizon=params['n_rollout_steps']) ddpg = DDPG(env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, steps_per_epoch=params['steps_per_epoch'], policy_lr=params['policy_lr'], qf_lr=params['qf_lr'], target_update_tau=params['tau'], n_train_steps=params['n_train_steps'], discount=params['discount'], min_buffer_size=int(1e4), exploration_strategy=action_noise, policy_optimizer=tf.train.AdamOptimizer, qf_optimizer=tf.train.AdamOptimizer) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(ddpg, env, sampler_args=dict(n_envs=12)) runner.train(n_epochs=params['n_epochs'], batch_size=params['n_rollout_steps']) dowel_logger.remove_all() return tabular_log_file
help='glob for tags to save') parser.add_argument( '--outdir', default='events', help='output directory to store values') parser.add_argument( '--force', type=boolean, default=False, help='overwrite existing files') parser.add_argument( '--workers', type=int, default=10, help='number of worker threads') args_, remaining = parser.parse_known_args() args_.outdir = os.path.expanduser(args_.outdir) remaining.insert(0, sys.argv[0]) tags = ['*general/objectives/reward'] logger.add_output(dowel.StdOutput()) logger.add_output(dowel.CsvOutput('loss.csv')) base_dir = 'benchmark' phases = ['train', 'test'] envs = ['cartpole_swingup'] # methods = ['aug2', 'aug3'] # methods = ['baseline3', 'resample_traj4', 'resample_traj6', 'aug2', 'aug3', 'aug4', 'aug5'] methods = ['aug6'] # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run', 'walker_walk', 'cup_catch'] for tag in tags: for env in envs: for meth in methods: tabular.record('Method', meth) for phase in phases: