def run_experiment(**kwargs): exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + kwargs.get( 'exp_name', '') print("\n---------- experiment with dir {} ---------------------------". format(exp_dir)) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) os.makedirs(exp_dir + '/Data/', exist_ok=True) os.makedirs(exp_dir + '/Model/', exist_ok=True) os.makedirs(exp_dir + '/Policy/', exist_ok=True) json.dump(kwargs, open(exp_dir + '/Data/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) json.dump(kwargs, open(exp_dir + '/Model/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) json.dump(kwargs, open(exp_dir + '/Policy/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) run_base(exp_dir, **kwargs)
def run_experiment(**kwargs): exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + kwargs.get( 'exp_name', 'tmp') print("\n---------- experiment with dir {} ---------------------------". format(exp_dir)) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) if 'num_data_workers' in kwargs: for idx in range(kwargs['num_data_workers']): os.makedirs(exp_dir + f'/Data-{idx}/', exist_ok=True) json.dump(kwargs, open(exp_dir + f'/Data-{idx}/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) for idx in range(kwargs['num_model_workers']): os.makedirs(exp_dir + f'/Model-{idx}/', exist_ok=True) json.dump(kwargs, open(exp_dir + f'/Model-{idx}/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) for idx in range(kwargs['num_policy_workers']): os.makedirs(exp_dir + f'/Policy-{idx}/', exist_ok=True) json.dump(kwargs, open(exp_dir + f'/Policy-{idx}/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) else: os.makedirs(exp_dir + '/Data/', exist_ok=True) os.makedirs(exp_dir + '/Model/', exist_ok=True) os.makedirs(exp_dir + '/Policy/', exist_ok=True) json.dump(kwargs, open(exp_dir + '/Data/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) json.dump(kwargs, open(exp_dir + '/Model/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) json.dump(kwargs, open(exp_dir + '/Policy/params.json', 'w+'), indent=2, sort_keys=True, cls=ClassEncoder) run_base(exp_dir, **kwargs)
def __init__( self, name, exp_dir, n_itr, stop_cond, verbose=True, ): self.name = name logger.configure(dir=exp_dir + '/' + name, format_strs=['csv', 'stdout', 'log']) self.n_itr = n_itr self.stop_cond = stop_cond self.verbose = verbose self.step_counter, self.synch_counter = 0, 0 self.sess = None
def run_experiment(**kwargs): exp_dir = os.getcwd() + '/data/' + EXP_NAME + kwargs.get('exp_name', '') logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95) sess = tf.Session(config=config) with sess.as_default() as sess: # Instantiate classes set_seed(kwargs['seed']) baseline = kwargs['baseline']() env = normalize(kwargs['env']()) # Wrappers? policy = GaussianMLPPolicy( name="meta-policy", obs_dim=np.prod(env.observation_space.shape), action_dim=np.prod(env.action_space.shape), hidden_sizes=kwargs['policy_hidden_sizes'], learn_std=kwargs['policy_learn_std'], hidden_nonlinearity=kwargs['policy_hidden_nonlinearity'], output_nonlinearity=kwargs['policy_output_nonlinearity'], ) dynamics_model = MLPDynamicsEnsemble('dynamics-ensemble', env=env, num_models=kwargs['num_models'], hidden_nonlinearity=kwargs['dyanmics_hidden_nonlinearity'], hidden_sizes=kwargs['dynamics_hidden_sizes'], output_nonlinearity=kwargs['dyanmics_output_nonlinearity'], learning_rate=kwargs['dynamics_learning_rate'], batch_size=kwargs['dynamics_batch_size'], buffer_size=kwargs['dynamics_buffer_size'], rolling_average_persitency=kwargs['rolling_average_persitency'] ) env_sampler = Sampler( env=env, policy=policy, num_rollouts=kwargs['num_rollouts'], max_path_length=kwargs['max_path_length'], n_parallel=kwargs['n_parallel'], ) model_sampler = METRPOSampler( env=env, policy=policy, dynamics_model=dynamics_model, num_rollouts=kwargs['imagined_num_rollouts'], max_path_length=kwargs['max_path_length'], deterministic=kwargs['deterministic'], ) dynamics_sample_processor = ModelSampleProcessor( baseline=baseline, discount=kwargs['discount'], gae_lambda=kwargs['gae_lambda'], normalize_adv=kwargs['normalize_adv'], positive_adv=kwargs['positive_adv'], ) model_sample_processor = SampleProcessor( baseline=baseline, discount=kwargs['discount'], gae_lambda=kwargs['gae_lambda'], normalize_adv=kwargs['normalize_adv'], positive_adv=kwargs['positive_adv'], ) algo = TRPO( policy=policy, step_size=kwargs['step_size'], ) trainer = Trainer( algo=algo, policy=policy, env=env, model_sampler=model_sampler, env_sampler=env_sampler, model_sample_processor=model_sample_processor, dynamics_sample_processor=dynamics_sample_processor, dynamics_model=dynamics_model, n_itr=kwargs['n_itr'], dynamics_model_max_epochs=kwargs['dynamics_max_epochs'], log_real_performance=kwargs['log_real_performance'], steps_per_iter=kwargs['steps_per_iter'], sample_from_buffer=kwargs['sample_from_buffer'], sess=sess, ) trainer.train()
def run_experiment(**kwargs): exp_dir = os.getcwd() + '/data/' + EXP_NAME logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) with sess.as_default() as sess: # Instantiate classes set_seed(kwargs['seed']) baseline = kwargs['baseline']() env = normalize(kwargs['env']()) policy = GaussianMLPPolicy( name="policy", obs_dim=np.prod(env.observation_space.shape), action_dim=np.prod(env.action_space.shape), hidden_sizes=kwargs['hidden_sizes'], learn_std=kwargs['learn_std'], hidden_nonlinearity=kwargs['hidden_nonlinearity'], output_nonlinearity=kwargs['output_nonlinearity'], init_std=kwargs['init_std'], squashed=kwargs['squashed']) # Load policy here sampler = Sampler( env=env, policy=policy, num_rollouts=kwargs['num_rollouts'], max_path_length=kwargs['max_path_length'], n_parallel=kwargs['n_parallel'], ) sample_processor = SingleSampleProcessor( baseline=baseline, discount=kwargs['discount'], gae_lambda=kwargs['gae_lambda'], normalize_adv=kwargs['normalize_adv'], positive_adv=kwargs['positive_adv'], ) algo = PPO( policy=policy, learning_rate=kwargs['learning_rate'], clip_eps=kwargs['clip_eps'], max_epochs=kwargs['num_ppo_steps'], entropy_bonus=kwargs['entropy_bonus'], ) trainer = Trainer( algo=algo, policy=policy, env=env, sampler=sampler, sample_processor=sample_processor, n_itr=kwargs['n_itr'], sess=sess, ) trainer.train()
def __call__( self, exp_dir, policy_pickle, env_pickle, baseline_pickle, dynamics_model_pickle, feed_dict, queue_prev, queue, queue_next, remote, start_itr, n_itr, stop_cond, need_query, auto_push, config, ): time_start = time.time() self.name = current_process().name logger.configure(dir=exp_dir + '/' + self.name, format_strs=['csv', 'stdout', 'log'], snapshot_mode=self.snapshot_mode, snapshot_gap=self.snapshot_gap) self.n_itr = n_itr self.queue_prev = queue_prev self.queue = queue self.queue_next = queue_next self.stop_cond = stop_cond # FIXME: specify CPU/GPU usage here import tensorflow as tf def _init_vars(): sess = tf.get_default_session() sess.run(tf.initializers.global_variables()) with tf.Session(config=config).as_default(): self.construct_from_feed_dict( policy_pickle, env_pickle, baseline_pickle, dynamics_model_pickle, feed_dict, ) _init_vars() # warm up self.itr_counter = start_itr if self.verbose: print('{} waiting for starting msg from trainer...'.format( self.name)) assert remote.recv() == 'prepare start' self.prepare_start() remote.send('loop ready') logger.dumpkvs() logger.log("\n============== {} is ready =============".format( self.name)) assert remote.recv() == 'start loop' total_push, total_synch, total_step = 0, 0, 0 while not self.stop_cond.is_set(): if self.verbose: logger.log( "\n------------------------- {} starting new loop ------------------" .format(self.name)) if need_query: # poll time_poll = time.time() queue_prev.put('push') time_poll = time.time() - time_poll logger.logkv('{}-TimePoll'.format(self.name), time_poll) do_push, do_synch, do_step = self.process_queue() # step if do_step: self.itr_counter += 1 self.step() if auto_push: do_push += 1 self.push() # Assuming doing autopush for all assert do_push == 1 assert do_step == 1 total_push += do_push total_synch += do_synch total_step += do_step logger.logkv(self.name + '-TimeSoFar', time.time() - time_start) logger.logkv(self.name + '-TotalPush', total_push) logger.logkv(self.name + '-TotalSynch', total_synch) logger.logkv(self.name + '-TotalStep', total_step) if total_synch > 0: logger.logkv(self.name + '-StepPerSynch', total_step / total_synch) logger.dumpkvs() logger.log( "\n========================== {} {}, total {} ===================" .format( self.name, (do_push, do_synch, do_step), (total_push, total_synch, total_step), )) self.set_stop_cond() remote.send('loop done') logger.log("\n================== {} closed ===================".format( self.name)) remote.send('worker closed')