def run_experiment(**config): exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + config.get( 'exp_name', '') logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(config, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) env = normalize(config['env'](reset_every_episode=True, task=config['task'])) dynamics_model = MLPDynamicsModel( name="dyn_model", env=env, learning_rate=config['learning_rate'], hidden_sizes=config['hidden_sizes'], valid_split_ratio=config['valid_split_ratio'], rolling_average_persitency=config['rolling_average_persitency'], hidden_nonlinearity=config['hidden_nonlinearity'], batch_size=config['batch_size'], ) policy = MPCController( name="policy", env=env, dynamics_model=dynamics_model, discount=config['discount'], n_candidates=config['n_candidates'], horizon=config['horizon'], use_cem=config['use_cem'], num_cem_iters=config['num_cem_iters'], ) sampler = Sampler( env=env, policy=policy, num_rollouts=config['num_rollouts'], max_path_length=config['max_path_length'], n_parallel=config['n_parallel'], ) sample_processor = ModelSampleProcessor(recurrent=False) algo = Trainer( env=env, policy=policy, dynamics_model=dynamics_model, sampler=sampler, dynamics_sample_processor=sample_processor, n_itr=config['n_itr'], initial_random_samples=config['initial_random_samples'], dynamics_model_max_epochs=config['dynamic_model_epochs'], ) algo.train()
def run_experiment(config): exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + config.get('exp_name', '') logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(config, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) port_policy = 21001 ports = [19999, 20001, 22001,23001,24001] _env = normalize(config['env'](reset_every_episode=True, task=config['task'], port=port_policy)) dynamics_model = RNNDynamicsModel( name="dyn_model", env=_env, hidden_sizes=config['hidden_sizes'], learning_rate=config['learning_rate'], backprop_steps=config['backprop_steps'], cell_type=config['cell_type'], batch_size=config['batch_size'], ) policy = RNNMPCController( name="policy", env=_env, dynamics_model=dynamics_model, discount=config['discount'], n_candidates=config['n_candidates'], horizon=config['horizon'], use_cem=config['use_cem'], num_cem_iters=config['num_cem_iters'], ) sampler = Sampler( env=_env, policy=policy, num_rollouts=config['num_rollouts'], max_path_length=config['max_path_length'], n_parallel=config['n_parallel'], ports=ports ) sample_processor = ModelSampleProcessor(recurrent=True) algo = Trainer( env=_env, policy=policy, dynamics_model=dynamics_model, sampler=sampler, sample_processor=sample_processor, n_itr=config['n_itr'], initial_random_samples=config['initial_random_samples'], dynamics_model_max_epochs=config['dynamic_model_epochs'], ) algo.train()
def run_experiment(config): exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + config.get( 'exp_name', '') logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(config, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) port_policy = 21001 ports = [19999, 20001, 22001, 23001, 24001] #ports = [25001, 26001] #env = normalize(config['env'](reset_every_episode=True, task=config['task'], port=19999)) #_envs = [normalize(config['env'](reset_every_episode=True, port=pt)) for pt in ports] # For the policy & etc _env = normalize(config['env'](reset_every_episode=True, task=config['task'], port=port_policy)) dynamics_model = MetaMLPDynamicsModel( name="dyn_model", env=_env, meta_batch_size=config['meta_batch_size'], inner_learning_rate=config['inner_learning_rate'], learning_rate=config['learning_rate'], hidden_sizes=config['hidden_sizes_model'], valid_split_ratio=config['valid_split_ratio'], rolling_average_persitency=config['rolling_average_persitency'], hidden_nonlinearity=config['hidden_nonlinearity_model'], batch_size=config['adapt_batch_size'], ) policy = MPCController( name="policy", env=_env, dynamics_model=dynamics_model, discount=config['discount'], n_candidates=config['n_candidates'], horizon=config['horizon'], use_cem=config['use_cem'], num_cem_iters=config['num_cem_iters'], ) sampler = Sampler( env=_env, policy=policy, n_parallel=config['n_parallel'], max_path_length=config['max_path_length'], num_rollouts=config['num_rollouts'], adapt_batch_size=config[ 'adapt_batch_size'], # Comment this out and it won't adapt during rollout ports=ports) sample_processor = ModelSampleProcessor(recurrent=True) algo = Trainer( env=_env, policy=policy, dynamics_model=dynamics_model, sampler=sampler, sample_processor=sample_processor, n_itr=config['n_itr'], initial_random_samples=config['initial_random_samples'], dynamics_model_max_epochs=config['dynamic_model_epochs'], ) algo.train()