def __init__(self, params): ####################### ## AGENT PARAMS ####################### agent_params = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], 'max_replay_buffer_size': params['max_replay_buffer_size'], } self.params = params self.params['agent_class'] = BCAgent ## HW1: you will modify this self.params['agent_params'] = agent_params ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) ## HW1: you will modify this ####################### ## LOAD EXPERT POLICY ####################### print('Loading expert policy from...', self.params['expert_policy_file']) self.loaded_expert_policy = LoadedGaussianPolicy( self.params['expert_policy_file']) print('Done restoring expert policy...')
def __init__(self, params): ####################### ## AGENT PARAMS ####################### agent_params = { "n_layers": params["n_layers"], "size": params["size"], "learning_rate": params["learning_rate"], "max_replay_buffer_size": params["max_replay_buffer_size"], } self.params = params self.params["agent_class"] = BCAgent ## HW1: you will modify this self.params["agent_params"] = agent_params ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) ## HW1: you will modify this ####################### ## LOAD EXPERT POLICY ####################### print("Loading expert policy from...", self.params["expert_policy_file"]) self.loaded_expert_policy = LoadedGaussianPolicy( self.params["expert_policy_file"] ) print("Done restoring expert policy...")
def __init__(self, params): ####################### ## AGENT PARAMS ####################### agent_params = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], 'max_replay_buffer_size': params['max_replay_buffer_size'], 'siren': params['siren'], 'train_separate_params': params['train_separate_params'], 'supervision_mode': params['supervision_mode'], 'offset_learning_rate': params['offset_learning_rate'], 'epsilon_s': params['epsilon_s'], 'auto_cast': params['auto_cast'], 'gradient_loss_scale': params['gradient_loss_scale'], 'additional_activation': params['additional_activation'], 'omega': params['omega'], } self.params = params self.params['agent_class'] = BCAgent ## HW1: you will modify this self.params['agent_params'] = agent_params ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) ## HW1: you will modify this ####################### ## LOAD EXPERT POLICY ####################### print('Loading expert policy from...', self.params['expert_policy_file']) self.loaded_expert_policy = LoadedGaussianPolicy( self.params['expert_policy_file']) print('Done restoring expert policy...')
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--emp_epoch', type=int, default=6) # number of gradient steps for training policy (per iter in n_iter) parser.add_argument('--eta', type=int, default=40) parser.add_argument('--bins', type=int, default=20) parser.add_argument('--load_dict', type=bool, default=False) parser.add_argument('--load_path', type=str) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--vertices', type=int, default=6) parser.add_argument('--obs_space', type=int, default=17) parser.add_argument('--expert_policy_file', '-epf', type=str, required=True) # relative to where you're running this script from parser.add_argument('--expert_data', '-ed', type=str, required=True) #relative to where you're running this script from parser.add_argument('--env_name', '-env', type=str, help='choices: Ant-v2, Humanoid-v2, Walker-v2, HalfCheetah-v2, Hopper-v2', required=True) parser.add_argument('--exp_name', '-exp', type=str, default='pick an experiment name', required=True) parser.add_argument('--do_dagger', action='store_true') parser.add_argument('--ep_len', type=int) parser.add_argument('--num_agent_train_steps_per_iter', type=int, default=1000) # number of gradient steps for training policy (per iter in n_iter) parser.add_argument('--n_iter', '-n', type=int, default=1) parser.add_argument('--eval_batch_size', type=int, default=1000) # eval data collected (in the env) for logging metrics parser.add_argument('--train_batch_size', type=int, default=100) # number of sampled data points to be used per gradient/train step parser.add_argument('--n_layers', type=int, default=2) # depth, of policy to be learned parser.add_argument('--size', type=int, default=64) # width of each layer, of policy to be learned parser.add_argument('--learning_rate', '-lr', type=float, default=5e-3) # LR for supervised learning parser.add_argument('--video_log_freq', type=int, default=5) parser.add_argument('--scalar_log_freq', type=int, default=1) parser.add_argument('--no_gpu', '-ngpu', action='store_true') parser.add_argument('--which_gpu', type=int, default=0) parser.add_argument('--max_replay_buffer_size', type=int, default=1000000) parser.add_argument('--save_params', action='store_true') parser.add_argument('--seed', type=int, default=2) args = parser.parse_args() # convert args to dictionary params = vars(args) ############# ## EXPERT POLICY ############# print('Loading expert policy from...', params['expert_policy_file']) expert_policy = LoadedGaussianPolicy(params['expert_policy_file']) print('Done restoring expert policy...') ################### ### INIT PATH ################### params['filestem'] = 'cs285/data/emp_dagger_buckets_' + str(params['env_name']) + '_' + str(params['bins']) + '_epoch'\ + str(params['emp_epoch']) + '_eta' + str(int(params['eta'])) ################### ### RUN TRAINING ################### trainer = EMP_Trainer(params) trainer.run_training_loop(params['n_iter'], expert_policy)
obs, rew, done, info = unwrapped_env.orig_step_(action_cont) return (obs, rew, done, info) # change observation space env.action_space = spaces.MultiDiscrete([[0, K - 1] for _ in range(naction)]) unwrapped_env.step = discretizing_step unwrapped_env.reset = discretizing_reset return env # Make the gym environment expert_policy = LoadedGaussianPolicy('cs285/policies/experts/Walker2d.pkl') env = gym.make("Hopper-v2") obs = env.reset() print("obs shape", obs.shape) print("low", env.action_space.low) print("high", env.action_space.high) env.close() count = 0 for _ in range(10000): action = expert_policy.get_action(obs) print("action", action) obs, reward, done, info = env.step(action) # take a random action # print("reward", reward) if done: print("Done")