def single_process(job): job_start_time = timer.time() # Allow process to parallelize things internally curr_proc = mp.current_process() curr_proc.daemon = False os.chdir(cwd) dirpath = os.path.join(job['save_dir'], job['job_name']) os.makedirs(dirpath, exist_ok=True) # start job os.chdir(cwd) job_start_time = timer.time() print('Started New Job : ', job['job_name'], '=======================') print('Job specifications : \n', job) # Make Env e = GymEnv(job['env_name']) # Make baseline baseline = MLPBaseline(e.spec) # save job details job['horizon'] = e.horizon job['ctrl_timestep'] = e.env.env.dt job['sim_timestep'] = e.env.env.model.opt.timestep # job['sim_skip'] = e.env.env.skip job_data_file = open(dirpath + '/job_data.txt', 'w') pprint.pprint(job, stream=job_data_file) job_data_file.close() # Make policy (???vik: sizes are hard coded) if 'init_policy' in job: policy = MLP(e.spec, init_log_std=job['init_std'], hidden_sizes=(32, 32), seed=job['seed']) loaded_policy = pickle.load(open(job['init_policy'], 'rb')) loaded_params = loaded_policy.get_param_values() print('log std values in loaded policy = ') print(params[-policy.m:]) # NOTE: if the log std is too small # (say <-2.0, it is problem dependent and intuition should be used) # then we need to bump it up so that it explores # params[-policy.m:] += 1.0 policy.set_param_values(loaded_params) del job['init_policy'] else: policy = MLP(e.spec, init_log_std=job['init_std'], hidden_sizes=(32, 32), seed=job['seed']) # Agent agent = NPG(e, policy, baseline, seed=job['seed'], \ normalized_step_size=job['normalized_step_size'], \ save_logs=job['save_logs'], FIM_invert_args=job['FIM_invert_args']) # Train Agent train_agent( job_name=dirpath, agent=agent, seed=job['seed'], niter=job['niter'], gamma=job['gamma'], gae_lambda=job['gae_lambda'], num_cpu=job['num_cpu'], sample_mode=job['sample_mode'], num_traj=job['num_traj'], evaluation_rollouts=job['evaluation_rollouts'], save_freq=job['save_freq'], plot_keys={'stoc_pol_mean', 'stoc_pol_std'}, ) total_job_time = timer.time() - job_start_time print('Job', job['job_name'], 'took %f seconds ==============' % total_job_time) return total_job_time
def single_process(job): job_start_time = timer.time() # Allow process to parallelize things internally curr_proc = mp.current_process() curr_proc.daemon = False # Create a directory for the job results. job_dir = os.path.join(job['output_dir']) if not os.path.isdir(job_dir): os.mkdir(job_dir) # start job job_start_time = timer.time() print('Started New Job : ', job['job_name'], '=======================') print('Job specifications : \n', job) # Make Env env_name = job['env_name'] # adept_envs.global_config.set_config(env_name, { # 'robot_params': job['robot'], # **job.get('env_params', {}), # }) e = GymEnv(env_name) # Make baseline baseline = MLPBaseline(e.spec) # save job details job['horizon'] = e.horizon job['ctrl_timestep'] = e.env.env.dt job['sim_timestep'] = e.env.env.model.opt.timestep # job['sim_skip'] = e.env.env.skip with open(os.path.join(job_dir, 'job_data.txt'), 'w') as job_data_file: pprint.pprint(job, stream=job_data_file) if 'init_policy' in job: policy = MLP(e.spec, init_log_std=job['init_std'], hidden_sizes=(32,32), seed=job['seed']) loaded_policy = pickle.load(open(job['init_policy'], 'rb')) loaded_params = loaded_policy.get_param_values() print("log std values in loaded policy = ") print(loaded_params[-policy.m:]) # NOTE: if the log std is too small # (say <-2.0, it is problem dependent and intuition should be used) # then we need to bump it up so that it explores loaded_params[-policy.m:] += job['init_std'] policy.set_param_values(loaded_params) del job['init_policy'] else: policy = MLP( e.spec, init_log_std=job['init_std'], hidden_sizes=job['hidden_sizes'], # hidden_sizes=(32, 32), seed=job['seed']) # Agent agent = NPG( e, policy, baseline, seed=job['seed'], normalized_step_size=job['normalized_step_size'], save_logs=job['save_logs'], FIM_invert_args=job['FIM_invert_args']) # Train Agent train_agent( job_name=job['job_name'], agent=agent, # save_dir=job_dir, seed=job['seed'], niter=job['niter'], gamma=job['gamma'], gae_lambda=job['gae_lambda'], num_cpu=job['num_cpu'], sample_mode=job['sample_mode'], num_traj=job.get('num_traj'), num_samples=job.get('num_samples'), evaluation_rollouts=job['evaluation_rollouts'], save_freq=job['save_freq'], plot_keys={'stoc_pol_mean', 'stoc_pol_std'}, ) total_job_time = timer.time() - job_start_time print('Job', job['job_name'], 'took %f seconds ==============' % total_job_time) return total_job_time