def parallel_train_pipeline(config, methods, env, eval_qnet, bhv_qnet, seedvec, max_name_length): num_method = len(methods) mse = np.zeros(len(methods)) ind_mse = np.zeros(len(methods)) mse_w = np.zeros(len(methods)) results, target = train_pipeline(env, config, eval_qnet, bhv_qnet, seedvec) for i_method in range(num_method): mse_1, mse_2, mse_3 = error_info(results[i_method], target, methods[i_method].ljust(max_name_length)) mse[i_method] = mse_1 ind_mse[i_method] = mse_2 mse_w[i_method] = mse_3 return(mse, ind_mse, mse_w)
env = gym.make("ContinuousCartPole-v0") config = config.contcartpole_config agent = load_ddpg_agent(config, filename=args.env_name + '_ddpg_agent.pth.tar') agent.actor.eval() agent.critic.eval() seedvec = np.random.randint(0, config.MAX_SEED, config.sample_num_traj) factual_types = ['hard', 2.5, 2.0, 1.5, 1.0, 0.5] methods = ['Baseline'] + ['mse_pi_{}'.format(ft) for ft in factual_types] +\ ['repbm_{}'.format(ft) for ft in factual_types] num_method = len(methods) max_name_length = len(max(methods, key=len)) mse = [] ind_mse = [] results, target = train_pipeline(env, config, agent, factual_types, seedvec) for i_method in range(num_method): mse_1, mse_2 = error_info(results[i_method], target, methods[i_method].ljust(max_name_length)) mse.append(mse_1) ind_mse.append(mse_2) print(mse) print(ind_mse) np.save('results/result_{}_{}'.format(args.env_name, args.pid), [mse, ind_mse])