Esempio n. 1
0
def parallel_train_pipeline(config, methods, env, eval_qnet, bhv_qnet, seedvec, max_name_length):
    num_method = len(methods)
    mse = np.zeros(len(methods))
    ind_mse = np.zeros(len(methods))
    mse_w = np.zeros(len(methods))

    results, target = train_pipeline(env, config, eval_qnet, bhv_qnet, seedvec)

    for i_method in range(num_method):
        mse_1, mse_2, mse_3 = error_info(results[i_method], target, methods[i_method].ljust(max_name_length))
        mse[i_method] = mse_1
        ind_mse[i_method] = mse_2
        mse_w[i_method] = mse_3

    return(mse, ind_mse, mse_w)
Esempio n. 2
0
        env = gym.make("ContinuousCartPole-v0")
        config = config.contcartpole_config

    agent = load_ddpg_agent(config,
                            filename=args.env_name + '_ddpg_agent.pth.tar')
    agent.actor.eval()
    agent.critic.eval()
    seedvec = np.random.randint(0, config.MAX_SEED, config.sample_num_traj)

    factual_types = ['hard', 2.5, 2.0, 1.5, 1.0, 0.5]
    methods = ['Baseline'] + ['mse_pi_{}'.format(ft) for ft in factual_types] +\
              ['repbm_{}'.format(ft) for ft in factual_types]

    num_method = len(methods)
    max_name_length = len(max(methods, key=len))

    mse = []
    ind_mse = []

    results, target = train_pipeline(env, config, agent, factual_types,
                                     seedvec)
    for i_method in range(num_method):
        mse_1, mse_2 = error_info(results[i_method], target,
                                  methods[i_method].ljust(max_name_length))
        mse.append(mse_1)
        ind_mse.append(mse_2)
    print(mse)
    print(ind_mse)
    np.save('results/result_{}_{}'.format(args.env_name, args.pid),
            [mse, ind_mse])