Ejemplo n.º 1
0
            f = open(job_name_ewc_seed_lambda+'/trained_ewc_baseline.pickle', 'wb')
            pickle.dump(baseline_ewc, f)
            f.close()
            f = open(job_name_ewc_seed_lambda+'/trained_ewc_alphas.pickle', 'wb')
            pickle.dump(agent_ewc.theta, f)
            f.close()
            f = open(job_name_ewc_seed_lambda+'/trained_ewc_grads.pickle', 'wb')
            pickle.dump(agent_ewc.grad, f)
            f.close()
            f = open(job_name_ewc_seed_lambda+'/trained_ewc_hess.pickle', 'wb')
            pickle.dump(agent_ewc.hess, f)
            f.close()
            f = open(job_name_ewc_seed_lambda+'/env_factors.pickle', 'wb')
            pickle.dump(size_factors_list, f)
            f.close()

            make_multitask_train_plots(loggers=agent_ewc.logger, keys=['stoc_pol_mean'], save_loc=job_name_ewc_seed_lambda+'/logs/')

            mean_test_perf = agent_ewc.test_tasks(test_rollouts=10,
                                num_cpu=num_cpu)
            result = np.mean(list(mean_test_perf.values()))
            print(result)
            make_multitask_test_plots(mean_test_perf, save_loc=job_name_ewc_seed_lambda+'/')

            result_file = open(job_name_ewc_seed_lambda + '/results.txt', 'w')
            result_file.write(str(mean_test_perf))
            result_file.close()

      SEED += 10

Ejemplo n.º 2
0
    f = open(job_name_lpgftw_seed + '/trained_mtl_baseline.pickle', 'wb')
    pickle.dump(baseline_mtl, f)
    f.close()
    f = open(job_name_lpgftw_seed + '/trained_mtl_alphas.pickle', 'wb')
    pickle.dump(agent_mtl.theta, f)
    f.close()
    f = open(job_name_lpgftw_seed + '/trained_mtl_grads.pickle', 'wb')
    pickle.dump(agent_mtl.grad, f)
    f.close()
    f = open(job_name_lpgftw_seed + '/trained_mtl_hess.pickle', 'wb')
    pickle.dump(agent_mtl.hess, f)
    f.close()
    f = open(job_name_lpgftw_seed + '/task_order.pickle', 'wb')
    pickle.dump(task_order, f)
    f.close()

    make_multitask_train_plots(loggers=agent_mtl.logger,
                               keys=['stoc_pol_mean'],
                               save_loc=job_name_lpgftw_seed + '/logs/')

    mean_test_perf = agent_mtl.test_tasks(test_rollouts=10, num_cpu=num_cpu)
    result = np.mean(list(mean_test_perf.values()))
    print(result)
    make_multitask_test_plots(mean_test_perf,
                              save_loc=job_name_lpgftw_seed + '/')
    result_file = open(job_name_lpgftw_seed + '/results.txt', 'w')
    result_file.write(str(mean_test_perf))
    result_file.close()

    SEED += 10
Ejemplo n.º 3
0
                evaluation_rollouts=5,
                task_id=task_id)
    agent_mtl.add_approximate_cost(N=10, task_id=task_id, num_cpu=1)

    train_agent(job_name=job_name_stl,
                agent=agent_stl[task_id],
                seed=SEED,
                niter=50,
                gamma=0.995,
                gae_lambda=0.97,
                num_cpu=num_cpu,
                sample_mode='trajectories',
                num_traj=10,
                save_freq=5,
                evaluation_rollouts=5,
                task_id=task_id)
    loggers_stl[task_id] = agent_stl[task_id].logger
    print("time taken for linear policy training = %f" % (timer.time() - ts))

make_multitask_train_plots(loggers=agent_mtl.logger,
                           keys=['stoc_pol_mean'],
                           save_loc=job_name_mtl + '/logs/')
make_multitask_train_plots(loggers=loggers_stl,
                           keys=['stoc_pol_mean'],
                           save_loc=job_name_stl + '/logs/')

mean_test_perf = agent_mtl.test_tasks(test_rollouts=10, num_cpu=num_cpu)
result = np.mean(list(mean_test_perf.values()))
print(result)
make_multitask_test_plots(mean_test_perf, save_loc=job_name_mtl + '/')