Ejemplo n.º 1
0
                evaluation_rollouts=5,
                task_id=task_id)
    agent_mtl.add_approximate_cost(N=10, task_id=task_id, num_cpu=1)

    train_agent(job_name=job_name_stl,
                agent=agent_stl[task_id],
                seed=SEED,
                niter=50,
                gamma=0.995,
                gae_lambda=0.97,
                num_cpu=num_cpu,
                sample_mode='trajectories',
                num_traj=10,
                save_freq=5,
                evaluation_rollouts=5,
                task_id=task_id)
    loggers_stl[task_id] = agent_stl[task_id].logger
    print("time taken for linear policy training = %f" % (timer.time() - ts))

make_multitask_train_plots(loggers=agent_mtl.logger,
                           keys=['stoc_pol_mean'],
                           save_loc=job_name_mtl + '/logs/')
make_multitask_train_plots(loggers=loggers_stl,
                           keys=['stoc_pol_mean'],
                           save_loc=job_name_stl + '/logs/')

mean_test_perf = agent_mtl.test_tasks(test_rollouts=10, num_cpu=num_cpu)
result = np.mean(list(mean_test_perf.values()))
print(result)
make_multitask_test_plots(mean_test_perf, save_loc=job_name_mtl + '/')
Ejemplo n.º 2
0
                    evaluation_rollouts=0,
                    task_id=task_id)
        agent_stl[task_id].add_approximate_cost(N=10, num_cpu=num_cpu)
        grads_stl[task_id] = agent_stl[task_id].grad
        hess_stl[task_id] = agent_stl[task_id].hess
        loggers_stl[task_id] = agent_stl[task_id].logger
        print("time taken for linear policy training = %f" %
              (timer.time() - ts))

    f = open(job_name_stl_seed + '/trained_stl_policy.pickle', 'wb')
    pickle.dump(policy_stl, f)
    f.close()
    f = open(job_name_stl_seed + '/trained_stl_baseline.pickle', 'wb')
    pickle.dump(baseline_stl, f)
    f.close()
    f = open(job_name_stl_seed + '/trained_stl_grads.pickle', 'wb')
    pickle.dump(grads_stl, f)
    f.close()
    f = open(job_name_stl_seed + '/trained_stl_hess.pickle', 'wb')
    pickle.dump(hess_stl, f)
    f.close()
    f = open(job_name_stl_seed + '/task_order.pickle', 'wb')
    pickle.dump(task_order, f)
    f.close()

    make_multitask_train_plots(loggers=loggers_stl,
                               keys=['stoc_pol_mean'],
                               save_loc=job_name_stl_seed + '/logs/')

    SEED += 10
Ejemplo n.º 3
0
    f = open(job_name_ewc_seed + '/trained_mtl_baseline.pickle', 'wb')
    pickle.dump(baseline_ewc, f)
    f.close()
    f = open(job_name_ewc_seed + '/trained_mtl_alphas.pickle', 'wb')
    pickle.dump(agent_ewc.theta, f)
    f.close()
    f = open(job_name_ewc_seed + '/trained_mtl_grads.pickle', 'wb')
    pickle.dump(agent_ewc.grad, f)
    f.close()
    f = open(job_name_ewc_seed + '/trained_mtl_hess.pickle', 'wb')
    pickle.dump(agent_ewc.hess, f)
    f.close()
    f = open(job_name_ewc_seed + '/task_order.pickle', 'wb')
    pickle.dump(task_order, f)
    f.close()

    make_multitask_train_plots(loggers=agent_ewc.logger,
                               keys=['stoc_pol_mean'],
                               save_loc=job_name_ewc_seed + '/logs/')

    mean_test_perf = agent_ewc.test_tasks(test_rollouts=10, num_cpu=num_cpu)
    result = np.mean(list(mean_test_perf.values()))
    print(result)
    make_multitask_test_plots(mean_test_perf, save_loc=job_name_ewc_seed + '/')

    result_file = open(job_name_ewc_seed + '/results.txt', 'w')
    result_file.write(str(mean_test_perf))
    result_file.close()

    SEED += 10