f = open(job_name_ewc_seed_lambda+'/trained_ewc_baseline.pickle', 'wb') pickle.dump(baseline_ewc, f) f.close() f = open(job_name_ewc_seed_lambda+'/trained_ewc_alphas.pickle', 'wb') pickle.dump(agent_ewc.theta, f) f.close() f = open(job_name_ewc_seed_lambda+'/trained_ewc_grads.pickle', 'wb') pickle.dump(agent_ewc.grad, f) f.close() f = open(job_name_ewc_seed_lambda+'/trained_ewc_hess.pickle', 'wb') pickle.dump(agent_ewc.hess, f) f.close() f = open(job_name_ewc_seed_lambda+'/env_factors.pickle', 'wb') pickle.dump(size_factors_list, f) f.close() make_multitask_train_plots(loggers=agent_ewc.logger, keys=['stoc_pol_mean'], save_loc=job_name_ewc_seed_lambda+'/logs/') mean_test_perf = agent_ewc.test_tasks(test_rollouts=10, num_cpu=num_cpu) result = np.mean(list(mean_test_perf.values())) print(result) make_multitask_test_plots(mean_test_perf, save_loc=job_name_ewc_seed_lambda+'/') result_file = open(job_name_ewc_seed_lambda + '/results.txt', 'w') result_file.write(str(mean_test_perf)) result_file.close() SEED += 10
f = open(job_name_lpgftw_seed + '/trained_mtl_baseline.pickle', 'wb') pickle.dump(baseline_mtl, f) f.close() f = open(job_name_lpgftw_seed + '/trained_mtl_alphas.pickle', 'wb') pickle.dump(agent_mtl.theta, f) f.close() f = open(job_name_lpgftw_seed + '/trained_mtl_grads.pickle', 'wb') pickle.dump(agent_mtl.grad, f) f.close() f = open(job_name_lpgftw_seed + '/trained_mtl_hess.pickle', 'wb') pickle.dump(agent_mtl.hess, f) f.close() f = open(job_name_lpgftw_seed + '/task_order.pickle', 'wb') pickle.dump(task_order, f) f.close() make_multitask_train_plots(loggers=agent_mtl.logger, keys=['stoc_pol_mean'], save_loc=job_name_lpgftw_seed + '/logs/') mean_test_perf = agent_mtl.test_tasks(test_rollouts=10, num_cpu=num_cpu) result = np.mean(list(mean_test_perf.values())) print(result) make_multitask_test_plots(mean_test_perf, save_loc=job_name_lpgftw_seed + '/') result_file = open(job_name_lpgftw_seed + '/results.txt', 'w') result_file.write(str(mean_test_perf)) result_file.close() SEED += 10
evaluation_rollouts=5, task_id=task_id) agent_mtl.add_approximate_cost(N=10, task_id=task_id, num_cpu=1) train_agent(job_name=job_name_stl, agent=agent_stl[task_id], seed=SEED, niter=50, gamma=0.995, gae_lambda=0.97, num_cpu=num_cpu, sample_mode='trajectories', num_traj=10, save_freq=5, evaluation_rollouts=5, task_id=task_id) loggers_stl[task_id] = agent_stl[task_id].logger print("time taken for linear policy training = %f" % (timer.time() - ts)) make_multitask_train_plots(loggers=agent_mtl.logger, keys=['stoc_pol_mean'], save_loc=job_name_mtl + '/logs/') make_multitask_train_plots(loggers=loggers_stl, keys=['stoc_pol_mean'], save_loc=job_name_stl + '/logs/') mean_test_perf = agent_mtl.test_tasks(test_rollouts=10, num_cpu=num_cpu) result = np.mean(list(mean_test_perf.values())) print(result) make_multitask_test_plots(mean_test_perf, save_loc=job_name_mtl + '/')