baseline_mtl = {} task_order = np.random.permutation(num_tasks) for task_id in range(num_tasks): e[task_id] = e_unshuffled[task_order[task_id]] baseline_mtl[task_id] = MLPBaseline(e[task_id].spec, reg_coef=1e-3, batch_size=64, epochs=2, learn_rate=1e-3, use_gpu=True) policy_mtl = LinearPolicyLPGFTW(e[0].spec, k=1, max_k=5, seed=SEED) agent_mtl = NPGFTW(e, policy_mtl, baseline_mtl, normalized_step_size=1, seed=SEED, save_logs=True, new_col_mode='max_k') for task_id in range(num_tasks): ts = timer.time() train_agent(job_name=job_name_lpgftw_seed, agent=agent_mtl, seed=SEED, niter=50, gamma=0.995, gae_lambda=0.97, num_cpu=num_cpu, sample_mode='trajectories', num_traj=10,
'policy_0.pickle', 'rb') policy_mtl = pickle.load(f) f.close() f = open( job_name_lpgftw_seed + '/iterations/task_{}/'.format(t) + 'baseline_0.pickle', 'rb') baseline_mtl[t] = pickle.load(f) f.close() if isinstance(policy_mtl.model.theta, list): policy_mtl.model.theta = torch.autograd.Variable(torch.zeros(0)) agent_mtl = NPGFTW(e, policy_mtl, baseline_mtl, normalized_step_size=0.1, seed=SEED, save_logs=False, new_col_mode='performance') mean_test_perf = agent_mtl.test_tasks(test_rollouts=10, num_cpu=num_cpu, task_ids=np.array([t])) forward_transfer_results = { **forward_transfer_results, **mean_test_perf } result_file = open(job_name_lpgftw_seed + '/start_results.txt', 'w') result_file.write(str(forward_transfer_results))
agent_stl[task_id].hess = hess_stl[task_id] k = 5 n = policy_stl[0].n m = policy_stl[0].m d = (n + 1) * m A = np.zeros((d * k, d * k)) b = np.zeros((d * k, 1)) S = np.zeros((k, num_tasks)) L = np.zeros((d, k)) Theta = np.zeros((d, num_tasks)) policy_mtl = LinearPolicyLPGFTW(e[0].spec, k=k, max_k=k, seed=SEED) agent_mtl = NPGFTW(e, policy_mtl, baseline_stl, normalized_step_size=1, seed=SEED, save_logs=True, new_col_mode='max_k') lasso_solver = Lasso(alpha=1e-5, fit_intercept=False) forward_transfer_results = {} for task_id in range(num_tasks): theta = policy_stl[task_id].trainable_params theta = torch.cat((theta[0], torch.unsqueeze(theta[1], 1)), 1) theta = theta.reshape((-1, 1)).data.numpy( ) # order of reshape same as for hessian in npg_cg.py agent_mtl.theta[task_id] = theta Theta[:, task_id] = theta.squeeze() D = -agent_stl[task_id].hess