Beispiel #1
0
      torch.manual_seed(SEED)

      job_name_ewc_seed = job_name_ewc + '/seed_{}'.format(i)

      e = {}
      task_order = np.random.permutation(num_tasks)
      for task_id in range(num_tasks):
            e[task_id] = e_unshuffled[task_order[task_id]]

      for ewc_lambda in lambda_range:   
            baseline_ewc = {}
            for task_id in range(num_tasks):
                  baseline_ewc[task_id] = MLPBaseline(e[task_id].spec, reg_coef=1e-3, batch_size=64, epochs=2, learn_rate=1e-3, use_gpu=True)
                  
            policy_ewc = LinearPolicy(e[0].spec, seed=SEED)
            agent_ewc = NPGEWC(e, policy_ewc, baseline_ewc, ewc_lambda=ewc_lambda, scaled_lambda=False, normalized_step_size=0.1, seed=SEED, save_logs=True)


            # agent = BatchREINFORCE(e, policy, baseline, learn_rate=0.0001, seed=SEED, save_logs=True)
            job_name_ewc_seed_lambda = job_name_ewc_seed + '/lambda{}'.format(ewc_lambda)
            for task_id in range(num_tasks):
                  ts = timer.time()
                  train_agent(job_name=job_name_ewc_seed_lambda,
                              agent=agent_ewc,
                              seed=SEED,
                              niter=50,
                              gamma=0.995,  
                              gae_lambda=0.97,
                              num_cpu=num_cpu,
                              sample_mode='trajectories',
                              num_traj=50,
Beispiel #2
0
        job_name_ewc_seed = job_name_ewc + '/seed_{}'.format(i)
        f = open(
            job_name_ewc_seed + '/iterations/task_{}/'.format(t) +
            'policy_0.pickle', 'rb')
        policy_mtl = pickle.load(f)
        f.close()
        f = open(
            job_name_ewc_seed + '/iterations/task_{}/'.format(t) +
            'baseline_0.pickle', 'rb')
        baseline_mtl[t] = pickle.load(f)
        f.close()

        agent_mtl = NPGEWC(e,
                           policy_mtl,
                           baseline_mtl,
                           ewc_lambda=1e-4,
                           scaled_lambda=False,
                           normalized_step_size=0.01,
                           seed=SEED,
                           save_logs=True)

        mean_test_perf = agent_mtl.test_tasks(test_rollouts=10,
                                              num_cpu=num_cpu,
                                              task_ids=np.array([t]))

        forward_transfer_results = {
            **forward_transfer_results,
            **mean_test_perf
        }

    result_file = open(job_name_ewc_seed + '/start_results.txt', 'w')
    result_file.write(str(forward_transfer_results))