Exemple #1
0
def measure_change_through_time(path, env_name, policy, rep_params):
    env = make_env(env_name, 1, rep_params['seed'], max_path_length=rep_params['max_path_length'])
    global metrics
    metrics = ['CCA']

    sanity_task = env.sample_tasks(1)

    with torch.no_grad():
        env.set_task(sanity_task[0])
        env.seed(rep_params['seed'])
        env.reset()
        env_task = Runner(env)
        sanity_ep = env_task.run(policy, episodes=1)

    init_change_m = defaultdict(list)
    init_change_v = defaultdict(list)
    adapt_change_m = defaultdict(list)
    adapt_change_v = defaultdict(list)
    checkpoints = path + f'/model_checkpoints/'
    i = 0

    file_list = os.listdir(checkpoints)
    file_list = [file for file in file_list if 'baseline' not in file]
    models_list = {}
    for file in file_list:
        n_file = file.split('_')[-1]
        n_file = n_file.split('.')[0]
        n_file = int(n_file)
        models_list[n_file] = f'model_{n_file}.pt'

    prev_policy = policy
    for key in sorted(models_list.keys()):
        model_chckpnt = models_list[key]
        if i > 40:
            break
        i += 1

        print(f'Loading {model_chckpnt} ...')
        chckpnt_policy = DiagNormalPolicy(9, 4)
        chckpnt_policy.load_state_dict(torch.load(os.path.join(checkpoints, model_chckpnt)))
        chckpnt_policy = MAML(chckpnt_policy, lr=rep_params['inner_lr'])

        mean, variance = episode_mean_var(sanity_ep, policy, chckpnt_policy, layer=6)
        a_mean, a_variance = episode_mean_var(sanity_ep, prev_policy, chckpnt_policy, layer=6)
        init_change_m['CCA'] += [mean['CCA']]
        init_change_v['CCA'] += [variance['CCA']]
        adapt_change_m['CCA'] += [a_mean['CCA']]
        adapt_change_v['CCA'] += [a_variance['CCA']]

        prev_policy = chckpnt_policy

    for metric in metrics:
        plot_sim_across_steps(init_change_m[metric], init_change_v[metric], metric=metric,
                              title='Similarity between init and adapted (in %)')

    for metric in metrics:
        difference = [1 - x for x in adapt_change_m[metric]]
        plot_sim_across_steps(difference, adapt_change_v[metric], metric=metric,
                              title='Representation difference after each step (in %)')
    def run(self, env, device):

        set_device(device)
        baseline = ch.models.robotics.LinearValue(env.state_size,
                                                  env.action_size)

        policy = DiagNormalPolicyANIL(env.state_size, env.action_size,
                                      params['fc_neurons'])
        policy = MAML(policy, lr=self.params['inner_lr'])
        body = policy.body
        head = policy.head

        all_parameters = list(body.parameters()) + list(head.parameters())
        meta_optimizer = torch.optim.Adam(all_parameters,
                                          lr=self.params['outer_lr'])

        self.log_model(policy.body,
                       device,
                       input_shape=(1, env.state_size),
                       name='body')
        self.log_model(policy.head,
                       device,
                       input_shape=(env.action_size, params['fc_neurons']),
                       name='head')

        t = trange(self.params['num_iterations'])
        try:
            for iteration in t:
                meta_optimizer.zero_grad()

                iter_reward = 0.0
                iter_loss = 0.0

                task_list = env.sample_tasks(self.params['meta_batch_size'])

                for task_i in trange(len(task_list),
                                     leave=False,
                                     desc='Task',
                                     position=0):
                    task = task_list[task_i]

                    learner = policy.clone()
                    env.set_task(task)
                    env.reset()
                    task = Runner(env, extra_info=extra_info)

                    # Fast adapt
                    loss, task_rew, task_suc = fast_adapt_ppo(task,
                                                              learner,
                                                              baseline,
                                                              self.params,
                                                              anil=True)

                    # print(f'Task {task_i}: Loss: {loss.item()} | Rew: {task_rew}')
                    iter_reward += task_rew
                    iter_loss += loss

                # Log
                average_return = iter_reward / self.params['meta_batch_size']
                av_loss = iter_loss / self.params['meta_batch_size']
                metrics = {
                    'average_return': average_return,
                    'loss': av_loss.item()
                }

                t.set_postfix(metrics)
                self.log_metrics(metrics)

                # Meta-optimize: Back-propagate through the accumulated gradients and optimize
                av_loss.backward()
                meta_optimizer.step()

                if iteration % self.params['save_every'] == 0:
                    self.save_model_checkpoint(policy.body,
                                               'body_' + str(iteration + 1))
                    self.save_model_checkpoint(policy.head,
                                               'head_' + str(iteration + 1))
                    self.save_model_checkpoint(
                        baseline, 'baseline_' + str(iteration + 1))

        # Support safely manually interrupt training
        except KeyboardInterrupt:
            print(
                '\nManually stopped training! Start evaluation & saving...\n')
            self.logger['manually_stopped'] = True
            self.params['num_iterations'] = iteration

        self.save_model(policy.body, name='body')
        self.save_model(policy.head, name='head')
        self.save_model(baseline, name='baseline')

        self.logger['elapsed_time'] = str(round(t.format_dict['elapsed'],
                                                2)) + ' sec'
        # Evaluate on new test tasks
        self.logger['test_reward'] = evaluate_ppo(env_name, policy, baseline,
                                                  eval_params)
        self.log_metrics({'test_reward': self.logger['test_reward']})
        self.save_logs_to_file()
    

dataset = GaussianCenters(possible_loc=loc[:,:2],
                           n_clusters=n_clusters, arrival_rate = arrival_rate, cluster_variance = cluster_variance)

test_gains_maml = np.zeros((len(num_of_beams),ntest,dataset.n_clusters*dataset.arrival_rate))
test_gains_scratch = np.zeros((len(num_of_beams),ntest,dataset.n_clusters*dataset.arrival_rate))
test_gains_dft = np.zeros((len(num_of_beams),ntest,dataset.n_clusters*dataset.arrival_rate))

for i,N in enumerate(num_of_beams):
    print(str(N) + '-beams Codebook')
    
    # Model:
    # ------
    model = AnalogBeamformer(n_antenna = num_antenna, n_beam = N)
    maml = MAML(model, lr=fast_lr, first_order=True)
    # Training:
    # ---------
    optimizer = optim.Adam(model.parameters(),lr=meta_lr, betas=(0.9,0.999), amsgrad=False)
    loss_fn = bf_gain_loss

    for iteration in range(nepoch):
        optimizer.zero_grad()
        meta_train_error = 0.0
        meta_valid_error = 0.0
        for task in range(batch_size):
            dataset.change_cluster()
            # Compute meta-training loss
            learner = maml.clone()
            batch_idc = dataset.sample()
            batch = (h_concat_scaled[batch_idc,:],egc_gain_scaled[batch_idc])
    def run(self, train_tasks, valid_tasks, test_tasks, input_shape, device):

        # Create model
        if dataset == "omni":
            features = ConvBase(output_size=64, hidden=32, channels=1, max_pool=False)
        else:
            features = ConvBase(output_size=64, channels=3, max_pool=True)
        features = torch.nn.Sequential(features, Lambda(lambda x: x.view(-1, fc_neurons)))
        features.to(device)

        head = torch.nn.Linear(fc_neurons, self.params['ways'])
        head = MAML(head, lr=self.params['inner_lr'])
        head.to(device)

        # Setup optimization
        all_parameters = list(features.parameters()) + list(head.parameters())
        optimizer = torch.optim.Adam(all_parameters, lr=self.params['outer_lr'])
        loss = torch.nn.CrossEntropyLoss(reduction='mean')

        self.log_model(features, device, input_shape=input_shape, name='features')  # Input shape is specific to dataset
        head_input_shape = (self.params['ways'], fc_neurons)
        self.log_model(head, device, input_shape=head_input_shape, name='head')  # Input shape is specific to dataset

        t = trange(self.params['num_iterations'])
        try:
            for iteration in t:
                optimizer.zero_grad()
                meta_train_loss = 0.0
                meta_train_accuracy = 0.0
                meta_valid_loss = 0.0
                meta_valid_accuracy = 0.0
                for task in range(self.params['meta_batch_size']):
                    # Compute meta-training loss
                    learner = head.clone()
                    batch = train_tasks.sample()
                    eval_loss, eval_acc = fast_adapt(batch, learner, loss,
                                                     self.params['adapt_steps'],
                                                     self.params['shots'], self.params['ways'],
                                                     device, features=features)
                    eval_loss.backward()
                    meta_train_loss += eval_loss.item()
                    meta_train_accuracy += eval_acc.item()

                    # Compute meta-validation loss
                    learner = head.clone()
                    batch = valid_tasks.sample()
                    eval_loss, eval_acc = fast_adapt(batch, learner, loss,
                                                     self.params['adapt_steps'],
                                                     self.params['shots'], self.params['ways'],
                                                     device, features=features)
                    meta_valid_loss += eval_loss.item()
                    meta_valid_accuracy += eval_acc.item()

                meta_train_loss = meta_train_loss / self.params['meta_batch_size']
                meta_valid_loss = meta_valid_loss / self.params['meta_batch_size']
                meta_train_accuracy = meta_train_accuracy / self.params['meta_batch_size']
                meta_valid_accuracy = meta_valid_accuracy / self.params['meta_batch_size']

                metrics = {'train_loss': meta_train_loss,
                           'train_acc': meta_train_accuracy,
                           'valid_loss': meta_valid_loss,
                           'valid_acc': meta_valid_accuracy}
                t.set_postfix(metrics)
                self.log_metrics(metrics)

                # Average the accumulated gradients and optimize
                for p in all_parameters:
                    p.grad.data.mul_(1.0 / self.params['meta_batch_size'])
                optimizer.step()

                if iteration % self.params['save_every'] == 0:
                    self.save_model_checkpoint(features, 'features_' + str(iteration + 1))
                    self.save_model_checkpoint(head, 'head_' + str(iteration + 1))

        # Support safely manually interrupt training
        except KeyboardInterrupt:
            print('\nManually stopped training! Start evaluation & saving...\n')
            self.logger['manually_stopped'] = True
            self.params['num_iterations'] = iteration

        self.save_model(features, name='features')
        self.save_model(head, name='head')

        self.logger['elapsed_time'] = str(round(t.format_dict['elapsed'], 2)) + ' sec'
        # Meta-testing on unseen tasks
        self.logger['test_acc'] = evaluate(self.params, test_tasks, head, loss, device, features=features)
        self.log_metrics({'test_acc': self.logger['test_acc']})
        self.save_logs_to_file()
    def run(self, env, device):

        set_device(device)
        baseline = ch.models.robotics.LinearValue(env.state_size,
                                                  env.action_size)

        policy = DiagNormalPolicyANIL(env.state_size, env.action_size,
                                      params['fc_neurons'])
        policy = MAML(policy, lr=self.params['inner_lr'])

        self.log_model(policy.body,
                       device,
                       input_shape=(1, env.state_size),
                       name='body')
        self.log_model(policy.head,
                       device,
                       input_shape=(env.action_size, params['fc_neurons']),
                       name='head')

        t = trange(self.params['num_iterations'])
        try:
            for iteration in t:

                iter_loss = 0.0
                iter_reward = 0.0
                iter_replays = []
                iter_policies = []

                task_list = env.sample_tasks(self.params['meta_batch_size'])

                for task_i in trange(len(task_list),
                                     leave=False,
                                     desc='Task',
                                     position=0):
                    task = task_list[task_i]

                    learner = deepcopy(policy)
                    env.set_task(task)
                    env.reset()
                    task = Runner(env, extra_info=extra_info)

                    # Fast adapt
                    learner, eval_loss, task_replay, task_rew, task_suc = fast_adapt_trpo(
                        task,
                        learner,
                        baseline,
                        self.params,
                        anil=True,
                        first_order=True)

                    iter_reward += task_rew
                    iter_loss += eval_loss.item()
                    iter_replays.append(task_replay)
                    iter_policies.append(learner)

                # Log
                average_return = iter_reward / self.params['meta_batch_size']
                average_loss = iter_loss / self.params['meta_batch_size']
                metrics = {
                    'average_return': average_return,
                    'loss': average_loss
                }

                t.set_postfix(metrics)
                self.log_metrics(metrics)

                # Meta-optimize
                meta_optimize_trpo(self.params,
                                   policy,
                                   baseline,
                                   iter_replays,
                                   iter_policies,
                                   anil=True)

                if iteration % self.params['save_every'] == 0:
                    self.save_model_checkpoint(policy.body,
                                               'body_' + str(iteration + 1))
                    self.save_model_checkpoint(policy.head,
                                               'head_' + str(iteration + 1))
                    self.save_model_checkpoint(
                        baseline, 'baseline_' + str(iteration + 1))

        # Support safely manually interrupt training
        except KeyboardInterrupt:
            print(
                '\nManually stopped training! Start evaluation & saving...\n')
            self.logger['manually_stopped'] = True
            self.params['num_iterations'] = iteration

        self.save_model(policy.body, name="body")
        self.save_model(policy.head, name="head")
        self.save_model(baseline, name="baseline")

        self.logger['elapsed_time'] = str(round(t.format_dict['elapsed'],
                                                2)) + ' sec'
        # Evaluate on new test tasks
        self.logger['test_reward'] = evaluate_trpo(env_name, policy, baseline,
                                                   eval_params)
        self.log_metrics({'test_reward': self.logger['test_reward']})
        self.save_logs_to_file()
Exemple #6
0
def run():
    try:
        with open(path + '/logger.json', 'r') as f:
            params = json.load(f)['config']
    except FileNotFoundError:
        print('WARNING CONFIG NOT FOUND. Using default parameters')
        params = dict()
        params['inner_lr'] = 0.1
        params['ppo_epochs'] = 3
        params['ppo_clip_ratio'] = 0.1
        params['tau'] = 1.0
        params['gamma'] = 0.99
        params['seed'] = 42

    eval_params['seed'] = params['seed']
    cl_params['seed'] = params['seed']
    rep_params['seed'] = params['seed']
    algo = params['algo']
    env_name = params['dataset']

    anil = True if 'anil' in algo else False

    if 'maml' in algo or 'anil' in algo:
        ml_algo = params['algo'].split('_')[0]
        rl_algo = params['algo'].split('_')[1]
    elif 'ppo' == algo or 'random' == algo:
        ml_algo = ''
        rl_algo = 'ppo'
    else:
        ml_algo = ''
        rl_algo = params['algo'].split('_')[1]

    cl_params['algo'] = rl_algo
    rep_params['algo'] = rl_algo
    cl_params['anil'] = anil
    rep_params['anil'] = anil
    if 'ML' in env_name:
        state_size = 9
        action_size = 4
        rep_params['extra_info'], cl_params['extra_info'] = True, True
    else:
        state_size = 2
        action_size = 2
        rep_params['extra_info'], cl_params['extra_info'] = False, False

    if checkpoint is None:
        baseline_path = path + '/baseline.pt'
        if ml_algo == 'anil':
            head_path = path + '/head.pt'
            body_path = path + '/body.pt'
        else:
            policy_path = path + '/model.pt'
    else:
        baseline_path = path + f'/model_checkpoints/model_baseline_{checkpoint}.pt'
        if ml_algo == 'maml':
            policy_path = path + f'/model_checkpoints/model_{checkpoint}.pt'
        else:
            head_path = path + f'/model_checkpoints/model_head_{checkpoint}.pt'
            body_path = path + f'/model_checkpoints/model_body_{checkpoint}.pt'

    device = torch.device('cpu')
    random.seed(params['seed'])
    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])

    baseline = ch.models.robotics.LinearValue(state_size, action_size)
    baseline.load_state_dict(torch.load(baseline_path))
    baseline.to(device)

    if ml_algo == 'anil':
        policy = DiagNormalPolicyANIL(state_size, action_size,
                                      params['fc_neurons'])
        policy.head.load_state_dict(torch.load(head_path))
        policy.body.load_state_dict(torch.load(body_path))
    else:
        policy = DiagNormalPolicy(state_size, action_size)
        policy.load_state_dict(torch.load(policy_path))

    policy = MAML(policy, lr=eval_params['inner_lr'])
    policy.to(device)

    print(f'Testing {ml_algo}-{rl_algo} on {env_name}')
    if EVALUATE:
        t_test = 'train' if test_on_train else 'test'
        test_rewards, av_test_rew, av_test_suc, res_per_task = evaluate(
            rl_algo,
            env_name,
            policy,
            baseline,
            eval_params,
            anil=anil,
            render=render,
            test_on_train=test_on_train,
            each3=each3)
        print(f'Average meta-testing reward: {av_test_rew}')
        print(f'Average meta-testing success rate: {av_test_suc * 100}%')

        if save_res:
            with open(f"{params['algo']}_{t_test}_{params['seed']}.json",
                      'w') as f:
                f.write(json.dumps(res_per_task))
        # with open(f"maml_trpo_test_{i}.json") as f:
        #     res_per_task = json.loads(f.read())

        for key, val in res_per_task.items():
            print(f'{key}: \n\tRewards: {val[::2]}\n\tSuccess: {val[1::2]}\n')

        bar_plot_ml10(res_per_task,
                      f"{params['algo']}_{t_test}_{params['seed']}.png")

    if RUN_CL:
        print('Running Continual Learning experiment...')
        run_cl_rl_exp(path,
                      env_name,
                      policy,
                      baseline,
                      cl_params,
                      workers,
                      test_on_train=test_on_train)
    if RUN_RC:
        print('Running Rep Change experiment...')
        run_rep_rl_exp(path, env_name, policy, baseline, rep_params)
Exemple #7
0
    def run(self, train_tasks, valid_tasks, test_tasks, model, input_shape, device):

        model.to(device)
        maml = MAML(model, lr=self.params['inner_lr'], first_order=False)
        opt = torch.optim.Adam(maml.parameters(), self.params['outer_lr'])
        loss = torch.nn.CrossEntropyLoss(reduction='mean')

        self.log_model(maml, device, input_shape=input_shape)  # Input shape is specific to dataset

        t = trange(self.params['num_iterations'])
        try:

            for iteration in t:
                # Clear the gradients after successfully back-propagating through the whole network
                opt.zero_grad()
                # Initialize iteration's metrics
                meta_train_loss = 0.0
                meta_train_accuracy = 0.0
                meta_valid_loss = 0.0
                meta_valid_accuracy = 0.0
                # Inner (Adaptation) loop
                for task in range(self.params['meta_batch_size']):
                    # Compute meta-training loss
                    learner = maml.clone()
                    batch = train_tasks.sample()
                    eval_loss, eval_acc = fast_adapt(batch, learner, loss,
                                                     self.params['adapt_steps'],
                                                     self.params['shots'], self.params['ways'],
                                                     device)

                    # Calculate the gradients of the now updated parameters of the model using the evaluation loss!
                    eval_loss.backward()
                    meta_train_loss += eval_loss.item()
                    meta_train_accuracy += eval_acc.item()

                    # Compute meta-validation loss
                    learner = maml.clone()
                    batch = valid_tasks.sample()
                    eval_loss, eval_acc = fast_adapt(batch, learner, loss,
                                                     self.params['adapt_steps'],
                                                     self.params['shots'], self.params['ways'],
                                                     device)
                    meta_valid_loss += eval_loss.item()
                    meta_valid_accuracy += eval_acc.item()

                meta_train_loss = meta_train_loss / self.params['meta_batch_size']
                meta_valid_loss = meta_valid_loss / self.params['meta_batch_size']
                meta_train_accuracy = meta_train_accuracy / self.params['meta_batch_size']
                meta_valid_accuracy = meta_valid_accuracy / self.params['meta_batch_size']

                metrics = {'train_loss': meta_train_loss,
                           'train_acc': meta_train_accuracy,
                           'valid_loss': meta_valid_loss,
                           'valid_acc': meta_valid_accuracy}
                t.set_postfix(metrics)
                self.log_metrics(metrics)

                # Average the accumulated gradients and optimize
                for p in maml.parameters():
                    p.grad.data.mul_(1.0 / self.params['meta_batch_size'])
                opt.step()

                if iteration % self.params['save_every'] == 0:
                    self.save_model_checkpoint(model, str(iteration))

        # Support safely manually interrupt training
        except KeyboardInterrupt:
            print('\nManually stopped training! Start evaluation & saving...\n')
            self.logger['manually_stopped'] = True
            self.params['num_iterations'] = iteration

        self.save_model(model)

        self.logger['elapsed_time'] = str(round(t.format_dict['elapsed'], 2)) + ' sec'
        # Meta-testing on unseen tasks
        self.logger['test_acc'] = evaluate(self.params, test_tasks, maml, loss, device)
        self.log_metrics({'test_acc': self.logger['test_acc']})
        self.save_logs_to_file()