def measure_change_through_time(path, env_name, policy, rep_params): env = make_env(env_name, 1, rep_params['seed'], max_path_length=rep_params['max_path_length']) global metrics metrics = ['CCA'] sanity_task = env.sample_tasks(1) with torch.no_grad(): env.set_task(sanity_task[0]) env.seed(rep_params['seed']) env.reset() env_task = Runner(env) sanity_ep = env_task.run(policy, episodes=1) init_change_m = defaultdict(list) init_change_v = defaultdict(list) adapt_change_m = defaultdict(list) adapt_change_v = defaultdict(list) checkpoints = path + f'/model_checkpoints/' i = 0 file_list = os.listdir(checkpoints) file_list = [file for file in file_list if 'baseline' not in file] models_list = {} for file in file_list: n_file = file.split('_')[-1] n_file = n_file.split('.')[0] n_file = int(n_file) models_list[n_file] = f'model_{n_file}.pt' prev_policy = policy for key in sorted(models_list.keys()): model_chckpnt = models_list[key] if i > 40: break i += 1 print(f'Loading {model_chckpnt} ...') chckpnt_policy = DiagNormalPolicy(9, 4) chckpnt_policy.load_state_dict(torch.load(os.path.join(checkpoints, model_chckpnt))) chckpnt_policy = MAML(chckpnt_policy, lr=rep_params['inner_lr']) mean, variance = episode_mean_var(sanity_ep, policy, chckpnt_policy, layer=6) a_mean, a_variance = episode_mean_var(sanity_ep, prev_policy, chckpnt_policy, layer=6) init_change_m['CCA'] += [mean['CCA']] init_change_v['CCA'] += [variance['CCA']] adapt_change_m['CCA'] += [a_mean['CCA']] adapt_change_v['CCA'] += [a_variance['CCA']] prev_policy = chckpnt_policy for metric in metrics: plot_sim_across_steps(init_change_m[metric], init_change_v[metric], metric=metric, title='Similarity between init and adapted (in %)') for metric in metrics: difference = [1 - x for x in adapt_change_m[metric]] plot_sim_across_steps(difference, adapt_change_v[metric], metric=metric, title='Representation difference after each step (in %)')
def run(self, env, device): set_device(device) baseline = ch.models.robotics.LinearValue(env.state_size, env.action_size) policy = DiagNormalPolicyANIL(env.state_size, env.action_size, params['fc_neurons']) policy = MAML(policy, lr=self.params['inner_lr']) body = policy.body head = policy.head all_parameters = list(body.parameters()) + list(head.parameters()) meta_optimizer = torch.optim.Adam(all_parameters, lr=self.params['outer_lr']) self.log_model(policy.body, device, input_shape=(1, env.state_size), name='body') self.log_model(policy.head, device, input_shape=(env.action_size, params['fc_neurons']), name='head') t = trange(self.params['num_iterations']) try: for iteration in t: meta_optimizer.zero_grad() iter_reward = 0.0 iter_loss = 0.0 task_list = env.sample_tasks(self.params['meta_batch_size']) for task_i in trange(len(task_list), leave=False, desc='Task', position=0): task = task_list[task_i] learner = policy.clone() env.set_task(task) env.reset() task = Runner(env, extra_info=extra_info) # Fast adapt loss, task_rew, task_suc = fast_adapt_ppo(task, learner, baseline, self.params, anil=True) # print(f'Task {task_i}: Loss: {loss.item()} | Rew: {task_rew}') iter_reward += task_rew iter_loss += loss # Log average_return = iter_reward / self.params['meta_batch_size'] av_loss = iter_loss / self.params['meta_batch_size'] metrics = { 'average_return': average_return, 'loss': av_loss.item() } t.set_postfix(metrics) self.log_metrics(metrics) # Meta-optimize: Back-propagate through the accumulated gradients and optimize av_loss.backward() meta_optimizer.step() if iteration % self.params['save_every'] == 0: self.save_model_checkpoint(policy.body, 'body_' + str(iteration + 1)) self.save_model_checkpoint(policy.head, 'head_' + str(iteration + 1)) self.save_model_checkpoint( baseline, 'baseline_' + str(iteration + 1)) # Support safely manually interrupt training except KeyboardInterrupt: print( '\nManually stopped training! Start evaluation & saving...\n') self.logger['manually_stopped'] = True self.params['num_iterations'] = iteration self.save_model(policy.body, name='body') self.save_model(policy.head, name='head') self.save_model(baseline, name='baseline') self.logger['elapsed_time'] = str(round(t.format_dict['elapsed'], 2)) + ' sec' # Evaluate on new test tasks self.logger['test_reward'] = evaluate_ppo(env_name, policy, baseline, eval_params) self.log_metrics({'test_reward': self.logger['test_reward']}) self.save_logs_to_file()
dataset = GaussianCenters(possible_loc=loc[:,:2], n_clusters=n_clusters, arrival_rate = arrival_rate, cluster_variance = cluster_variance) test_gains_maml = np.zeros((len(num_of_beams),ntest,dataset.n_clusters*dataset.arrival_rate)) test_gains_scratch = np.zeros((len(num_of_beams),ntest,dataset.n_clusters*dataset.arrival_rate)) test_gains_dft = np.zeros((len(num_of_beams),ntest,dataset.n_clusters*dataset.arrival_rate)) for i,N in enumerate(num_of_beams): print(str(N) + '-beams Codebook') # Model: # ------ model = AnalogBeamformer(n_antenna = num_antenna, n_beam = N) maml = MAML(model, lr=fast_lr, first_order=True) # Training: # --------- optimizer = optim.Adam(model.parameters(),lr=meta_lr, betas=(0.9,0.999), amsgrad=False) loss_fn = bf_gain_loss for iteration in range(nepoch): optimizer.zero_grad() meta_train_error = 0.0 meta_valid_error = 0.0 for task in range(batch_size): dataset.change_cluster() # Compute meta-training loss learner = maml.clone() batch_idc = dataset.sample() batch = (h_concat_scaled[batch_idc,:],egc_gain_scaled[batch_idc])
def run(self, train_tasks, valid_tasks, test_tasks, input_shape, device): # Create model if dataset == "omni": features = ConvBase(output_size=64, hidden=32, channels=1, max_pool=False) else: features = ConvBase(output_size=64, channels=3, max_pool=True) features = torch.nn.Sequential(features, Lambda(lambda x: x.view(-1, fc_neurons))) features.to(device) head = torch.nn.Linear(fc_neurons, self.params['ways']) head = MAML(head, lr=self.params['inner_lr']) head.to(device) # Setup optimization all_parameters = list(features.parameters()) + list(head.parameters()) optimizer = torch.optim.Adam(all_parameters, lr=self.params['outer_lr']) loss = torch.nn.CrossEntropyLoss(reduction='mean') self.log_model(features, device, input_shape=input_shape, name='features') # Input shape is specific to dataset head_input_shape = (self.params['ways'], fc_neurons) self.log_model(head, device, input_shape=head_input_shape, name='head') # Input shape is specific to dataset t = trange(self.params['num_iterations']) try: for iteration in t: optimizer.zero_grad() meta_train_loss = 0.0 meta_train_accuracy = 0.0 meta_valid_loss = 0.0 meta_valid_accuracy = 0.0 for task in range(self.params['meta_batch_size']): # Compute meta-training loss learner = head.clone() batch = train_tasks.sample() eval_loss, eval_acc = fast_adapt(batch, learner, loss, self.params['adapt_steps'], self.params['shots'], self.params['ways'], device, features=features) eval_loss.backward() meta_train_loss += eval_loss.item() meta_train_accuracy += eval_acc.item() # Compute meta-validation loss learner = head.clone() batch = valid_tasks.sample() eval_loss, eval_acc = fast_adapt(batch, learner, loss, self.params['adapt_steps'], self.params['shots'], self.params['ways'], device, features=features) meta_valid_loss += eval_loss.item() meta_valid_accuracy += eval_acc.item() meta_train_loss = meta_train_loss / self.params['meta_batch_size'] meta_valid_loss = meta_valid_loss / self.params['meta_batch_size'] meta_train_accuracy = meta_train_accuracy / self.params['meta_batch_size'] meta_valid_accuracy = meta_valid_accuracy / self.params['meta_batch_size'] metrics = {'train_loss': meta_train_loss, 'train_acc': meta_train_accuracy, 'valid_loss': meta_valid_loss, 'valid_acc': meta_valid_accuracy} t.set_postfix(metrics) self.log_metrics(metrics) # Average the accumulated gradients and optimize for p in all_parameters: p.grad.data.mul_(1.0 / self.params['meta_batch_size']) optimizer.step() if iteration % self.params['save_every'] == 0: self.save_model_checkpoint(features, 'features_' + str(iteration + 1)) self.save_model_checkpoint(head, 'head_' + str(iteration + 1)) # Support safely manually interrupt training except KeyboardInterrupt: print('\nManually stopped training! Start evaluation & saving...\n') self.logger['manually_stopped'] = True self.params['num_iterations'] = iteration self.save_model(features, name='features') self.save_model(head, name='head') self.logger['elapsed_time'] = str(round(t.format_dict['elapsed'], 2)) + ' sec' # Meta-testing on unseen tasks self.logger['test_acc'] = evaluate(self.params, test_tasks, head, loss, device, features=features) self.log_metrics({'test_acc': self.logger['test_acc']}) self.save_logs_to_file()
def run(self, env, device): set_device(device) baseline = ch.models.robotics.LinearValue(env.state_size, env.action_size) policy = DiagNormalPolicyANIL(env.state_size, env.action_size, params['fc_neurons']) policy = MAML(policy, lr=self.params['inner_lr']) self.log_model(policy.body, device, input_shape=(1, env.state_size), name='body') self.log_model(policy.head, device, input_shape=(env.action_size, params['fc_neurons']), name='head') t = trange(self.params['num_iterations']) try: for iteration in t: iter_loss = 0.0 iter_reward = 0.0 iter_replays = [] iter_policies = [] task_list = env.sample_tasks(self.params['meta_batch_size']) for task_i in trange(len(task_list), leave=False, desc='Task', position=0): task = task_list[task_i] learner = deepcopy(policy) env.set_task(task) env.reset() task = Runner(env, extra_info=extra_info) # Fast adapt learner, eval_loss, task_replay, task_rew, task_suc = fast_adapt_trpo( task, learner, baseline, self.params, anil=True, first_order=True) iter_reward += task_rew iter_loss += eval_loss.item() iter_replays.append(task_replay) iter_policies.append(learner) # Log average_return = iter_reward / self.params['meta_batch_size'] average_loss = iter_loss / self.params['meta_batch_size'] metrics = { 'average_return': average_return, 'loss': average_loss } t.set_postfix(metrics) self.log_metrics(metrics) # Meta-optimize meta_optimize_trpo(self.params, policy, baseline, iter_replays, iter_policies, anil=True) if iteration % self.params['save_every'] == 0: self.save_model_checkpoint(policy.body, 'body_' + str(iteration + 1)) self.save_model_checkpoint(policy.head, 'head_' + str(iteration + 1)) self.save_model_checkpoint( baseline, 'baseline_' + str(iteration + 1)) # Support safely manually interrupt training except KeyboardInterrupt: print( '\nManually stopped training! Start evaluation & saving...\n') self.logger['manually_stopped'] = True self.params['num_iterations'] = iteration self.save_model(policy.body, name="body") self.save_model(policy.head, name="head") self.save_model(baseline, name="baseline") self.logger['elapsed_time'] = str(round(t.format_dict['elapsed'], 2)) + ' sec' # Evaluate on new test tasks self.logger['test_reward'] = evaluate_trpo(env_name, policy, baseline, eval_params) self.log_metrics({'test_reward': self.logger['test_reward']}) self.save_logs_to_file()
def run(): try: with open(path + '/logger.json', 'r') as f: params = json.load(f)['config'] except FileNotFoundError: print('WARNING CONFIG NOT FOUND. Using default parameters') params = dict() params['inner_lr'] = 0.1 params['ppo_epochs'] = 3 params['ppo_clip_ratio'] = 0.1 params['tau'] = 1.0 params['gamma'] = 0.99 params['seed'] = 42 eval_params['seed'] = params['seed'] cl_params['seed'] = params['seed'] rep_params['seed'] = params['seed'] algo = params['algo'] env_name = params['dataset'] anil = True if 'anil' in algo else False if 'maml' in algo or 'anil' in algo: ml_algo = params['algo'].split('_')[0] rl_algo = params['algo'].split('_')[1] elif 'ppo' == algo or 'random' == algo: ml_algo = '' rl_algo = 'ppo' else: ml_algo = '' rl_algo = params['algo'].split('_')[1] cl_params['algo'] = rl_algo rep_params['algo'] = rl_algo cl_params['anil'] = anil rep_params['anil'] = anil if 'ML' in env_name: state_size = 9 action_size = 4 rep_params['extra_info'], cl_params['extra_info'] = True, True else: state_size = 2 action_size = 2 rep_params['extra_info'], cl_params['extra_info'] = False, False if checkpoint is None: baseline_path = path + '/baseline.pt' if ml_algo == 'anil': head_path = path + '/head.pt' body_path = path + '/body.pt' else: policy_path = path + '/model.pt' else: baseline_path = path + f'/model_checkpoints/model_baseline_{checkpoint}.pt' if ml_algo == 'maml': policy_path = path + f'/model_checkpoints/model_{checkpoint}.pt' else: head_path = path + f'/model_checkpoints/model_head_{checkpoint}.pt' body_path = path + f'/model_checkpoints/model_body_{checkpoint}.pt' device = torch.device('cpu') random.seed(params['seed']) np.random.seed(params['seed']) torch.manual_seed(params['seed']) baseline = ch.models.robotics.LinearValue(state_size, action_size) baseline.load_state_dict(torch.load(baseline_path)) baseline.to(device) if ml_algo == 'anil': policy = DiagNormalPolicyANIL(state_size, action_size, params['fc_neurons']) policy.head.load_state_dict(torch.load(head_path)) policy.body.load_state_dict(torch.load(body_path)) else: policy = DiagNormalPolicy(state_size, action_size) policy.load_state_dict(torch.load(policy_path)) policy = MAML(policy, lr=eval_params['inner_lr']) policy.to(device) print(f'Testing {ml_algo}-{rl_algo} on {env_name}') if EVALUATE: t_test = 'train' if test_on_train else 'test' test_rewards, av_test_rew, av_test_suc, res_per_task = evaluate( rl_algo, env_name, policy, baseline, eval_params, anil=anil, render=render, test_on_train=test_on_train, each3=each3) print(f'Average meta-testing reward: {av_test_rew}') print(f'Average meta-testing success rate: {av_test_suc * 100}%') if save_res: with open(f"{params['algo']}_{t_test}_{params['seed']}.json", 'w') as f: f.write(json.dumps(res_per_task)) # with open(f"maml_trpo_test_{i}.json") as f: # res_per_task = json.loads(f.read()) for key, val in res_per_task.items(): print(f'{key}: \n\tRewards: {val[::2]}\n\tSuccess: {val[1::2]}\n') bar_plot_ml10(res_per_task, f"{params['algo']}_{t_test}_{params['seed']}.png") if RUN_CL: print('Running Continual Learning experiment...') run_cl_rl_exp(path, env_name, policy, baseline, cl_params, workers, test_on_train=test_on_train) if RUN_RC: print('Running Rep Change experiment...') run_rep_rl_exp(path, env_name, policy, baseline, rep_params)
def run(self, train_tasks, valid_tasks, test_tasks, model, input_shape, device): model.to(device) maml = MAML(model, lr=self.params['inner_lr'], first_order=False) opt = torch.optim.Adam(maml.parameters(), self.params['outer_lr']) loss = torch.nn.CrossEntropyLoss(reduction='mean') self.log_model(maml, device, input_shape=input_shape) # Input shape is specific to dataset t = trange(self.params['num_iterations']) try: for iteration in t: # Clear the gradients after successfully back-propagating through the whole network opt.zero_grad() # Initialize iteration's metrics meta_train_loss = 0.0 meta_train_accuracy = 0.0 meta_valid_loss = 0.0 meta_valid_accuracy = 0.0 # Inner (Adaptation) loop for task in range(self.params['meta_batch_size']): # Compute meta-training loss learner = maml.clone() batch = train_tasks.sample() eval_loss, eval_acc = fast_adapt(batch, learner, loss, self.params['adapt_steps'], self.params['shots'], self.params['ways'], device) # Calculate the gradients of the now updated parameters of the model using the evaluation loss! eval_loss.backward() meta_train_loss += eval_loss.item() meta_train_accuracy += eval_acc.item() # Compute meta-validation loss learner = maml.clone() batch = valid_tasks.sample() eval_loss, eval_acc = fast_adapt(batch, learner, loss, self.params['adapt_steps'], self.params['shots'], self.params['ways'], device) meta_valid_loss += eval_loss.item() meta_valid_accuracy += eval_acc.item() meta_train_loss = meta_train_loss / self.params['meta_batch_size'] meta_valid_loss = meta_valid_loss / self.params['meta_batch_size'] meta_train_accuracy = meta_train_accuracy / self.params['meta_batch_size'] meta_valid_accuracy = meta_valid_accuracy / self.params['meta_batch_size'] metrics = {'train_loss': meta_train_loss, 'train_acc': meta_train_accuracy, 'valid_loss': meta_valid_loss, 'valid_acc': meta_valid_accuracy} t.set_postfix(metrics) self.log_metrics(metrics) # Average the accumulated gradients and optimize for p in maml.parameters(): p.grad.data.mul_(1.0 / self.params['meta_batch_size']) opt.step() if iteration % self.params['save_every'] == 0: self.save_model_checkpoint(model, str(iteration)) # Support safely manually interrupt training except KeyboardInterrupt: print('\nManually stopped training! Start evaluation & saving...\n') self.logger['manually_stopped'] = True self.params['num_iterations'] = iteration self.save_model(model) self.logger['elapsed_time'] = str(round(t.format_dict['elapsed'], 2)) + ' sec' # Meta-testing on unseen tasks self.logger['test_acc'] = evaluate(self.params, test_tasks, maml, loss, device) self.log_metrics({'test_acc': self.logger['test_acc']}) self.save_logs_to_file()