def run_learning(task_data, meta_model, prm, verbose=1): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create model for task: task_model = get_model(prm) # Load initial point from meta-parameters: task_model.load_state_dict(meta_model.state_dict()) # The data-sets of the new task: train_loader = task_data['train'] test_loader = task_data['test'] n_train_samples = len(train_loader.dataset) n_batches = len(train_loader) # Get task optimizer: task_optimizer = SGD(task_model.parameters(), lr=prm.alpha) # In meta-testing, use SGD with step-size alpha # ------------------------------------------------------------------------------------------- # Learning function # ------------------------------------------------------------------------------------------- def run_meta_test_learning(task_model, train_loader): task_model.train() train_loader_iter = iter(train_loader) # Gradient steps (training) loop for i_grad_step in range(prm.n_meta_test_grad_steps): # get batch: batch_data = data_gen.get_next_batch_cyclic( train_loader_iter, train_loader) inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Calculate empirical loss: outputs = task_model(inputs) task_objective = loss_criterion(outputs, targets) # Take gradient step with the task weights: grad_step(task_objective, task_optimizer) # end gradient step loop return task_model # ------------------------------------------------------------------------------------------- # Test evaluation function # -------------------------------------------------------------------------------------------- def run_test(model, test_loader): model.eval() test_loss = 0 n_correct = 0 for batch_data in test_loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm, is_test=True) outputs = model(inputs) test_loss += loss_criterion(outputs, targets) # sum the mean loss in batch n_correct += count_correct(outputs, targets) n_test_samples = len(test_loader.dataset) n_test_batches = len(test_loader) test_loss = test_loss.data[0] / n_test_batches test_acc = n_correct / n_test_samples print('\nTest set: Average loss: {:.4}, Accuracy: {:.3} ( {}/{})\n'. format(test_loss, test_acc, n_correct, n_test_samples)) return test_acc # -----------------------------------------------------------------------------------------------------------# # Update Log file if verbose == 1: write_to_log( 'Total number of steps: {}'.format(n_batches * prm.num_epochs), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: task_model = run_meta_test_learning(task_model, train_loader) # Test: test_acc = run_test(task_model, test_loader) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, verbose=verbose) test_err = 1 - test_acc return test_err, task_model
'---- Infinite train tasks - New training tasks ' 'are drawn from tasks distribution in each iteration...', prm) # Meta-training to learn meta-model (theta params): meta_model = meta_train_MAML_infinite_tasks.run_meta_learning( prm, task_generator) # save learned meta-model: save_model_state(meta_model, save_path) write_to_log('Trained meta-model saved in ' + save_path, prm) elif prm.mode == 'LoadMetaModel': # Loads previously training prior. # First, create the model: meta_model = get_model(prm) # Then load the weights: load_model_state(meta_model, prm.load_model_path) write_to_log('Pre-trained meta-model loaded from ' + prm.load_model_path, prm) else: raise ValueError('Invalid mode') # ------------------------------------------------------------------------------------------- # Generate the data sets of the test tasks: # ------------------------------------------------------------------------------------------- n_test_tasks = prm.n_test_tasks limit_train_samples_in_test_tasks = prm.limit_train_samples_in_test_tasks if limit_train_samples_in_test_tasks == 0:
def run_meta_learning(train_data_loaders, prm): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) n_tasks = len(train_data_loaders) # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta): model = get_model(prm) model.train() # Create optimizer for meta-params (theta) meta_params = list(model.parameters()) meta_optimizer = optim_func(meta_params, **optim_args) # number of sample-batches in each task: n_batch_list = [len(data_loader['train']) for data_loader in train_data_loaders] n_batches_per_task = np.max(n_batch_list) # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): # For each task, prepare an iterator to generate training batches: train_iterators = [iter(train_data_loaders[ii]['train']) for ii in range(n_tasks)] # The task order to take batches from: task_order = [] task_ids_list = list(range(n_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) # ----------- meta-batches loop (batches of tasks) -----------------------------------# for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)] n_tasks_in_batch = len(task_ids_in_meta_batch) # it may be less than prm.meta_batch_size at the last one # note: it is OK if some task appear several times in the meta-batch mb_data_loaders = [train_data_loaders[task_id] for task_id in task_ids_in_meta_batch] mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch] # Get objective based on tasks in meta-batch: total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 200 if i_meta_batch % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print(cmn.status_string(i_epoch, num_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective))) # end meta-batches loop # end run_epoch() # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(model), prm) write_to_log('---- Meta-Training set: {0} tasks'.format(len(train_data_loaders)), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() num_epochs = int(np.ceil(prm.n_meta_train_iterations / np.ceil(n_tasks / prm.meta_batch_size))) # Training loop: for i_epoch in range(num_epochs): run_train_epoch(i_epoch) stop_time = timeit.default_timer() # Update Log file: cmn.write_final_result(0.0, stop_time - start_time, prm) # Return learned meta-parameters: return model
avg_param_vec = None for i_task in range(n_train_tasks): print('Learning train-task {} out of {}'.format( i_task + 1, n_train_tasks)) data_loader = train_data_loaders[i_task] test_err, curr_model = learn_single_standard.run_learning(data_loader, prm, verbose=0) if i_task == 0: avg_param_vec = parameters_to_vector( curr_model.parameters()) * (1 / n_train_tasks) else: avg_param_vec += parameters_to_vector( curr_model.parameters()) * (1 / n_train_tasks) avg_model = deterministic_models.get_model(prm) vector_to_parameters(avg_param_vec, avg_model.parameters()) # create the prior model: prior_model = stochastic_models.get_model(prm) prior_layers_list = [ layer for layer in prior_model.modules() if isinstance(layer, StochasticLayer) ] avg_model_layers_list = [ layer for layer in avg_model.modules() if isinstance(layer, torch.nn.Conv2d) or isinstance(layer, torch.nn.Linear) ] assert len(avg_model_layers_list) == len( prior_layers_list), "lists not equal"
def run_learning(data_loader, prm, verbose=1, initial_model=None): # Unpack parameters: optim_func, optim_args, lr_schedule = \ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # The data-sets: train_loader = data_loader['train'] test_loader = data_loader['test'] n_batches = len(train_loader) # Create model: if hasattr(prm, 'func_model') and prm.func_model: import Models.deterministic_models as func_models model = func_models.get_model(prm) else: model = get_model(prm) # Load initial weights: if initial_model: model.load_state_dict(initial_model.state_dict()) # Gather modules list: modules_list = list(model.named_children()) if hasattr(model, 'net'): # extract the modules from 'net' field: modules_list += list(model.net.named_children()) modules_list = [m for m in modules_list if m[0] is not 'net'] # Determine which parameters are optimized and which are frozen: if hasattr(prm, 'freeze_list'): freeze_list = prm.freeze_list optimized_modules = [ named_module[1] for named_module in modules_list if not named_module[0] in freeze_list ] optimized_params = sum( [list(mo.parameters()) for mo in optimized_modules], []) elif hasattr(prm, 'not_freeze_list'): not_freeze_list = prm.not_freeze_list optimized_modules = [ named_module[1] for named_module in modules_list if named_module[0] in not_freeze_list ] optimized_params = sum( [list(mo.parameters()) for mo in optimized_modules], []) else: optimized_params = model.parameters() # Get optimizer: optimizer = optim_func(optimized_params, **optim_args) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): log_interval = 500 model.train() for batch_idx, batch_data in enumerate(train_loader): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Calculate loss: outputs = model(inputs) loss = loss_criterion(outputs, targets) # Take gradient step: grad_step(loss, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: if batch_idx % log_interval == 0: batch_acc = correct_rate(outputs, targets) print( cmn.status_string(i_epoch, prm.num_epochs, batch_idx, n_batches, batch_acc, get_value(loss))) # -----------------------------------------------------------------------------------------------------------# # Update Log file # -----------------------------------------------------------------------------------------------------------# update_file = not verbose == 0 cmn.write_to_log(cmn.get_model_string(model), prm, update_file=update_file) cmn.write_to_log('Total number of steps: {}'.format(n_batches * prm.num_epochs), prm, update_file=update_file) cmn.write_to_log('Number of training samples: {}'.format( data_loader['n_train_samples']), prm, update_file=update_file) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_epoch in range(prm.num_epochs): run_train_epoch(i_epoch) # Test: test_acc = run_test(model, test_loader, loss_criterion, prm) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, verbose=verbose, result_name='Standard') test_err = 1 - test_acc return test_err, model
def run_meta_learning(prm, task_generator): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule n_iterations = prm.n_meta_train_iterations # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta): model = get_model(prm) model.train() # Create optimizer for meta-params (theta) meta_params = list(model.parameters()) meta_optimizer = optim_func(meta_params, **optim_args) meta_batch_size = prm.meta_batch_size # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_meta_iteration(i_iter): # In each meta-iteration we draw a meta-batch of several tasks # Then we take a grad step with theta. # Generate the data sets of the training-tasks for meta-batch: mb_data_loaders = task_generator.create_meta_batch( prm, meta_batch_size, meta_split='meta_train') # For each task, prepare an iterator to generate training batches: mb_iterators = [ iter(mb_data_loaders[ii]['train']) for ii in range(meta_batch_size) ] # Get objective based on tasks in meta-batch: total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_iter) # Print status: log_interval = 5 if (i_iter) % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_iter, n_iterations, 1, 1, batch_acc, total_objective.data[0])) # end run_meta_iteration() # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(model), prm) write_to_log('---- Meta-Training with infinite tasks...', prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_iter in range(n_iterations): run_meta_iteration(i_iter) stop_time = timeit.default_timer() # Update Log file: cmn.write_final_result(0.0, stop_time - start_time, prm) # Return learned meta-parameters: return model