def run_eval_expected(model, loader, prm): ''' Estimates the expectation of the loss by monte-carlo averaging''' n_samples = len(loader.dataset) loss_criterion = get_loss_func(prm) model.eval() avg_loss = 0.0 n_correct = 0 n_MC = prm.n_MC_eval # number of monte-carlo runs for expected loss estimation for batch_data in loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm) batch_size = inputs.shape[0] # monte-carlo runs for i_MC in range(n_MC): outputs = model(inputs) avg_loss += loss_criterion( outputs, targets).item() # sum the loss contributed from batch n_correct += count_correct(outputs, targets) avg_loss /= (n_MC * n_samples) acc = n_correct / (n_MC * n_samples) info = { 'acc': acc, 'n_correct': n_correct, 'n_samples': n_samples, 'avg_loss': avg_loss } return info
def run_eval_majority_vote(model, loader, prm, n_votes=5): ''' Estimates the the loss of the the majority votes over several draws form network's distribution''' loss_criterion = get_loss_func(prm) n_samples = len(loader.dataset) n_test_batches = len(loader) model.eval() avg_loss = 0 n_correct = 0 for batch_data in loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm) batch_size = inputs.shape[0] # min(prm.test_batch_size, n_samples) info = data_gen.get_info(prm) n_labels = info['n_classes'] votes = torch.zeros((batch_size, n_labels), device=prm.device) loss_from_batch = 0.0 for i_vote in range(n_votes): outputs = model(inputs) loss_from_batch += loss_criterion(outputs, targets).item() pred = outputs.data.max(1, keepdim=True)[1] # get the index of the max output for i_sample in range(batch_size): pred_val = pred[i_sample].cpu().numpy()[0] votes[i_sample, pred_val] += 1 avg_loss += loss_from_batch / n_votes # sum the loss contributed from batch majority_pred = votes.max(1, keepdim=True)[1] # find argmax class for each sample n_correct += majority_pred.eq(targets.data.view_as(majority_pred)).cpu().sum() avg_loss /= n_samples acc = n_correct / n_samples info = {'acc': acc, 'n_correct': n_correct, 'n_samples': n_samples, 'avg_loss': avg_loss} return info
def run_eval_max_posterior(model, loader, prm): ''' Estimates the the loss by using the mean network parameters''' # 使用平均网络参数 n_samples = len(loader.dataset) loss_criterion = get_loss_func(prm) model.eval() avg_loss = 0 n_correct = 0 for batch_data in loader: # 提取batch data inputs, targets = data_gen.get_batch_vars(batch_data, prm) batch_size = inputs.shape[0] old_eps_std = model.set_eps_std(0.0) # test with max-posterior outputs = model(inputs) model.set_eps_std(old_eps_std) # return model to normal behaviour avg_loss += loss_criterion( outputs, targets).item() # sum the loss contributed from batch n_correct += count_correct(outputs, targets) avg_loss /= n_samples acc = n_correct / n_samples info = { 'acc': acc, 'n_correct': n_correct, 'n_samples': n_samples, 'avg_loss': avg_loss } return info
def run_eval_avg_vote(model, loader, prm, n_votes=5): ''' Estimates the the loss by of the average vote over several draws form network's distribution''' loss_criterion = get_loss_func(prm) n_samples = len(loader.dataset) n_test_batches = len(loader) model.eval() avg_loss = 0 n_correct = 0 for batch_data in loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm) batch_size = min(prm.test_batch_size, n_samples) info = data_gen.get_info(prm) n_labels = info['n_classes'] votes = torch.zeros((batch_size, n_labels), device=prm.device) loss_from_batch = 0.0 for i_vote in range(n_votes): outputs = model(inputs) loss_from_batch += loss_criterion(outputs, targets).item() votes += outputs.data majority_pred = votes.max(1, keepdim=True)[1] n_correct += majority_pred.eq(targets.data.view_as(majority_pred)).cpu().sum() avg_loss += loss_from_batch / n_votes # sum the loss contributed from batch avg_loss /= n_samples acc = n_correct / n_samples info = {'acc': acc, 'n_correct': n_correct, 'n_samples': n_samples, 'avg_loss': avg_loss} return info
def run_learning(data_loader, prm, prior_model=None, init_from_prior=True, verbose=1): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule = \ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_func(prm) train_loader = data_loader['train'] test_loader = data_loader['test'] n_batches = len(train_loader) n_train_samples = data_loader['n_train_samples'] figure_flag = hasattr(prm, 'log_figure') and prm.log_figure # get model: if prior_model and init_from_prior: # init from prior model: post_model = deepcopy(prior_model).to(prm.device) else: post_model = get_model(prm) # post_model.set_eps_std(0.0) # DEBUG: turn off randomness # Get optimizer: optimizer = optim_func(post_model.parameters(), **optim_args) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch, log_mat): post_model.train() for batch_idx, batch_data in enumerate(train_loader): # get batch data: inputs, targets = data_gen.get_batch_vars(batch_data, prm) batch_size = inputs.shape[0] # Monte-Carlo iterations: avg_empiric_loss = torch.zeros(1, device=prm.device) n_MC = prm.n_MC for i_MC in range(n_MC): # calculate objective: outputs = post_model(inputs) avg_empiric_loss_curr = (1 / batch_size) * loss_criterion( outputs, targets) avg_empiric_loss += (1 / n_MC) * avg_empiric_loss_curr # complexity/prior term: if prior_model: complexity_term = get_task_complexity(prm, prior_model, post_model, n_train_samples, avg_empiric_loss) else: complexity_term = torch.zeros(1, device=prm.device) # Total objective: objective = avg_empiric_loss + complexity_term # Take gradient step: grad_step(objective, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 1000 if batch_idx % log_interval == 0: batch_acc = correct_rate(outputs, targets) print( cmn.status_string(i_epoch, prm.num_epochs, batch_idx, n_batches, batch_acc, objective.item()) + ' Loss: {:.4}\t Comp.: {:.4}'.format( avg_empiric_loss.item(), complexity_term.item())) # End batch loop # save results for epochs-figure: if figure_flag and (i_epoch % prm.log_figure['interval_epochs'] == 0): save_result_for_figure(post_model, prior_model, data_loader, prm, log_mat, i_epoch) # End run_train_epoch() # ------------------------------------------------------------------------------------------- # Main Script # ------------------------------------------------------------------------------------------- # Update Log file if verbose: write_to_log(cmn.get_model_string(post_model), prm) write_to_log('Number of weights: {}'.format(post_model.weights_count), prm) write_to_log( 'Total number of steps: {}'.format(n_batches * prm.num_epochs), prm) write_to_log( 'Number of training samples: {}'.format( data_loader['n_train_samples']), prm) start_time = timeit.default_timer() if figure_flag: n_logs = 1 + ( (prm.num_epochs - 1) // prm.log_figure['interval_epochs']) log_mat = np.zeros((len(prm.log_figure['val_types']), n_logs)) else: log_mat = None # Run training epochs: for i_epoch in range(prm.num_epochs): run_train_epoch(i_epoch, log_mat) # evaluate final perfomance on train-set train_acc, train_loss = run_eval_Bayes(post_model, train_loader, prm) # Test: test_acc, test_loss = run_eval_Bayes(post_model, test_loader, prm) test_err = 1 - test_acc # Log results if verbose: write_to_log( '>Train-err. : {:.4}%\t Train-loss: {:.4}'.format( 100 * (1 - train_acc), train_loss), prm) write_to_log( '>Test-err. {:1.3}%, Test-loss: {:.4}'.format( 100 * (test_err), test_loss), prm) stop_time = timeit.default_timer() if verbose: cmn.write_final_result(test_acc, stop_time - start_time, prm) if figure_flag: plot_log(log_mat, prm) return post_model, test_err, test_loss, log_mat
def run_meta_learning(data_loaders, prm): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_func(prm) n_train_tasks = len(data_loaders) # assert prm.meta_batch_size <= n_train_tasks # Create posterior models for each task: posteriors_models = [get_model(prm) for _ in range(n_train_tasks)] # Create a 'dummy' model to generate the set of parameters of the shared prior: prior_model = get_model(prm) # Gather all tasks posterior params: all_post_param = sum([list(posterior_model.parameters()) for posterior_model in posteriors_models], []) # Create optimizer for all parameters (posteriors + prior) prior_params = list(prior_model.parameters()) all_params = all_post_param + prior_params all_optimizer = optim_func(all_params, **optim_args) # number of sample-batches in each task: n_batch_list = [len(data_loader['train']) for data_loader in data_loaders] n_batches_per_task = np.max(n_batch_list) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch, i_step = 0): # For each task, prepare an iterator to generate training batches: train_iterators = [iter(data_loaders[ii]['train']) for ii in range(n_train_tasks)] # The task order to take batches from: # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch task_order = [] task_ids_list = list(range(n_train_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # Note: this method ensures each training sample in each task is drawn in each epoch. # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch. # random.shuffle(task_ids_list) # --############ TEMP # ----------- meta-batches loop (batches of tasks) -----------------------------------# # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)] # meta-batch size may be less than prm.meta_batch_size at the last one # note: it is OK if some tasks appear several times in the meta-batch mb_data_loaders = [data_loaders[task_id] for task_id in task_ids_in_meta_batch] mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch] mb_posteriors_models = [posteriors_models[task_id] for task_id in task_ids_in_meta_batch] # prior_weight_steps = 10000 # # prior_weight = 1 - math.exp(-i_step/prior_weight_steps) # prior_weight = min(i_step / prior_weight_steps, 1.0) i_step += 1 # Get objective based on tasks in meta-batch: total_objective, info = get_objective(prior_model, prm, mb_data_loaders, mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks) # Take gradient step with the shared prior and all tasks' posteriors: grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 200 if i_meta_batch % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print(cmn.status_string(i_epoch, prm.n_meta_train_epochs, i_meta_batch, n_meta_batches, batch_acc, total_objective.item()) + ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t Meta-Comp.: {:.4}\t'. format(info['avg_empirical_loss'], info['avg_intra_task_comp'], info['meta_comp'])) # end meta-batches loop return i_step # end run_epoch() # ------------------------------------------------------------------------------------------- # Test evaluation function - # Evaluate the mean loss on samples from the test sets of the training tasks # -------------------------------------------------------------------------------------------- def run_test(): test_acc_avg = 0.0 n_tests = 0 for i_task in range(n_train_tasks): model = posteriors_models[i_task] test_loader = data_loaders[i_task]['test'] if len(test_loader) > 0: test_acc, test_loss = run_eval_Bayes(model, test_loader, prm) n_tests += 1 test_acc_avg += test_acc n_test_samples = len(test_loader.dataset) write_to_log('Train Task {}, Test set: {} - Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n'.format( i_task, prm.test_type, test_loss, test_acc, n_test_samples), prm) else: print('Train Task {}, Test set: {} - No test data'.format(i_task, prm.test_type)) if n_tests > 0: test_acc_avg /= n_tests return test_acc_avg # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(prior_model), prm) write_to_log('---- Meta-Training set: {0} tasks'.format(len(data_loaders)), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() i_step = 0 # Training loop: for i_epoch in range(prm.n_meta_train_epochs): i_step = run_train_epoch(i_epoch, i_step) stop_time = timeit.default_timer() # Test: test_acc_avg = run_test() # Update Log file: cmn.write_final_result(test_acc_avg, stop_time - start_time, prm, result_name=prm.test_type) # Return learned prior: return prior_model
def run_meta_iteration(i_iter, prior_model, task_generator, prm): # In each meta-iteration we draw a meta-batch of several tasks # Then we take a grad step with prior. # Unpack parameters: optim_func, optim_args, lr_schedule = \ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_func(prm) meta_batch_size = prm.meta_batch_size n_inner_steps = prm.n_inner_steps n_meta_iterations = prm.n_meta_train_epochs # Generate the data sets of the training-tasks for meta-batch: mb_data_loaders = task_generator.create_meta_batch(prm, meta_batch_size, meta_split='meta_train') # For each task, prepare an iterator to generate training batches: mb_iterators = [ iter(mb_data_loaders[ii]['train']) for ii in range(meta_batch_size) ] # The posteriors models will adjust to new tasks in eacxh meta-batch # Create posterior models for each task: posteriors_models = [get_model(prm) for _ in range(meta_batch_size)] init_from_prior = True if init_from_prior: for post_model in posteriors_models: post_model.load_state_dict(prior_model.state_dict()) # Gather all tasks posterior params: all_post_param = sum([ list(posterior_model.parameters()) for posterior_model in posteriors_models ], []) # Create optimizer for all parameters (posteriors + prior) prior_params = list(prior_model.parameters()) all_params = all_post_param + prior_params all_optimizer = optim_func(all_params, **optim_args) # all_optimizer = optim_func(prior_params, **optim_args) ## DeBUG test_acc_avg = 0.0 for i_inner_step in range(n_inner_steps): # Get objective based on tasks in meta-batch: total_objective, info = get_objective(prior_model, prm, mb_data_loaders, mb_iterators, posteriors_models, loss_criterion, prm.n_train_tasks) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_iter) # Print status: log_interval = 20 if (i_inner_step) % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_iter, n_meta_iterations, i_inner_step, n_inner_steps, batch_acc, total_objective.data[0]) + ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t'.format( info['avg_empirical_loss'], info['avg_intra_task_comp'])) # Print status = on test set of meta-batch: log_interval_eval = 10 if (i_iter) % log_interval_eval == 0 and i_iter > 0: test_acc_avg = run_test(mb_data_loaders, posteriors_models, loss_criterion, prm) print('Meta-iter: {} \t Meta-Batch Test Acc: {:1.3}\t'.format( i_iter, test_acc_avg)) # End of inner steps return prior_model, posteriors_models, test_acc_avg
def run_meta_learning(train_data_loaders, prm): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_func(prm) n_tasks = len(train_data_loaders) # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta): model = get_model(prm) model.train() # Create optimizer for meta-params (theta) meta_params = list(model.parameters()) meta_optimizer = optim_func(meta_params, **optim_args) # number of sample-batches in each task: n_batch_list = [ len(data_loader['train']) for data_loader in train_data_loaders ] n_batches_per_task = np.max(n_batch_list) # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): # For each task, prepare an iterator to generate training batches: train_iterators = [ iter(train_data_loaders[ii]['train']) for ii in range(n_tasks) ] # The task order to take batches from: task_order = [] task_ids_list = list(range(n_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) # ----------- meta-batches loop (batches of tasks) -----------------------------------# for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start:( meta_batch_start + prm.meta_batch_size)] n_tasks_in_batch = len( task_ids_in_meta_batch ) # it may be less than prm.meta_batch_size at the last one # note: it is OK if some task appear several times in the meta-batch mb_data_loaders = [ train_data_loaders[task_id] for task_id in task_ids_in_meta_batch ] mb_iterators = [ train_iterators[task_id] for task_id in task_ids_in_meta_batch ] # Get objective based on tasks in meta-batch: total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 200 if i_meta_batch % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_epoch, num_epochs, i_meta_batch, n_meta_batches, batch_acc, total_objective.item())) # end meta-batches loop # end run_epoch() # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(model), prm) write_to_log( '---- Meta-Training set: {0} tasks'.format(len(train_data_loaders)), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() num_epochs = int( np.ceil(prm.n_meta_train_iterations / np.ceil(n_tasks / prm.meta_batch_size))) # Training loop: for i_epoch in range(num_epochs): run_train_epoch(i_epoch) stop_time = timeit.default_timer() # Update Log file: cmn.write_final_result(0.0, stop_time - start_time, prm) # Return learned meta-parameters: return model
def run_learning(task_data, prior_model, prm, init_from_prior=True, verbose=1): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_func(prm) # Create posterior model for the new task: post_model = get_model(prm) if init_from_prior: post_model.load_state_dict(prior_model.state_dict()) # prior_model_dict = prior_model.state_dict() # post_model_dict = post_model.state_dict() # # # filter out unnecessary keys: # prior_model_dict = {k: v for k, v in prior_model_dict.items() if '_log_var' in k or '_mu' in k} # # overwrite entries in the existing state dict: # post_model_dict.update(prior_model_dict) # # # # load the new state dict # post_model.load_state_dict(post_model_dict) # add_noise_to_model(post_model, prm.kappa_factor) # The data-sets of the new task: train_loader = task_data['train'] test_loader = task_data['test'] n_train_samples = len(train_loader.dataset) n_batches = len(train_loader) # Get optimizer: optimizer = optim_func(post_model.parameters(), **optim_args) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): log_interval = 500 post_model.train() for batch_idx, batch_data in enumerate(train_loader): # get batch data: inputs, targets = data_gen.get_batch_vars(batch_data, prm) batch_size = inputs.shape[0] correct_count = 0 sample_count = 0 # Monte-Carlo iterations: n_MC = prm.n_MC avg_empiric_loss = 0 complexity_term = 0 for i_MC in range(n_MC): # Calculate empirical loss: outputs = post_model(inputs) avg_empiric_loss_curr = (1 / batch_size) * loss_criterion( outputs, targets) # complexity_curr = get_task_complexity(prm, prior_model, post_model, # n_train_samples, avg_empiric_loss_curr) avg_empiric_loss += (1 / n_MC) * avg_empiric_loss_curr # complexity_term += (1 / n_MC) * complexity_curr correct_count += count_correct(outputs, targets) sample_count += inputs.size(0) # end monte-carlo loop complexity_term = get_task_complexity(prm, prior_model, post_model, n_train_samples, avg_empiric_loss) # Approximated total objective (for current batch): if prm.complexity_type == 'Variational_Bayes': # note that avg_empiric_loss_per_task is estimated by an average over batch samples, # but its weight in the objective should be considered by how many samples there are total in the task total_objective = avg_empiric_loss * ( n_train_samples) + complexity_term else: total_objective = avg_empiric_loss + complexity_term # Take gradient step with the posterior: grad_step(total_objective, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: if batch_idx % log_interval == 0: batch_acc = correct_count / sample_count print( cmn.status_string(i_epoch, prm.n_meta_test_epochs, batch_idx, n_batches, batch_acc, total_objective.item()) + ' Empiric Loss: {:.4}\t Intra-Comp. {:.4}'.format( avg_empiric_loss.item(), complexity_term.item())) # end batch loop # end run_train_epoch() # -----------------------------------------------------------------------------------------------------------# # Update Log file if verbose == 1: write_to_log( 'Total number of steps: {}'.format(n_batches * prm.n_meta_test_epochs), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_epoch in range(prm.n_meta_test_epochs): run_train_epoch(i_epoch) # Test: test_acc, test_loss = run_eval_Bayes(post_model, test_loader, prm) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, result_name=prm.test_type, verbose=verbose) test_err = 1 - test_acc return test_err, post_model
def run_learning(data_loader, prm, verbose=1, initial_model=None): # Unpack parameters: optim_func, optim_args, lr_schedule = \ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_func(prm) # The data-sets: train_loader = data_loader['train'] test_loader = data_loader['test'] n_batches = len(train_loader) # Create model: if hasattr(prm, 'func_model') and prm.func_model: import Models.deterministic_models as func_models model = func_models.get_model(prm) else: model = get_model(prm) # Load initial weights: if initial_model: model.load_state_dict(initial_model.state_dict()) # Gather modules list: modules_list = list(model.named_children()) if hasattr(model, 'net'): # extract the modules from 'net' field: modules_list += list(model.net.named_children()) modules_list = [m for m in modules_list if m[0] is not 'net'] # Determine which parameters are optimized and which are frozen: if hasattr(prm, 'freeze_list'): freeze_list = prm.freeze_list optimized_modules = [ named_module[1] for named_module in modules_list if not named_module[0] in freeze_list ] optimized_params = sum( [list(mo.parameters()) for mo in optimized_modules], []) elif hasattr(prm, 'not_freeze_list'): not_freeze_list = prm.not_freeze_list optimized_modules = [ named_module[1] for named_module in modules_list if named_module[0] in not_freeze_list ] optimized_params = sum( [list(mo.parameters()) for mo in optimized_modules], []) else: optimized_params = model.parameters() # Get optimizer: optimizer = optim_func(optimized_params, **optim_args) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): log_interval = 500 model.train() for batch_idx, batch_data in enumerate(train_loader): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) batch_size = inputs.shape[0] # Calculate loss: outputs = model(inputs) loss = (1 / batch_size) * loss_criterion(outputs, targets) # Take gradient step: grad_step(loss, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: if batch_idx % log_interval == 0: batch_acc = correct_rate(outputs, targets) print( cmn.status_string(i_epoch, prm.num_epochs, batch_idx, n_batches, batch_acc, loss.item())) # -----------------------------------------------------------------------------------------------------------# # Update Log file # -----------------------------------------------------------------------------------------------------------# update_file = not verbose == 0 write_to_log(cmn.get_model_string(model), prm, update_file=update_file) write_to_log('Number of weights: {}'.format(model.weights_count), prm, update_file=update_file) write_to_log('Total number of steps: {}'.format(n_batches * prm.num_epochs), prm, update_file=update_file) write_to_log('Number of training samples: {}'.format( data_loader['n_train_samples']), prm, update_file=update_file) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_epoch in range(prm.num_epochs): run_train_epoch(i_epoch) # Test: test_acc = run_test(model, test_loader, loss_criterion, prm) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, verbose=verbose, result_name='Standard') test_err = 1 - test_acc return test_err, model
def run_meta_learning(prm, task_generator): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule n_iterations = prm.n_meta_train_iterations # Loss criterion loss_criterion = get_loss_func(prm) # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta): model = get_model(prm) model.train() # Create optimizer for meta-params (theta) meta_params = list(model.parameters()) meta_optimizer = optim_func(meta_params, **optim_args) meta_batch_size = prm.meta_batch_size # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_meta_iteration(i_iter): # In each meta-iteration we draw a meta-batch of several tasks # Then we take a grad step with theta. # Generate the data sets of the training-tasks for meta-batch: mb_data_loaders = task_generator.create_meta_batch( prm, meta_batch_size, meta_split='meta_train') # For each task, prepare an iterator to generate training batches: mb_iterators = [ iter(mb_data_loaders[ii]['train']) for ii in range(meta_batch_size) ] # Get objective based on tasks in meta-batch: total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_iter) # Print status: log_interval = 5 if (i_iter) % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_iter, n_iterations, 1, 1, batch_acc, total_objective.item())) # end run_meta_iteration() # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(model), prm) write_to_log('---- Meta-Training with infinite tasks...', prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_iter in range(n_iterations): run_meta_iteration(i_iter) stop_time = timeit.default_timer() # Update Log file: cmn.write_final_result(0.0, stop_time - start_time, prm) # Return learned meta-parameters: return model