Beispiel #1
0
    def run_test():
        test_acc_avg = 0.0
        n_tests = 0
        for i_task in range(n_train_tasks):
            model = posteriors_models[i_task]
            test_loader = data_loaders[i_task]['test']
            if len(test_loader) > 0:
                test_acc, test_loss = run_test_Bayes(model, test_loader,
                                                     loss_criterion, prm)
                n_tests += 1
                test_acc_avg += test_acc

                n_test_samples = len(test_loader.dataset)

                write_to_log(
                    'Train Task {}, Test set: {} -  Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n'
                    .format(i_task, prm.test_type, test_loss, test_acc,
                            n_test_samples), prm)
            else:
                print('Train Task {}, Test set: {} - No test data'.format(
                    i_task, prm.test_type))

        if n_tests > 0:
            test_acc_avg /= n_tests
        return test_acc_avg
Beispiel #2
0
def save_result_for_figure(post_model, prior_model, data_loader, prm, log_mat,
                           i_epoch):
    '''save results for epochs-figure'''
    from Utils.complexity_terms import get_net_densities_divergence
    prm_eval = deepcopy(prm)  #   parameters for evaluation
    prm_eval.loss_type = prm.log_figure['loss_type_eval']
    val_types = prm.log_figure['val_types']

    # evaluation
    _, train_loss = run_eval_Bayes(post_model, data_loader['train'], prm_eval)
    _, test_loss = run_eval_Bayes(post_model, data_loader['test'], prm_eval)

    for i_val_type, val_type in enumerate(val_types):
        if val_type[0] == 'i_epoch':
            val = i_epoch
        elif val_type[0] == 'train_loss':
            val = train_loss
        elif val_type[0] == 'test_loss':
            val = test_loss
        elif val_type[0] == 'Bound':
            prm_eval.complexity_type = val_type[1]
            prm_eval.divergence_type = val_type[2]
            val = eval_bound(post_model, prior_model, data_loader, prm_eval,
                             train_loss)
            write_to_log(str(val_type) + ' = ' + str(val), prm)
        elif val_type[0] == 'Divergence':
            prm_eval.divergence_type = val_type[1]
            val = get_net_densities_divergence(prior_model, post_model,
                                               prm_eval)
        else:
            raise ValueError('Invalid loss_type_eval')

        log_counter = i_epoch // prm.log_figure['interval_epochs']
        log_mat[i_val_type, log_counter] = val
def run_meta_learning(task_generator, prm):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # Create a 'dummy' model to generate the set of parameters of the shared prior:
    prior_model = get_model(prm)

    meta_batch_size = prm.meta_batch_size

    n_meta_iterations = prm.n_meta_train_epochs
    n_inner_steps = prm.n_inner_steps

    # -----------------------------------------------------------------------------------------------------------#
    # Main script
    # -----------------------------------------------------------------------------------------------------------#

    # Update Log file
    write_to_log(cmn.get_model_string(prior_model), prm)
    write_to_log('---- Meta-Training with infinite tasks...', prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    test_acc_avg = 0.0
    # object.grad_init(prm, prior_model, loss_criterion, iter(data_loaders[0]['train']), data_loaders[0]['train'], prior_optimizer)
    for i_iter in range(n_meta_iterations):
        prior_model, posteriors_models, test_acc_avg = run_meta_iteration(
            i_iter, prior_model, task_generator, prm)

    # Note: test_acc_avg is the last checked test error in a meta-training batch
    #  (not the final evaluation which is done on the meta-test tasks)

    stop_time = timeit.default_timer()

    # Update Log file:
    cmn.write_final_result(test_acc_avg,
                           stop_time - start_time,
                           prm,
                           result_name=prm.test_type)

    # Return learned prior:
    return prior_model
Beispiel #4
0
    def run_train_epoch(i_epoch):

        # For each task, prepare an iterator to generate training batches:
        train_iterators = [iter(data_loaders[ii]['train']) for ii in range(n_train_tasks)]

        # The task order to take batches from:
        # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times
        # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch
        task_order = []
        task_ids_list = list(range(n_train_tasks))
        for i_batch in range(n_batches_per_task):
            random.shuffle(task_ids_list)
            task_order += task_ids_list
        # Note: this method ensures each training sample in each task is drawn in each epoch.
        # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch.

        # ----------- meta-batches loop (batches of tasks) -----------------------------------#
        # each meta-batch includes several tasks
        # we take a grad step with theta after each meta-batch
        meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size))
        n_meta_batches = len(meta_batch_starts)

        for i_meta_batch in range(n_meta_batches):


            meta_batch_start = meta_batch_starts[i_meta_batch]
            task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)]
            # meta-batch size may be less than  prm.meta_batch_size at the last one
            # note: it is OK if some tasks appear several times in the meta-batch

            mb_data_loaders = [data_loaders[task_id] for task_id in task_ids_in_meta_batch]
            mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch]
            mb_posteriors_models = [posteriors_models[task_id] for task_id in task_ids_in_meta_batch]

            # Get objective based on tasks in meta-batch:
            total_objective, info = get_objective(prior_model, prm, mb_data_loaders,
                                                  mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks)

            # Take gradient step with the shared prior and all tasks' posteriors:
            grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch)

            # Print training status of current batch:
            log_interval = 200
            if i_meta_batch % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                write_to_log(cmn.status_string(i_epoch,  prm.n_meta_train_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)) +
                        ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t Meta-Comp.: {:.4}, w_kld : {:.4}, b_kld : {:.4}'.
                        format(info['avg_empirical_loss'], info['avg_intra_task_comp'], info['meta_comp'], info['w_kld'], info['b_kld']), prm)
def run_meta_learning(train_data_loaders, prm):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    n_tasks = len(train_data_loaders)

    # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta):
    model = get_model(prm)
    model.train()

    # Create optimizer for meta-params (theta)
    meta_params = list(model.parameters())

    meta_optimizer = optim_func(meta_params, **optim_args)

    # number of sample-batches in each task:
    n_batch_list = [len(data_loader['train']) for data_loader in train_data_loaders]

    n_batches_per_task = np.max(n_batch_list)
    # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------
    def run_train_epoch(i_epoch):

        # For each task, prepare an iterator to generate training batches:
        train_iterators = [iter(train_data_loaders[ii]['train']) for ii in range(n_tasks)]

        # The task order to take batches from:
        task_order = []
        task_ids_list = list(range(n_tasks))
        for i_batch in range(n_batches_per_task):
            random.shuffle(task_ids_list)
            task_order += task_ids_list

        # each meta-batch includes several tasks
        # we take a grad step with theta after each meta-batch
        meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size))
        n_meta_batches = len(meta_batch_starts)

        # ----------- meta-batches loop (batches of tasks) -----------------------------------#
        for i_meta_batch in range(n_meta_batches):

            meta_batch_start = meta_batch_starts[i_meta_batch]
            task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)]
            n_tasks_in_batch = len(task_ids_in_meta_batch)  # it may be less than  prm.meta_batch_size at the last one
            # note: it is OK if some task appear several times in the meta-batch

            mb_data_loaders = [train_data_loaders[task_id] for task_id in task_ids_in_meta_batch]
            mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch]

            # Get objective based on tasks in meta-batch:
            total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion)

            # Take gradient step with the meta-parameters (theta) based on validation data:
            grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            log_interval = 200
            if i_meta_batch % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                print(cmn.status_string(i_epoch, num_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)))
        # end  meta-batches loop

    # end run_epoch()

    # -----------------------------------------------------------------------------------------------------------#
    # Main script
    # -----------------------------------------------------------------------------------------------------------#

    # Update Log file
    write_to_log(cmn.get_model_string(model), prm)
    write_to_log('---- Meta-Training set: {0} tasks'.format(len(train_data_loaders)), prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    num_epochs = int(np.ceil(prm.n_meta_train_iterations / np.ceil(n_tasks / prm.meta_batch_size)))

    # Training loop:
    for i_epoch in range(num_epochs):
        run_train_epoch(i_epoch)

    stop_time = timeit.default_timer()

    # Update Log file:
    cmn.write_final_result(0.0, stop_time - start_time, prm)

    # Return learned meta-parameters:
    return model
Beispiel #6
0
def run_meta_learning(data_loaders, prm):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    n_train_tasks = len(data_loaders)
    #import pudb
    #pudb.set_trace()

    # Create posterior models for each task:
    posteriors_models = [get_model(prm) for _ in range(n_train_tasks)]
    # Create a 'dummy' model to generate the set of parameters of the shared prior:
    prior_model = get_model(prm)
    if prm.from_pretrain:
        if prm.data_source == "CIFAR100":
            pretrain_path = "pretrained_cifar100/epoch-46-acc0.478.pth"
        elif prm.data_source == 'Caltech256':
            pretrain_path = "pretrained_caltech256/epoch-7-acc0.303.pth"
        else:
            pretrain_path = None
        for e in posteriors_models:
            load_model_state(e, pretrain_path, pop_softmax = True )

        load_model_state(prior_model, pretrain_path, pop_softmax = True )
        write_to_log("load pretrained from" + pretrain_path, prm)

    # Gather all tasks posterior params:
    all_post_param = sum([list(posterior_model.parameters()) for posterior_model in posteriors_models], [])

    # Create optimizer for all parameters (posteriors + prior)
    prior_params = list(prior_model.parameters())
    all_params = all_post_param + prior_params
    all_optimizer = optim_func(all_params, **optim_args)

    # number of sample-batches in each task:
    n_batch_list = [len(data_loader['train']) for data_loader in data_loaders]

    n_batches_per_task = np.max(n_batch_list)

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------
    def run_train_epoch(i_epoch):

        # For each task, prepare an iterator to generate training batches:
        train_iterators = [iter(data_loaders[ii]['train']) for ii in range(n_train_tasks)]

        # The task order to take batches from:
        # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times
        # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch
        task_order = []
        task_ids_list = list(range(n_train_tasks))
        for i_batch in range(n_batches_per_task):
            random.shuffle(task_ids_list)
            task_order += task_ids_list
        # Note: this method ensures each training sample in each task is drawn in each epoch.
        # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch.

        # ----------- meta-batches loop (batches of tasks) -----------------------------------#
        # each meta-batch includes several tasks
        # we take a grad step with theta after each meta-batch
        meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size))
        n_meta_batches = len(meta_batch_starts)

        for i_meta_batch in range(n_meta_batches):


            meta_batch_start = meta_batch_starts[i_meta_batch]
            task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)]
            # meta-batch size may be less than  prm.meta_batch_size at the last one
            # note: it is OK if some tasks appear several times in the meta-batch

            mb_data_loaders = [data_loaders[task_id] for task_id in task_ids_in_meta_batch]
            mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch]
            mb_posteriors_models = [posteriors_models[task_id] for task_id in task_ids_in_meta_batch]

            # Get objective based on tasks in meta-batch:
            total_objective, info = get_objective(prior_model, prm, mb_data_loaders,
                                                  mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks)

            # Take gradient step with the shared prior and all tasks' posteriors:
            grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch)

            # Print training status of current batch:
            log_interval = 200
            if i_meta_batch % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                write_to_log(cmn.status_string(i_epoch,  prm.n_meta_train_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)) +
                        ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t Meta-Comp.: {:.4}, w_kld : {:.4}, b_kld : {:.4}'.
                        format(info['avg_empirical_loss'], info['avg_intra_task_comp'], info['meta_comp'], info['w_kld'], info['b_kld']), prm)
        # end  meta-batches loop

    # end run_epoch()

    # -------------------------------------------------------------------------------------------
    #  Test evaluation function -
    # Evaluate the mean loss on samples from the test sets of the training tasks
    # --------------------------------------------------------------------------------------------
    def run_test():
        test_acc_avg = 0.0
        n_tests = 0
        for i_task in range(n_train_tasks):
            model = posteriors_models[i_task]
            test_loader = data_loaders[i_task]['test']
            if len(test_loader) > 0:
                test_acc, test_loss = run_test_Bayes(model, test_loader, loss_criterion, prm, verbose=0)
                n_tests += 1
                test_acc_avg += test_acc

                n_test_samples = len(test_loader.dataset)

                #write_to_log('Train Task {}, Test set: {} -  Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n'.format(
                    #i_task, prm.test_type, test_loss, test_acc, n_test_samples), prm)
            else:
                print('Train Task {}, Test set: {} - No test data'.format(i_task, prm.test_type))

        if n_tests > 0:
            test_acc_avg /= n_tests
        return test_acc_avg

    # -----------------------------------------------------------------------------------------------------------#
    # Main script
    # -----------------------------------------------------------------------------------------------------------#

    # Update Log file

    write_to_log(cmn.get_model_string(prior_model), prm)
    write_to_log('---- Meta-Training set: {0} tasks'.format(len(data_loaders)), prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    for i_epoch in range(prm.n_meta_train_epochs):
        save_path = os.path.join(prm.result_dir, 'Epoch_{}_model.pth'.format(i_epoch))
        run_train_epoch(i_epoch)
        if i_epoch % 1 == 0:
            save_model_state(prior_model, save_path)
            #utils.debug()
            import pudb
            #pudb.set_trace()
            test_acc_avg = run_test()
            print("Epoch {}: test_acc is {}".format(i_epoch, test_acc_avg))

    stop_time = timeit.default_timer()

    # Test:
    test_acc_avg = run_test()

    # Update Log file:
    cmn.write_final_result(test_acc_avg, stop_time - start_time, prm, result_name=prm.test_type)

    # Return learned prior:
    return prior_model
Beispiel #7
0
    def run_train_epoch(i_epoch):
        log_interval = 500

        post_model.train()

        train_info = {}
        train_info["task_comp"] = 0.0
        train_info["total_loss"] = 0.0

        cnt = 0
        for batch_idx, batch_data in enumerate(train_loader):
            cnt += 1

            correct_count = 0
            sample_count = 0

            # Monte-Carlo iterations:
            n_MC = prm.n_MC
            task_empirical_loss = 0
            task_complexity = 0
            for i_MC in range(n_MC):
                # get batch:
                inputs, targets = data_gen.get_batch_vars(batch_data, prm)

                # Calculate empirical loss:
                outputs = post_model(inputs)
                curr_empirical_loss = loss_criterion(outputs, targets)

                #hyper_kl = 0 when testing
                curr_empirical_loss, curr_complexity, task_info = get_bayes_task_objective(
                    prm,
                    prior_model,
                    post_model,
                    n_train_samples,
                    curr_empirical_loss,
                    noised_prior=False)

                task_empirical_loss += (1 / n_MC) * curr_empirical_loss
                task_complexity += (1 / n_MC) * curr_complexity

                correct_count += count_correct(outputs, targets)
                sample_count += inputs.size(0)

            # Total objective:

            total_objective = task_empirical_loss + prm.task_complex_w * task_complexity

            train_info["task_comp"] += task_complexity.data[0]
            train_info["total_loss"] += total_objective.data[0]

            # Take gradient step with the posterior:
            grad_step(total_objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            if batch_idx % log_interval == 0:
                batch_acc = correct_count / sample_count
                write_to_log(
                    cmn.status_string(i_epoch, prm.n_meta_test_epochs,
                                      batch_idx, n_batches, batch_acc,
                                      total_objective.data[0]) +
                    ' Empiric Loss: {:.4}\t Intra-Comp. {:.4}, w_kld {:.4}, b_kld {:.4}'
                    .format(task_empirical_loss.data[0],
                            task_complexity.data[0], task_info["w_kld"],
                            task_info["b_kld"]), prm)

        train_info["task_comp"] /= cnt
        train_info["total_loss"] /= cnt
        return train_info
def run_learning(task_data, prior_model, prm, init_from_prior=True, verbose=1):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_func(prm)

    # Create posterior model for the new task:
    post_model = get_model(prm)

    if init_from_prior:
        post_model.load_state_dict(prior_model.state_dict())

        # prior_model_dict = prior_model.state_dict()
        # post_model_dict = post_model.state_dict()
        #
        # # filter out unnecessary keys:
        # prior_model_dict = {k: v for k, v in prior_model_dict.items() if '_log_var' in k or '_mu' in k}
        # # overwrite entries in the existing state dict:
        # post_model_dict.update(prior_model_dict)
        #
        # # #  load the new state dict
        # post_model.load_state_dict(post_model_dict)

        # add_noise_to_model(post_model, prm.kappa_factor)

    # The data-sets of the new task:
    train_loader = task_data['train']
    test_loader = task_data['test']
    n_train_samples = len(train_loader.dataset)
    n_batches = len(train_loader)

    #  Get optimizer:
    optimizer = optim_func(post_model.parameters(), **optim_args)

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------

    def run_train_epoch(i_epoch):
        log_interval = 500

        post_model.train()

        for batch_idx, batch_data in enumerate(train_loader):

            # get batch data:
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)
            batch_size = inputs.shape[0]

            correct_count = 0
            sample_count = 0

            # Monte-Carlo iterations:
            n_MC = prm.n_MC
            avg_empiric_loss = 0
            complexity_term = 0

            for i_MC in range(n_MC):

                # Calculate empirical loss:
                outputs = post_model(inputs)
                avg_empiric_loss_curr = (1 / batch_size) * loss_criterion(
                    outputs, targets)

                # complexity_curr = get_task_complexity(prm, prior_model, post_model,
                #                                            n_train_samples, avg_empiric_loss_curr)

                avg_empiric_loss += (1 / n_MC) * avg_empiric_loss_curr
                # complexity_term += (1 / n_MC) * complexity_curr

                correct_count += count_correct(outputs, targets)
                sample_count += inputs.size(0)
            # end monte-carlo loop

            complexity_term = get_task_complexity(prm, prior_model, post_model,
                                                  n_train_samples,
                                                  avg_empiric_loss)

            # Approximated total objective (for current batch):
            if prm.complexity_type == 'Variational_Bayes':
                # note that avg_empiric_loss_per_task is estimated by an average over batch samples,
                #  but its weight in the objective should be considered by how many samples there are total in the task
                total_objective = avg_empiric_loss * (
                    n_train_samples) + complexity_term
            else:
                total_objective = avg_empiric_loss + complexity_term

            # Take gradient step with the posterior:
            grad_step(total_objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            if batch_idx % log_interval == 0:
                batch_acc = correct_count / sample_count
                print(
                    cmn.status_string(i_epoch, prm.n_meta_test_epochs,
                                      batch_idx, n_batches, batch_acc,
                                      total_objective.item()) +
                    ' Empiric Loss: {:.4}\t Intra-Comp. {:.4}'.format(
                        avg_empiric_loss.item(), complexity_term.item()))
        # end batch loop

    # end run_train_epoch()

    # -----------------------------------------------------------------------------------------------------------#
    # Update Log file
    if verbose == 1:
        write_to_log(
            'Total number of steps: {}'.format(n_batches *
                                               prm.n_meta_test_epochs), prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    for i_epoch in range(prm.n_meta_test_epochs):
        run_train_epoch(i_epoch)

    # Test:
    test_acc, test_loss = run_eval_Bayes(post_model, test_loader, prm)

    stop_time = timeit.default_timer()
    cmn.write_final_result(test_acc,
                           stop_time - start_time,
                           prm,
                           result_name=prm.test_type,
                           verbose=verbose)

    test_err = 1 - test_acc
    return test_err, post_model
Beispiel #9
0
def run_learning(data_loader,
                 prm,
                 prior_model=None,
                 init_from_prior=True,
                 verbose=1):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------

    # Unpack parameters:
    optim_func, optim_args, lr_schedule = \
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_func(prm)

    train_loader = data_loader['train']
    test_loader = data_loader['test']
    n_batches = len(train_loader)
    n_train_samples = data_loader['n_train_samples']

    figure_flag = hasattr(prm, 'log_figure') and prm.log_figure

    # get model:
    if prior_model and init_from_prior:
        # init from prior model:
        post_model = deepcopy(prior_model).to(prm.device)
    else:
        post_model = get_model(prm)

    # post_model.set_eps_std(0.0) # DEBUG: turn off randomness

    #  Get optimizer:
    optimizer = optim_func(post_model.parameters(), **optim_args)

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------

    def run_train_epoch(i_epoch, log_mat):

        post_model.train()

        for batch_idx, batch_data in enumerate(train_loader):

            # get batch data:
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)

            batch_size = inputs.shape[0]

            # Monte-Carlo iterations:
            avg_empiric_loss = torch.zeros(1, device=prm.device)
            n_MC = prm.n_MC

            for i_MC in range(n_MC):

                # calculate objective:
                outputs = post_model(inputs)
                avg_empiric_loss_curr = (1 / batch_size) * loss_criterion(
                    outputs, targets)
                avg_empiric_loss += (1 / n_MC) * avg_empiric_loss_curr

            # complexity/prior term:
            if prior_model:
                complexity_term = get_task_complexity(prm, prior_model,
                                                      post_model,
                                                      n_train_samples,
                                                      avg_empiric_loss)
            else:
                complexity_term = torch.zeros(1, device=prm.device)

            # Total objective:
            objective = avg_empiric_loss + complexity_term

            # Take gradient step:
            grad_step(objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            log_interval = 1000
            if batch_idx % log_interval == 0:
                batch_acc = correct_rate(outputs, targets)
                print(
                    cmn.status_string(i_epoch, prm.num_epochs, batch_idx,
                                      n_batches, batch_acc, objective.item()) +
                    ' Loss: {:.4}\t Comp.: {:.4}'.format(
                        avg_empiric_loss.item(), complexity_term.item()))

        # End batch loop

        # save results for epochs-figure:
        if figure_flag and (i_epoch % prm.log_figure['interval_epochs'] == 0):
            save_result_for_figure(post_model, prior_model, data_loader, prm,
                                   log_mat, i_epoch)

    # End run_train_epoch()
    # -------------------------------------------------------------------------------------------
    #  Main Script
    # -------------------------------------------------------------------------------------------

    #  Update Log file
    if verbose:
        write_to_log(cmn.get_model_string(post_model), prm)
        write_to_log('Number of weights: {}'.format(post_model.weights_count),
                     prm)
        write_to_log(
            'Total number of steps: {}'.format(n_batches * prm.num_epochs),
            prm)
        write_to_log(
            'Number of training samples: {}'.format(
                data_loader['n_train_samples']), prm)

    start_time = timeit.default_timer()

    if figure_flag:
        n_logs = 1 + (
            (prm.num_epochs - 1) // prm.log_figure['interval_epochs'])
        log_mat = np.zeros((len(prm.log_figure['val_types']), n_logs))

    else:
        log_mat = None

    # Run training epochs:
    for i_epoch in range(prm.num_epochs):
        run_train_epoch(i_epoch, log_mat)

    # evaluate final perfomance on train-set
    train_acc, train_loss = run_eval_Bayes(post_model, train_loader, prm)

    # Test:
    test_acc, test_loss = run_eval_Bayes(post_model, test_loader, prm)
    test_err = 1 - test_acc

    # Log results
    if verbose:
        write_to_log(
            '>Train-err. : {:.4}%\t Train-loss: {:.4}'.format(
                100 * (1 - train_acc), train_loss), prm)
        write_to_log(
            '>Test-err. {:1.3}%, Test-loss:  {:.4}'.format(
                100 * (test_err), test_loss), prm)

    stop_time = timeit.default_timer()
    if verbose:
        cmn.write_final_result(test_acc, stop_time - start_time, prm)

    if figure_flag:
        plot_log(log_mat, prm)

    return post_model, test_err, test_loss, log_mat
Beispiel #10
0
def run_meta_learning(data_loaders, prm):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    n_train_tasks = len(data_loaders)

    # Create a 'dummy' model to generate the set of parameters of the shared prior:
    prior_model = get_model(prm)

    # Create posterior models for each task:
    posteriors_models = [
        transfer_weights(prior_model, get_model(prm))
        for _ in range(n_train_tasks)
    ]

    # Gather all tasks posterior params:
    # all_post_param = sum([list(posterior_model.parameters()) for posterior_model in posteriors_models], [])

    # Create optimizer for all parameters (posteriors + prior)
    prior_params = filter(lambda p: p.requires_grad, prior_model.parameters())
    prior_optimizer = optim_func(prior_params, optim_args)
    object.grad_init(prm, prior_model, loss_criterion,
                     iter(data_loaders[0]['train']), data_loaders[0]['train'],
                     prior_optimizer)
    # all_params = all_post_param + prior_params
    all_posterior_optimizers = [
        optim_func(
            filter(lambda p: p.requires_grad,
                   posteriors_models[i].parameters()), optim_args)
        for i in range(n_train_tasks)
    ]

    # number of sample-batches in each task:
    n_batch_list = [len(data_loader['train']) for data_loader in data_loaders]

    n_batches_per_task = np.max(n_batch_list)

    L = prm.optim_args['L']

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------
    def run_train_epoch(i_epoch):

        # optim_args['L'] = L-5*i_epoch

        # For each task, prepare an iterator to generate training batches:
        train_iterators = [
            iter(data_loaders[ii]['train']) for ii in range(n_train_tasks)
        ]

        # The task order to take batches from:
        # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times
        # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch
        task_order = []
        task_ids_list = list(range(n_train_tasks))
        for i_batch in range(n_batches_per_task):
            random.shuffle(task_ids_list)
            task_order += task_ids_list
        # Note: this method ensures each training sample in each task is drawn in each epoch.
        # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch.

        # ----------- meta-batches loop (batches of tasks) -----------------------------------#
        # each meta-batch includes several tasks
        # we take a grad step with theta after each meta-batch
        # meta_batch_starts = list(range(0, len(task_order), n_train_tasks))
        meta_batch_starts = list(range(0, len(task_order),
                                       prm.meta_batch_size))
        n_meta_batches = len(meta_batch_starts)

        for i_meta_batch in range(n_meta_batches):

            meta_batch_start = meta_batch_starts[i_meta_batch]
            task_ids_in_meta_batch = task_order[meta_batch_start:(
                meta_batch_start + prm.meta_batch_size)]
            # meta-batch size may be less than  prm.meta_batch_size at the last one
            # note: it is OK if some tasks appear several times in the meta-batch

            mb_data_loaders = [
                data_loaders[task_id] for task_id in task_ids_in_meta_batch
            ]
            mb_iterators = [
                train_iterators[task_id] for task_id in task_ids_in_meta_batch
            ]
            mb_posteriors_models = [
                posteriors_models[task_id]
                for task_id in task_ids_in_meta_batch
            ]

            #task_loss_list, info = get_objective(prior_model, prm, mb_data_loaders, object.feval,
            #                                      mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks, task_ids_in_meta_batch)

            # Take gradient step with the shared prior and all tasks' posteriors:
            # for i_task in range(n_train_tasks):
            # prior_optimizer.zero_grad()
            #for i_task in range(n_train_tasks):
            #    if isinstance(task_loss_list[i_task], int):
            #        continue
            #    grad_step(task_loss_list[i_task], posteriors_models[i_task], loss_criterion, all_posterior_optimizers[i_task], prm,
            #              train_iterators[i_task], data_loaders[i_task]['train'], lr_schedule, prm.lr, i_epoch)

            #task_loss_list, info = get_objective(prior_model, prm, mb_data_loaders, object.feval,
            #                                      mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks, task_ids_in_meta_batch)

            # Take gradient step with the shared prior and all tasks' posteriors:
            # for i_task in range(n_train_tasks):
            # prior_optimizer.zero_grad()
            #for i_task in range(n_train_tasks):
            #    if isinstance(task_loss_list[i_task], int):
            #        continue
            #    grad_step(task_loss_list[i_task], posteriors_models[i_task], loss_criterion, all_posterior_optimizers[i_task], prm,
            #              train_iterators[i_task], data_loaders[i_task]['train'], lr_schedule, prm.lr, i_epoch)

            # Get objective based on tasks in meta-batch:
            task_loss_list, info = get_objective(prior_model, prm,
                                                 mb_data_loaders, object.feval,
                                                 mb_iterators,
                                                 mb_posteriors_models,
                                                 loss_criterion, n_train_tasks,
                                                 task_ids_in_meta_batch)

            # Take gradient step with the shared prior and all tasks' posteriors:
            # for i_task in range(n_train_tasks):
            prior_optimizer.zero_grad()
            for i_task in range(n_train_tasks):
                if isinstance(task_loss_list[i_task], int):
                    continue
                grad_step(task_loss_list[i_task], posteriors_models[i_task],
                          loss_criterion, all_posterior_optimizers[i_task],
                          prm, train_iterators[i_task],
                          data_loaders[i_task]['train'], lr_schedule, prm.lr,
                          i_epoch)
                # if i_meta_batch==n_meta_batches-1:
                # if i_epoch == prm.n_meta_train_epochs-1:
                prior_get_grad(prior_optimizer,
                               all_posterior_optimizers[i_task])

            # task_loss_list, info = get_objective(prior_model, prm, mb_data_loaders, object.feval,
            #                                       mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks, task_ids_in_meta_batch)

            # prior_grad_step(prior_optimizer, prm.meta_batch_size, prm,prm.prior_lr_schedule, prm.prior_lr, i_epoch)
            prior_updates(prior_optimizer, n_train_tasks, prm)
            for post_model in posteriors_models:
                post_model.load_state_dict(prior_model.state_dict())

            # Print status:
            log_interval = 10
            if (i_meta_batch) % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                print(
                    cmn.status_string(i_epoch, prm.n_meta_train_epochs,
                                      i_meta_batch, n_meta_batches, batch_acc)
                    +
                    ' Empiric-Loss: {:.4f}'.format(info['avg_empirical_loss']))

        # for i in range(20):
        #     prior_grad_step(prior_optimizer, prm.meta_batch_size, prm, lr_schedule, prm.prior_lr, i_epoch)
        # end  meta-batches loop

    # end run_epoch()

    # -------------------------------------------------------------------------------------------
    #  Test evaluation function -
    # Evaluate the mean loss on samples from the test sets of the training tasks
    # --------------------------------------------------------------------------------------------
    def run_test():
        test_acc_avg = 0.0
        n_tests = 0
        for i_task in range(n_train_tasks):
            model = posteriors_models[i_task]
            test_loader = data_loaders[i_task]['test']
            if len(test_loader) > 0:
                test_acc, test_loss = run_test_Bayes(model, test_loader,
                                                     loss_criterion, prm)
                n_tests += 1
                test_acc_avg += test_acc

                n_test_samples = len(test_loader.dataset)

                write_to_log(
                    'Train Task {}, Test set: {} -  Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n'
                    .format(i_task, prm.test_type, test_loss, test_acc,
                            n_test_samples), prm)
            else:
                print('Train Task {}, Test set: {} - No test data'.format(
                    i_task, prm.test_type))

        if n_tests > 0:
            test_acc_avg /= n_tests
        return test_acc_avg

    # -----------------------------------------------------------------------------------------------------------#
    # Main script
    # -----------------------------------------------------------------------------------------------------------#

    # Update Log file

    write_to_log(cmn.get_model_string(prior_model), prm)
    write_to_log('---- Meta-Training set: {0} tasks'.format(len(data_loaders)),
                 prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    for i_epoch in range(prm.n_meta_train_epochs):
        # if (i_epoch+1) % 50 == 0:
        #     prm.lr = prm.lr/2
        # for post_model in posteriors_models:
        #     post_model.load_state_dict(prior_model.state_dict())
        run_train_epoch(i_epoch)
        # for post_model in posteriors_models:
        #     post_model.load_state_dict(prior_model.state_dict())

    # prior_update(prior_optimizer,prm.meta_batch_size,prm)

    stop_time = timeit.default_timer()

    # Test:
    test_acc_avg = run_test()

    # Update Log file:
    cmn.write_final_result(test_acc_avg,
                           stop_time - start_time,
                           prm,
                           result_name=prm.test_type)

    # Return learned prior:
    return prior_model
Beispiel #11
0
def run_learning(task_data, meta_model, prm, verbose=1):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # Create model for task:
    task_model = get_model(prm)

    #  Load initial point from meta-parameters:
    task_model.load_state_dict(meta_model.state_dict())

    # The data-sets of the new task:
    train_loader = task_data['train']
    test_loader = task_data['test']
    n_train_samples = len(train_loader.dataset)
    n_batches = len(train_loader)

    #  Get task optimizer:
    task_optimizer = SGD(task_model.parameters(), lr=prm.alpha)

    # In meta-testing, use SGD with step-size alpha

    # -------------------------------------------------------------------------------------------
    #  Learning  function
    # -------------------------------------------------------------------------------------------

    def run_meta_test_learning(task_model, train_loader):

        task_model.train()
        train_loader_iter = iter(train_loader)

        # Gradient steps (training) loop
        for i_grad_step in range(prm.n_meta_test_grad_steps):
            # get batch:
            batch_data = data_gen.get_next_batch_cyclic(
                train_loader_iter, train_loader)
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)

            # Calculate empirical loss:
            outputs = task_model(inputs)
            task_objective = loss_criterion(outputs, targets)

            # Take gradient step with the task weights:
            grad_step(task_objective, task_optimizer)

        # end gradient step loop

        return task_model

    # -------------------------------------------------------------------------------------------
    #  Test evaluation function
    # --------------------------------------------------------------------------------------------
    def run_test(model, test_loader):
        model.eval()
        test_loss = 0
        n_correct = 0
        for batch_data in test_loader:
            inputs, targets = data_gen.get_batch_vars(batch_data,
                                                      prm,
                                                      is_test=True)
            outputs = model(inputs)
            test_loss += loss_criterion(outputs,
                                        targets)  # sum the mean loss in batch
            n_correct += count_correct(outputs, targets)

        n_test_samples = len(test_loader.dataset)
        n_test_batches = len(test_loader)
        test_loss = test_loss.data[0] / n_test_batches
        test_acc = n_correct / n_test_samples
        print('\nTest set: Average loss: {:.4}, Accuracy: {:.3} ( {}/{})\n'.
              format(test_loss, test_acc, n_correct, n_test_samples))
        return test_acc

    # -----------------------------------------------------------------------------------------------------------#
    # Update Log file
    if verbose == 1:
        write_to_log(
            'Total number of steps: {}'.format(n_batches * prm.num_epochs),
            prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    task_model = run_meta_test_learning(task_model, train_loader)

    # Test:
    test_acc = run_test(task_model, test_loader)

    stop_time = timeit.default_timer()
    cmn.write_final_result(test_acc,
                           stop_time - start_time,
                           prm,
                           verbose=verbose)

    test_err = 1 - test_acc
    return test_err, task_model
def run_meta_learning(prm, task_generator):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    n_iterations = prm.n_meta_train_iterations

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta):
    model = get_model(prm)
    model.train()

    # Create optimizer for meta-params (theta)
    meta_params = list(model.parameters())

    meta_optimizer = optim_func(meta_params, **optim_args)

    meta_batch_size = prm.meta_batch_size

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------
    def run_meta_iteration(i_iter):
        # In each meta-iteration we draw a meta-batch of several tasks
        # Then we take a grad step with theta.

        # Generate the data sets of the training-tasks for meta-batch:
        mb_data_loaders = task_generator.create_meta_batch(
            prm, meta_batch_size, meta_split='meta_train')

        # For each task, prepare an iterator to generate training batches:
        mb_iterators = [
            iter(mb_data_loaders[ii]['train']) for ii in range(meta_batch_size)
        ]

        # Get objective based on tasks in meta-batch:
        total_objective, info = meta_step(prm, model, mb_data_loaders,
                                          mb_iterators, loss_criterion)

        # Take gradient step with the meta-parameters (theta) based on validation data:
        grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_iter)

        # Print status:
        log_interval = 5
        if (i_iter) % log_interval == 0:
            batch_acc = info['correct_count'] / info['sample_count']
            print(
                cmn.status_string(i_iter, n_iterations, 1, 1, batch_acc,
                                  total_objective.data[0]))

    # end run_meta_iteration()

    # -----------------------------------------------------------------------------------------------------------#
    # Main script
    # -----------------------------------------------------------------------------------------------------------#

    # Update Log file
    write_to_log(cmn.get_model_string(model), prm)
    write_to_log('---- Meta-Training with infinite tasks...', prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    for i_iter in range(n_iterations):
        run_meta_iteration(i_iter)

    stop_time = timeit.default_timer()

    # Update Log file:
    cmn.write_final_result(0.0, stop_time - start_time, prm)

    # Return learned meta-parameters:
    return model
Beispiel #13
0
def run_learning(data_loader,
                 prm,
                 prior_model=None,
                 init_from_prior=True,
                 verbose=1):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------

    # Unpack parameters:
    optim_func, optim_args, lr_schedule = \
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    train_loader = data_loader['train']
    test_loader = data_loader['test']
    n_batches = len(train_loader)
    n_train_samples = data_loader['n_train_samples']

    # get model:
    if prior_model and init_from_prior:
        # init from prior model:
        post_model = deepcopy(prior_model)
    else:
        post_model = get_model(prm)

    # post_model.set_eps_std(0.0) # DEBUG: turn off randomness

    #  Get optimizer:
    optimizer = optim_func(post_model.parameters(), **optim_args)

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------

    def run_train_epoch(i_epoch):

        # # Adjust randomness (eps_std)
        # if hasattr(prm, 'use_randomness_schedeule') and prm.use_randomness_schedeule:
        #     if i_epoch > prm.randomness_full_epoch:
        #         eps_std = 1.0
        #     elif i_epoch > prm.randomness_init_epoch:
        #         eps_std = (i_epoch - prm.randomness_init_epoch) / (prm.randomness_full_epoch - prm.randomness_init_epoch)
        #     else:
        #         eps_std = 0.0  #  turn off randomness
        #     post_model.set_eps_std(eps_std)

        # post_model.set_eps_std(0.00) # debug

        complexity_term = 0

        post_model.train()

        for batch_idx, batch_data in enumerate(train_loader):

            # Monte-Carlo iterations:
            empirical_loss = 0
            n_MC = prm.n_MC
            for i_MC in range(n_MC):
                # get batch:
                inputs, targets = data_gen.get_batch_vars(batch_data, prm)

                # calculate objective:
                outputs = post_model(inputs)
                empirical_loss_c = loss_criterion(outputs, targets)
                empirical_loss += (1 / n_MC) * empirical_loss_c

            #  complexity/prior term:
            if prior_model:
                empirical_loss, complexity_term = get_bayes_task_objective(
                    prm, prior_model, post_model, n_train_samples,
                    empirical_loss)
            else:
                complexity_term = 0.0

                # Total objective:
            objective = empirical_loss + complexity_term

            # Take gradient step:
            grad_step(objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            log_interval = 500
            if batch_idx % log_interval == 0:
                batch_acc = correct_rate(outputs, targets)
                print(
                    cmn.status_string(i_epoch, prm.num_epochs, batch_idx,
                                      n_batches, batch_acc, objective.data[0])
                    + ' Loss: {:.4}\t Comp.: {:.4}'.format(
                        get_value(empirical_loss), get_value(complexity_term)))

    # -------------------------------------------------------------------------------------------
    #  Main Script
    # -------------------------------------------------------------------------------------------

    #  Update Log file
    update_file = not verbose == 0
    cmn.write_to_log(cmn.get_model_string(post_model),
                     prm,
                     update_file=update_file)
    cmn.write_to_log('Total number of steps: {}'.format(n_batches *
                                                        prm.num_epochs),
                     prm,
                     update_file=update_file)
    cmn.write_to_log('Number of training samples: {}'.format(
        data_loader['n_train_samples']),
                     prm,
                     update_file=update_file)

    start_time = timeit.default_timer()

    # Run training epochs:
    for i_epoch in range(prm.num_epochs):
        run_train_epoch(i_epoch)

    # Test:
    test_acc, test_loss = run_test_Bayes(post_model, test_loader,
                                         loss_criterion, prm)

    stop_time = timeit.default_timer()
    cmn.write_final_result(test_acc,
                           stop_time - start_time,
                           prm,
                           result_name=prm.test_type)

    test_err = 1 - test_acc
    return test_err, post_model
Beispiel #14
0
def run_learning(task_data, prior_model, prm, init_from_prior=True, verbose=1):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # Create posterior model for the new task:
    post_model = get_model(prm)

    if init_from_prior:
        post_model.load_state_dict(prior_model.state_dict())

        # prior_model_dict = prior_model.state_dict()
        # post_model_dict = post_model.state_dict()
        #
        # # filter out unnecessary keys:
        # prior_model_dict = {k: v for k, v in prior_model_dict.items() if '_log_var' in k or '_mu' in k}
        # # overwrite entries in the existing state dict:
        # post_model_dict.update(prior_model_dict)
        #
        # # #  load the new state dict
        # post_model.load_state_dict(post_model_dict)

        # add_noise_to_model(post_model, prm.kappa_factor)

    # The data-sets of the new task:
    train_loader = task_data['train']
    test_loader = task_data['test']
    n_train_samples = len(train_loader.dataset)
    n_batches = len(train_loader)

    #  Get optimizer:
    optimizer = optim_func(post_model.parameters(), **optim_args)

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------

    def run_train_epoch(i_epoch):
        log_interval = 500

        post_model.train()

        for batch_idx, batch_data in enumerate(train_loader):

            correct_count = 0
            sample_count = 0

            # Monte-Carlo iterations:
            n_MC = prm.n_MC
            task_empirical_loss = 0
            task_complexity = 0
            for i_MC in range(n_MC):
                # get batch:
                inputs, targets = data_gen.get_batch_vars(batch_data, prm)

                # Calculate empirical loss:
                outputs = post_model(inputs)
                curr_empirical_loss = loss_criterion(outputs, targets)

                curr_empirical_loss, curr_complexity = get_bayes_task_objective(
                    prm,
                    prior_model,
                    post_model,
                    n_train_samples,
                    curr_empirical_loss,
                    noised_prior=False)

                task_empirical_loss += (1 / n_MC) * curr_empirical_loss
                task_complexity += (1 / n_MC) * curr_complexity

                correct_count += count_correct(outputs, targets)
                sample_count += inputs.size(0)

            # Total objective:

            total_objective = task_empirical_loss + task_complexity

            # Take gradient step with the posterior:
            grad_step(total_objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            if batch_idx % log_interval == 0:
                batch_acc = correct_count / sample_count
                print(
                    cmn.status_string(i_epoch, prm.n_meta_test_epochs,
                                      batch_idx, n_batches, batch_acc,
                                      total_objective.item()) +
                    ' Empiric Loss: {:.4}\t Intra-Comp. {:.4}'.format(
                        task_empirical_loss.item(), task_complexity.item()))

                data_objective.append(total_objective.item())
                data_accuracy.append(batch_acc)
                data_emp_loss.append(task_empirical_loss.item())
                data_task_comp.append(task_complexity.item())

            return total_objective.item()

    # -----------------------------------------------------------------------------------------------------------#
    # Update Log file
    if verbose == 1:
        write_to_log(
            'Total number of steps: {}'.format(n_batches *
                                               prm.n_meta_test_epochs), prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    data_objective = []
    data_accuracy = []
    data_emp_loss = []
    data_task_comp = []
    # Training loop:
    for i_epoch in range(prm.n_meta_test_epochs):
        test_bound = run_train_epoch(i_epoch)

    with open(
            os.path.join(prm.result_dir, 'run_test_data_prior_bound_data.pkl'),
            'wb') as f:
        pickle.dump(
            {
                'data_objective': data_objective,
                "data_accuracy": data_accuracy,
                'data_emp_loss': data_emp_loss,
                'data_task_comp': data_task_comp
            }, f)

    # Test:
    test_acc, test_loss = run_test_Bayes(post_model, test_loader,
                                         loss_criterion, prm)

    stop_time = timeit.default_timer()
    cmn.write_final_result(test_acc,
                           stop_time - start_time,
                           prm,
                           result_name=prm.test_type,
                           verbose=verbose)

    test_err = 1 - test_acc
    return test_err, test_loss, test_bound, post_model
Beispiel #15
0
def run_learning(task_data, prior_model, prm, init_from_prior=True, verbose=1):

    # prm.optim_func, prm.optim_args = optim.EntropySGD, {'llr':0.01, 'lr':0.1, 'momentum':0.9, 'damp':0, 'weight_decay':1e-3, 'nesterov':True,
    #                  'L':20, 'eps':1e-3, 'g0':1e-4, 'g1':1e-3}

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    # prm.optim_args['llr'] = 0.1
    # prm.optim_args['L'] = 20
    # # prm.optim_args['weight_decay'] = 1e-3
    # # prm.optim_args['g1'] = 0
    # prm.optim_args['g0'] = 1e-4
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule_test

    # prm.optim_func, prm.optim_args = optim.Adam, {'lr': prm.lr}  # 'weight_decay': 1e-4

    # lr_schedule = {'decay_factor': 0.1, 'decay_epochs': [15, 20]}

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # Create posterior model for the new task:
    post_model = get_model(prm)

    if init_from_prior:
        post_model.load_state_dict(prior_model.state_dict())

        # prior_model_dict = prior_model.state_dict()
        # post_model_dict = post_model.state_dict()
        #
        # # filter out unnecessary keys:
        # prior_model_dict = {k: v for k, v in prior_model_dict.items() if '_log_var' in k or '_mu' in k}
        # # overwrite entries in the existing state dict:
        # post_model_dict.update(prior_model_dict)
        #
        # # #  load the new state dict
        # post_model.load_state_dict(post_model_dict)

        # add_noise_to_model(post_model, prm.kappa_factor)

    # The data-sets of the new task:
    train_loader = task_data
    test_loader = task_data['test']
    # n_train_samples = len(train_loader['train'].dataset)
    n_batches = len(train_loader)

    #  Get optimizer:
    optimizer = optim_func(
        filter(lambda p: p.requires_grad, post_model.parameters()), optim_args)

    # optimizer = optim_func(filter(lambda p: p.requires_grad, post_model.parameters()), optim_args['lr'])

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------

    def run_train_epoch(i_epoch):
        # log_interval = 500

        post_model.train()

        train_iterators = iter(train_loader['train'])

        for batch_idx, batch_data in enumerate(train_loader['train']):

            task_loss, info = get_objective(prior_model, prm, [train_loader],
                                            object.feval, [train_iterators],
                                            [post_model], loss_criterion, 1,
                                            [0])

            grad_step(task_loss[0], post_model, loss_criterion, optimizer, prm,
                      train_iterators, train_loader['train'], lr_schedule,
                      prm.optim_args['lr'], i_epoch)

            # for log_var in post_model.parameters():
            #     if log_var.requires_grad is False:
            #         log_var.data = log_var.data - (i_epoch + 1) * math.log(1 + prm.gamma1)

            # Print status:
            log_interval = 10
            if (batch_idx) % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                print(
                    cmn.status_string(i_epoch, prm.n_meta_train_epochs,
                                      batch_idx, n_batches, batch_acc) +
                    ' Empiric-Loss: {:.4f}'.format(info['avg_empirical_loss']))

    # -----------------------------------------------------------------------------------------------------------#
    # Update Log file
    if verbose == 1:
        write_to_log(
            'Total number of steps: {}'.format(n_batches *
                                               prm.n_meta_test_epochs), prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    for i_epoch in range(prm.n_meta_test_epochs):
        run_train_epoch(i_epoch)

    # Test:
    test_acc, test_loss = run_test_Bayes(post_model, test_loader,
                                         loss_criterion, prm)

    stop_time = timeit.default_timer()
    cmn.write_final_result(test_acc,
                           stop_time - start_time,
                           prm,
                           result_name=prm.test_type,
                           verbose=verbose)

    test_err = 1 - test_acc
    return test_err, post_model
Beispiel #16
0
if info['type'] == 'multi_class':
    losses = ['Zero_One']
elif info['type'] == 'binary_class':
    losses = ['Logistic_Binary_Clipped', 'Zero_One']
else:
    raise ValueError

prt = deepcopy(prm)  # Temp parameters
for loss_type in losses:
    prt.loss_type = loss_type
    test_acc, test_loss = run_eval_Bayes(post_model, data_loader['test'], prt)
    train_acc, train_loss = run_eval_Bayes(post_model, data_loader['train'],
                                           prt)
    print('-' * 20)
    write_to_log(
        '-Loss func. {}, Train-loss :{:.4}, Test-loss:  {:.4}'.format(
            loss_type, train_loss, test_loss), prm)

    for divergence_type in ['KL', 'W_Sqr', 'W_NoSqr']:
        prt.divergence_type = divergence_type
        dvrg_val = get_net_densities_divergence(prior_model, post_model, prt)

        write_to_log(
            '\t--Divergence: {} = {:.4}'.format(prt.divergence_type, dvrg_val),
            prm)
        for complexity_type in ['McAllester', 'Seeger', 'Catoni']:
            prt.complexity_type = complexity_type
            bound_val = learn_single_Bayes.eval_bound(post_model, prior_model,
                                                      data_loader, prt,
                                                      train_loss, dvrg_val)
            write_to_log(
Beispiel #17
0
task_generator = Task_Generator(prm)
# -------------------------------------------------------------------------------------------
#  Run Meta-Training
# -------------------------------------------------------------------------------------------

start_time = timeit.default_timer()

if prm.mode == 'MetaTrain':

    n_train_tasks = prm.n_train_tasks
    if n_train_tasks:
        # In this case we generate a finite set of train (observed) task before meta-training.

        # Generate the data sets of the training-tasks:
        write_to_log('---- Generating {} training-tasks'.format(n_train_tasks),
                     prm)
        train_data_loaders = task_generator.create_meta_batch(
            prm, n_train_tasks, meta_split='meta_train')

        # Meta-training to learn meta-model (theta params):
        meta_model = meta_train_MAML_finite_tasks.run_meta_learning(
            train_data_loaders, prm)
    else:
        # In this case we observe new tasks generated from the task-distribution in each meta-iteration.
        write_to_log(
            '---- Infinite train tasks - New training tasks '
            'are drawn from tasks distribution in each iteration...', prm)

        # Meta-training to learn meta-model (theta params):
        meta_model = meta_train_MAML_infinite_tasks.run_meta_learning(
            prm, task_generator)
Beispiel #18
0
# -------------------------------------------------------------------------------------------
#  Run experiments
# -------------------------------------------------------------------------------------------

task_generator = Task_Generator(prm)

test_err_orig = np.zeros(n_experiments)
test_err_scratch = np.zeros(n_experiments)
test_err_scratch_bayes = np.zeros(n_experiments)
test_err_transfer = np.zeros(n_experiments)
test_err_scratch_reg = np.zeros(n_experiments)
test_err_freeze = np.zeros(n_experiments)

for i in range(n_experiments):
    write_to_log('--- Experiment #{} out of {}'.format(i+1, n_experiments), prm)

    # Generate the task #1 data set:
    task1_data = task_generator.get_data_loader(prm)
    n_samples_orig = task1_data['n_train_samples']

    #  Run learning of task 1
    write_to_log('--- Standard learning of task #1', prm)
    test_err_orig[i], transferred_model = learn_single_standard.run_learning(task1_data, prm)

    # Generate the task 2 data set:

    write_to_log('--- Generating task #2 with at most {} samples'.format(limit_train_samples), prm)
    task2_data = task_generator.get_data_loader(prm, limit_train_samples = limit_train_samples)

    #  Run learning of task 2 from scratch:
Beispiel #19
0
#set_trace()
task_generator = Task_Generator(prm)

# -------------------------------------------------------------------------------------------
#  Run Meta-Training
# -------------------------------------------------------------------------------------------

start_time = timeit.default_timer()

if prm.mode == 'MetaTrain':

    n_train_tasks = prm.n_train_tasks
    if n_train_tasks:
        # In this case we generate a finite set of train (observed) task before meta-training.
        # Generate the data sets of the training tasks:
        write_to_log('--- Generating {} training-tasks'.format(n_train_tasks),
                     prm)
        train_data_loaders = task_generator.create_meta_batch(
            prm, n_train_tasks, meta_split='meta_train')

        # Meta-training to learn prior:
        prior_model = meta_train_Bayes_finite_tasks.run_meta_learning(
            train_data_loaders, prm)
        # save learned prior:
        save_model_state(prior_model, save_path)
        write_to_log('Trained prior saved in ' + save_path, prm)
    else:
        # In this case we observe new tasks generated from the task-distribution in each meta-iteration.
        write_to_log(
            '---- Infinite train tasks - New training tasks are '
            'drawn from tasks distribution in each iteration...', prm)
task_generator = Task_Generator(prm)

# -------------------------------------------------------------------------------------------
#  Run Meta-Training
# -------------------------------------------------------------------------------------------

start_time = timeit.default_timer()

if prm.mode == 'MetaTrain':

    n_train_tasks = prm.n_train_tasks
    if n_train_tasks:
        # In this case we generate a finite set of train (observed) task before meta-training.
        # Generate the data sets of the training tasks:
        print('--- Traning PAC Bayes bound: {}'.format(prm.complexity_type))
        write_to_log('--- Generating {} training-tasks'.format(n_train_tasks),
                     prm)
        train_data_loaders = task_generator.create_meta_batch(
            prm, n_train_tasks, meta_split='meta_train')

        # Meta-training to learn prior:
        prior_model = meta_train_Bayes_finite_tasks.run_meta_learning(
            train_data_loaders, prm)
        # save learned prior:
        save_model_state(prior_model, save_path)
        write_to_log('Trained prior saved in ' + save_path, prm)
    else:
        # In this case we observe new tasks generated from the task-distribution in each meta-iteration.
        write_to_log(
            '---- Infinite train tasks - New training tasks are '
            'drawn from tasks distribution in each iteration...', prm)
Beispiel #21
0
#set prm params
prm.N_Way = 5
prm.K_Shot_MetaTest = 100
prm.data_source = "CIFAR100"
prm.model_name = "CIFARNet"

learning_rate = 1e-3
loss_fn = nn.CrossEntropyLoss()

prm.run_name = "scratch_cifar100_lr_{}_{}_way_{}_shot_nte_{}_epoch_{}".format(learning_rate, prm.N_Way, prm.K_Shot_MetaTest, num_tasks, epoch_num)
prm.result_dir = os.path.join("scratch_log", prm.run_name)
os.makedirs(prm.result_dir,  exist_ok=True)
#create_result_dir(prm)
time_str = datetime.now().strftime(' %Y-%m-%d %H:%M:%S')
write_to_log("Written by scratch.py at {}".format(time_str), prm, mode = "w")

task_generator = Task_Generator(prm)
data_loaders = task_generator.create_meta_batch(prm, num_tasks, meta_split='meta_test')
assert len(data_loaders) == num_tasks

best_accs = []
best_losses = []

for i_task in range(num_tasks):
    #initialize net at start
    net = get_model(prm)
    net = net.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

    d = data_loaders[i_task]
Beispiel #22
0
def run_learning(data_loader, prm, verbose=1, initial_model=None):

    # Unpack parameters:
    optim_func, optim_args, lr_schedule = \
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # The data-sets:
    train_loader = data_loader['train']
    test_loader = data_loader['test']

    n_batches = len(train_loader)

    # Create  model:
    if hasattr(prm, 'func_model') and prm.func_model:
        import Models.deterministic_models as func_models
        model = func_models.get_model(prm)
    else:
        model = get_model(prm)

    # Load initial weights:
    if initial_model:
        model.load_state_dict(initial_model.state_dict())

    # Gather modules list:
    modules_list = list(model.named_children())
    if hasattr(model, 'net'):
        # extract the modules from 'net' field:
        modules_list += list(model.net.named_children())
        modules_list = [m for m in modules_list if m[0] is not 'net']

    # Determine which parameters are optimized and which are frozen:
    if hasattr(prm, 'freeze_list'):
        freeze_list = prm.freeze_list
        optimized_modules = [
            named_module[1] for named_module in modules_list
            if not named_module[0] in freeze_list
        ]
        optimized_params = sum(
            [list(mo.parameters()) for mo in optimized_modules], [])
    elif hasattr(prm, 'not_freeze_list'):
        not_freeze_list = prm.not_freeze_list
        optimized_modules = [
            named_module[1] for named_module in modules_list
            if named_module[0] in not_freeze_list
        ]
        optimized_params = sum(
            [list(mo.parameters()) for mo in optimized_modules], [])
    else:
        optimized_params = model.parameters()

    #  Get optimizer:
    optimizer = optim_func(optimized_params, **optim_args)

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------

    def run_train_epoch(i_epoch):
        log_interval = 500

        model.train()
        for batch_idx, batch_data in enumerate(train_loader):

            # get batch:
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)

            # Calculate loss:
            outputs = model(inputs)
            loss = loss_criterion(outputs, targets)

            # Take gradient step:
            grad_step(loss, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            if batch_idx % log_interval == 0:
                batch_acc = correct_rate(outputs, targets)
                print(
                    cmn.status_string(i_epoch, prm.num_epochs, batch_idx,
                                      n_batches, batch_acc, get_value(loss)))

    # -----------------------------------------------------------------------------------------------------------#
    # Update Log file
    # -----------------------------------------------------------------------------------------------------------#
    update_file = not verbose == 0
    cmn.write_to_log(cmn.get_model_string(model), prm, update_file=update_file)
    cmn.write_to_log('Total number of steps: {}'.format(n_batches *
                                                        prm.num_epochs),
                     prm,
                     update_file=update_file)
    cmn.write_to_log('Number of training samples: {}'.format(
        data_loader['n_train_samples']),
                     prm,
                     update_file=update_file)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    for i_epoch in range(prm.num_epochs):
        run_train_epoch(i_epoch)

    # Test:
    test_acc = run_test(model, test_loader, loss_criterion, prm)

    stop_time = timeit.default_timer()
    cmn.write_final_result(test_acc,
                           stop_time - start_time,
                           prm,
                           verbose=verbose,
                           result_name='Standard')

    test_err = 1 - test_acc
    return test_err, model
Beispiel #23
0
def run_meta_learning(data_loaders, prm):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_func(prm)

    n_train_tasks = len(data_loaders)

    # assert prm.meta_batch_size <= n_train_tasks

    # Create posterior models for each task:
    posteriors_models = [get_model(prm) for _ in range(n_train_tasks)]

    # Create a 'dummy' model to generate the set of parameters of the shared prior:
    prior_model = get_model(prm)

    # Gather all tasks posterior params:
    all_post_param = sum([list(posterior_model.parameters()) for posterior_model in posteriors_models], [])

    # Create optimizer for all parameters (posteriors + prior)
    prior_params = list(prior_model.parameters())
    all_params = all_post_param + prior_params
    all_optimizer = optim_func(all_params, **optim_args)

    # number of sample-batches in each task:
    n_batch_list = [len(data_loader['train']) for data_loader in data_loaders]

    n_batches_per_task = np.max(n_batch_list)


    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------
    def run_train_epoch(i_epoch, i_step = 0):

        # For each task, prepare an iterator to generate training batches:
        train_iterators = [iter(data_loaders[ii]['train']) for ii in range(n_train_tasks)]

        # The task order to take batches from:
        # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times
        # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch
        task_order = []
        task_ids_list = list(range(n_train_tasks))
        for i_batch in range(n_batches_per_task):
            random.shuffle(task_ids_list)
            task_order += task_ids_list
        # Note: this method ensures each training sample in each task is drawn in each epoch.
        # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch.

        # random.shuffle(task_ids_list) # --############ TEMP

        # ----------- meta-batches loop (batches of tasks) -----------------------------------#
        # each meta-batch includes several tasks
        # we take a grad step with theta after each meta-batch
        meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size))
        n_meta_batches = len(meta_batch_starts)

        for i_meta_batch in range(n_meta_batches):

            meta_batch_start = meta_batch_starts[i_meta_batch]
            task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)]
            # meta-batch size may be less than  prm.meta_batch_size at the last one
            # note: it is OK if some tasks appear several times in the meta-batch

            mb_data_loaders = [data_loaders[task_id] for task_id in task_ids_in_meta_batch]
            mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch]
            mb_posteriors_models = [posteriors_models[task_id] for task_id in task_ids_in_meta_batch]

            # prior_weight_steps = 10000
            # # prior_weight = 1 - math.exp(-i_step/prior_weight_steps)
            # prior_weight = min(i_step / prior_weight_steps, 1.0)
            i_step += 1

            # Get objective based on tasks in meta-batch:
            total_objective, info = get_objective(prior_model, prm, mb_data_loaders,
                                                  mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks)

            # Take gradient step with the shared prior and all tasks' posteriors:
            grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            log_interval = 200
            if i_meta_batch % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                print(cmn.status_string(i_epoch,  prm.n_meta_train_epochs, i_meta_batch,
                                        n_meta_batches, batch_acc, total_objective.item()) +
                      ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t Meta-Comp.: {:.4}\t'.
                      format(info['avg_empirical_loss'], info['avg_intra_task_comp'], info['meta_comp']))
        # end  meta-batches loop
        return i_step
    # end run_epoch()

    # -------------------------------------------------------------------------------------------
    #  Test evaluation function -
    # Evaluate the mean loss on samples from the test sets of the training tasks
    # --------------------------------------------------------------------------------------------
    def run_test():
        test_acc_avg = 0.0
        n_tests = 0
        for i_task in range(n_train_tasks):
            model = posteriors_models[i_task]
            test_loader = data_loaders[i_task]['test']
            if len(test_loader) > 0:
                test_acc, test_loss = run_eval_Bayes(model, test_loader, prm)
                n_tests += 1
                test_acc_avg += test_acc

                n_test_samples = len(test_loader.dataset)

                write_to_log('Train Task {}, Test set: {} -  Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n'.format(
                    i_task, prm.test_type, test_loss, test_acc, n_test_samples), prm)
            else:
                print('Train Task {}, Test set: {} - No test data'.format(i_task, prm.test_type))

        if n_tests > 0:
            test_acc_avg /= n_tests
        return test_acc_avg

    # -----------------------------------------------------------------------------------------------------------#
    # Main script
    # -----------------------------------------------------------------------------------------------------------#

    # Update Log file

    write_to_log(cmn.get_model_string(prior_model), prm)
    write_to_log('---- Meta-Training set: {0} tasks'.format(len(data_loaders)), prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()
    i_step = 0
    # Training loop:
    for i_epoch in range(prm.n_meta_train_epochs):
        i_step = run_train_epoch(i_epoch, i_step)

    stop_time = timeit.default_timer()

    # Test:
    test_acc_avg = run_test()

    # Update Log file:
    cmn.write_final_result(test_acc_avg, stop_time - start_time, prm, result_name=prm.test_type)

    # Return learned prior:
    return prior_model
Beispiel #24
0
# path to save the learned meta-parameters
save_path = os.path.join(prm.result_dir, 'learned_prior.pt')

# -------------------------------------------------------------------------------------------
#  Run Meta-Training
# -------------------------------------------------------------------------------------------

start_time = timeit.default_timer()

if prm.mode == 'MetaTrain':

    n_train_tasks = prm.n_train_tasks
    # In this case we generate a finite set of train (observed) task before meta-training.
    # Generate the data sets of the training tasks:
    write_to_log(
        '-' * 5 + 'Generating {} training-tasks'.format(n_train_tasks) +
        '-' * 5, prm)
    train_data_loaders = task_generator.create_meta_batch(
        prm, n_train_tasks, meta_split='meta_train')

    # Run standard learning for each task and average the parameters:
    avg_param_vec = None
    for i_task in range(n_train_tasks):
        print('Learning train-task {} out of {}'.format(
            i_task + 1, n_train_tasks))
        data_loader = train_data_loaders[i_task]
        test_err, curr_model = learn_single_standard.run_learning(data_loader,
                                                                  prm,
                                                                  verbose=0)
        if i_task == 0:
            avg_param_vec = parameters_to_vector(
Beispiel #25
0
def run_learning(task_data, prior_model, prm, init_from_prior=True, verbose=1):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # Create posterior model for the new task:
    post_model = get_model(prm)

    if init_from_prior:
        post_model.load_state_dict(prior_model.state_dict())

        # prior_model_dict = prior_model.state_dict()
        # post_model_dict = post_model.state_dict()
        #
        # # filter out unnecessary keys:
        # prior_model_dict = {k: v for k, v in prior_model_dict.items() if '_log_var' in k or '_mu' in k}
        # # overwrite entries in the existing state dict:
        # post_model_dict.update(prior_model_dict)
        #
        # # #  load the new state dict
        # post_model.load_state_dict(post_model_dict)

        # add_noise_to_model(post_model, prm.kappa_factor)

    # The data-sets of the new task:
    train_loader = task_data['train']
    test_loader = task_data['test']
    #import pudb
    #pudb.set_trace()
    n_train_samples = len(train_loader.dataset)
    n_batches = len(train_loader)

    #  Get optimizer:
    optimizer = optim_func(post_model.parameters(), **optim_args)

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------

    def run_train_epoch(i_epoch):
        log_interval = 500

        post_model.train()

        train_info = {}
        train_info["task_comp"] = 0.0
        train_info["total_loss"] = 0.0

        cnt = 0
        for batch_idx, batch_data in enumerate(train_loader):
            cnt += 1

            correct_count = 0
            sample_count = 0

            # Monte-Carlo iterations:
            n_MC = prm.n_MC
            task_empirical_loss = 0
            task_complexity = 0
            for i_MC in range(n_MC):
                # get batch:
                inputs, targets = data_gen.get_batch_vars(batch_data, prm)

                # Calculate empirical loss:
                outputs = post_model(inputs)
                curr_empirical_loss = loss_criterion(outputs, targets)

                #hyper_kl = 0 when testing
                curr_empirical_loss, curr_complexity, task_info = get_bayes_task_objective(
                    prm,
                    prior_model,
                    post_model,
                    n_train_samples,
                    curr_empirical_loss,
                    noised_prior=False)

                task_empirical_loss += (1 / n_MC) * curr_empirical_loss
                task_complexity += (1 / n_MC) * curr_complexity

                correct_count += count_correct(outputs, targets)
                sample_count += inputs.size(0)

            # Total objective:

            total_objective = task_empirical_loss + prm.task_complex_w * task_complexity

            train_info["task_comp"] += task_complexity.data[0]
            train_info["total_loss"] += total_objective.data[0]

            # Take gradient step with the posterior:
            grad_step(total_objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            if batch_idx % log_interval == 0:
                batch_acc = correct_count / sample_count
                write_to_log(
                    cmn.status_string(i_epoch, prm.n_meta_test_epochs,
                                      batch_idx, n_batches, batch_acc,
                                      total_objective.data[0]) +
                    ' Empiric Loss: {:.4}\t Intra-Comp. {:.4}, w_kld {:.4}, b_kld {:.4}'
                    .format(task_empirical_loss.data[0],
                            task_complexity.data[0], task_info["w_kld"],
                            task_info["b_kld"]), prm)

        train_info["task_comp"] /= cnt
        train_info["total_loss"] /= cnt
        return train_info

    # -----------------------------------------------------------------------------------------------------------#
    # Update Log file
    if verbose == 1:
        write_to_log(
            'Total number of steps: {}'.format(n_batches *
                                               prm.n_meta_test_epochs), prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    best_acc = -1
    best_acc_loss = -1
    best_acc_comp = -1

    for i_epoch in range(prm.n_meta_test_epochs):
        train_info = run_train_epoch(i_epoch)
        test_acc, test_loss = run_test_Bayes(post_model, test_loader,
                                             loss_criterion, prm)
        if test_acc > best_acc:
            best_acc = test_acc
            best_acc_loss = test_loss
            best_acc_comp = train_info["task_comp"]

    # Test:
    test_acc, test_loss = run_test_Bayes(post_model, test_loader,
                                         loss_criterion, prm)

    stop_time = timeit.default_timer()
    cmn.write_final_result(best_acc,
                           stop_time - start_time,
                           prm,
                           result_name=prm.test_type,
                           verbose=verbose)

    test_err = 1 - best_acc
    return test_err, best_acc_comp, best_acc_loss, post_model
Beispiel #26
0
        for i_rep in range(n_reps):
            # Generate task data set:
            data_loader = task_generator.get_data_loader(
                prm, limit_train_samples=n_train_samples)

            # Learn a posterior which minimizes some bound with the training loss function
            post_model, test_err, test_loss, log_mat = learn_single_Bayes.run_learning(
                data_loader, prm, prior_model, init_from_prior=True, verbose=0)

            # evaluation
            _, train_loss = run_eval_Bayes(post_model, data_loader['train'],
                                           prm_eval)
            _, test_loss = run_eval_Bayes(post_model, data_loader['test'],
                                          prm_eval)
            write_to_log(
                'n_samples: {}, rep: {}.  Train-loss :{:.4}, Test-loss:  {:.4}'
                .format(n_train_samples, i_rep, train_loss, test_loss), prm)

            for i_val_type, val_type in enumerate(val_types):
                if val_type[0] == 'train_loss':
                    val = train_loss
                elif val_type[0] == 'test_loss':
                    val = test_loss
                elif val_type[0] == 'Bound':
                    prm_eval.complexity_type = val_type[1]
                    prm_eval.divergence_type = val_type[2]
                    val = learn_single_Bayes.eval_bound(
                        post_model, prior_model, data_loader, prm_eval,
                        train_loss)
                    write_to_log(str(val_type) + ' = ' + str(val), prm)
                elif val_type[0] == 'Divergence':