Example #1
0
    def run_train_epoch(i_epoch):

        # # Adjust randomness (eps_std)
        # if hasattr(prm, 'use_randomness_schedeule') and prm.use_randomness_schedeule:
        #     if i_epoch > prm.randomness_full_epoch:
        #         eps_std = 1.0
        #     elif i_epoch > prm.randomness_init_epoch:
        #         eps_std = (i_epoch - prm.randomness_init_epoch) / (prm.randomness_full_epoch - prm.randomness_init_epoch)
        #     else:
        #         eps_std = 0.0  #  turn off randomness
        #     post_model.set_eps_std(eps_std)

        # post_model.set_eps_std(0.00) # debug

        complexity_term = 0

        post_model.train()

        for batch_idx, batch_data in enumerate(train_loader):

            # Monte-Carlo iterations:
            empirical_loss = 0
            n_MC = prm.n_MC
            for i_MC in range(n_MC):
                # get batch:
                inputs, targets = data_gen.get_batch_vars(batch_data, prm)

                # calculate objective:
                outputs = post_model(inputs)
                empirical_loss_c = loss_criterion(outputs, targets)
                empirical_loss += (1 / n_MC) * empirical_loss_c

            #  complexity/prior term:
            if prior_model:
                empirical_loss, complexity_term = get_bayes_task_objective(
                    prm, prior_model, post_model, n_train_samples,
                    empirical_loss)
            else:
                complexity_term = 0.0

                # Total objective:
            objective = empirical_loss + complexity_term

            # Take gradient step:
            grad_step(objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            log_interval = 500
            if batch_idx % log_interval == 0:
                batch_acc = correct_rate(outputs, targets)
                print(
                    cmn.status_string(i_epoch, prm.num_epochs, batch_idx,
                                      n_batches, batch_acc, objective.data[0])
                    + ' Loss: {:.4}\t Comp.: {:.4}'.format(
                        get_value(empirical_loss), get_value(complexity_term)))
def get_params_statistics(param_list):
    n_list = len(param_list)
    mean_list = np.zeros(n_list)
    std_list = np.zeros(n_list)
    for i_param, named_param in enumerate(param_list):
        param_name = named_param[0]
        param_vals = named_param[1]
        param_mean = get_value(param_vals.mean())
        param_std = get_value(param_vals.std())
        mean_list[i_param] = param_mean
        std_list[i_param] = param_std
        print('Parameter name: {}, mean value: {:.3}, STD: {:.3}'.format(
            param_name, param_mean, param_std))
    return mean_list, std_list
def run_test_max_posterior(model, test_loader, loss_criterion, prm):

    n_test_samples = len(test_loader.dataset)

    model.eval()
    test_loss = 0
    n_correct = 0
    for batch_data in test_loader:
        inputs, targets = data_gen.get_batch_vars(batch_data,
                                                  prm,
                                                  is_test=True)
        old_eps_std = model.set_eps_std(0.0)  # test with max-posterior
        outputs = model(inputs)
        model.set_eps_std(old_eps_std)  # return model to normal behaviour
        test_loss += loss_criterion(outputs,
                                    targets)  # sum the mean loss in batch
        n_correct += count_correct(outputs, targets)

    test_loss /= n_test_samples
    test_acc = n_correct / n_test_samples
    info = {
        'test_acc': test_acc,
        'n_correct': n_correct,
        'test_type': 'max_posterior',
        'n_test_samples': n_test_samples,
        'test_loss': get_value(test_loss)
    }
    return info
Example #4
0
def get_objective(prior_model, prm, mb_data_loaders, feval, mb_iterators,
                  mb_posteriors_models, loss_criterion, n_train_tasks,
                  task_ids_in_meta_batch):
    '''  Calculate objective based on tasks in meta-batch '''
    # note: it is OK if some tasks appear several times in the meta-batch

    n_tasks_in_mb = len(mb_data_loaders)

    sum_empirical_loss = 0
    correct_count = 0
    sample_count = 0
    task_loss_list = []
    for i in range(n_train_tasks):
        task_loss_list.append(0)
    # all_task_loss = 0

    # ----------- loop over tasks in meta-batch -----------------------------------#
    for i_task in range(n_tasks_in_mb):

        # n_samples = mb_data_loaders[i_task]['n_train_samples']

        # The posterior model corresponding to the task in the batch:
        post_model = mb_posteriors_models[i_task]
        post_model.train()

        loss, cor_count, sum_count = feval(prm, post_model, loss_criterion,
                                           mb_iterators[i_task],
                                           mb_data_loaders[i_task]['train'])

        correct_count += cor_count
        sample_count += sum_count

        # Intra-task complexity of current task:
        task_loss_list[task_ids_in_meta_batch[i_task]] += loss
        # task_empirical_loss += loss

        sum_empirical_loss += loss

    # end loop over tasks in meta-batch
    avg_empirical_loss = (1 / n_tasks_in_mb) * sum_empirical_loss

    info = {
        'sample_count': get_value(sample_count),
        'correct_count': get_value(correct_count),
        'avg_empirical_loss': get_value(avg_empirical_loss)
    }
    return task_loss_list, info
Example #5
0
def run_test(model, test_loader, loss_criterion, prm):
    model.eval()
    test_loss = 0
    n_correct = 0
    for batch_data in test_loader:
        inputs, targets = data_gen.get_batch_vars(batch_data,
                                                  prm,
                                                  is_test=True)
        outputs = model(inputs)
        test_loss += loss_criterion(outputs,
                                    targets)  # sum the mean loss in batch
        n_correct += count_correct(outputs, targets)

    n_test_samples = len(test_loader.dataset)
    n_test_batches = len(test_loader)
    test_loss = get_value(test_loss) / n_test_batches
    test_acc = n_correct / n_test_samples
    print('\nTest set: Average loss: {:.4}, Accuracy: {:.3} ( {}/{})\n'.format(
        test_loss, test_acc, n_correct, n_test_samples))
    return test_acc
def run_test_majority_vote(model, test_loader, loss_criterion, prm, n_votes=9):
    #
    n_test_samples = len(test_loader.dataset)
    n_test_batches = len(test_loader)
    model.eval()
    test_loss = 0
    n_correct = 0
    for batch_data in test_loader:
        inputs, targets = data_gen.get_batch_vars(batch_data,
                                                  prm,
                                                  is_test=True)

        batch_size = inputs.shape[
            0]  # min(prm.test_batch_size, n_test_samples)
        info = data_gen.get_info(prm)
        n_labels = info['n_classes']
        votes = cmn.zeros_gpu((batch_size, n_labels))
        for i_vote in range(n_votes):

            outputs = model(inputs)
            test_loss += loss_criterion(outputs, targets)
            pred = outputs.data.max(
                1, keepdim=True)[1]  # get the index of the max output
            for i_sample in range(batch_size):
                pred_val = pred[i_sample].cpu().numpy()[0]
                votes[i_sample, pred_val] += 1

        majority_pred = votes.max(
            1, keepdim=True)[1]  # find argmax class for each sample
        n_correct += majority_pred.eq(
            targets.data.view_as(majority_pred)).cpu().sum()
    test_loss /= n_test_samples
    test_acc = n_correct / n_test_samples
    info = {
        'test_acc': test_acc,
        'n_correct': n_correct,
        'test_type': 'majority_vote',
        'n_test_samples': n_test_samples,
        'test_loss': get_value(test_loss)
    }
    return info
Example #7
0
    def run_train_epoch(i_epoch):
        log_interval = 500

        model.train()
        for batch_idx, batch_data in enumerate(train_loader):

            # get batch:
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)

            # Calculate loss:
            outputs = model(inputs)
            loss = loss_criterion(outputs, targets)

            # Take gradient step:
            grad_step(loss, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            if batch_idx % log_interval == 0:
                batch_acc = correct_rate(outputs, targets)
                print(
                    cmn.status_string(i_epoch, prm.num_epochs, batch_idx,
                                      n_batches, batch_acc, get_value(loss)))
def run_test_avg_vote(model, test_loader, loss_criterion, prm, n_votes=5):

    n_test_samples = len(test_loader.dataset)
    n_test_batches = len(test_loader)
    model.eval()
    test_loss = 0
    n_correct = 0
    for batch_data in test_loader:
        inputs, targets = data_gen.get_batch_vars(batch_data,
                                                  prm,
                                                  is_test=True)

        batch_size = min(prm.test_batch_size, n_test_samples)
        info = data_gen.get_info(prm)
        n_labels = info['n_classes']
        votes = cmn.zeros_gpu((batch_size, n_labels))
        for i_vote in range(n_votes):

            outputs = model(inputs)
            test_loss += loss_criterion(outputs, targets)
            votes += outputs.data

        majority_pred = votes.max(1, keepdim=True)[1]
        n_correct += majority_pred.eq(
            targets.data.view_as(majority_pred)).cpu().sum()

    test_loss /= n_test_samples
    test_acc = n_correct / n_test_samples
    info = {
        'test_acc': test_acc,
        'n_correct': n_correct,
        'test_type': 'AvgVote',
        'n_test_samples': n_test_samples,
        'test_loss': get_value(test_loss)
    }
    return info
def get_objective(prior_model, prm, mb_data_loaders, mb_iterators,
                  mb_posteriors_models, loss_criterion, n_train_tasks):
    '''  Calculate objective based on tasks in meta-batch '''
    # note: it is OK if some tasks appear several times in the meta-batch

    n_tasks_in_mb = len(mb_data_loaders)

    sum_empirical_loss = 0
    sum_intra_task_comp = 0
    correct_count = 0
    sample_count = 0
    #set_trace()

    # KLD between hyper-posterior and hyper-prior:
    hyper_kl = (1 / (2 * prm.kappa_prior**2)) * net_norm(
        prior_model, p=2)  #net_norm is L2-regularization

    # Hyper-prior term:
    meta_complex_term = get_meta_complexity_term(hyper_kl, prm, n_train_tasks)
    sum_w_kld = 0.0
    sum_b_kld = 0.0

    # ----------- loop over tasks in meta-batch -----------------------------------#
    for i_task in range(n_tasks_in_mb):

        n_samples = mb_data_loaders[i_task]['n_train_samples']

        # get sample-batch data from current task to calculate the empirical loss estimate:
        batch_data = data_gen.get_next_batch_cyclic(
            mb_iterators[i_task], mb_data_loaders[i_task]['train'])

        # The posterior model corresponding to the task in the batch:
        post_model = mb_posteriors_models[i_task]
        post_model.train()

        # Monte-Carlo iterations:
        n_MC = prm.n_MC
        task_empirical_loss = 0
        task_complexity = 0
        # ----------- Monte-Carlo loop  -----------------------------------#
        for i_MC in range(n_MC):
            # get batch variables:
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)

            # Debug
            # print(targets[0].data[0])  # print first image label
            # import matplotlib.pyplot as plt
            # plt.imshow(inputs[0].cpu().data[0].numpy())  # show first image
            # plt.show()

            # Empirical Loss on current task:
            outputs = post_model(inputs)
            curr_empirical_loss = loss_criterion(outputs, targets)

            correct_count += count_correct(outputs, targets)
            sample_count += inputs.size(0)

            # Intra-task complexity of current task:
            curr_empirical_loss, curr_complexity, task_info = get_bayes_task_objective(
                prm,
                prior_model,
                post_model,
                n_samples,
                curr_empirical_loss,
                hyper_kl,
                n_train_tasks=n_train_tasks)

            sum_w_kld += task_info["w_kld"]
            sum_b_kld += task_info["b_kld"]
            task_empirical_loss += (1 / n_MC) * curr_empirical_loss
            task_complexity += (1 / n_MC) * curr_complexity
        # end Monte-Carlo loop

        sum_empirical_loss += task_empirical_loss
        sum_intra_task_comp += task_complexity

    # end loop over tasks in meta-batch
    avg_empirical_loss = (1 / n_tasks_in_mb) * sum_empirical_loss
    avg_intra_task_comp = (1 / n_tasks_in_mb) * sum_intra_task_comp
    avg_w_kld += (1 / n_tasks_in_mb) * sum_w_kld
    avg_b_kld += (1 / n_tasks_in_mb) * sum_b_kld

    # Approximated total objective:
    total_objective = avg_empirical_loss + prm.task_complex_w * avg_intra_task_comp + prm.meta_complex_w * meta_complex_term

    info = {
        'sample_count': get_value(sample_count),
        'correct_count': get_value(correct_count),
        'avg_empirical_loss': get_value(avg_empirical_loss),
        'avg_intra_task_comp': get_value(avg_intra_task_comp),
        'meta_comp': get_value(meta_complex_term),
        'w_kld': avg_w_kld,
        'b_kld': avg_b_kld
    }
    return total_objective, info
    def run_train_epoch(i_epoch):

        # For each task, prepare an iterator to generate training batches:
        train_iterators = [iter(train_data_loaders[ii]['train']) for ii in range(n_tasks)]

        # The task order to take batches from:
        task_order = []
        task_ids_list = list(range(n_tasks))
        for i_batch in range(n_batches_per_task):
            random.shuffle(task_ids_list)
            task_order += task_ids_list

        # each meta-batch includes several tasks
        # we take a grad step with theta after each meta-batch
        meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size))
        n_meta_batches = len(meta_batch_starts)

        # ----------- meta-batches loop (batches of tasks) -----------------------------------#
        for i_meta_batch in range(n_meta_batches):

            meta_batch_start = meta_batch_starts[i_meta_batch]
            task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)]
            n_tasks_in_batch = len(task_ids_in_meta_batch)  # it may be less than  prm.meta_batch_size at the last one
            # note: it is OK if some task appear several times in the meta-batch

            mb_data_loaders = [train_data_loaders[task_id] for task_id in task_ids_in_meta_batch]
            mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch]

            # Get objective based on tasks in meta-batch:
            total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion)

            # Take gradient step with the meta-parameters (theta) based on validation data:
            grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            log_interval = 200
            if i_meta_batch % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                print(cmn.status_string(i_epoch, num_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)))
Example #11
0
    def run_train_epoch(i_epoch):

        # For each task, prepare an iterator to generate training batches:
        train_iterators = [iter(data_loaders[ii]['train']) for ii in range(n_train_tasks)]

        # The task order to take batches from:
        # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times
        # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch
        task_order = []
        task_ids_list = list(range(n_train_tasks))
        for i_batch in range(n_batches_per_task):
            random.shuffle(task_ids_list)
            task_order += task_ids_list
        # Note: this method ensures each training sample in each task is drawn in each epoch.
        # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch.

        # ----------- meta-batches loop (batches of tasks) -----------------------------------#
        # each meta-batch includes several tasks
        # we take a grad step with theta after each meta-batch
        meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size))
        n_meta_batches = len(meta_batch_starts)

        for i_meta_batch in range(n_meta_batches):


            meta_batch_start = meta_batch_starts[i_meta_batch]
            task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)]
            # meta-batch size may be less than  prm.meta_batch_size at the last one
            # note: it is OK if some tasks appear several times in the meta-batch

            mb_data_loaders = [data_loaders[task_id] for task_id in task_ids_in_meta_batch]
            mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch]
            mb_posteriors_models = [posteriors_models[task_id] for task_id in task_ids_in_meta_batch]

            # Get objective based on tasks in meta-batch:
            total_objective, info = get_objective(prior_model, prm, mb_data_loaders,
                                                  mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks)

            # Take gradient step with the shared prior and all tasks' posteriors:
            grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch)

            # Print training status of current batch:
            log_interval = 200
            if i_meta_batch % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                write_to_log(cmn.status_string(i_epoch,  prm.n_meta_train_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)) +
                        ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t Meta-Comp.: {:.4}, w_kld : {:.4}, b_kld : {:.4}'.
                        format(info['avg_empirical_loss'], info['avg_intra_task_comp'], info['meta_comp'], info['w_kld'], info['b_kld']), prm)
def learn(data_set, complexity_type):

    n_tasks = len(data_set)
    n_dim = data_set[0].shape[1]
    n_samples_list = [task_data.shape[0] for task_data in data_set]

    from Utils import config
    if config.USE_GPU:
        # Define prior:
        w_P_mu = Variable(torch.randn(n_dim).cuda(), requires_grad=True)
        w_P_log_sigma = Variable(torch.randn(n_dim).cuda(), requires_grad=True)

        # Init posteriors:
        w_mu = Variable(torch.randn(n_tasks, n_dim).cuda(), requires_grad=True)
        w_log_sigma = Variable(torch.randn(n_tasks, n_dim).cuda(),
                               requires_grad=True)
    else:
        # Define prior:
        w_P_mu = Variable(torch.randn(n_dim), requires_grad=True)
        w_P_log_sigma = Variable(torch.randn(n_dim), requires_grad=True)

        # Init posteriors:
        w_mu = Variable(torch.randn(n_tasks, n_dim), requires_grad=True)
        w_log_sigma = Variable(torch.randn(n_tasks, n_dim), requires_grad=True)

    learning_rate = 1e-1

    # create your optimizer
    optimizer = optim.Adam([w_mu, w_log_sigma, w_P_mu, w_P_log_sigma],
                           lr=learning_rate)

    n_epochs = 800
    batch_size = 128

    for i_epoch in range(n_epochs):

        # Sample data batch:
        b_task = np.random.randint(0, n_tasks)  # sample a random task index
        batch_size_curr = min(n_samples_list[b_task], batch_size)
        batch_inds = np.random.choice(n_samples_list[b_task],
                                      batch_size_curr,
                                      replace=False)
        task_data = torch.from_numpy(data_set[b_task][batch_inds])

        from Utils import config
        if config.USE_GPU:
            task_data = Variable(task_data.cuda(), requires_grad=False)

            # Re-Parametrization:
            w_sigma = torch.exp(w_log_sigma[b_task])
            epsilon = Variable(torch.randn(n_dim).cuda(), requires_grad=False)
            w = w_mu[b_task] + w_sigma * epsilon
        else:
            task_data = Variable(task_data, requires_grad=False)

            # Re-Parametrization:
            w_sigma = torch.exp(w_log_sigma[b_task])
            epsilon = Variable(torch.randn(n_dim), requires_grad=False)
            w = w_mu[b_task] + w_sigma * epsilon

        # Empirical Loss:
        empirical_loss = (w - task_data).pow(2).mean()

        # Complexity terms:
        sigma_sqr_prior = torch.exp(2 * w_P_log_sigma)
        complex_term_sum = 0
        for i_task in range(n_tasks):

            sigma_sqr_post = torch.exp(2 * w_log_sigma[i_task])

            kl_dist = torch.sum(w_P_log_sigma - w_log_sigma[i_task] + (
                (w_mu[i_task] - w_P_mu).pow(2) + sigma_sqr_post) /
                                (2 * sigma_sqr_prior) - 0.5)

            n_samples = n_samples_list[i_task]

            if complexity_type == 'PAC_Bayes_McAllaster':
                delta = 0.95
                complex_term_sum += torch.sqrt(
                    (1 / (2 * n_samples)) *
                    (kl_dist + np.log(2 * np.sqrt(n_samples) / delta)))

            elif complexity_type == 'Variational_Bayes':
                complex_term_sum += (1 / n_samples) * kl_dist

            elif complexity_type == 'KL':
                complex_term_sum += kl_dist
            else:
                raise ValueError('Invalid complexity_type')

        hyper_prior_factor = 1e-6 * np.sqrt(1 / n_tasks)
        hyper_prior = torch.sum(sigma_sqr_prior +
                                w_P_mu.pow(2)) * hyper_prior_factor

        # Total objective:
        complex_term = (1 / n_tasks) * complex_term_sum
        objective = empirical_loss + complex_term + hyper_prior

        # Gradient step:
        optimizer.zero_grad()  # zero the gradient buffers
        objective.backward()
        optimizer.step()  # Does the update

        if i_epoch % 100 == 0:
            print('Step: {0}, objective: {1}'.format(i_epoch,
                                                     get_value(objective)))

    # Switch  back to numpy:
    w_mu = w_mu.data.cpu().numpy()
    w_log_sigma = w_log_sigma.data.cpu().numpy()
    w_sigma = np.exp(w_log_sigma)
    w_P_mu = w_P_mu.data.cpu().numpy()
    w_P_log_sigma = w_P_log_sigma.data.cpu().numpy()
    w_P_sigma = np.exp(w_P_log_sigma)

    #  Plots:
    fig1 = plt.figure()
    ax = plt.subplot(111, aspect='equal')
    # plot prior:
    plt.plot(w_P_mu[0], w_P_mu[1], 'o', label='prior mean ')
    ell = Ellipse(xy=(w_P_mu[0], w_P_mu[1]),
                  width=w_P_sigma[0],
                  height=w_P_sigma[1],
                  angle=0,
                  color='blue')
    ell.set_facecolor('none')
    ax.add_artist(ell)
    for i_task in range(n_tasks):
        # plot task data points:
        plt.plot(data_set[i_task][:, 0],
                 data_set[i_task][:, 1],
                 '.',
                 label='Task {0} samples'.format(1 + i_task))
        # plot posterior:
        plt.plot(w_mu[i_task][0],
                 w_mu[i_task][1],
                 'o',
                 label='posterior {0} mean'.format(1 + i_task))
        ell = Ellipse(xy=(w_mu[i_task][0], w_mu[i_task][1]),
                      width=w_sigma[i_task][0],
                      height=w_sigma[i_task][1],
                      angle=0,
                      color='black')
        ell.set_facecolor('none')
        ax.add_artist(ell)

    # plt.plot(0, 0, 'x', label='hyper-prior ')

    # plt.legend(loc='upper left')
    plt.legend()
    plt.xlabel('Dimension 1')
    plt.ylabel('Dimension 2')