def run_vcl(hidden_size, no_epochs, data_gen, coreset_method, coreset_size=0, batch_size=None, single_head=True,sd = 0): print("seed ",sd) in_dim, out_dim = data_gen.get_dims() x_coresets, y_coresets = [], [] x_testsets, y_testsets = [], [] all_acc = np.array([]) print("max iter ", data_gen.max_iter) for task_id in range(data_gen.max_iter): x_train, y_train, x_test, y_test = data_gen.next_task() x_testsets.append(x_test) y_testsets.append(y_test) # Set the readout head to train head = 0 if single_head else task_id bsize = x_train.shape[0] if (batch_size is None) else batch_size # Train network with maximum likelihood to initialize first model if task_id == 0: ml_model = Vanilla_NN(in_dim, hidden_size, out_dim, x_train.shape[0]) ml_model.train(x_train, y_train, task_id, no_epochs, bsize) mf_weights = ml_model.get_weights() mf_variances = None ml_model.close_session() # Select coreset if needed if coreset_size > 0: x_coresets, y_coresets, x_train, y_train = coreset_method(x_coresets, y_coresets, x_train, y_train, coreset_size) # Train on non-coreset data s_time = time.time() mf_model = MFVI_NN(in_dim, hidden_size, out_dim, x_train.shape[0], prev_means=mf_weights, prev_log_variances=mf_variances) mf_model.train(x_train, y_train, head, no_epochs, bsize) e_time = time.time() print("time train ",e_time - s_time) mf_weights, mf_variances = mf_model.get_weights() # Incorporate coreset data and make prediction acc = utils.get_scores(mf_model, x_testsets, y_testsets, x_coresets, y_coresets, hidden_size, no_epochs, single_head, batch_size) all_acc = utils.concatenate_results(acc, all_acc) print(all_acc) write_data_to_file(all_acc , "result_vcl_split_seed"+str(sd)+".csv") mf_model.close_session() return all_acc
def run_vcl(hidden_size, no_epochs, data_gen, coreset_method, coreset_size=0, batch_size=None, single_head=True): in_dim, out_dim = data_gen.get_dims() x_coresets, y_coresets = [], [] x_testsets, y_testsets = [], [] all_acc = np.array([]) for task_id in range(data_gen.max_iter): x_train, y_train, x_test, y_test = data_gen.next_task() x_testsets.append(x_test) y_testsets.append(y_test) # Set the readout head to train head = 0 if single_head else task_id bsize = x_train.shape[0] if (batch_size is None) else batch_size # Train network with maximum likelihood to initialize first model if task_id == 0: ml_model = Vanilla_NN(in_dim, hidden_size, out_dim, x_train.shape[0]) ml_model.train(x_train, y_train, task_id, no_epochs, bsize) mf_weights = ml_model.get_weights() mf_variances = None ml_model.close_session() # Train on non-coreset data mf_model = MFVI_NN(in_dim, hidden_size, out_dim, x_train.shape[0], prev_means=mf_weights, prev_log_variances=mf_variances) mf_model.train(x_train, y_train, head, no_epochs, bsize) mf_weights, mf_variances = mf_model.get_weights() # Select coreset if needed if coreset_size > 0: if type(coreset_method) is str and coreset_method == "uncertainty_based": x_coresets, y_coresets, x_train, y_train = uncertainty_based( mf_model, task_id, x_coresets, y_coresets, x_train, y_train, coreset_size) else: x_coresets, y_coresets, x_train, y_train = coreset_method(x_coresets, y_coresets, x_train, y_train, coreset_size) # Incorporate coreset data and make prediction acc = utils.get_scores(mf_model, x_testsets, y_testsets, x_coresets, y_coresets, hidden_size, no_epochs, single_head, batch_size) all_acc = utils.concatenate_results(acc, all_acc) mf_model.close_session() return all_acc
def run_vcl(hidden_size, no_epochs, data_gen, coreset_method, coreset_size=0, batch_size=None, single_head=True, train_info = None): in_dim, out_dim = data_gen.get_dims() x_coresets, y_coresets = [], [] x_testsets, y_testsets = [], [] all_acc = np.array([]) all_acc_for_save = np.zeros((data_gen.max_iter, data_gen.max_iter), dtype=np.float32) for task_id in range(data_gen.max_iter): x_train, y_train, x_test, y_test = data_gen.next_task() x_testsets.append(x_test) y_testsets.append(y_test) # Set the readout head to train head = 0 if single_head else task_id bsize = x_train.shape[0] if (batch_size is None) else batch_size # Train network with maximum likelihood to initialize first model if task_id == 0: print('Vanilla NN train for task 0!') ml_model = Vanilla_NN(in_dim, hidden_size, out_dim, x_train.shape[0]) ml_model.train(x_train, y_train, task_id, no_epochs, bsize) mf_weights = ml_model.get_weights() mf_variances = None ml_model.close_session() # Select coreset if needed if coreset_size > 0: x_coresets, y_coresets, x_train, y_train = coreset_method(x_coresets, y_coresets, x_train, y_train, coreset_size) print('Current task : {}'.format(task_id)) # Train on non-coreset data mf_model = MFVI_NN(in_dim, hidden_size, out_dim, x_train.shape[0], prev_means=mf_weights, prev_log_variances=mf_variances) mf_model.train(x_train, y_train, head, no_epochs, bsize) mf_weights, mf_variances = mf_model.get_weights() # Incorporate coreset data and make prediction acc = utils.get_scores(mf_model, x_testsets, y_testsets, x_coresets, y_coresets, hidden_size, no_epochs, single_head, batch_size) all_acc = utils.concatenate_results(acc, all_acc) for u in range(task_id + 1): print('>>> Test on task {:2d} : acc={:5.1f}% <<<'.format(u, 100 * acc[u])) all_acc_for_save[task_id, u] = acc[u] # Save log_name = '{}_{}_{}_{}epochs_batch{}_{}_{}coreset_{}'.format(train_info['date'], train_info['experiment'], train_info['tasknum'], no_epochs, train_info['batch'], train_info['coreset_method'], coreset_size, train_info['trial']) if single_head: log_name += '_single' save_path = './results/' + log_name + '.txt' print('Save at ' + save_path) np.savetxt(save_path, all_acc_for_save, '%.4f') mf_model.close_session() return all_acc
def run_vcl_shared(hidden_size, no_epochs, data_gen, coreset_method, coreset_size=0, batch_size=None, path='sandbox/', multi_head=False, learning_rate=0.005, store_weights=False): in_dim, out_dim = data_gen.get_dims() x_coresets, y_coresets = [], [] x_testsets, y_testsets = [], [] x_trainsets, y_trainsets = [], [] all_acc = np.array([]) no_tasks = data_gen.max_iter # Store train and test sets (over all tasks) for i in range(no_tasks): x_train, y_train, x_test, y_test = data_gen.next_task() x_trainsets.append(x_train) y_trainsets.append(y_train) x_testsets.append(x_test) y_testsets.append(y_test) all_classes = range(data_gen.out_dim) training_loss_classes = [] # Training loss function depends on these classes training_classes = [] # Which classes' heads' weights change during training test_classes = [] # Which classes to compare between at test time for task_id in range(no_tasks): # The data input classes for this task data_classes = data_gen.classes[task_id] if multi_head: training_loss_classes.append(data_classes) training_classes.append(data_classes) test_classes.append(data_classes) else: # Single-head training_loss_classes.append(all_classes) training_classes.append(all_classes) test_classes.append(all_classes) # Create model no_heads = out_dim lower_size = [in_dim] + deepcopy(hidden_size) upper_sizes = [[hidden_size[-1], 1] for i in range(no_heads)] model = MFVI_NN(lower_size, upper_sizes, training_loss_classes=training_loss_classes, data_classes=data_gen.classes, use_float64=multi_head) no_lower_weights = model.lower_net.no_weights no_upper_weights = [net.no_weights for net in model.upper_nets] # Set up model weights at initial prior weights_storage = WeightsStorage(no_lower_weights, no_upper_weights, prior_mean=0.0, prior_var=1.0) for task_id in range(no_tasks): # tf init model model.init_session(task_id, learning_rate, training_classes[task_id]) # Get data x_train, y_train = x_trainsets[task_id], y_trainsets[task_id] # Set batch size bsize = x_train.shape[0] if (batch_size is None) else batch_size # Select coreset if needed if coreset_size > 0: x_coresets, y_coresets, x_train, y_train = coreset_method( x_coresets, y_coresets, x_train, y_train, coreset_size) # Prior of weights is previous posterior (or, if first task, already in weights_storage) lower_weights_prior, upper_weights_prior = weights_storage.return_weights() # Initialise using random means + small variances lower_weights = initialise_weights(lower_weights_prior) upper_weights = deepcopy(upper_weights_prior) for class_id in training_classes[task_id]: upper_weights[class_id] = deepcopy(initialise_weights(upper_weights_prior[class_id])) # Assign initial weights to the model model.assign_weights(range(no_heads), lower_weights, upper_weights) # Train on non-coreset data model.reset_optimiser() start_time = time.time() _, _ = model.train(x_train, y_train, task_id, lower_weights_prior, upper_weights_prior, no_epochs, bsize) end_time = time.time() print 'Time taken to train (s):', end_time - start_time # Get weights from model, and store in weights_storage lower_weights, upper_weights = model.get_weights(range(no_heads)) weights_storage.store_weights(lower_weights, upper_weights) # Save model weights after training on non-coreset data if store_weights: np.savez(path + 'weights_%d.npz' % task_id, lower=lower_weights, upper=upper_weights, classes=data_gen.classes, MNISTdigits=data_gen.sets, class_index_conversion=data_gen.class_list) model.close_session() # Train on coreset data, then calculate test accuracy if multi_head: acc = np.zeros(no_tasks) for test_task_id in range(task_id+1): # Initialise session, and load weights into model model.init_session(test_task_id, learning_rate, training_classes[test_task_id]) lower_weights, upper_weights = weights_storage.return_weights() model.assign_weights(range(no_heads), lower_weights, upper_weights) if len(x_coresets) > 0: print 'Training on coreset data...' # Train on each task's coreset data just before testing on that task x_train_coreset, y_train_coreset = x_coresets[test_task_id], y_coresets[test_task_id] bsize = x_train_coreset.shape[0] if (batch_size is None) else batch_size model.reset_optimiser() _, _ = model.train(x_train_coreset, y_train_coreset, test_task_id, lower_weights, upper_weights, no_epochs, bsize) # Test-time: Calculate test accuracy acc_interm = utils.get_scores_output_pred(model, x_testsets, y_testsets, test_classes, task_idx=[test_task_id], multi_head=multi_head) acc[test_task_id] = acc_interm[0] model.close_session() else: acc = np.zeros(no_tasks) # Initialise session, and load weights into model model.init_session(task_id, learning_rate, training_classes[task_id]) lower_weights, upper_weights = weights_storage.return_weights() model.assign_weights(range(no_heads), lower_weights, upper_weights) if len(x_coresets) > 0: print 'Training on coreset data...' x_train_coreset, y_train_coreset = utils.merge_coresets(x_coresets, y_coresets) bsize = x_train_coreset.shape[0] if (batch_size is None) else batch_size _, _ = model.train(x_train_coreset, y_train_coreset, task_id, lower_weights, upper_weights, no_epochs, bsize) # Test-time: Calculate test accuracy acc_interm = utils.get_scores_output_pred(model, x_testsets, y_testsets, test_classes, task_idx=range(task_id+1), multi_head=multi_head) acc[:task_id+1] = acc_interm model.close_session() # Append accuracies to all_acc array if task_id == 0: all_acc = np.array(acc) else: all_acc = np.vstack([all_acc, acc]) print all_acc return all_acc
def get_scores(model, x_testsets, y_testsets, x_coresets, y_coresets, hidden_size, no_epochs, single_head, batch_size=None): mf_weights, mf_variances = model.get_weights() acc = [] if single_head: if len(x_coresets) > 0: x_train, y_train = merge_coresets(x_coresets, y_coresets) bsize = x_train.shape[0] if (batch_size is None) else batch_size final_model = MFVI_NN(x_train.shape[1], hidden_size, y_train.shape[1], x_train.shape[0], prev_means=mf_weights, prev_log_variances=mf_variances) final_model.train(x_train, y_train, 0, no_epochs, bsize) else: final_model = model for i in range(len(x_testsets)): if not single_head: if len(x_coresets) > 0: x_train, y_train = x_coresets[i], y_coresets[i] bsize = x_train.shape[0] if (batch_size is None) else batch_size final_model = MFVI_NN(x_train.shape[1], hidden_size, y_train.shape[1], x_train.shape[0], prev_means=mf_weights, prev_log_variances=mf_variances) final_model.train(x_train, y_train, i, no_epochs, bsize) else: final_model = model head = 0 if single_head else i x_test, y_test = x_testsets[i], y_testsets[i] pred = final_model.prediction_prob(x_test, head) pred_mean = np.mean(pred, axis=0) pred_y = np.argmax(pred_mean, axis=1) y = np.argmax(y_test, axis=1) cur_acc = len(np.where((pred_y - y) == 0)[0]) * 1.0 / y.shape[0] acc.append(cur_acc) if len(x_coresets) > 0 and not single_head: final_model.close_session() if len(x_coresets) > 0 and single_head: final_model.close_session() return acc