def LF_experiment(angle, reps=1, ntrees=10, acorn=None): errors = np.zeros(2) for rep in range(reps): print("Starting Rep {} of Angle {}".format(rep, angle)) X_base_train, y_base_train = generate_gaussian_parity(n = 100, angle_params = 0) X_base_test, y_base_test = generate_gaussian_parity(n = 10000, angle_params = 0) X_rotated_train, y_rotated_train = generate_gaussian_parity(n = 100, angle_params = angle) lifelong_forest = LifeLongDNN(model = "uf", parallel = True) lifelong_forest.new_forest(X_base_train, y_base_train, n_estimators=ntrees) lifelong_forest.new_forest(X_rotated_train, y_rotated_train, n_estimators=ntrees) all_predictions_test=lifelong_forest.predict(X_base_test, representation='all', decider=0) base_predictions_test=lifelong_forest.predict(X_base_test, representation=0, decider=0) errors[1] = errors[1]+(1 - np.mean(all_predictions_test == y_base_test)) errors[0] = errors[0]+(1 - np.mean(base_predictions_test == y_base_test)) errors = errors/reps print("Errors For Angle {}: {}".format(angle, errors)) with open('results/angle_'+str(angle)+'.pickle', 'wb') as f: pickle.dump(errors, f, protocol = 2)
def exp(n_sample, n_test, angle_params, n_trees, reps, acorn=None): if acorn != None: np.random.seed(acorn) error = np.zeros(reps, dtype=float) for i in range(reps): train, label = generate_gaussian_parity(n_sample, cov_scale=0.1, angle_params=angle_params) test, test_label = generate_gaussian_parity(n_test, cov_scale=0.1, angle_params=angle_params) l2f = LifeLongDNN() l2f.new_forest(train, label, n_estimators=n_trees, max_samples=ceil(log2(n_sample))) uf_task = l2f.predict(test, representation=0, decider=0) error[i] = 1 - np.sum(uf_task == test_label) / n_test return np.mean(error, axis=0), np.std(error, ddof=1, axis=0)
def LF_experiment(data_x, data_y, angle, model, granularity, reps=1, ntrees=29, acorn=None): if acorn is not None: np.random.seed(acorn) errors = np.zeros(2) for rep in range(reps): print("Starting Rep {} of Angle {}".format(rep, angle)) train_x1, train_y1, train_x2, train_y2, test_x, test_y = cross_val_data( data_x, data_y, total_cls=10) #change data angle for second task tmp_data = train_x2.copy() _tmp_ = np.zeros((32, 32, 3), dtype=int) total_data = tmp_data.shape[0] for i in range(total_data): tmp_ = image_aug(tmp_data[i], angle) tmp_data[i] = tmp_ if model == "uf": train_x1 = train_x1.reshape( (train_x1.shape[0], train_x1.shape[1] * train_x1.shape[2] * train_x1.shape[3])) tmp_data = tmp_data.reshape( (tmp_data.shape[0], tmp_data.shape[1] * tmp_data.shape[2] * tmp_data.shape[3])) test_x = test_x.reshape( (test_x.shape[0], test_x.shape[1] * test_x.shape[2] * test_x.shape[3])) with tf.device('/gpu:' + str(int(angle // granularity) % 4)): lifelong_forest = LifeLongDNN( model=model, parallel=True if model == "uf" else False) lifelong_forest.new_forest(train_x1, train_y1, n_estimators=ntrees) lifelong_forest.new_forest(tmp_data, train_y2, n_estimators=ntrees) llf_task1 = lifelong_forest.predict(test_x, representation='all', decider=0) llf_single_task = lifelong_forest.predict(test_x, representation=0, decider=0) errors[1] = errors[1] + (1 - np.mean(llf_task1 == test_y)) errors[0] = errors[0] + (1 - np.mean(llf_single_task == test_y)) errors = errors / reps print("Errors For Angle {}: {}".format(angle, errors)) with open('rotation_results/angle_' + str(angle) + '_' + model + '.pickle', 'wb') as f: pickle.dump(errors, f, protocol=2)
def LF_experiment(data_x, data_y, ntrees, shift, slot, model, num_points_per_task, acorn=None): df = pd.DataFrame() shifts = [] slots = [] accuracies_across_tasks = [] train_x_task0, train_y_task0, test_x_task0, test_y_task0 = cross_val_data(data_x, data_y, num_points_per_task, total_task=10, shift=shift, slot=slot) lifelong_forest = LifeLongDNN(model = model, parallel = True if model == "uf" else False) lifelong_forest.new_forest( train_x_task0, train_y_task0, max_depth=ceil(log2(num_points_per_task)), n_estimators=ntrees ) task_0_predictions=lifelong_forest.predict( test_x_task0, representation='all', decider=0 ) shifts.append(shift) slots.append(slot) accuracies_across_tasks.append(np.mean( task_0_predictions == test_y_task0 )) print(accuracies_across_tasks) for task_ii in range(29): train_x, train_y, _, _ = cross_val_data(data_x, data_y, num_points_per_task, total_task=10, shift=shift, slot=slot, task = task_ii) print("Starting Task {} For Fold {} For Slot {}".format(task_ii, shift, slot)) lifelong_forest.new_forest( train_x, train_y, max_depth=ceil(log2(num_points_per_task)), n_estimators=ntrees ) task_0_predictions=lifelong_forest.predict( test_x_task0, representation='all', decider=0 ) shifts.append(shift) slots.append(slot) accuracies_across_tasks.append(np.mean( task_0_predictions == test_y_task0 )) print(accuracies_across_tasks) df['data_fold'] = shifts df['slot'] = slots df['accuracy'] = accuracies_across_tasks file_to_save = 'result/'+model+str(ntrees)+'_'+str(shift)+'_'+str(slot)+'.pickle' with open(file_to_save, 'wb') as f: pickle.dump(df, f)
def LF_experiment(train_x, train_y, test_x, test_y, ntrees, shift, slot, model, num_points_per_task, acorn=None): df = pd.DataFrame() shifts = [] base_tasks = [] accuracies_across_tasks = [] lifelong_forest = LifeLongDNN(model=model, parallel=True if model == "uf" else False) for task_ii in range(10): print("Starting Task {} For Fold {}".format(task_ii, shift)) if acorn is not None: np.random.seed(acorn) tmp = train_y[task_ii * 5000 + slot * num_points_per_task:task_ii * 5000 + (slot + 1) * num_points_per_task] if task_ii != 0: np.random.shuffle(tmp) #tmp=np.random.randint(low = 0, high = 10, size = num_points_per_task) lifelong_forest.new_forest( train_x[task_ii * 5000 + slot * num_points_per_task:task_ii * 5000 + (slot + 1) * num_points_per_task, :], tmp, max_depth=ceil(log2(num_points_per_task)), n_estimators=ntrees) llf_task = lifelong_forest.predict(test_x[0:1000, :], representation='all', decider=0) shifts.append(shift) base_tasks.append(task_ii + 1) accuracies_across_tasks.append(np.mean(llf_task == test_y[0:1000])) df['data_fold'] = shifts df['task'] = base_tasks df['task_1_accuracy'] = accuracies_across_tasks file_to_save = 'result/' + model + str(ntrees) + '_' + str( shift) + '_' + str(slot) + '.pickle' with open(file_to_save, 'wb') as f: pickle.dump(df, f)
def LF_experiment(num_task_1_data, rep): l2f = LifeLongDNN(model="uf", parallel=False) X_train_task0, y_train_task0 = generate_gaussian_parity(n=num_task_1_data, angle_params=0, acorn=1) X_train_task1, y_train_task1 = generate_gaussian_parity(n=100, angle_params=10, acorn=1) X_test_task0, y_test_task0 = generate_gaussian_parity(n=10000, angle_params=0, acorn=2) l2f.new_forest(X_train_task0, y_train_task0, n_estimators=10) llf_task = l2f.predict(X_test_task0, representation=0, decider=0) single_task_accuracy = np.nanmean(llf_task == y_test_task0) single_task_error = 1 - single_task_accuracy l2f.new_forest(X_train_task1, y_train_task1, n_estimators=10) llf_task = l2f.predict(X_test_task0, representation="all", decider=0) double_task_accuracy = np.nanmean(llf_task == y_test_task0) double_task_error = 1 - double_task_accuracy if double_task_error == 0 or single_task_error == 0: te = 1 else: te = (single_task_error + 1e-6) / (double_task_error + 1e-6) df = pd.DataFrame() df['te'] = [te] file_to_save = 'result/' + str(num_task_1_data) + '_' + str( rep) + '.pickle' with open(file_to_save, 'wb') as f: pickle.dump(df, f)
std_accuracy_dict = {'hybrid':[],'building':[],'recruiting':[],'UF':[]} for ns in task_10_sample: estimation_sample_no = ceil(estimation_set*ns) validation_sample_no = ns - estimation_sample_no for rep in range(reps): print("doing {} samples for {} th rep".format(ns,rep)) ## estimation l2f = LifeLongDNN(model = "uf", parallel = True) for task in range(9): indx = np.random.choice(num_points_per_task, num_points_per_forest, replace=False) l2f.new_forest( train_x_across_task[task][indx], train_y_across_task[task][indx], max_depth=ceil(log2(num_points_per_forest)), n_estimators=ntrees ) task_10_train_indx = np.random.choice(num_points_per_task, ns, replace=False) l2f.new_forest( train_x_across_task[9][task_10_train_indx[:estimation_sample_no]], train_y_across_task[9][task_10_train_indx[:estimation_sample_no]], max_depth=ceil(log2(estimation_sample_no)), n_estimators=ntrees ) ## L2F validation posteriors_across_trees = estimate_posteriors(
def LF_experiment(train_x, train_y, test_x, test_y, ntrees, shift, slot, num_points_per_task, acorn=None): uf_accuracies = np.zeros(10,dtype=float) rf_accuracies = np.zeros(10,dtype=float) single_task_accuracies = np.zeros(10,dtype=float) l2f_accuracies = np.zeros(10,dtype=float) for task_ii in range(10): single_task_learner = LifeLongDNN(model = "uf", parallel = True) if acorn is not None: np.random.seed(acorn) single_task_learner.new_forest( train_x[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task,:], train_y[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task], max_depth=ceil(log2(num_points_per_task)), n_estimators=ntrees ) llf_task=single_task_learner.predict( test_x[task_ii*1000:(task_ii+1)*1000,:], representation=0, decider=0 ) single_task_accuracies[task_ii] = np.mean( llf_task == test_y[task_ii*1000:(task_ii+1)*1000] ) lifelong_forest = LifeLongDNN(model = "uf", parallel = True) for task_ii in range(10): print("Starting L2F Task {} For Fold {}".format(task_ii, shift)) if acorn is not None: np.random.seed(acorn) lifelong_forest.new_forest( train_x[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task,:], train_y[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task], max_depth=ceil(log2(num_points_per_task)), n_estimators=ntrees ) llf_task=lifelong_forest.predict( test_x[task_ii*1000:(task_ii+1)*1000,:], representation='all', decider=task_ii ) l2f_accuracies[task_ii] = np.mean( llf_task == test_y[task_ii*1000:(task_ii+1)*1000] ) for task_ii in range(10): print("Starting UF Task {} For Fold {}".format(task_ii, shift)) lifelong_forest = LifeLongDNN(model = "uf", parallel = True) if acorn is not None: np.random.seed(acorn) if task_ii== 0: train_data_x = train_x[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task,:] train_data_y = train_y[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task] else: train_data_x = np.concatenate( ( train_data_x, train_x[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task,:] ), axis = 0 ) train_data_y = np.concatenate( ( train_data_y, train_y[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task] ), axis = 0 ) lifelong_forest.new_forest( train_data_x, train_data_y, max_depth=ceil(log2(num_points_per_task*(task_ii+1))), n_estimators=(task_ii+1)*ntrees ) llf_task=lifelong_forest.predict( test_x[task_ii*1000:(task_ii+1)*1000,:], representation=0, decider=0 ) uf_accuracies[task_ii] = np.mean( llf_task == test_y[task_ii*1000:(task_ii+1)*1000] ) for task_ii in range(10): print("Starting RF Task {} For Fold {}".format(task_ii, shift)) RF = BaggingClassifier( DecisionTreeClassifier( max_depth=ceil(log2(num_points_per_task*(task_ii+1))), min_samples_leaf=1, max_features="auto" ), n_estimators=(task_ii+1)*ntrees, max_samples=0.63, n_jobs = -1 ) if acorn is not None: np.random.seed(acorn) if task_ii== 0: train_data_x = train_x[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task,:] train_data_y = train_y[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task] else: train_data_x = np.concatenate( ( train_data_x, train_x[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task,:] ), axis = 0 ) train_data_y = np.concatenate( ( train_data_y, train_y[task_ii*5000+slot*num_points_per_task:task_ii*5000+(slot+1)*num_points_per_task] ), axis = 0 ) RF.fit( train_data_x, train_data_y ) llf_task=RF.predict( test_x[task_ii*1000:(task_ii+1)*1000,:] ) rf_accuracies[task_ii] = np.mean( llf_task == test_y[task_ii*1000:(task_ii+1)*1000] ) print(rf_accuracies[task_ii]) return single_task_accuracies,uf_accuracies,rf_accuracies,l2f_accuracies
def experiment(n_xor, n_rxor, n_test, reps, n_trees, max_depth, acorn=None): #print(1) if n_xor == 0 and n_rxor == 0: raise ValueError('Wake up and provide samples to train!!!') if acorn != None: np.random.seed(acorn) errors = np.zeros((reps, 4), dtype=float) for i in range(reps): l2f = LifeLongDNN() uf = LifeLongDNN() #source data xor, label_xor = generate_gaussian_parity(n_xor, cov_scale=0.1, angle_params=0) test_xor, test_label_xor = generate_gaussian_parity(n_test, cov_scale=0.1, angle_params=0) #target data nxor, label_nxor = generate_gaussian_parity(n_rxor, cov_scale=0.1, angle_params=np.pi / 4) test_nxor, test_label_nxor = generate_gaussian_parity( n_test, cov_scale=0.1, angle_params=np.pi / 4) if n_xor == 0: l2f.new_forest(nxor, label_nxor, n_estimators=n_trees, max_depth=max_depth) errors[i, 0] = 0.5 errors[i, 1] = 0.5 uf_task2 = l2f.predict(test_nxor, representation=0, decider=0) l2f_task2 = l2f.predict(test_nxor, representation='all', decider=0) errors[i, 2] = 1 - np.sum(uf_task2 == test_label_nxor) / n_test errors[i, 3] = 1 - np.sum(l2f_task2 == test_label_nxor) / n_test elif n_rxor == 0: l2f.new_forest(xor, label_xor, n_estimators=n_trees, max_depth=max_depth) uf_task1 = l2f.predict(test_xor, representation=0, decider=0) l2f_task1 = l2f.predict(test_xor, representation='all', decider=0) errors[i, 0] = 1 - np.sum(uf_task1 == test_label_xor) / n_test errors[i, 1] = 1 - np.sum(l2f_task1 == test_label_xor) / n_test errors[i, 2] = 0.5 errors[i, 3] = 0.5 else: l2f.new_forest(xor, label_xor, n_estimators=n_trees, max_depth=max_depth) l2f.new_forest(nxor, label_nxor, n_estimators=n_trees, max_depth=max_depth) uf.new_forest(xor, label_xor, n_estimators=2 * n_trees, max_depth=max_depth) uf.new_forest(nxor, label_nxor, n_estimators=2 * n_trees, max_depth=max_depth) uf_task1 = uf.predict(test_xor, representation=0, decider=0) l2f_task1 = l2f.predict(test_xor, representation='all', decider=0) uf_task2 = uf.predict(test_nxor, representation=1, decider=1) l2f_task2 = l2f.predict(test_nxor, representation='all', decider=1) errors[i, 0] = 1 - np.sum(uf_task1 == test_label_xor) / n_test errors[i, 1] = 1 - np.sum(l2f_task1 == test_label_xor) / n_test errors[i, 2] = 1 - np.sum(uf_task2 == test_label_nxor) / n_test errors[i, 3] = 1 - np.sum(l2f_task2 == test_label_nxor) / n_test return np.mean(errors, axis=0)
def experiment(n_spiral3, n_spiral5, n_test, reps, n_trees, max_depth, acorn=None): #print(1) if n_spiral3 == 0 and n_rxor == 0: raise ValueError('Wake up and provide samples to train!!!') if acorn != None: np.random.seed(acorn) errors = np.zeros((reps, 4), dtype=float) for i in range(reps): l2f = LifeLongDNN() uf = LifeLongDNN() #source data spiral3, label_spiral3 = generate_spirals(n_spiral3, 2, 3, noise=2.5) test_spiral3, test_label_spiral3 = generate_spirals(n_test, 2, 3, noise=2.5) #target data spiral5, label_spiral5 = generate_spirals(n_spiral5, 2, 5, noise=2.5) test_spiral5, test_label_spiral5 = generate_spirals(n_test, 2, 5, noise=2.5) if n_spiral3 == 0: l2f.new_forest(spiral5, label_spiral5, n_estimators=n_trees, max_depth=max_depth) errors[i, 0] = 0.5 errors[i, 1] = 0.5 uf_task2 = l2f.predict(test_spiral5, representation=0, decider=0) l2f_task2 = l2f.predict(test_spiral5, representation='all', decider=0) errors[i, 2] = 1 - np.sum(uf_task2 == test_label_spiral5) / n_test errors[i, 3] = 1 - np.sum(l2f_task2 == test_label_spiral5) / n_test elif n_spiral5 == 0: l2f.new_forest(spiral3, label_spiral3, n_estimators=n_trees, max_depth=max_depth) uf_task1 = l2f.predict(test_spiral3, representation=0, decider=0) l2f_task1 = l2f.predict(test_spiral3, representation='all', decider=0) errors[i, 0] = 1 - np.sum(uf_task1 == test_label_spiral3) / n_test errors[i, 1] = 1 - np.sum(l2f_task1 == test_label_spiral3) / n_test errors[i, 2] = 0.5 errors[i, 3] = 0.5 else: l2f.new_forest(spiral3, label_spiral3, n_estimators=n_trees, max_depth=max_depth) l2f.new_forest(spiral5, label_spiral5, n_estimators=n_trees, max_depth=max_depth) uf.new_forest(spiral3, label_spiral3, n_estimators=2 * n_trees, max_depth=max_depth) uf.new_forest(spiral5, label_spiral5, n_estimators=2 * n_trees, max_depth=max_depth) uf_task1 = uf.predict(test_spiral3, representation=0, decider=0) l2f_task1 = l2f.predict(test_spiral3, representation='all', decider=0) uf_task2 = uf.predict(test_spiral5, representation=1, decider=1) l2f_task2 = l2f.predict(test_spiral5, representation='all', decider=1) errors[i, 0] = 1 - np.sum(uf_task1 == test_label_spiral3) / n_test errors[i, 1] = 1 - np.sum(l2f_task1 == test_label_spiral3) / n_test errors[i, 2] = 1 - np.sum(uf_task2 == test_label_spiral5) / n_test errors[i, 3] = 1 - np.sum(l2f_task2 == test_label_spiral5) / n_test return np.mean(errors, axis=0)
def LF_experiment(train_x, train_y, test_x, test_y, ntrees, shift, slot, model, num_points_per_task, acorn=None): df = pd.DataFrame() single_task_accuracies = np.zeros(10, dtype=float) shifts = [] tasks = [] base_tasks = [] accuracies_across_tasks = [] for task_ii in range(10): if model == "uf": single_task_learner = LifeLongDNN(model="uf", parallel=True) if acorn is not None: np.random.seed(acorn) single_task_learner.new_forest( train_x[task_ii * 5000 + slot * num_points_per_task:task_ii * 5000 + (slot + 1) * num_points_per_task, :], train_y[task_ii * 5000 + slot * num_points_per_task:task_ii * 5000 + (slot + 1) * num_points_per_task], max_depth=ceil(log2(num_points_per_task)), n_estimators=ntrees * (task_ii + 1)) llf_task = single_task_learner.predict( test_x[task_ii * 1000:(task_ii + 1) * 1000, :], representation=0, decider=0) single_task_accuracies[task_ii] = np.mean( llf_task == test_y[task_ii * 1000:(task_ii + 1) * 1000]) lifelong_forest = LifeLongDNN(model=model, parallel=True if model == "uf" else False) for task_ii in range(10): print("Starting Task {} For Fold {}".format(task_ii, shift)) if acorn is not None: np.random.seed(acorn) lifelong_forest.new_forest( train_x[task_ii * 5000 + slot * num_points_per_task:task_ii * 5000 + (slot + 1) * num_points_per_task, :], train_y[task_ii * 5000 + slot * num_points_per_task:task_ii * 5000 + (slot + 1) * num_points_per_task], max_depth=ceil(log2(num_points_per_task)), n_estimators=ntrees) if model == "dnn": llf_task = lifelong_forest.predict( test_x[task_ii * 1000:(task_ii + 1) * 1000, :], representation=task_ii, decider=task_ii) single_task_accuracies[task_ii] = np.mean( llf_task == test_y[task_ii * 1000:(task_ii + 1) * 1000]) for task_jj in range(task_ii + 1): llf_task = lifelong_forest.predict( test_x[task_jj * 1000:(task_jj + 1) * 1000, :], representation='all', decider=task_jj) shifts.append(shift) tasks.append(task_jj + 1) base_tasks.append(task_ii + 1) accuracies_across_tasks.append( np.mean(llf_task == test_y[task_jj * 1000:(task_jj + 1) * 1000])) df['data_fold'] = shifts df['task'] = tasks df['base_task'] = base_tasks df['accuracy'] = accuracies_across_tasks df_single_task = pd.DataFrame() df_single_task['task'] = range(1, 11) df_single_task['data_fold'] = shift df_single_task['accuracy'] = single_task_accuracies summary = (df, df_single_task) file_to_save = 'result/result/' + model + str( num_points_per_task) + '_' + str(ntrees) + '_' + str( shift) + '_' + str(slot) + '.pickle' with open(file_to_save, 'wb') as f: pickle.dump(summary, f)