# print i1, i2, i3 # pool_dat = load_pool() # init_dat = load_init() # test_dat = load_test() # gc.collect() # pool_dat, init_dat, test_dat = reload_dat() ### ELLA + Active Task Selection ### fig_flag[2] = True test_acc_el_att, learned_size_el_att = run_lm_trials(pool_dat, init_dat, test_dat, ELLA, evaluation=config.EVAL_ME, engine=eng, do_active=False, task_ctrl=exp.TASK_ACTIVE, n_trials=5) print test_acc_el_att line_acc_el_att, = plt.plot(learned_size_el_att, test_acc_el_att, label='Active Task') util.curve_to_csv("intermediate/att_ella.csv", test_acc_el_att, learned_size_el_att) handles.append(line_acc_el_att) # pool_dat = load_pool() # init_dat = load_init() # test_dat = load_test() # # gc.collect() ### Active ELLA ### fig_flag[3] = True test_acc_el_al, learned_size_el_al = run_lm_trials(pool_dat, init_dat, test_dat, ELLA, evaluation=config.EVAL_ME, engine=eng, task_ctrl=exp.TASK_NONE, do_active=True, n_trials=5) print test_acc_el_al line_acc_el_al, = plt.plot(learned_size_el_al, test_acc_el_al, label='Active Learning') util.curve_to_csv("res/al_ella.csv", test_acc_el_al, learned_size_el_al)
return test_acc, learned_size pool_dat = load_pool() init_dat = load_init() test_dat = load_test() train_pool = np.array(gen_land_pool(pool_dat)) shuffle(train_pool) print "[info]Start passive learning..." test_acc_ps, learned_size_ps = run_stl_landm(pool_dat, init_dat, test_dat, do_active=False) util.curve_to_csv("res/ps_stl_non.csv", test_acc_ps, learned_size_ps) print "[info]Start active learning..." test_acc_ac, learned_size_ac = run_stl_landm(pool_dat, init_dat, test_dat, do_active=True) util.curve_to_csv("res/acc_stl_non.csv", test_acc_ac, learned_size_ac) # print test_acc_ps # print learned_size_ps # print test_acc_ac # print learned_size_ac figure = plt.figure()
next_train_x = [[] for i in range(0, T)] next_train_y = [[] for i in range(0, T)] for i in range(0, selected_x.shape[0]): t = int(tasks[i]) next_train_x[t].append(selected_x[i, :]) next_train_y[t].append(selected_y[i]) # Training # for t in range(0, T): if next_train_x[t]: ella_model.refine_model(np.array(next_train_x[t]), np.array(next_train_y[t]), t) # Yield results # acc = ella_score(ella_model, test_dat) acc = ella_auc_score(ella_model, test_dat) test_acc.append(acc) learned_size.append(total_pool_size - prof.get_pool_size() + init_size) if ITER_ENABLE: if count < 0 : break count -= 1 print test_acc print learned_size util.learning_curve("res/fig_ella_non.png", test_acc, learned_size) util.curve_to_csv("res/ella_non.csv", test_acc, learned_size)
next_train_x = [[] for i in range(0, T)] next_train_y = [[] for i in range(0, T)] for row in selected: t = int(row[10]) next_train_x[t].append(row[:9]) next_train_y[t].append(row[9]) for t in range(0, T): if next_train_x[t]: learner.refine_model(np.array(next_train_x[t]), np.array(next_train_y[t]), t) train_pool = train_pool[INS_SIZE:, :] acc = util.model_roc_score(learner, test_dat) test_acc.append(acc) learned_size.append(total_pool_size - len(train_pool) + init_size) if ITER_ENABLE: if count < 0: break count -= 1 print test_acc print learned_size util.learning_curve("res/fig_non_active.png", test_acc, learned_size) util.curve_to_csv("res/non_ac.csv", test_acc, learned_size) # for model in models: # print "size ", model.get_trained_size()
def run_ml_landmine(pool_dat, init_dat, test_dat, learner_class, engine=None, do_active=False, task_ctrl=TASK_NONE, evaluation=config.EVAL_ACC): if task_ctrl == TASK_NONE: active_task = False rand_task = False elif task_ctrl == TASK_RAND: active_task = False rand_task = True elif task_ctrl == TASK_ACTIVE: active_task = True rand_task = False # Copy all data test_dat_cp = copy.deepcopy(test_dat) init_dat_cp = copy.deepcopy(init_dat) pool_dat_cp = copy.deepcopy(pool_dat) ###### Train Initial Model ###### init_size = dat_size(init_dat_cp) if learner_class is ELLA: # Add bias term test_dat_cp = util.add_bias(test_dat_cp) pool_dat_cp = util.add_bias(pool_dat_cp) init_dat_cp = util.add_bias(init_dat_cp) learner = learner_class(engine, init_dat_cp) else: learner = learner_class(LogisticRegression(), init_dat_cp) print "Start training..." test_acc = [] learned_size = [] prof = Professor(init_dat_cp, pool_dat_cp, random=True, do_active=do_active, multi_t=active_task or rand_task, rand_task=rand_task) total_pool_size = prof.get_pool_size() init_acc = model_eval(learner, test_dat_cp, evaluation) test_acc = [init_acc] learned_size = [init_size] task_rec = [] ## Training Until No more data available OR Reach the set N_ITER ### count = N_ITER while prof.has_next(): # print "pool", prof.get_pool_size() selected_x, selected_y, tasks = prof.next_train_set(INS_SIZE, learner=learner) task_rec.append(tasks[0]) # Group selected training set by tasks next_train_x = [[] for i in range(0, T)] next_train_y = [[] for i in range(0, T)] for i in range(0, selected_x.shape[0]): t = int(tasks[i]) next_train_x[t].append(selected_x[i, :]) next_train_y[t].append(selected_y[i]) for t in range(0, T): if next_train_x[t]: learner.refine_model(np.array(next_train_x[t]), np.array(next_train_y[t]), t) acc = model_eval(learner, test_dat_cp, evaluation) # print acc test_acc.append(acc) # print learner.get_trained_size() learned_size.append(total_pool_size - prof.get_pool_size() + init_size) if ITER_ENABLE: if count < 0: break count -= 1 if count % 20 == 0: if engine is not None: engine.clean_mem() util.curve_to_csv("res/task_save" + str(do_active) + " .csv", task_rec, [0]) return test_acc, learned_size