def hyperopt_GD(P,param_space,eval_step=5,max_evals=None,P_val=None): P.set('save_step',INF) F = ds.get_data(P) P.log("Data loaded.") DL_L, _, DL_V = pp.get_all_dataloader(P,F,P_val) P.log(f"Number of batches: Labelled = {len(DL_L)} | Validation = {len(DL_V)}") if P.get('CUDA') and torch.cuda.is_available(): floatTensor = torch.cuda.FloatTensor else: floatTensor = torch.FloatTensor def obj(args): P0 = P.copy() P0.update(args) perf_mat = np.empty(shape=(2,P.get('runs'))) for run in range(P0.get('runs')): G, D, _, _ = GAN.train_GD(P0, DL_L, DL_V, name=P0.get('name')+'_%d'%run) G.eval();D.eval(); with torch.no_grad(): YV = floatTensor(pp.get_one_hot_labels(P0,num=P0.get('batch_size'))) perf_mat[0,run] = calc_accuracy(D(torch.cat((G(torch.cat((floatTensor(np.random.normal(0,1,(P0.get('batch_size'),P0.get('noise_shape')))),YV),dim=1)),YV),dim=1)),floatTensor(P0.get('batch_size'),1).fill_(0.0)) perf_mat[1,run] = np.mean([calc_accuracy(D(torch.cat((XV,YV),dim=1)),floatTensor(XV.shape[0],1).fill_(1.0)) for XV, YV in DL_V]) val = np.mean((0.5-perf_mat[0])**2) + 0.1 * np.mean((1-perf_mat[1])**2) P0.log(f"loss = {val:.5f} Accuracy D = {np.mean(perf_mat):.5f} | vs G = {np.mean(perf_mat[0]):.5f} | vs real = {np.mean(perf_mat[1]):.5f}",name='hyperopt') return val hyperopt_Search(P,param_space,obj,eval_step=eval_step,max_evals=max_evals)
def hyperopt_R(P,param_space,eval_step=5,max_evals=None,P_val=None): P.set('save_step',INF) F = ds.get_data(P) P.log("Data loaded.") DL_L, _, DL_V = pp.get_all_dataloader(P,F,P_val) P.log(f"Number of batches: Labelled = {len(DL_L)} | Validation = {len(DL_V)}") def obj(args): P0 = P.copy() P0.update(args) P0.log("Check Params: "+", ".join([str(key)+' = '+ ("'"+val+"'" if isinstance(val,str) else str(val)) for key,val in args.items()]),name='hyperopt') perf_mat = np.empty(shape=(2,P.get('runs'),len(DL_V))) for run in range(P0.get('runs')): C, _, _ = GAN.train_Base(P0, DL_L, DL_V, name=P0.get('name')+'_%d'%run) C.eval() with torch.no_grad(): for i,(XV, YV) in enumerate(DL_V): perf_mat[0,run,i] = calc_f1score(C(XV), YV, average = 'weighted') perf_mat[1,run,i] = calc_accuracy(C(XV), YV) perf = np.mean(perf_mat.reshape(2,-1),axis=1) P0.log(f"F1: {perf[0]:.5f} | Accuracy: {perf[1]:.5f}",name='hyperopt') return -perf[0] hyperopt_Search(P,param_space,obj,eval_step=eval_step,max_evals=max_evals)
def accept_request(): data = request.get_data() data_json = json.loads(data) session_id = data_json['session_id'] username = data_json['username'] brp_data = data_source.get_data(username) attribute_request = session_manager.get_session(session_id)['request'] validator_response = validator.check(attribute_request, brp_data) random_color = "rgb({0},{1},{2})".format(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) data = { 'request_valid': validator_response, 'request_status': 'ACCEPTED', 'secret': random_color } active_session = session_manager.append_session_data(session_id, data) # can not be ended here, since requestor needs to access data still # session_manager.end_session(session_id) return json_response({'response': active_session})
def do_GET(self): parsed_path = urlparse(self.path) self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() data = get_data(True) self.wfile.write(json.dumps(data).encode()) return
def plt_FX_num(P,max_n=908,P_val=None,indeces=None): if indeces is not None:max_n = min(max_n,len(indeces)) P.set('FX_indeces',None) V = ds.get_data(P) for i,(X,Y) in enumerate(V): count_string = ', '.join([ f"{int(val)}: {count}" for val,count in zip(*np.unique(Y,return_counts=True))]) P.log(f"V[{i+1}]: {X.shape} - {Y.shape} ({count_string})") def train_model(X,y,seed): mlp = MLP(hidden_layer_sizes=(150,150),max_iter=2000, random_state=seed) return mlp.fit(X, y) mat = np.empty(shape=(3,max_n)) FX = np.arange(1,max_n+1,1) for fx in FX: if indeces is None:P.set('FX_num',fx) else: P.set('FX_indeces',indeces[:fx]) V0 = ds.select_features(V,P.get('FX_indeces')) F0 = pp.perform_preprocessing(P, V0, P_val) x_train, y_train = F0[0] x_test, y_test = F0[2] model_list = Parallel(n_jobs=8)(delayed(train_model)(x_train, y_train.ravel(), seed) for seed in range(P.get('runs'))) res = np.empty(shape=(3,P.get('runs'))) for run,mlp in enumerate(model_list): res[0,run] = accuracy_score(y_test.ravel(),mlp.predict(x_test)) res[1,run] = f1_score(y_test.ravel(),mlp.predict(x_test),average='weighted') res[2,run] = mlp.n_iter_ mat[:,fx-1] = np.mean(res,axis=1) P.log(f"Fx_num = {fx}: [Acc = {mat[0,fx-1]:.2f}] [F1 = {mat[1,fx-1]:.2f}] [{mat[2,fx-1]:.2f} iterations]") plt.figure(figsize=(27,9),dpi=300,clear=True) fig, ax = plt.subplots() ax.plot(FX,mat[0],linestyle='solid',label='Accuracy') ax.plot(FX,mat[1],linestyle='solid',label='F1 Score') ax.legend() ax.set_xlabel('FX_num') ax.set_ylabel('Performance') ax.set_xlim(1,max_n) ax.grid() save_fig(P,'eval_fx_num',fig) ax.plot(FX,mat[2]/np.max(mat[2]),linestyle='solid',label='Iterations') ax.legend() save_fig(P,'eval_fx_num_iterations',fig)
def hyperopt_GAN(P,param_space,eval_step=5,max_evals=None,P_val=None): P.set('save_step',INF) P.set('R_active',False) F = ds.get_data(P) P.log("Data loaded.") DL_L, DL_U_iter, DL_V = pp.get_all_dataloader(P,F,P_val) P.log(f"Number of batches: Labelled = {len(DL_L)} | Unlabelled = {len(DL_U_iter)} | Validation = {len(DL_V)}") if P.get('CUDA') and torch.cuda.is_available(): floatTensor = torch.cuda.FloatTensor else: floatTensor = torch.FloatTensor def obj(args): P0 = P.copy() P0.update(args) P0.log("Check Params: "+", ".join([str(key)+' = '+ ("'"+val+"'" if isinstance(val,str) else str(val)) for key,val in args.items()]),name='hyperopt') perf_mat_C = np.empty(shape=(2,P.get('runs'),len(DL_V))) perf_mat_D = np.empty(shape=(3,P.get('runs'))) for run in range(P0.get('runs')): G, D, C, _, _ = GAN.train_GAN(P0, DL_L, DL_U_iter, DL_V, name=P0.get('name')+'_%d'%run) G.eval();D.eval();C.eval(); with torch.no_grad(): acc_D = np.empty(shape=(2,len(DL_V))) for i,(XV, YV) in enumerate(DL_V): YC = C(XV) perf_mat_C[0,run,i] = calc_f1score(YC, YV, average = 'weighted') perf_mat_C[1,run,i] = calc_accuracy(YC, YV) acc_D[0,i] = calc_accuracy(D(torch.cat((XV,YV),dim=1)),floatTensor(XV.shape[0],1).fill_(1.0)) acc_D[1,i] = calc_accuracy(D(torch.cat((XV,YC),dim=1)),floatTensor(XV.shape[0],1).fill_(0.0)) YV = floatTensor(pp.get_one_hot_labels(P0,num=P0.get('batch_size'))) perf_mat_D[0,run] = calc_accuracy(D(torch.cat((G(torch.cat((floatTensor(np.random.normal(0,1,(P0.get('batch_size'),P0.get('noise_shape')))),YV),dim=1)),YV),dim=1)),floatTensor(P0.get('batch_size'),1).fill_(0.0)) perf_mat_D[1,run] = np.mean(acc_D[0]) perf_mat_D[2,run] = np.mean(acc_D[1]) perf = np.mean(perf_mat_C.reshape(2,-1),axis=1) val = ( 0.15 * np.mean((0.5-perf_mat_D[0])**2) # G/D acc ideally is around 50% + 0.05 * np.mean((1-perf_mat_D[1])**2) # D is rewarded for accurately classifying real pairs + 0.05 * np.mean((1-perf_mat_D[2])**2) # D is rewarded for accurately classifying classifier predictions - perf[0] # C F1 score is most important ) P0.log(f"loss = {val:.5f} [Accuracy D = {np.mean(perf_mat_D):.5f} | vs G = {np.mean(perf_mat_D[0]):.5f} | vs C = {np.mean(perf_mat_D[2]):.5f} | vs real = {np.mean(perf_mat_D[1]):.5f}] [C - F1: {perf[0]:.5f} | Accuracy: {perf[1]:.5f}]",name='hyperopt') return val hyperopt_Search(P,param_space,obj,eval_step=eval_step,max_evals=max_evals)
def sklearn_baseline(P,V=None): P.log(P) F = pp.perform_preprocessing(P, ds.get_data(P,V), P.copy().set_keys( sample_no = None, undersampling = False, oversampling = False, )) x_train, y_train = F[0] x_test, y_test = F[2] y_train, y_test = y_train.ravel(), y_test.ravel() # P.log('cross_val: '+str(P.get('cross_val'))) # P.log(' FX_num: '+str(P.get('FX_num'))) # ''' Multi-layer Perceptron ''' # res = np.empty(shape=(P.get('runs'),5)) # for run in range(P.get('runs')): # clf = MLP(hidden_layer_sizes=(100,100),max_iter=500) # clf.fit(x_train, y_train) # y_pred = clf.predict(x_train) # res[run,0] = accuracy_score(y_train,y_pred) # res[run,1] = f1_score(y_train,y_pred,average='macro') # y_pred = clf.predict(x_test) # res[run,2] = accuracy_score(y_test,y_pred) # res[run,3] = f1_score(y_test,y_pred,average='macro') # res[run,4] = clf.n_iter_ # res = np.mean(res,axis=0) # P.log(f"MLP Acc Train: {res[0]:.2f}") # P.log(f"MLP F1 Train: {res[1]:.2f}") # P.log(f"MLP Acc Test: {res[2]:.2f}") # P.log(f"MLP F1 Test: {res[3]:.2f}") # P.log(F"MLP Iterations = {res[4]}") ''' Random Forest Classifier ''' res = np.empty(shape=(P.get('runs'),4)) for run in range(P.get('runs')): clf = RandomForestClassifier() clf.fit(x_train, y_train) y_pred = clf.predict(x_train) res[run,0] = accuracy_score(y_train,y_pred) res[run,1] = f1_score(y_train,y_pred,average='macro') y_pred = clf.predict(x_test) res[run,2] = accuracy_score(y_test,y_pred) res[run,3] = f1_score(y_test,y_pred,average='macro') res = np.mean(res,axis=0) P.log("") P.log(f"RFC Acc Train: {res[0]:.5f}") P.log(f"RFC F1 Train: {res[1]:.5f}") P.log("") P.log(f"RFC Acc Test: {res[2]:.5f}") P.log(f"RFC F1 Test: {res[3]:.5f}") ''' Gaussian Naive Bayes ''' res = np.empty(shape=(P.get('runs'),4)) for run in range(P.get('runs')): clf = GaussianNB() clf.fit(x_train, y_train) y_pred = clf.predict(x_train) res[run,0] = accuracy_score(y_train,y_pred) res[run,1] = f1_score(y_train,y_pred,average='macro') y_pred = clf.predict(x_test) res[run,2] = accuracy_score(y_test,y_pred) res[run,3] = f1_score(y_test,y_pred,average='macro') res = np.mean(res,axis=0) P.log("") P.log(f"GNB Acc Train: {res[0]:.5f}") P.log(f"GNB F1 Train: {res[1]:.5f}") P.log("") P.log(f"GNB Acc Test: {res[2]:.5f}") P.log(f"GNB F1 Test: {res[3]:.5f}")
def run_cross_val(P, V=None, Base=True): P.log("Params: " + str(P)) ACC = load_results(P, name='acc') F1S = load_results(P, name='f1') YF = load_results(P, name='YF') RF = load_results(P, name='RF') PF = load_results(P, name='PF') if any(mat is None for mat in (ACC, F1S, YF, PF)): if P.get('CUDA') and torch.cuda.is_available(): P.log("CUDA Training.") else: P.log("CPU Training.") F = pp.perform_preprocessing( P, ds.get_data(P, V), P.copy().set_keys( sample_no=None, undersampling=False, oversampling=False, )) X, Y = F[0] XV, YV = F[2] x_test, y_test = XV, YV.ravel() DL_V = pp.get_dataloader(P, XV, YV, batch_size=1024) #DL_L, DL_U_iter, DL_V = pp.get_all_dataloader(P, ds.get_data(P,V), P_val) #P.log(f"Number of batches: Labelled = {len(DL_L)} | Unlabelled = {len(DL_U_iter)} | Validation = {len(DL_V)}") ACC = None F1S = None YF = None RF = None PF = None # Baseline Results res = np.empty(shape=(P.get('runs'), 8)) # ------------------- # Individual runs # ------------------- skf = StratifiedKFold(n_splits=P.get('runs'), shuffle=True, random_state=42) for run, (train_index, test_index) in enumerate(skf.split(X, Y)): DL_L = pp.get_dataloader(P, X[test_index], Y[test_index]) DL_U_iter = pp.get_perm_dataloader(P, X[train_index], Y[train_index]) P.log( f"Number of batches: Labelled = {len(DL_L)} | Unlabelled = {len(DL_U_iter)} | Validation = {len(DL_V)}" ) G, D, C, mat_accuracy, mat_f1_score = GAN.train_GAN( P, DL_L, DL_U_iter, DL_V, name=P.get('name') + '_%d' % run) if P.get('R_active'): R, acc_BASE, f1_BASE = GAN.train_Base(P, DL_L, DL_V, name=P.get('name') + '_%d' % run) mat_accuracy = np.concatenate((mat_accuracy, acc_BASE)) mat_f1_score = np.concatenate((mat_f1_score, f1_BASE)) if ACC is None: ACC = np.expand_dims(mat_accuracy, axis=2) F1S = np.expand_dims(mat_f1_score, axis=2) else: ACC = np.concatenate( (ACC, np.expand_dims(mat_accuracy, axis=2)), axis=2) F1S = np.concatenate( (F1S, np.expand_dims(mat_accuracy, axis=2)), axis=2) C.eval() if P.get('R_active'): R.eval() with torch.no_grad(): for XV, YV in DL_V: # Classify Validation data PC = C(XV) if YF == None: YF = YV PF = PC else: YF = torch.cat((YF, YV), 0) PF = torch.cat((PF, PC), 0) if P.get('R_active'): if RF == None: RF = R(XV) else: RF = torch.cat((RF, R(XV).detach()), 0) if Base: # Baseline x_train, y_train = X[test_index], Y[test_index].ravel() ''' Random Forest Classifier ''' clf = RandomForestClassifier() clf.fit(x_train, y_train) y_pred = clf.predict(x_train) res[run, 0] = accuracy_score(y_train, y_pred) res[run, 1] = f1_score(y_train, y_pred, average='macro') y_pred = clf.predict(x_test) res[run, 2] = accuracy_score(y_test, y_pred) res[run, 3] = f1_score(y_test, y_pred, average='macro') ''' Gaussian Naive Bayes ''' clf = GaussianNB() clf.fit(x_train, y_train) y_pred = clf.predict(x_train) res[run, 4] = accuracy_score(y_train, y_pred) res[run, 5] = f1_score(y_train, y_pred, average='macro') y_pred = clf.predict(x_test) res[run, 6] = accuracy_score(y_test, y_pred) res[run, 7] = f1_score(y_test, y_pred, average='macro') save_results(P, ACC, name='acc') save_results(P, F1S, name='f1') save_results(P, YF, name='YF') save_results(P, PF, name='PF') if RF is not None: save_results(P, RF, name='RF') P.log("Saved Accuracy, F1 Score and predictions.") if Base: # Baseline Evaluation res = np.mean(res, axis=0) P.log("") P.log(f"RFC Acc Train: {res[0]:.5f}") P.log(f"RFC F1 Train: {res[1]:.5f}") P.log("") P.log(f"RFC Acc Test: {res[2]:.5f}") P.log(f"RFC F1 Test: {res[3]:.5f}") P.log("") P.log(f"GNB Acc Train: {res[4]:.5f}") P.log(f"GNB F1 Train: {res[5]:.5f}") P.log("") P.log(f"GNB Acc Test: {res[6]:.5f}") P.log(f"GNB F1 Test: {res[7]:.5f}") P.log("") else: P.log("Loaded Accuracy, F1 Score and predictions.") plot_evaluation(P, ACC, F1S, YF, PF, RF, epoch_lst=list(range(50, P.get('epochs'), 50)))
from bokeh.io import curdoc from bokeh.layouts import column from bokeh.models import ColumnDataSource from bokeh.plotting import figure # initialize from data_source import get_data data = get_data() source = ColumnDataSource(data=dict(date=[], t1=[], t2=[])) source.data = source.from_df(data[['x', 'y']]) # set up plots corr = figure(plot_width=350, plot_height=350, tools='box_select,reset') corr.circle('x', 'y', size=2, source=source, selection_color="orange", alpha=0.6, nonselection_alpha=0.1, selection_alpha=0.4) def selection_change(attrname, old, new): selected = source.selected.indices if selected: xs = source.data['x'][selected] ys = source.data['y'][selected] xy = [(x, y) for x, y in zip(xs, ys)]
#!/usr/bin/env python3 from data_source import get_data print(get_data(False))
def pytorch_baseline(P,P_val=None,num=None): P.set('epochs_GD',0) P.log("Params: "+str(P)) if P.get('CUDA') and torch.cuda.is_available(): P.log("CUDA Training.") else: P.log("CPU Training.") DL_L, _, DL_V = pp.get_all_dataloader(P, ds.get_data(P), P_val) P.log(f"Number of batches: Labelled = {len(DL_L)} | Validation = {len(DL_V)}") ACC = None F1S = None RF = None YF = None # ------------------- # Individual runs # ------------------- for run in range(P.get('runs')): R, mat_accuracy, mat_f1_score = GAN.train_Base(P, DL_L, DL_V, name=P.get('name')+'_%d'%run) if ACC is None: ACC = mat_accuracy F1S = mat_f1_score else: ACC = np.concatenate((ACC, mat_accuracy),axis=0) F1S = np.concatenate((F1S, mat_f1_score),axis=0) R.eval() with torch.no_grad(): for XV, YV in DL_V: if RF == None: RF = R(XV) YF = YV else: RF = torch.cat((RF, R(XV).detach()), 0) YF = torch.cat((YF, YV), 0) # ------------------- # Plot Metrics # ------------------- timeline = np.arange(0,(P.get('epochs_GD')+P.get('epochs'))+1,P.get('save_step')) def get_label(name,model): if name == "Accuracy": return "Accuracy $A_%s$"%model; elif name == "F1 Score": return "F1 Score $F_%s$"%model; else: return "NO_NAME_"+model plt.figure(figsize=(27,9),dpi=300,clear=True) fig, ax = plt.subplots() cmap = plt.get_cmap('gnuplot') indices = np.linspace(0, cmap.N, 7) colors = [cmap(int(i)) for i in indices] for k,(name,mat) in enumerate((('Accuracy',ACC),("F1 Score",F1S))): mean_C = np.mean(mat,axis=0) std_C = np.std(mat,axis=0) P.log(f"R {name} Test: {mean_C[-1]:.2f}") ax.plot(timeline,mean_C,c=colors[k],linestyle='solid',label=get_label(name,'R')) ax.fill_between(timeline, mean_C-std_C, mean_C+std_C, alpha=0.3, facecolor=colors[k]) Y_max = 1.15 ax.set_xlim(0.0,(P.get('epochs_GD')+P.get('epochs'))) ax.set_ylim(0.0,Y_max) ax.legend() ax.set_xlabel('Epoch') ax.set_ylabel('Performance') if num is None: name = 'eval_baseline' else: name = 'eval_baseline_'+str(num) ax.grid() save_fig(P,name,fig) YF = pp.one_hot_to_labels(P,YF) RF = pp.one_hot_to_labels(P,RF) con_mat = confusion_matrix(YF, RF, labels=None, sample_weight=None, normalize=None) plot_confusion_matrix(np.divide(con_mat,P.get('runs')).round().astype(int),P,name=name,title='Confusion matrix',fmt='d') con_mat = confusion_matrix(YF, RF, labels=None, sample_weight=None, normalize='all') plot_confusion_matrix(con_mat,P,name=name+'_normalised',title='Confusion matrix',fmt='0.3f')
def get_Results(P,P_val=None,V=None): P.log("Params: "+str(P)) ACC = load_results(P,name='acc') F1S = load_results(P,name='f1') YF = load_results(P,name='YF') RF = load_results(P,name='RF') PF = load_results(P,name='PF') if any(mat is None for mat in (ACC,F1S,YF,PF)): if P.get('CUDA') and torch.cuda.is_available(): P.log("CUDA Training.") else: P.log("CPU Training.") DL_L, DL_U_iter, DL_V = pp.get_all_dataloader(P, ds.get_data(P,V), P_val) P.log(f"Number of batches: Labelled = {len(DL_L)} | Unlabelled = {len(DL_U_iter)} | Validation = {len(DL_V)}") ACC = None F1S = None YF = None RF = None PF = None # ------------------- # Individual runs # ------------------- for run in range(P.get('runs')): G, D, C, mat_accuracy, mat_f1_score = GAN.train_GAN(P, DL_L, DL_U_iter, DL_V, name=P.get('name')+'_%d'%run) if P.get('R_active'): R, acc_BASE, f1_BASE = GAN.train_Base(P, DL_L, DL_V, name=P.get('name')+'_%d'%run) mat_accuracy = np.concatenate((mat_accuracy,acc_BASE)) mat_f1_score = np.concatenate((mat_f1_score,f1_BASE)) if ACC is None: ACC = np.expand_dims(mat_accuracy,axis=2) F1S = np.expand_dims(mat_f1_score,axis=2) else: ACC = np.concatenate((ACC, np.expand_dims(mat_accuracy,axis=2)),axis=2) F1S = np.concatenate((F1S, np.expand_dims(mat_accuracy,axis=2)),axis=2) C.eval() if P.get('R_active'): R.eval() with torch.no_grad(): for XV, YV in DL_V: # Classify Validation data PC = C(XV) if YF == None: YF = YV PF = PC else: YF = torch.cat((YF, YV), 0) PF = torch.cat((PF, PC), 0) if P.get('R_active'): if RF == None: RF = R(XV) else: RF = torch.cat((RF, R(XV).detach()), 0) save_results(P, ACC, name='acc') save_results(P, F1S, name='f1') save_results(P,YF,name='YF') save_results(P,PF,name='PF') if RF is not None: save_results(P, RF, name='RF') P.log("Saved Accuracy, F1 Score and predictions.") else: P.log("Loaded Accuracy, F1 Score and predictions.") return ACC, F1S, (YF, RF, PF)