def evaluate(self, parser=rightParse): noverlap = 0 nmodel = 0 ngolden = 0 for parsed, tagged in zip(self.parsed(), self.tagged()): if (len(tagged) >= 2): golden = spannings(parsed, unary=False, root=True) if parser != uBoundParse: if parser != randomTreeParse: model = spannings(parser(tagged), unary=False, root=True) else: model = set(parser(len(tagged))) m = Metric(golden, model) e = m.evaluation() else: e = [ len(golden), len(golden), len(tagged) - 1 ] noverlap += e[0] ngolden += e[1] nmodel += e[2] else: if len(parsed.leaves()) != len(tagged): print tagged, "doesn't match." print_metrics(noverlap, ngolden, nmodel)
def evaluate(self,parser=forwardDependency, directed = True): noverlap = 0 nmodel = 0 ngolden = 0 for dep in self.iterator(renum=True): if ( dep.number_of_nodes() - 1 >= 2 ): gold = set(dep.edges()) model = set(parser(dep.number_of_nodes() - 1)) #if len(gold) != len(model): # print gold # print model # print [sent.node[i]['word'] for i in sent.nodes()] # print o #print "Gold: ", #print gold #print "Model: ", #print model if len(gold) == len(model): if directed: for c in gold: if c in model: noverlap += 1 else: for head,sink in gold: if (head,sink) in model or (sink,head) in model: noverlap += 1 ngolden += len(gold) nmodel += len(model) print_metrics(noverlap, ngolden, nmodel)
def run_ann(X, y, ep, bs, perc, training_size, seed, out_dir): sizes = 1 F1 = np.zeros((sizes, 3)) PREC = np.zeros((sizes, 3)) REC = np.zeros((sizes, 3)) # Stratified random data x_train, y_train, idx_0, idx_1 = strati_training_data( X, y, size=training_size, p_class1=perc, seed=int(seed)) #size=20000,p_class1=0.2) print("x_train, y_train", x_train.shape, y_train.shape) rus = RandomUnderSampler(random_state=0) x_res, y_res = rus.fit_sample(x_train, y_train) #t1=time.time() #sfm = feat_sel(x_res, y_res) #x_sel = sfm.transform(x_res) #t2=time.time() x_sel = x_res #print("Feature selection time:", t2-t1) x_train_2, x_test, y_train_2, y_test = train_test_split( x_sel, y_res, test_size=test_perc, random_state=0) #sfm = SelectFromModel(LassoCV()) #sfm.fit(x_train_2, y_train_2) #train = sfm.transform(x_train_2) #XX = sfm.transform(X) XX = X global input_dim input_dim = x_train_2.shape[1] #pred_training, pred_big_im, pred_testing = ann(x_train_2,y_train_2,X) pred_training, pred_big_im, pred_testing = ann(x_train_2, y_train_2, XX, ep, bs, x_test) #preffix = out_dir+"/"+str(ep)+"_"+str(bs)+"_"+ str(perc) + "_" preffix = out_dir + "/" + img_name + "/ann_strati_0/" + "_".join( [str(ep), str(bs), str(perc), seed, ""]) print("preffix", preffix) f1_train = open(preffix + "F1_train.txt", "a") f1_test = open(preffix + "F1_test.txt", "a") f1_big = open(preffix + "F1_big.txt", "a") prec_train = open(preffix + "PREC_train.txt", "a") prec_test = open(preffix + "PREC_test.txt", "a") prec_big = open(preffix + "PREC_big.txt", "a") rec_train = open(preffix + "REC_train.txt", "a") rec_test = open(preffix + "REC_test.txt", "a") rec_big = open(preffix + "REC_big.txt", "a") i = 0 cm_train, F1[i, 0], PREC[i, 0], REC[i, 0] = print_metrics("Trainng", y_train_2, pred_training) f1_train.write( str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 0]) + "\n") prec_train.write( str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 0]) + "\n") rec_train.write( str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 0]) + "\n") cm_test, F1[i, 1], PREC[i, 1], REC[i, 1] = print_metrics("Test", y_test, pred_testing) f1_test.write( str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 1]) + "\n") prec_test.write( str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 1]) + "\n") rec_test.write( str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 1]) + "\n") cm_big, F1[i, 2], PREC[i, 2], REC[i, 2] = print_metrics("Big image", y, pred_big_im) f1_big.write(str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 2]) + "\n") prec_big.write( str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 2]) + "\n") rec_big.write( str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 2]) + "\n") scores = str(np.round(F1[i, 2], 2)) + "_" + str(np.round( PREC[i, 2], 2)) + "_" + str(np.round(REC[i, 2], 2)) plt.imsave(preffix + str(training_size) + "_" + scores + "_pred.png", np.reshape(pred_big_im, (1000, 1000))) f1_train.close() f1_test.close() f1_big.close()
def run_svm(training_size, out_dir): sizes = 1 F1 = np.zeros((sizes, 3)) PREC = np.zeros((sizes, 3)) REC = np.zeros((sizes, 3)) #TIME = np.zeros((sizes,1)) x_train, y_train, idx_0, idx_1 = strati_training_data(X, y, size=training_size, p_class1=perc, seed=int(seed)) rus = RandomUnderSampler(random_state=0) x_res, y_res = rus.fit_sample(x_train, y_train) #t1=time.time() #sfm = feat_sel(x_res, y_res) #x_sel = sfm.transform(x_res) #t2=time.time() #print("Feature selection time:", t2-t1) x_sel = x_res x_train_2, x_test, y_train_2, y_test = train_test_split( x_sel, y_res, test_size=test_perc, random_state=0) #XX = sfm.transform(X) XX = X kernel = 'rbf' C_r = [-5, 5] C_step = 0.5 g_r = [-5, 5] g_step = 0.5 C_range = 10.**np.arange(C_r[0], C_r[1], C_step) gamma_range = 10.**np.arange(g_r[0], g_r[1], g_step) # score can be: roc_auc, accuracy, recall, precision, f1, average_precision #score = 'average_precision' score = "f1" print(":::run_svm::: x_train_2.shape:", x_train_2.shape) pred_training, pred_big_im, pred_testing, best_params = svm( x_train_2, y_train_2, XX, x_test, kernel, gamma_range, C_range, score) #print(best_params) best_C = best_params["C"] best_gamma = best_params["gamma"] best_k = best_params["kernel"] #preffix = out_dir+"/"+str(ep)+"_"+str(bs)+"_"+ str(perc) + "_" preffix = out_dir + "/" + img_name + "/svm_strati_0/" + "_".join( [str(best_C), str(best_gamma), best_k, seed, ""]) print("preffix", preffix) f1_train = open(preffix + "F1_train.txt", "a") f1_test = open(preffix + "F1_test.txt", "a") f1_big = open(preffix + "F1_big.txt", "a") prec_train = open(preffix + "PREC_train.txt", "a") prec_test = open(preffix + "PREC_test.txt", "a") prec_big = open(preffix + "PREC_big.txt", "a") rec_train = open(preffix + "REC_train.txt", "a") rec_test = open(preffix + "REC_test.txt", "a") rec_big = open(preffix + "REC_big.txt", "a") i = 0 cm_train, F1[i, 0], PREC[i, 0], REC[i, 0] = print_metrics("Trainng", y_train_2, pred_training) f1_train.write( str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 0]) + "\n") prec_train.write( str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 0]) + "\n") rec_train.write( str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 0]) + "\n") cm_test, F1[i, 1], PREC[i, 1], REC[i, 1] = print_metrics("Test", y_test, pred_testing) f1_test.write( str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 1]) + "\n") prec_test.write( str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 1]) + "\n") rec_test.write( str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 1]) + "\n") cm_big, F1[i, 2], PREC[i, 2], REC[i, 2] = print_metrics("Big image", y, pred_big_im) f1_big.write(str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 2]) + "\n") prec_big.write( str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 2]) + "\n") rec_big.write( str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 2]) + "\n") scores = str(np.round(F1[i, 2], 2)) + "_" + str(np.round( PREC[i, 2], 2)) + "_" + str(np.round(REC[i, 2], 2)) plt.imsave(preffix + str(training_size) + "_" + scores + "_pred.png", np.reshape(pred_big_im, (1000, 1000))) f1_train.close() f1_test.close() f1_big.close()