Ejemplo n.º 1
0
 def evaluate(self, parser=rightParse):
     noverlap = 0
     nmodel = 0
     ngolden = 0
     
     for parsed, tagged in zip(self.parsed(), self.tagged()):
         if (len(tagged) >= 2):
             golden = spannings(parsed, unary=False, root=True)
             
             if parser != uBoundParse:
                 
                 if parser != randomTreeParse:
                     model = spannings(parser(tagged), unary=False, root=True)
                 else:
                     model = set(parser(len(tagged)))
         
                 m = Metric(golden, model)
                 e = m.evaluation()
                 
             else:
                 e = [ len(golden), len(golden), len(tagged) - 1 ]
                 
             noverlap += e[0]
             ngolden += e[1]
             nmodel += e[2]        
         else:
             if len(parsed.leaves()) != len(tagged):
                 print tagged, "doesn't match."
                 
     print_metrics(noverlap, ngolden, nmodel)
Ejemplo n.º 2
0
 def evaluate(self,parser=forwardDependency, directed = True):
     noverlap = 0
     nmodel = 0
     ngolden = 0
 
     for dep in self.iterator(renum=True):
         if ( dep.number_of_nodes() - 1 >= 2 ):
             gold = set(dep.edges())
             model = set(parser(dep.number_of_nodes() - 1))
         
         #if len(gold) != len(model):
         #    print gold
         #    print model
         #    print [sent.node[i]['word'] for i in sent.nodes()]
         #    print o
         #print "Gold: ",
         #print gold
         #print "Model: ",
         #print model
             if len(gold) == len(model):
                 if directed:
                     for c in gold:
                         if c in model:
                             noverlap += 1
                 else:
                     for head,sink in gold:
                         if (head,sink) in model or (sink,head) in model:
                             noverlap += 1
                             
                 ngolden += len(gold)
                 nmodel += len(model) 
             
     print_metrics(noverlap, ngolden, nmodel)
Ejemplo n.º 3
0
def run_ann(X, y, ep, bs, perc, training_size, seed, out_dir):
    sizes = 1
    F1 = np.zeros((sizes, 3))
    PREC = np.zeros((sizes, 3))
    REC = np.zeros((sizes, 3))

    # Stratified random data
    x_train, y_train, idx_0, idx_1 = strati_training_data(
        X, y, size=training_size, p_class1=perc,
        seed=int(seed))  #size=20000,p_class1=0.2)

    print("x_train, y_train", x_train.shape, y_train.shape)

    rus = RandomUnderSampler(random_state=0)
    x_res, y_res = rus.fit_sample(x_train, y_train)

    #t1=time.time()
    #sfm = feat_sel(x_res, y_res)
    #x_sel = sfm.transform(x_res)
    #t2=time.time()
    x_sel = x_res
    #print("Feature selection time:", t2-t1)

    x_train_2, x_test, y_train_2, y_test = train_test_split(
        x_sel, y_res, test_size=test_perc, random_state=0)
    #sfm = SelectFromModel(LassoCV())
    #sfm.fit(x_train_2, y_train_2)
    #train = sfm.transform(x_train_2)
    #XX = sfm.transform(X)
    XX = X
    global input_dim
    input_dim = x_train_2.shape[1]

    #pred_training, pred_big_im, pred_testing = ann(x_train_2,y_train_2,X)
    pred_training, pred_big_im, pred_testing = ann(x_train_2, y_train_2, XX,
                                                   ep, bs, x_test)

    #preffix = out_dir+"/"+str(ep)+"_"+str(bs)+"_"+ str(perc) + "_"
    preffix = out_dir + "/" + img_name + "/ann_strati_0/" + "_".join(
        [str(ep), str(bs), str(perc), seed, ""])

    print("preffix", preffix)
    f1_train = open(preffix + "F1_train.txt", "a")
    f1_test = open(preffix + "F1_test.txt", "a")
    f1_big = open(preffix + "F1_big.txt", "a")

    prec_train = open(preffix + "PREC_train.txt", "a")
    prec_test = open(preffix + "PREC_test.txt", "a")
    prec_big = open(preffix + "PREC_big.txt", "a")

    rec_train = open(preffix + "REC_train.txt", "a")
    rec_test = open(preffix + "REC_test.txt", "a")
    rec_big = open(preffix + "REC_big.txt", "a")

    i = 0
    cm_train, F1[i,
                 0], PREC[i,
                          0], REC[i,
                                  0] = print_metrics("Trainng", y_train_2,
                                                     pred_training)
    f1_train.write(
        str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 0]) + "\n")
    prec_train.write(
        str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 0]) + "\n")
    rec_train.write(
        str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 0]) + "\n")

    cm_test, F1[i, 1], PREC[i,
                            1], REC[i,
                                    1] = print_metrics("Test", y_test,
                                                       pred_testing)
    f1_test.write(
        str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 1]) + "\n")
    prec_test.write(
        str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 1]) + "\n")
    rec_test.write(
        str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 1]) + "\n")

    cm_big, F1[i, 2], PREC[i,
                           2], REC[i,
                                   2] = print_metrics("Big image", y,
                                                      pred_big_im)
    f1_big.write(str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 2]) + "\n")
    prec_big.write(
        str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 2]) + "\n")
    rec_big.write(
        str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 2]) + "\n")

    scores = str(np.round(F1[i, 2], 2)) + "_" + str(np.round(
        PREC[i, 2], 2)) + "_" + str(np.round(REC[i, 2], 2))
    plt.imsave(preffix + str(training_size) + "_" + scores + "_pred.png",
               np.reshape(pred_big_im, (1000, 1000)))

    f1_train.close()
    f1_test.close()
    f1_big.close()
Ejemplo n.º 4
0
def run_svm(training_size, out_dir):
    sizes = 1
    F1 = np.zeros((sizes, 3))
    PREC = np.zeros((sizes, 3))
    REC = np.zeros((sizes, 3))
    #TIME = np.zeros((sizes,1))

    x_train, y_train, idx_0, idx_1 = strati_training_data(X,
                                                          y,
                                                          size=training_size,
                                                          p_class1=perc,
                                                          seed=int(seed))
    rus = RandomUnderSampler(random_state=0)
    x_res, y_res = rus.fit_sample(x_train, y_train)

    #t1=time.time()
    #sfm = feat_sel(x_res, y_res)
    #x_sel = sfm.transform(x_res)
    #t2=time.time()
    #print("Feature selection time:", t2-t1)
    x_sel = x_res

    x_train_2, x_test, y_train_2, y_test = train_test_split(
        x_sel, y_res, test_size=test_perc, random_state=0)
    #XX = sfm.transform(X)
    XX = X

    kernel = 'rbf'
    C_r = [-5, 5]
    C_step = 0.5
    g_r = [-5, 5]
    g_step = 0.5
    C_range = 10.**np.arange(C_r[0], C_r[1], C_step)
    gamma_range = 10.**np.arange(g_r[0], g_r[1], g_step)
    # score can be: roc_auc, accuracy, recall, precision, f1, average_precision
    #score = 'average_precision'
    score = "f1"

    print(":::run_svm::: x_train_2.shape:", x_train_2.shape)
    pred_training, pred_big_im, pred_testing, best_params = svm(
        x_train_2, y_train_2, XX, x_test, kernel, gamma_range, C_range, score)
    #print(best_params)
    best_C = best_params["C"]
    best_gamma = best_params["gamma"]
    best_k = best_params["kernel"]

    #preffix = out_dir+"/"+str(ep)+"_"+str(bs)+"_"+ str(perc) + "_"
    preffix = out_dir + "/" + img_name + "/svm_strati_0/" + "_".join(
        [str(best_C), str(best_gamma), best_k, seed, ""])

    print("preffix", preffix)
    f1_train = open(preffix + "F1_train.txt", "a")
    f1_test = open(preffix + "F1_test.txt", "a")
    f1_big = open(preffix + "F1_big.txt", "a")

    prec_train = open(preffix + "PREC_train.txt", "a")
    prec_test = open(preffix + "PREC_test.txt", "a")
    prec_big = open(preffix + "PREC_big.txt", "a")

    rec_train = open(preffix + "REC_train.txt", "a")
    rec_test = open(preffix + "REC_test.txt", "a")
    rec_big = open(preffix + "REC_big.txt", "a")

    i = 0

    cm_train, F1[i,
                 0], PREC[i,
                          0], REC[i,
                                  0] = print_metrics("Trainng", y_train_2,
                                                     pred_training)
    f1_train.write(
        str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 0]) + "\n")
    prec_train.write(
        str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 0]) + "\n")
    rec_train.write(
        str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 0]) + "\n")

    cm_test, F1[i, 1], PREC[i,
                            1], REC[i,
                                    1] = print_metrics("Test", y_test,
                                                       pred_testing)
    f1_test.write(
        str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 1]) + "\n")
    prec_test.write(
        str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 1]) + "\n")
    rec_test.write(
        str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 1]) + "\n")

    cm_big, F1[i, 2], PREC[i,
                           2], REC[i,
                                   2] = print_metrics("Big image", y,
                                                      pred_big_im)
    f1_big.write(str(trainSize) + ":" + str(seed) + ":" + str(F1[i, 2]) + "\n")
    prec_big.write(
        str(trainSize) + ":" + str(seed) + ":" + str(PREC[i, 2]) + "\n")
    rec_big.write(
        str(trainSize) + ":" + str(seed) + ":" + str(REC[i, 2]) + "\n")

    scores = str(np.round(F1[i, 2], 2)) + "_" + str(np.round(
        PREC[i, 2], 2)) + "_" + str(np.round(REC[i, 2], 2))
    plt.imsave(preffix + str(training_size) + "_" + scores + "_pred.png",
               np.reshape(pred_big_im, (1000, 1000)))

    f1_train.close()
    f1_test.close()
    f1_big.close()