Exemple #1
0
def sim_getCorrelation(We, words, f, weight4ind, scoring_function, params):
    f = open(f, 'r')
    lines = f.readlines()
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]
        p2 = i[1]
        score = float(i[2])
        X1, X2 = data_io.getSeqs(p1, p2, words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1, m1 = data_io.prepare_data(seq1)
    x2, m2 = data_io.prepare_data(seq2)
    m1 = data_io.seq2weight(x1, m1, weight4ind)
    m2 = data_io.seq2weight(x2, m2, weight4ind)
    scores = scoring_function(We, x1, x2, m1, m2, params)
    print seq1[0]
    print seq2[0]
    print scores[0]
    preds = np.squeeze(scores)
    return pearsonr(preds, golds)[0], spearmanr(preds, golds)[0]
Exemple #2
0
def sim_badSents(We, words, weight4ind, scoring_function, params, fpc, sent1,
                 sent2):
    seq1 = []
    seq2 = []

    X1, X2 = data_io.getSeqs(sent1, sent2, words)
    seq1.append(X1)
    seq2.append(X2)

    x1, m1 = data_io.prepare_data(seq1)
    x2, m2 = data_io.prepare_data(seq2)
    m1 = data_io.seq2weight(x1, m1, weight4ind)
    m2 = data_io.seq2weight(x2, m2, weight4ind)
    scores = scoring_function(We, x1, x2, m1, m2, params, fpc)
    preds = np.squeeze(scores)
    preds = preds * 2 + 3
    return preds
Exemple #3
0
def sim_getCorrelation(We, words, f, weight4ind, scoring_function, params, fpc,
                       test_name):
    f = open(f, 'r')
    lines = f.readlines()
    golds = []
    seq1 = []
    seq2 = []
    index = []
    idx = 0
    for i in lines:
        i = i.split("\t")
        p1 = i[0]
        p2 = i[1]
        score = float(i[2])
        X1, X2 = data_io.getSeqs(p1, p2, words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
        index.append(idx)
        idx += 1
    x1, m1 = data_io.prepare_data(seq1)
    x2, m2 = data_io.prepare_data(seq2)
    m1 = data_io.seq2weight(x1, m1, weight4ind)
    m2 = data_io.seq2weight(x2, m2, weight4ind)
    golds = np.asarray(golds)
    scores = scoring_function(We, x1, x2, m1, m2, params, fpc)
    # scores = scoring_function(We, x1, x2, m1, m2, golds, params, fpc)
    # preds = np.squeeze(scores).reshape(-1, 1)
    preds = np.squeeze(scores)
    # print('the prediction list is {}'.format(preds))

    # add SVM predictor
    # clf = pickle.load(open('../score_predictor/model_svm', 'rb'))
    # clf.fit(preds, golds)
    # preds = clf.predict(preds)

    print(preds)
    # np.save(open("../pred_list", 'wb'), preds)
    # np.save(open("../gold_list", 'wb'), golds)
    # show_result_image(preds, golds, index, fpc, test_name)
    # find_bad_scores(preds.tolist(), lower_threshold=2.5, higher_threshold=3.8)
    MSE = sqrt(mean_squared_error(golds, preds))
    return pearsonr(preds, golds)[0], MSE
Exemple #4
0
def getCorrelation(model,words,f, params=[]):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = data_io.getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = data_io.prepare_data(seq1)
    x2,m2 = data_io.prepare_data(seq2)
    if params and params.weightfile:
        m1 = data_io.seq2weight(x1, m1, params.weight4ind)
        m2 = data_io.seq2weight(x2, m2, params.weight4ind)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
Exemple #5
0
def getAcc(model, words, f, params=[]):
    f = open(f, 'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    ct = 0
    for i in lines:
        i = i.split("\t")
        p1 = i[0]
        p2 = i[1]
        score = i[2]
        X1, X2 = data_io.getSeqs(p1, p2, words)
        seq1.append(X1)
        seq2.append(X2)
        ct += 1
        if ct % 100 == 0:
            x1, m1 = data_io.prepare_data(seq1)
            x2, m2 = data_io.prepare_data(seq2)
            if params and params.weightfile:
                m1 = data_io.seq2weight(x1, m1, params.weight4ind)
                m2 = data_io.seq2weight(x2, m2, params.weight4ind)
            scores = model.scoring_function(x1, x2, m1, m2)
            scores = np.squeeze(scores)
            preds.extend(scores.tolist())
            seq1 = []
            seq2 = []
        golds.append(score)
    if len(seq1) > 0:
        x1, m1 = data_io.prepare_data(seq1)
        x2, m2 = data_io.prepare_data(seq2)
        if params and params.weightfile:
            m1 = data_io.seq2weight(x1, m1, params.weight4ind)
            m2 = data_io.seq2weight(x2, m2, params.weight4ind)
        scores = model.scoring_function(x1, x2, m1, m2)
        scores = np.squeeze(scores)
        preds.extend(scores.tolist())
    return acc(preds, golds)
Exemple #6
0
def sim_getCorrelation1(We, words, file_index, weight4ind, scoring_function,
                        params):
    f = open(file_index[0], 'r')
    #print(f)
    line = f.readlines()
    lines = [lin for lin in line]
    f = open(file_index[1], 'r')
    #print(f)
    score_line = f.readlines()
    score_lines = [score for score in score_line]
    golds = []
    seq1 = []
    seq2 = []
    for index in range(len(lines)):
        i = lines[index]
        j = score_lines[index]
        i = i.split("\t")
        #print(i)
        #print(i)
        p1 = i[0].lower()
        p2 = i[1].lower()
        try:
            score = float(j)
            X1, X2 = data_io.getSeqs(p1, p2, words)
            seq1.append(X1)
            seq2.append(X2)
            golds.append(score)
        except:
            pass
    x1, m1 = data_io.prepare_data(seq1)
    x2, m2 = data_io.prepare_data(seq2)
    m1 = data_io.seq2weight(x1, m1, weight4ind)
    m2 = data_io.seq2weight(x2, m2, weight4ind)
    # print(x1,x2,m1,m2)
    # print(x1.shape,x2.shape,m1.shape,m2.shape)
    scores = scoring_function(We, x1, x2, m1, m2, params)
    preds = np.squeeze(scores)
    return pearsonr(preds, golds)[0]