Example #1
0
def get_threshold(We, words, Rel, rel, tm, relSize):
    f = open('../commonsendata/Eval/conceptnet/new_omcs_dev1.txt', 'r')
    lines = f.readlines()
    Exp_S = []
    r_list = []
    t1_list = []
    t2_list = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        t1id = lookupwordID(We, words, t1)
        t2id = lookupwordID(We, words, t2)
        t1_list.append(t1id)
        t2_list.append(t2id)
        r_list.append(r)

    t1batch = t1_list
    t2batch = t2_list

    x1, x1_mask, x1_length = tm.prepare_data(t1batch)
    x2, x2_mask, x2_length = tm.prepare_data(t2batch)

    v1 = tm.GetVector(x1, x1_mask, x1_length)
    v2 = tm.GetVector(x2, x2_mask, x2_length)

    for j in range(len(lines)):

        v_r = Rel[rel[r_list[j].lower()], :].reshape((relSize))
        input_vec = np.concatenate((v1[j], v_r, v2[j]), axis=0)

        softmaxScore = tm.score_func(input_vec)
        Exp_S.append(softmaxScore[0][0])
    right = 0
    wrong = 0
    threshold = 0
    accurancy = 0
    binaryScore = []
    Exp_S_sorted = sorted(Exp_S)
    for j in xrange(len(Exp_S)):
        temp_thr = Exp_S_sorted[j]
        for j1 in xrange(int(len(Exp_S) / 2)):
            if (Exp_S[j1] >= temp_thr):
                right = right + 1
            else:
                wrong = wrong + 1
        for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
            if (Exp_S[j2] < temp_thr):
                right = right + 1
            else:
                wrong = wrong + 1
        if ((right / (len(Exp_S))) > accurancy):
            accurancy = (right / (len(Exp_S)))
            threshold = temp_thr
        right = 0
        wrong = 0

    print 'Dev1-Accurancy', accurancy
    return threshold
Example #2
0
def evaluate_conceptNet(We, words, Rel, rel, tm, memsize, relSize, fin):
    threshold = get_threshold(We, words, Rel, rel, tm, memsize, relSize, fin)
    f = open('../../commonsendata/Eval/conceptnet/new_omcs_dev2.txt', 'r')
    lines = f.readlines()
    Exp_S = []
    r_list = []
    t1_list = []
    t2_list = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        t1id = lookupwordID(We, words, t1)
        t2id = lookupwordID(We, words, t2)
        t1_list.append(t1id)
        t2_list.append(t2id)
        r_list.append(r)

    t1batch = t1_list
    t2batch = t2_list

    delim = (lookupwordID(We, words, "#"))

    batchTuple = [a + delim + b for a, b in zip(t1batch, t2batch)]
    xx, xx_mask = tm.prepare_data(batchTuple)
    vector = tm.GetVector(xx, xx_mask)

    for j in range(len(lines)):

        v_r = Rel[rel[r_list[j].lower()], :].reshape((1, relSize))
        gvector = vector[j].reshape((1, memsize))
        input_vec = np.concatenate((gvector, v_r), axis=1)

        #input_vec = np.concatenate((gv1,v_r,gv2),axis = 1)
        softmaxScore = tm.GetVectorNew(input_vec)

        Exp_S.append(softmaxScore[0][0])

    right = 0
    wrong = 0
    accurancy = 0
    for j1 in xrange(int(len(Exp_S) / 2)):
        if (Exp_S[j1] >= threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
        if (Exp_S[j2] < threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    accurancy = (right / (len(Exp_S)))

    print 'Dev2-Accurancy', accurancy
    fin.write('Dev2-Accurancy' + str(accurancy) + "\n")
    print 'Threshold', threshold
    fin.write('Threshold' + str(threshold) + "\n")
    return accurancy
Example #3
0
def evaluate_conceptNet(words, We, rel, Rel, tm, relSize):
    threshold, accurancy1 = get_threshold(
        '../data/conceptnet/new_omcs_dev1.txt', We, words, rel, Rel, tm,
        relSize)
    f = open('../data/conceptnet/new_omcs_dev2.txt', 'r')
    lines = f.readlines()
    S = []
    T = []
    T1 = []
    T2 = []
    R = []
    Exp_S = []
    result = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        temp1 = lookupwordID(We, words, t1)
        temp2 = lookupwordID(We, words, t2)
        T1.append(temp1)
        T2.append(temp2)
        tp = rel[r.lower()]
        c = Rel[tp * relSize:tp * relSize + relSize, :]
        R.append(c)

    x1, x1_mask = tm.prepare_data(T1)
    x2, x2_mask = tm.prepare_data(T2)
    emb1 = tm.GetVector(x1, x1_mask)
    emb2 = tm.GetVector(x2, x2_mask)
    for j in range(len(R)):

        gv1 = emb1[j]
        gv2 = emb2[j]
        v_r = R[j]

        temp1 = np.dot(gv1, v_r)
        exp_score = np.inner(temp1, gv2)
        Exp_S.append(exp_score)

    right = 0
    wrong = 0
    accurancy2 = 0
    for j1 in xrange(int(len(Exp_S) / 2)):
        if (Exp_S[j1] >= threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
        if (Exp_S[j2] <= threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    accurancy2 = (right / (len(Exp_S)))

    #print 'Dev2-Accurancy',accurancy
    #print 'Threshold',threshold
    return accurancy1, accurancy2, threshold
Example #4
0
def evaluate_conceptNet(We, words, Rel, rel, tm, relSize):
    threshold = get_threshold(We, words, Rel, rel, tm, relSize)
    f = open('../commonsendata/Eval/conceptnet/new_omcs_dev2.txt', 'r')
    lines = f.readlines()
    Exp_S = []
    r_list = []
    t1_list = []
    t2_list = []
    tuple_words = []
    tuple_vecs = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        t1id = lookupwordID(We, words, t1)
        t2id = lookupwordID(We, words, t2)
        t1_list.append(t1id)
        t2_list.append(t2id)
        r_list.append(r)
        tuple_words.append(i)

    t1batch = t1_list[0:len(lines)]
    t2batch = t2_list[0:len(lines)]

    x1, x1_mask, x1_length = tm.prepare_data(t1batch)
    x2, x2_mask, x2_length = tm.prepare_data(t2batch)

    v1 = tm.GetVector(x1, x1_mask, x1_length)
    v2 = tm.GetVector(x2, x2_mask, x2_length)

    for j in range(len(lines)):
        v_r = Rel[rel[r_list[j].lower()], :].reshape((relSize))
        input_vec = np.concatenate((v1[j], v_r, v2[j]), axis=0)
        softmaxScore = tm.score_func(input_vec)
        tuple_vecs.append(tm.hidden_func(input_vec))
        Exp_S.append(softmaxScore[0][0])

    right = 0
    wrong = 0
    accurancy = 0
    for j1 in xrange(int(len(Exp_S) / 2)):
        if (Exp_S[j1] >= threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
        if (Exp_S[j2] < threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    accurancy = (right / (len(Exp_S)))

    print 'Dev2-Accurancy', accurancy
    print 'Threshold', threshold
    return accurancy, tuple_words, tuple_vecs
Example #5
0
def evaluate_conceptNet(We, words, Rel, rel, tm, relSize):
    threshold = get_threshold(We, words, Rel, rel, tm, relSize)
    f = open('../commonsendata/Eval/conceptnet/new_omcs_dev2.txt', 'r')
    lines = f.readlines()
    Exp_S = []
    r_list = []
    t1_list = []
    t2_list = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        t1id = lookupwordID(We, words, t1)
        t2id = lookupwordID(We, words, t2)
        t1_list.append(t1id)
        t2_list.append(t2id)
        r_list.append(r)

    t1batch = t1_list[0:len(lines)]
    t2batch = t2_list[0:len(lines)]

    x1, x1_mask, x1_length = tm.prepare_data(t1batch)
    x2, x2_mask, x2_length = tm.prepare_data(t2batch)

    v1 = tm.GetVector(x1, x1_mask, x1_length)
    v2 = tm.GetVector(x2, x2_mask, x2_length)

    for j in range(len(lines)):
        v_r = Rel[rel[r_list[j].lower()] *
                  relSize:rel[r_list[j].lower()] * relSize + relSize, :]
        temp1 = np.dot(v1[j], v_r)
        exp_score = np.inner(temp1, v2[j])
        Exp_S.append(exp_score)

    right = 0
    wrong = 0
    accurancy = 0
    for j1 in xrange(int(len(Exp_S) / 2)):
        if (Exp_S[j1] >= threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
        if (Exp_S[j2] < threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    accurancy = (right / (len(Exp_S)))

    print 'Dev2-Accurancy', accurancy
    print 'Threshold', threshold
    return accurancy
Example #6
0
def get_accu(We, words, Rel, rel, tm, relSize, threshold, filename):
    f = open(filename, 'r')
    lines = f.readlines()
    lines.append('ReceivesAction\thockey\tplay on ice\t1')
    lines.append('AtLocation\trestroom\trest area\t1')
    Exp_S = []
    r_list = []
    t1_list = []
    t2_list = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        t1id = lookupwordID(We, words, t1)
        t2id = lookupwordID(We, words, t2)
        t1_list.append(t1id)
        t2_list.append(t2id)
        r_list.append(r)

    t1batch = t1_list[0:len(lines)]
    t2batch = t2_list[0:len(lines)]

    x1, x1_mask, x1_length = tm.prepare_data(t1batch)
    x2, x2_mask, x2_length = tm.prepare_data(t2batch)

    v1 = tm.GetVector(x1, x1_mask, x1_length)
    v2 = tm.GetVector(x2, x2_mask, x2_length)

    for j in range(len(lines)):
        v_r = Rel[rel[r_list[j].lower()], :].reshape((relSize))
        input_vec = np.concatenate((v1[j], v_r, v2[j]), axis=0)
        softmaxScore = tm.score_func(input_vec)
        # if j == len(lines)-1:
        # print lines[j],softmaxScore
        Exp_S.append(softmaxScore[0][0])

    right = 0
    wrong = 0
    accurancy = 0
    for j1 in xrange(int(len(Exp_S) / 2)):
        if (Exp_S[j1] >= threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
        if (Exp_S[j2] < threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    accurancy = (right / (len(Exp_S)))

    return accurancy
Example #7
0
def get_accu(We, words, Rel, rel, tm, relSize, threshold, filename):
    f = open(filename, 'r')
    lines = f.readlines()
    Exp_S = []
    r_list = []
    t1_list = []
    t2_list = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        t1id = lookupwordID(We, words, t1)
        t2id = lookupwordID(We, words, t2)
        t1_list.append(t1id)
        t2_list.append(t2id)
        r_list.append(r)

    t1batch = t1_list[0:len(lines)]
    t2batch = t2_list[0:len(lines)]

    x1, x1_mask, x1_length = tm.prepare_data(t1batch)
    x2, x2_mask, x2_length = tm.prepare_data(t2batch)

    v1 = tm.GetVector(x1, x1_mask, x1_length)
    v2 = tm.GetVector(x2, x2_mask, x2_length)

    for j in range(len(lines)):
        v_r = Rel[rel[r_list[j].lower()] *
                  relSize:rel[r_list[j].lower()] * relSize + relSize, :]
        temp1 = np.dot(v1[j], v_r)
        exp_score = np.inner(temp1, v2[j])
        Exp_S.append(exp_score)

    right = 0
    wrong = 0
    accurancy = 0
    for j1 in xrange(int(len(Exp_S) / 2)):
        if (Exp_S[j1] >= threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
        if (Exp_S[j2] < threshold):
            right = right + 1
        else:
            wrong = wrong + 1
    accurancy = (right / (len(Exp_S)))

    return accurancy
Example #8
0
    def getpairs2(self, batch, params):
        embed_size = self.memsize
        Rel = self.getRel()
        we = self.getWe()
        # Rel0 = np.reshape(Rel,(-1,relsize))
        newd = [
            convertToIndex(i, self.words, we, self.rel, Rel) for i in batch
        ]
        g1 = []
        g2 = []
        R = []
        #print newd
        length = len(batch)

        for idx, e in enumerate(newd):
            (r, t1, t2, s) = e
            g1.append(t1)
            g2.append(t2)
            R.append(r)
        #batch is list of tuples

        p11 = []
        p22 = []
        p3 = []
        if (params.type == 'MAX'):
            for i in range(length):
                #print 'i: ',i
                id0 = R[i]
                wpick = ['', '', '']
                while (wpick[0] == ''):
                    index = random.randint(0, len(g1) - 1)
                    if (index != i):
                        wpick[0] = g1[index]
                        p11.append(wpick[0])

                while (wpick[1] == ''):
                    index = random.randint(0, len(g2) - 1)
                    if (index != i):
                        wpick[1] = g2[index]
                        p22.append(wpick[1])

                while (wpick[2] == ''):
                    index = random.randint(0, len(R) - 1)
                    if (index != i):
                        wpick[2] = R[index]
                        p3.append(wpick[2])

        delim = (lookupwordID(we, self.words, "#"))

        pT = [a + delim + b for a, b in zip(g1, g2)]
        pTuple, pTupleMask = self.prepare_data(pT)
        neT1 = [a + delim + b for a, b in zip(p11, g2)]
        neTuple1, neTuple1Mask = self.prepare_data(neT1)
        neT2 = [a + delim + b for a, b in zip(g1, p22)]
        neTuple2, neTuple2Mask = self.prepare_data(neT2)

        return (R, p3, pTuple, pTupleMask, neTuple1, neTuple1Mask, neTuple2,
                neTuple2Mask)
    def prepare_aedata(self, list_of_seqs, contextsize, words):
        lengths = [len(s) for s in list_of_seqs]
        sumlength = sum(lengths)
        n_samples = len(list_of_seqs)

        D = np.zeros((sumlength, 10)).astype('int32')
        x = np.zeros((sumlength, 2 * contextsize + 1)).astype('int32')
        index = 0
        for i in range(n_samples):
            seq_id = lookupwordID(words, list_of_seqs[i])
            new_seq = [0] * contextsize + seq_id + [1] * contextsize
            for j in range(lengths[i]):
                x[index, :] = new_seq[j:j + 2 * contextsize + 1]
                word_j = list_of_seqs[i][j]
                if len(word_j) == 0:
                    print 'error'
                    print list_of_seqs[i]
                    sys.exit()
                punc_flag = 1
                a = 0
                for s in word_j:
                    if s in string.punctuation:
                        a = a + 1
                if a == len(word_j):
                    punc_flag = 0

                if word_j == '<@MENTION>':
                    D[index, 0] = 1
                elif (word_j[0] == '#') and (len(word_j) != 1):
                    D[index, 1] = 1
                elif word_j == 'rt':
                    D[index, 2] = 1

                elif 'URL' in word_j:
                    D[index, 3] = 1
                elif word_j.replace('.', '', 1).isdigit():
                    D[index, 4] = 1
                # check whether it is punc
                elif '$' in word_j:
                    D[index, 5] = 1
                elif word_j == ':':
                    D[index, 7] = 1
                elif word_j == '...':
                    D[index, 8] = 1
                elif (len(word_j) == 1) and (word_j[0] in string.punctuation):
                    D[index, 9] = 1
                elif punc_flag == 0:
                    D[index, 6] = 1

                index = index + 1

    #print len(labels)
        return x, n_samples, D
Example #10
0
def get_threshold(evafile, We, words, rel, Rel, tm, relSize):
    f1 = open(evafile, 'r')
    lines = f1.readlines()
    S = []
    T = []
    T1 = []
    T2 = []
    R = []
    Exp_S = []
    result = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        temp1 = lookupwordID(We, words, t1)
        temp2 = lookupwordID(We, words, t2)
        T1.append(temp1)
        T2.append(temp2)
        tp = rel[r.lower()]
        c = Rel[tp * relSize:tp * relSize + relSize, :]
        R.append(c)

    x1, x1_mask = tm.prepare_data(T1)
    x2, x2_mask = tm.prepare_data(T2)
    emb1 = tm.GetVector(x1, x1_mask)
    emb2 = tm.GetVector(x2, x2_mask)
    for j in range(len(R)):

        gv1 = emb1[j]
        gv2 = emb2[j]
        v_r = R[j]
        temp1 = np.dot(gv1, v_r)
        exp_score = np.inner(temp1, gv2)
        Exp_S.append(exp_score)

    right = 0
    wrong = 0
    threshold = 0
    accurancy = 0
    binaryScore = []
    Exp_S_sorted = sorted(Exp_S)
    for j in xrange(len(Exp_S)):
        temp_thr = Exp_S_sorted[j]
        for j1 in xrange(int(len(Exp_S) / 2)):
            if (Exp_S[j1] >= temp_thr):
                right = right + 1
            else:
                wrong = wrong + 1
        for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
            if (Exp_S[j2] <= temp_thr):
                right = right + 1
            else:
                wrong = wrong + 1
        if ((right / (len(Exp_S))) > accurancy):
            accurancy = (1.0 * right / (len(Exp_S)))
            threshold = temp_thr
        right = 0
        wrong = 0

    #print 'Dev1-Accurancy',accurancy
    return threshold, accurancy
Example #11
0
def evaCOPA(evafile, words, We, rel, Rel, evaType, tm, relSize):
    f = open(evafile, 'r')
    lines = f.readlines()
    trueAns = []
    q = []
    alter1 = []
    alter2 = []
    causality = []
    same = 0
    diff = 0
    totalScore1 = []
    totalScore2 = []
    for i in xrange(4, len(lines) - 1, 6):
        singleAns = lines[i][lines[i].find('alternative=') +
                             13:lines[i].find('>') - 1]
        trueAns.append(singleAns)

        if (lines[i].find('effect') != -1):
            causality.append(1)
        else:
            causality.append(0)

        singleq = lines[i + 1][lines[i + 1].find('<p>') +
                               3:lines[i + 1].find('</p>') - 1]
        temp0 = lookupwordID(We, words, singleq)
        q.append(temp0)

        singleAlter1 = lines[i + 2][lines[i + 2].find('<a1>') +
                                    4:lines[i + 2].find('</a1>') - 1]
        temp1 = lookupwordID(We, words, singleAlter1)
        alter1.append(temp1)

        singleAlter2 = lines[i + 3][lines[i + 3].find('<a2>') +
                                    4:lines[i + 3].find('</a2>') - 1]
        temp2 = lookupwordID(We, words, singleAlter2)
        alter2.append(temp2)

    idx = 0
    while idx < len(q):
        qq = q[idx:idx + 100 if idx + 100 < len(q) else len(q)]
        alter1q = alter1[idx:idx + 100 if idx + 100 < len(q) else len(q)]
        alter2q = alter2[idx:idx + 100 if idx + 100 < len(q) else len(q)]
        causalityq = causality[idx:idx + 100 if idx + 100 < len(q) else len(q)]
        x0, x0_mask = tm.prepare_data(qq)
        x1, x1_mask = tm.prepare_data(alter1q)
        x2, x2_mask = tm.prepare_data(alter2q)
        emb0 = tm.GetVector(x0, x0_mask)
        emb1 = tm.GetVector(x1, x1_mask)
        emb2 = tm.GetVector(x2, x2_mask)
        for j in range(100):

            if (causalityq[j] == 0):
                scores1 = score(emb1[j], emb0[j], words, We, rel, Rel, relSize)
                scores2 = score(emb2[j], emb0[j], words, We, rel, Rel, relSize)
                if (evaType.lower() == 'max'):
                    score1 = scores1[0]
                    score2 = scores2[0]
                if (evaType.lower() == 'sum'):
                    score1 = scores1[1]
                    score2 = scores2[1]
                if (evaType.lower() == 'cause'):
                    score1 = scores1[2]
                    score2 = scores2[2]
                totalScore1.append(scores1)
                totalScore2.append(scores2)
            else:
                scores1 = score(emb0[j], emb1[j], words, We, rel, Rel, relSize)
                scores2 = score(emb0[j], emb2[j], words, We, rel, Rel, relSize)
                if (evaType.lower() == 'max'):
                    score1 = scores1[0]
                    score2 = scores2[0]
                if (evaType.lower() == 'sum'):
                    score1 = scores1[1]
                    score2 = scores2[1]
                if (evaType.lower() == 'cause'):
                    score2 = scores1[2]
                    score2 = scores2[2]
                totalScore1.append(scores1)
                totalScore2.append(scores2)

            if (score1 > score2):
                ans = 1
            else:
                ans = 2
            if (ans == int(trueAns[idx])):
                same = same + 1
            else:
                diff = diff + 1
        idx = idx + 100
    # print 'totalScore1',len(totalScore1)
    # print 'totalScore2',len(totalScore2)
    # print 'trueAns',len(trueAns)
    #print same, diff
    return same / (same + diff), totalScore1, totalScore2, trueAns
Example #12
0
def get_threshold(We, words, Rel, rel, tm, relSize, dev1_file):
    f = open(dev1_file, 'r')
    lines = f.readlines()
    Exp_S = []
    r_list = []
    t1_list = []
    t2_list = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        t1id = lookupwordID(We, words, t1)
        t2id = lookupwordID(We, words, t2)
        t1_list.append(t1id)
        t2_list.append(t2id)
        r_list.append(r)

    t1batch = t1_list
    t2batch = t2_list

    x1, x1_mask, x1_length = tm.prepare_data(t1batch)
    x2, x2_mask, x2_length = tm.prepare_data(t2batch)

    v1 = tm.GetVector(x1, x1_mask, x1_length)
    v2 = tm.GetVector(x2, x2_mask, x2_length)

    for j in range(len(lines)):

        v_r = Rel[rel[r_list[j].lower()] *
                  relSize:rel[r_list[j].lower()] * relSize + relSize, :]

        temp1 = np.dot(v1[j], v_r)
        exp_score = np.inner(temp1, v2[j])

        Exp_S.append(exp_score)
    right = 0
    wrong = 0
    threshold = 0
    accurancy = 0
    binaryScore = []
    Exp_S_sorted = sorted(Exp_S)
    for j in xrange(len(Exp_S)):
        temp_thr = Exp_S_sorted[j]
        for j1 in xrange(int(len(Exp_S) / 2)):
            if (Exp_S[j1] >= temp_thr):
                right = right + 1
            else:
                wrong = wrong + 1
        for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
            if (Exp_S[j2] < temp_thr):
                right = right + 1
            else:
                wrong = wrong + 1
        if ((right / (len(Exp_S))) > accurancy):
            accurancy = (right / (len(Exp_S)))
            threshold = temp_thr
        right = 0
        wrong = 0

    print 'Dev1-Accurancy', accurancy
    return threshold
Example #13
0
def get_threshold(We, words, Rel, rel, tm, memsize, relSize, fin):
    f = open('../../commonsendata/Eval/conceptnet/new_omcs_dev1.txt', 'r')
    lines = f.readlines()
    Exp_S = []
    r_list = []
    t1_list = []
    t2_list = []
    for i in lines:
        i = i.strip()
        i = i.split('\t')
        (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(),
                              float(i[3]))
        t1id = lookupwordID(We, words, t1)
        t2id = lookupwordID(We, words, t2)
        t1_list.append(t1id)
        t2_list.append(t2id)
        r_list.append(r)

    t1batch = t1_list[0:len(lines)]
    t2batch = t2_list[0:len(lines)]
    print 't1batch: ', len(t1batch)

    delim = (lookupwordID(We, words, "#"))

    batchTuple = [a + delim + b for a, b in zip(t1batch, t2batch)]
    xx, xx_mask = tm.prepare_data(batchTuple)
    vector = tm.GetVector(xx, xx_mask)

    for j in range(len(lines)):

        v_r = Rel[rel[r_list[j].lower()], :].reshape((1, relSize))
        vectorg = vector[j].reshape((1, memsize))
        input_vec = np.concatenate((vectorg, v_r), axis=1)

        softmaxScore = tm.GetVectorNew(input_vec)
        Exp_S.append(softmaxScore[0][0])
    right = 0
    wrong = 0
    threshold = 0
    accurancy = 0
    binaryScore = []
    Exp_S_sorted = sorted(Exp_S)
    for j in xrange(len(Exp_S)):
        temp_thr = Exp_S_sorted[j]
        for j1 in xrange(int(len(Exp_S) / 2)):
            if (Exp_S[j1] >= temp_thr):
                right = right + 1
            else:
                wrong = wrong + 1
        for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1):
            if (Exp_S[j2] < temp_thr):
                right = right + 1
            else:
                wrong = wrong + 1
        if ((right / (len(Exp_S))) > accurancy):
            accurancy = (right / (len(Exp_S)))
            threshold = temp_thr
        right = 0
        wrong = 0

    print 'Dev1-Accurancy', accurancy
    fin.write('Dev1-Accurancy' + str(accurancy) + "\n")
    return threshold