コード例 #1
0
ファイル: train.py プロジェクト: wywlds/textsim
def evaluateAll():
    testds = open("../dataset/sick/test.txt")
    levendistances = []
    scores = []
    for l in testds.readlines():
        splits = l.split("\t")
        sen1 = splits[1]
        sen2 = splits[2]
        score = float("%.1f" % float(splits[3]))
        scores.append(score)
        levendistances.append(-levendistance.leven(sen1, sen2))
    calibrated = metrics.calibration(levendistances)
    metrics.evaluate(calibrated, scores)

    partitionScores = [[], [], [], []]
    calibratedScores = [[], [], [], []]
    for i in range(len(scores)):
        if scores[i] == 5.0:
            partitionScores[3].append(scores[i])
            calibratedScores[3].append(calibrated[i])
        else:
            position = int(scores[i]) - 1
            partitionScores[position].append(scores[i])
            calibratedScores[position].append(calibrated[i])
    print partitionScores[1][1:4]
    print calibratedScores[1][1:4]
    for i in range(4):
        metrics.evaluate(partitionScores[i], calibratedScores[i])
コード例 #2
0
ファイル: ldapredict.py プロジェクト: wywlds/textsim
def predict(numt):
    lda = models.LdaModel.load("../dataset/sick/model.lda")
    #lda = models.LdaModel.load("../dataset/sick/modeltfidf.lda")
    dictionary = corpora.Dictionary.load("../dataset/sick/sick.dict")
    testds = open("../dataset/sick/test.txt")

    def splitSent(sent):
        words = re.split(",| ", sent)
        wordlist = []
        for word in words:
            if word == "":
                continue
            else:
                wordlist.append(word)
        return wordlist

    simscores = []
    scores = []
    for l in testds.readlines():
        items = l.split("\t")
        sent1 = items[1]
        txt1 = dictionary.doc2bow(splitSent(sent1))
        sent2 = items[2]
        txt2 = dictionary.doc2bow(splitSent(sent2))
        corpus = [txt1, txt2]
        index = similarities.MatrixSimilarity(lda[corpus], num_features=numt)
        sim = index[lda[txt2]]
        simscores.append(sim[0])

        score = float("%.1f" % float(items[3]))
        scores.append(score)
    calibrated = metrics.calibration(simscores)
    #print calibrated
    #print scores
    metrics.evaluate(calibrated, scores)

    partitionScores = [[], [], [], []]
    calibratedScores = [[], [], [], []]
    for i in range(len(scores)):
        if scores[i] == 5.0:
            partitionScores[3].append(scores[i])
            calibratedScores[3].append(calibrated[i])
        else:
            position = int(scores[i]) - 1
            partitionScores[position].append(scores[i])
            calibratedScores[position].append(calibrated[i])
    print partitionScores[1][1:4]
    print calibratedScores[1][1:4]
    for i in range(4):
        metrics.evaluate(partitionScores[i], calibratedScores[i])
コード例 #3
0
ファイル: newloss.py プロジェクト: wywlds/textsim
    #losses = tf.reduce_mean(loss)

    train_op = tf.train.GradientDescentOptimizer(0.03).minimize(losses)
    lenth = len(inputs1)

    with tf.Session() as sess:
        sess.run(init)
        print "\n"
        for epoch in range(num_epoch):
            for i in range(lenth / batch_num):
                (data1, data2, labels) = next_batch(batch_num, inputs1,
                                                    inputs2, originalTraining)
                sess.run(train_op,
                         feed_dict={
                             x1: data1,
                             x2: data2,
                             pivot: labels
                         })
            transform_result = sess.run(prediction,
                                        feed_dict={
                                            x1: test1[:lenthtest],
                                            x2: test2[:lenthtest],
                                            pivot: [[0, 0, 0, 0, 0]]
                                        })
            newScores = []
            for item in transform_result:
                newScores.append(item[0])

            calibrated = metrics.calibration(newScores)
            metrics.evaluate(calibrated, originalScores)
コード例 #4
0
ファイル: directDAN.py プロジェクト: wywlds/textsim
    embedding = emb
    W = tf.Variable(emb,
                    trainable=False, name="W")

    pivot = tf.placeholder(tf.float32, shape=[None, 5], name="pivot")
    leftseqs=tf.placeholder(tf.int32, shape=[None, None], name="leftseqs")
    leftlength=tf.placeholder(tf.float32, shape=[None], name="leftlength")
    rightseqs=tf.placeholder(tf.int32, shape=[None, None], name='rightseqs')
    rightlength=tf.placeholder(tf.float32, shape=[None], name="rightlength")
    leftEmbedding = tf.nn.embedding_lookup(W, leftseqs)
    rightEmbedding = tf.nn.embedding_lookup(W, rightseqs)

    leftSum = tf.reduce_sum(leftEmbedding, axis=1)
    leftAverage = tf.transpose(tf.multiply(tf.transpose(leftSum), leftlength))
    rightSum = tf.reduce_sum(rightEmbedding, axis=1)
    rightAverage = tf.transpose(tf.multiply(tf.transpose(rightSum), rightlength))

    product=tf.reduce_sum(tf.multiply(leftAverage, rightAverage), axis=1)
    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(init)
        print "\n"
        transform_result = sess.run(product , feed_dict={leftseqs: testlbatch, rightseqs: testrbatch, leftlength:testllength, rightlength:testrlength, pivot: testscores})
        print transform_result
        print len(transform_result)
        newScores=metrics.calibration(transform_result)
        metrics.evaluate(newScores,testscore)
コード例 #5
0
ファイル: predict.py プロジェクト: wywlds/textsim
    model = gensim.models.Doc2Vec.load("../dataset/sick/doc2vec")
    testdoc = open("../dataset/sick/test.txt")
    cosines=[]
    scores=[]
    for l in testdoc.readlines():
        items = l.split("\t")
        sent1 = items[1]
        sent2 = items[2]
        words1 = main.simple_preprocess(sent1)
        words2 = main.simple_preprocess(sent2)
        vec1 = list(model.infer_vector(words1))
        vec2 = list(model.infer_vector(words2))
        cosines.append(1- spatial.distance.cosine(vec1, vec2))
        score = float("%.1f" % float(items[3]))
        scores.append(score)
    calibrated = metrics.calibration(cosines)
    print calibrated
    print scores
    metrics.evaluate(calibrated, scores)

    partitionScores = [[],[],[],[]]
    calibratedScores = [[],[],[],[]]
    for i in range(len(scores)):
        if scores[i] == 5.0:
            partitionScores[3].append(scores[i])
            calibratedScores[3].append(calibrated[i])
        else:
            position = int(scores[i])-1
            partitionScores[position].append(scores[i])
            calibratedScores[position].append(calibrated[i])
    print partitionScores[1][1:4]
コード例 #6
0
ファイル: main.py プロジェクト: wywlds/textsim

if __name__=="__main__":
    idfmap = id.getIdf()
    testds = open("../dataset/sick/test.txt")
    tfidfs = []
    scores = []
    for l in testds.readlines():
        items = l.split("\t")
        id = items[0]
        sentenceA = items[1]
        sentenceB = items[2]
        score = float("%.1f" % float(items[3]))
        tfidfs.append(tfidf(sentenceA,sentenceB))
        scores.append(score)
    calibrated = metrics.calibration(tfidfs)
    print calibrated
    print scores
    metrics.evaluate(calibrated, scores)

    partitionScores = [[],[],[],[]]
    calibratedScores = [[],[],[],[]]
    for i in range(len(scores)):
        if scores[i] == 5.0:
            partitionScores[3].append(scores[i])
            calibratedScores[3].append(calibrated[i])
        else:
            position = int(scores[i])-1
            partitionScores[position].append(scores[i])
            calibratedScores[position].append(calibrated[i])
    print partitionScores[1][1:4]