Exemple #1
0
def ComputePrecisionK(modelfile, testfile, K_list):

    maxParagraphLength = 10
    maxParagraphs = 4
    #nlabels=1001
    #vocabularySize=76391
    labels = 8
    vocabularySize = 244
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    print(testfile)
    testing.getDataFromfile(testfile)
    print("data loading done")
    print("no of test examples: " + str(testing.totalPages))

    model.load(modelfile)

    print("model loading done")

    batchSize = 1

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    precAtK = {}
    for itr in K_list:
        precAtK[itr] = 0

    for i, v in enumerate(pred):
        temp = [(labId, labProb) for labId, labProb in enumerate(v)]
        #     print(temp)
        temp = sorted(temp, key=lambda x: x[1], reverse=True)
        for ele in K_list:
            pBag = 0
            for itr in range(ele):
                if truePre[i][0][temp[itr][0]] == 1:
                    pBag += 1
        #         print(float(pBag)/float(ele))
            precAtK[ele] += float(pBag) / float(ele)

    f = open("results/precAtK_model3_n", "w")
    for key in sorted(precAtK.keys()):
        #     print(key, precAtK[key]/len(pred))
        print(precAtK[key] / len(pred))
        f.write(str(key) + "\t" + str(precAtK[key] / len(pred)) + "\n")
    f.close()
Exemple #2
0
def ComputeFscore(modelfile, testfile, outputfile):
    maxParagraphLength = int(sys.argv[1])
    maxParagraphs = int(sys.argv[2])
    filterSizes = [int(i) for i in sys.argv[3].split("-")]
    num_filters = int(sys.argv[4])
    wordEmbeddingDimension = int(sys.argv[5])
    # batchSize= int(sys.argv[6])
    # epochs= int(sys.argv[7])
    # folder_name = sys.argv[8]
    # output = sys.argv[9]
    lrate = sys.argv[10]
    poolLength = int(sys.argv[11])

    labels = 8
    vocabularySize = 244

    model = Model(maxParagraphs, maxParagraphLength, labels, vocabularySize,
                  filterSizes, num_filters, poolLength, wordEmbeddingDimension,
                  lrate)

    testing = DataParser(maxParagraphs, maxParagraphLength, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelsCount = {}
    ConfusionMa = {}
    fScr = {}

    thres = 0.5
    valid = int(
        len(truePre) * 0.5
    )  #using first 50% data for threshold tuning - we have merged test and cv files
    labelsCount = {}
    ConfusionMa = {}
    fScr = {}
    thresLab = {}
    for la in range(labels):
        if la % 25 == 0:
            print("Current label", la)
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)

        t = []
        p = []
        for i in range(valid, len(truePre)):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])

        p = np.array(p)
        fScr[la] = f1_score(t, p >= bestThre)
        ConfusionMa[la] = confusion_matrix(t, p > bestThre)
        thresLab[la] = bestThre

    f = open(outputfile, "a")
    output = sys.argv[9]

    sum_fscore = 0.0
    for i in range(labels):
        sum_fscore = sum_fscore + fScr[i]
        output = output + "," + str(fScr[i])
    output += "," + str(sum_fscore / float(labels - 1))
    print("Fscore at " + sys.argv[7] + " epochs: " +
          str(sum_fscore / float(labels - 1)))
    f.write(output + "\n")
    f.close()
Exemple #3
0
def ComputeFscore(modelfile, testfile, outputfile):
    maxParagraphLength = 20
    maxParagraphs = 10
    #nlabels=1001
    #vocabularySize=76391
    labels = 8
    vocabularySize = 244
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelsCount = {}
    ConfusionMa = {}
    fScr = {}

    thres = 0.5
    valid = int(
        len(truePre) * 0.5
    )  #using first 50% data for threshold tuning - we have merged test and cv files
    labelsCount = {}
    ConfusionMa = {}
    fScr = {}
    thresLab = {}
    for la in range(labels):
        if la % 25 == 0:
            print("Current label", la)
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)

        t = []
        p = []
        for i in range(valid, len(truePre)):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])

        p = np.array(p)
        fScr[la] = f1_score(t, p >= bestThre)
        ConfusionMa[la] = confusion_matrix(t, p > bestThre)
        thresLab[la] = bestThre

    f = open(outputfile, "w")
    sum_fscore = 0.0
    for i in range(labels):

        sum_fscore = sum_fscore + fScr[i]
        inp = str(i) + "," + str(thresLab[i]) + "," + str(fScr[i]) + "\n"
        f.write(inp)
    f.write(str(sum_fscore / float(labels - 1)))

    print(sum_fscore)
    print(sum_fscore / float((labels - 1)))
    f.close()
    return (sum_fscore / float((labels - 1)))