Ejemplo n.º 1
0
def ComputePrecisionK(modelfile, testfile, K_list):

    CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/"))
    sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/"))

    maxParagraphLength = 250
    maxParagraphs = 10
    labels = 1001
    vocabularySize = 76390
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)
    print("data loading done")
    print("no of test examples: " + str(testing.totalPages))

    model.load(modelfile)

    print("model loading done")

    batchSize = 10

    testing.restore()
    truePre = []
    pred = []
    for i in range(math.ceil(testing.totalPages / batchSize)):
        if i < testing.totalPages / batchSize:
            data = testing.nextBatch(batchSize)
        else:
            data = testing.nextBatch(testing.totalPages % batchSize)
        truePre.extend(data[0])
        pre = model.predict(data)
        pred.extend(pre[0].tolist())

    avgPrecK = [0] * len(K_list)
    for i, p in enumerate(pred):
        sortedL = sorted(range(len(p)), key=p.__getitem__, reverse=True)
        for k, K in enumerate(K_list):
            labelK = sortedL[:K]
            precK = 0
            for l in labelK:
                if truePre[i][l] == 1:
                    precK += 1
            avgPrecK[k] += precK / float(K)
    avgPrecK = [float(a) / len(pred) for a in avgPrecK]

    for p in avgPrecK:
        print(str(p))
Ejemplo n.º 2
0
def ComputeFscore(modelfile, testfile, outputfile):

    CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/"))
    sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/"))

    maxParagraphLength = 250
    maxParagraphs = 10
    labels = 1000
    vocabularySize = 150000
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch()
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelsCount = {}
    ConfusionMa = {}
    fScr = {}

    thres = 0.5
    valid = int(len(truePre) * 0.35)
    labelsCount = {}
    ConfusionMa = {}
    fScr = {}
    thresLab = {}
    for la in range(1000):
        if la % 25 == 0:
            print("Currnet label", la)
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)

        t = []
        p = []
        for i in range(valid, len(truePre)):
            t.append(truePre[i][la])
            p.append(pred[i][la])

        p = np.array(p)
        fScr[la] = f1_score(t, p >= bestThre)
        ConfusionMa[la] = confusion_matrix(t, p > bestThre)
        thresLab[la] = bestThre

    f = open(outputfile, "w")
    for i in range(1000):
        inp = str(i) + "," + str(thresLab[i]) + "," + str(fScr[i]) + "\n"
        f.write(inp)
    f.close()