def ComputePrecisionK(modelfile, testfile, K_list): CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/")) sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/")) maxParagraphLength = 250 maxParagraphs = 10 labels = 1001 vocabularySize = 76390 model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing = DataParser(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing.getDataFromfile(testfile) print("data loading done") print("no of test examples: " + str(testing.totalPages)) model.load(modelfile) print("model loading done") batchSize = 10 testing.restore() truePre = [] pred = [] for i in range(math.ceil(testing.totalPages / batchSize)): if i < testing.totalPages / batchSize: data = testing.nextBatch(batchSize) else: data = testing.nextBatch(testing.totalPages % batchSize) truePre.extend(data[0]) pre = model.predict(data) pred.extend(pre[0].tolist()) avgPrecK = [0] * len(K_list) for i, p in enumerate(pred): sortedL = sorted(range(len(p)), key=p.__getitem__, reverse=True) for k, K in enumerate(K_list): labelK = sortedL[:K] precK = 0 for l in labelK: if truePre[i][l] == 1: precK += 1 avgPrecK[k] += precK / float(K) avgPrecK = [float(a) / len(pred) for a in avgPrecK] for p in avgPrecK: print(str(p))
def ComputeFscore(modelfile, testfile, outputfile): CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/")) sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/")) maxParagraphLength = 250 maxParagraphs = 10 labels = 1000 vocabularySize = 150000 model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing = DataParser(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing.getDataFromfile(testfile) model.load(modelfile) print("loading done") testing.restore() truePre = [] pred = [] for itr in range(testing.totalPages): data = testing.nextBatch() truePre.append(data[0]) pre = model.predict(data) pred.append(pre[0]) labelsCount = {} ConfusionMa = {} fScr = {} thres = 0.5 valid = int(len(truePre) * 0.35) labelsCount = {} ConfusionMa = {} fScr = {} thresLab = {} for la in range(1000): if la % 25 == 0: print("Currnet label", la) t = [] p = [] for i in range(valid): t.append(truePre[i][la]) p.append(pred[i][la]) bestF, bestThre = thresholdTuning(t, p) t = [] p = [] for i in range(valid, len(truePre)): t.append(truePre[i][la]) p.append(pred[i][la]) p = np.array(p) fScr[la] = f1_score(t, p >= bestThre) ConfusionMa[la] = confusion_matrix(t, p > bestThre) thresLab[la] = bestThre f = open(outputfile, "w") for i in range(1000): inp = str(i) + "," + str(thresLab[i]) + "," + str(fScr[i]) + "\n" f.write(inp) f.close()