def ComputePrecisionK(modelfile, testfile, K_list): maxParagraphLength = 10 maxParagraphs = 4 #nlabels=1001 #vocabularySize=76391 labels = 8 vocabularySize = 244 model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing = DataParser(maxParagraphLength, maxParagraphs, labels, vocabularySize) print(testfile) testing.getDataFromfile(testfile) print("data loading done") print("no of test examples: " + str(testing.totalPages)) model.load(modelfile) print("model loading done") batchSize = 1 testing.restore() truePre = [] pred = [] for itr in range(testing.totalPages): data = testing.nextBatch(1) truePre.append(data[0]) pre = model.predict(data) pred.append(pre[0]) precAtK = {} for itr in K_list: precAtK[itr] = 0 for i, v in enumerate(pred): temp = [(labId, labProb) for labId, labProb in enumerate(v)] # print(temp) temp = sorted(temp, key=lambda x: x[1], reverse=True) for ele in K_list: pBag = 0 for itr in range(ele): if truePre[i][0][temp[itr][0]] == 1: pBag += 1 # print(float(pBag)/float(ele)) precAtK[ele] += float(pBag) / float(ele) f = open("results/precAtK_model3_n", "w") for key in sorted(precAtK.keys()): # print(key, precAtK[key]/len(pred)) print(precAtK[key] / len(pred)) f.write(str(key) + "\t" + str(precAtK[key] / len(pred)) + "\n") f.close()
def ComputeFscore(modelfile, testfile, outputfile): maxParagraphLength = int(sys.argv[1]) maxParagraphs = int(sys.argv[2]) filterSizes = [int(i) for i in sys.argv[3].split("-")] num_filters = int(sys.argv[4]) wordEmbeddingDimension = int(sys.argv[5]) # batchSize= int(sys.argv[6]) # epochs= int(sys.argv[7]) # folder_name = sys.argv[8] # output = sys.argv[9] lrate = sys.argv[10] poolLength = int(sys.argv[11]) labels = 8 vocabularySize = 244 model = Model(maxParagraphs, maxParagraphLength, labels, vocabularySize, filterSizes, num_filters, poolLength, wordEmbeddingDimension, lrate) testing = DataParser(maxParagraphs, maxParagraphLength, labels, vocabularySize) testing.getDataFromfile(testfile) model.load(modelfile) print("loading done") testing.restore() truePre = [] pred = [] for itr in range(testing.totalPages): data = testing.nextBatch(1) truePre.append(data[0]) pre = model.predict(data) pred.append(pre[0]) labelsCount = {} ConfusionMa = {} fScr = {} thres = 0.5 valid = int( len(truePre) * 0.5 ) #using first 50% data for threshold tuning - we have merged test and cv files labelsCount = {} ConfusionMa = {} fScr = {} thresLab = {} for la in range(labels): if la % 25 == 0: print("Current label", la) t = [] p = [] for i in range(valid): t.append(truePre[i][0][la]) p.append(pred[i][la]) bestF, bestThre = thresholdTuning(t, p) t = [] p = [] for i in range(valid, len(truePre)): t.append(truePre[i][0][la]) p.append(pred[i][la]) p = np.array(p) fScr[la] = f1_score(t, p >= bestThre) ConfusionMa[la] = confusion_matrix(t, p > bestThre) thresLab[la] = bestThre f = open(outputfile, "a") output = sys.argv[9] sum_fscore = 0.0 for i in range(labels): sum_fscore = sum_fscore + fScr[i] output = output + "," + str(fScr[i]) output += "," + str(sum_fscore / float(labels - 1)) print("Fscore at " + sys.argv[7] + " epochs: " + str(sum_fscore / float(labels - 1))) f.write(output + "\n") f.close()
def ComputeFscore(modelfile, testfile, outputfile): maxParagraphLength = 20 maxParagraphs = 10 #nlabels=1001 #vocabularySize=76391 labels = 8 vocabularySize = 244 model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing = DataParser(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing.getDataFromfile(testfile) model.load(modelfile) print("loading done") testing.restore() truePre = [] pred = [] for itr in range(testing.totalPages): data = testing.nextBatch(1) truePre.append(data[0]) pre = model.predict(data) pred.append(pre[0]) labelsCount = {} ConfusionMa = {} fScr = {} thres = 0.5 valid = int( len(truePre) * 0.5 ) #using first 50% data for threshold tuning - we have merged test and cv files labelsCount = {} ConfusionMa = {} fScr = {} thresLab = {} for la in range(labels): if la % 25 == 0: print("Current label", la) t = [] p = [] for i in range(valid): t.append(truePre[i][0][la]) p.append(pred[i][la]) bestF, bestThre = thresholdTuning(t, p) t = [] p = [] for i in range(valid, len(truePre)): t.append(truePre[i][0][la]) p.append(pred[i][la]) p = np.array(p) fScr[la] = f1_score(t, p >= bestThre) ConfusionMa[la] = confusion_matrix(t, p > bestThre) thresLab[la] = bestThre f = open(outputfile, "w") sum_fscore = 0.0 for i in range(labels): sum_fscore = sum_fscore + fScr[i] inp = str(i) + "," + str(thresLab[i]) + "," + str(fScr[i]) + "\n" f.write(inp) f.write(str(sum_fscore / float(labels - 1))) print(sum_fscore) print(sum_fscore / float((labels - 1))) f.close() return (sum_fscore / float((labels - 1)))