refs = [] doneRefs = {} for mapEntry, prediction in izip(mapFile, predictionFile): referee, reference = mapEntry.strip().split(":") label, confidence, invConfidence = prediction.strip().split() if referee != currentReferee: if currentReferee: predictionOut = [x[0] for x in refs] resultsFile.write(currentReferee + ", " + " ".join(predictionOut) + "\n") doneRefs[currentReferee] = 1 refs = [] currentReferee = referee else: if label == "1": if reference in refCounts: confidence = refCounts[reference] refs = papers.appendMax((reference, confidence), refs, 10) predictionOut = [x[0] for x in refs] resultsFile.write(currentReferee + ", " + " ".join(predictionOut) + "\n") doneRefs[currentReferee] = 1 with open(fullIdsLocation, "r") as fullIds: for line in fullIds: evalId, preds = line.split(",") if evalId not in doneRefs: resultsFile.write(evalId + ", " + "\n")
def testMaxAppendEnd(self): test = [(1,3),(2,5),(3,8)] res = papers.appendMax((4,9), test, 6) self.assertEqual(res, [(1,3), (2,5), (3,8), (4,9)])
def testMaxAppendEmpty(self): test = [] res = papers.appendMax((1,2), test, 3) self.assertEqual(res, [(1,2)])
def testMaxAppendLess(self): test = [(1,3),(2,5),(3,8)] res = papers.appendMax((4,6), test, 4) self.assertEqual(res, [(1,3), (2,5), (4,6), (3,8)])
print "----------------------------------------------------------------------------" #heurisitic based prediction predictions = [] with open(predictionLocation, "w", 0) as file: file.write("Id,References\n") for paper in validationData.papersByRef: currentPaper = validationData.papersByRef[paper] print "-Processing paper: ", paper refs = [] if currentPaper.canonicalVenue in trainingData.venueReferences: print "For paper: ", paper, " there are ", len(trainingData.venueReferences[currentPaper.canonicalVenue]), " venues" for venue in trainingData.venueReferences[currentPaper.canonicalVenue]: for nextPaperId in trainingData.indicesByCanonicalVenue[venue]: nextPaper = trainingData.papersByRef[nextPaperId] if(nextPaper.year < currentPaper.year and nextPaper.index != currentPaper.index): sim = currentPaper.abstractCosineSimilarity(nextPaper.abstractList) refs = papers.appendMax((nextPaper.index, sim), refs, 10) prediction = [x[0] for x in refs] # predictions.append((paper, )) else: prediction = [] # predictions.append((paper, [])) file.write(paper + ", " + " ".join(prediction) + "\n") print "----------------------------------------------------------------------------" print "- Number of training papers: ", len(trainingData.papersByRef) print "- Number of validation papers: ", len(validationData.papersByRef) print "----------------------------------------------------------------------------"
def testMaxAppendEmpty(self): test = [] res = papers.appendMax((1, 2), test, 3) self.assertEqual(res, [(1, 2)])
def testMaxAppendEnd(self): test = [(1, 3), (2, 5), (3, 8)] res = papers.appendMax((4, 9), test, 6) self.assertEqual(res, [(1, 3), (2, 5), (3, 8), (4, 9)])
def testMaxAppendLess(self): test = [(1, 3), (2, 5), (3, 8)] res = papers.appendMax((4, 6), test, 4) self.assertEqual(res, [(1, 3), (2, 5), (4, 6), (3, 8)])