def greedyMC2(lists, itemList, trainList, n): """ A method to greedily select a subset of the outputLists such that the average precision is maximised """ currentListsInds = range(len(lists)) newListsInds = [] currentAvPrecision = 0 lastAvPrecision = -0.1 while currentAvPrecision - lastAvPrecision > 0: lastAvPrecision = currentAvPrecision averagePrecisions = numpy.zeros(len(currentListsInds)) for i, j in enumerate(currentListsInds): newListsInds.append(j) newLists = [] for k in newListsInds: newLists.append(lists[k]) rankAggregate, scores = RankAggregator.MC2(newLists, itemList) averagePrecisions[i] = Evaluator.averagePrecisionFromLists(trainList, rankAggregate[0:n], n) newListsInds.remove(j) j = numpy.argmax(averagePrecisions) currentAvPrecision = averagePrecisions[j] if currentAvPrecision > lastAvPrecision: newListsInds.append(currentListsInds.pop(j)) return newListsInds
for item in outputList: if item not in testExpertMatchesInds: newTrainOutputList.append(item) if item not in trainExpertMatchesInds: newTestOutputList.append(item) trainOutputLists.append(newTrainOutputList) testOutputLists.append(newTestOutputList) for i, n in enumerate(ns): for j, trainOutputList in enumerate(trainOutputLists): testOutputList = testOutputLists[j] trainPrecisions[i, j] = Evaluator.precisionFromIndLists(trainExpertMatchesInds, trainOutputList[0:n]) testPrecisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, testOutputList[0:n]) averageTrainPrecisions[s, i, j] = Evaluator.averagePrecisionFromLists(trainExpertMatchesInds, trainOutputList[0:n], n) averageTestPrecisions[s, i, j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, testOutputList[0:n], n) #Now look at rank aggregations relevantItems = set([]) for trainOutputList in trainOutputLists: relevantItems = relevantItems.union(trainOutputList) relevantItems = list(relevantItems) listInds = RankAggregator.greedyMC2(trainOutputLists, relevantItems, trainExpertMatchesInds, 20) newOutputList = [] for listInd in listInds: newOutputList.append(testOutputLists[listInd]) """
if runLSI: outputFilename = dataset.getOutputFieldDir(field) + "outputListsLSI.npz" else: outputFilename = dataset.getOutputFieldDir(field) + "outputListsLDA.npz" Util.savePickle([outputLists, trainExpertMatchesInds, testExpertMatchesInds], outputFilename, debug=True) numMethods = len(outputLists) precisions = numpy.zeros((len(ns), numMethods)) averagePrecisions = numpy.zeros(numMethods) for i, n in enumerate(ns): for j in range(len(outputLists)): precisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, outputLists[j][0:n]) for j in range(len(outputLists)): averagePrecisions[j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, outputLists[j][0:averagePrecisionN], averagePrecisionN) precisions2 = numpy.c_[numpy.array(ns), precisions] logging.debug(Latex.listToRow(methodNames)) logging.debug("Computing Precision") logging.debug(Latex.array2DToRows(precisions2)) logging.debug("Computing Average Precision") logging.debug(Latex.array1DToRow(averagePrecisions)) #fermer le fichier fich.close() logging.debug("All done!")