def test(self, absList, modelfilename='', fold=None): """ Apply ensemble of classifiers to given list of abstracts. Ignores any given model file. """ for i in range(self.nClassifiers): print 'test:', self.entityTypesString, i if self.type == 'abstract': self.finder.test(absList, self.modelFilenames[i]) else: self.useBaggedFeatures(self.baggedFeatures[i], absList, self.modelFilenames[i], self.finder.test) self.renameLabels(absList, i) # resultFilename = '%s%s.r%d.ensemble.txt'%(self.entityTypesString, self.getFoldString(fold), self.randomSeed) # resultsOut = open(resultFilename,'w') print self.entityTypesString for abstract in absList: # resultsOut.write('---%s---' % abstract.id) for sentence in abstract.sentences: for token in sentence: token.topKLabels[self.entityTypesString] = [] for i in range(self.nClassifiers): token.topKLabels[self.entityTypesString].append(TokenLabel('other')) eLabelMatches = token.getLabelMatches(self.ensembleTypes) for eLabel in eLabelMatches: [label, i] = self.toRegularLabel(eLabel) tLabel = TokenLabel(label) token.topKLabels[self.entityTypesString][i] = tLabel # tLabel.prob = prob # tLabel.sequenceProb = sequenceProb[i/2] token.removeLabel(eLabel) # if label != 'other': # token.addLabel(label) # resultsOut.write(str(ensembleLabels)+'\n') # resultsOut.write('%s, %s\n' %(token.text.ljust(12), eLabelMatches)) self.finder.rerankLabelsAndAssign(absList, rerankType=self.rerankType, topKMax=5, fold=fold, countOther=self.countOther)
def getTopKLabelings(self, sentence, finder, topK): """ return list of top k sequence labelings for the sentence """ labelings = [] for k in range(min(topK,finder.tokenClassifier.topK)): topKLabelingExists = False sequenceLabels = [] for token in sentence: if finder.entityTypesString in token.topKLabels and k < len(token.topKLabels[finder.entityTypesString]): label = token.topKLabels[finder.entityTypesString][k] topKLabelingExists = True else: # give each token a label. number finder labelings typically only label numbers and not other tokens. label = TokenLabel('other') label.prob = 0 sequenceLabels.append(label) if topKLabelingExists or k == 0: # only keep labeling if it is the first one (may be all 'other'), or if the labeling exists. labelings.append(sequenceLabels) return labelings