예제 #1
0
파일: ensemble.py 프로젝트: olabknbit/acres
  def test(self, absList, modelfilename='', fold=None):
    """ Apply ensemble of classifiers to given list of abstracts. 
        Ignores any given model file.
        """           
    for i in range(self.nClassifiers):
      print 'test:', self.entityTypesString, i
      if self.type == 'abstract':
        self.finder.test(absList, self.modelFilenames[i])
      else:
        self.useBaggedFeatures(self.baggedFeatures[i], absList, self.modelFilenames[i], self.finder.test)
      self.renameLabels(absList, i)
      
#    resultFilename = '%s%s.r%d.ensemble.txt'%(self.entityTypesString, self.getFoldString(fold), self.randomSeed)
#    resultsOut = open(resultFilename,'w')
    
    print self.entityTypesString
    for abstract in absList:
#      resultsOut.write('---%s---' % abstract.id)
      for sentence in abstract.sentences:
        for token in sentence:
          token.topKLabels[self.entityTypesString] = []  
          for i in range(self.nClassifiers):
            token.topKLabels[self.entityTypesString].append(TokenLabel('other'))
          
          eLabelMatches = token.getLabelMatches(self.ensembleTypes)
          
          for eLabel in eLabelMatches:
            [label, i] = self.toRegularLabel(eLabel)
            tLabel = TokenLabel(label)
            token.topKLabels[self.entityTypesString][i] = tLabel
#              tLabel.prob = prob
#              tLabel.sequenceProb = sequenceProb[i/2]            
            token.removeLabel(eLabel)
#            if label != 'other':
#              token.addLabel(label)            
          
#          resultsOut.write(str(ensembleLabels)+'\n')  
#          resultsOut.write('%s,  %s\n' %(token.text.ljust(12), eLabelMatches))
    self.finder.rerankLabelsAndAssign(absList, rerankType=self.rerankType, topKMax=5, fold=fold, countOther=self.countOther)
예제 #2
0
 def getTopKLabelings(self, sentence, finder, topK):
   """ return list of top k sequence labelings for the sentence """
   labelings = []
   for k in range(min(topK,finder.tokenClassifier.topK)):
     topKLabelingExists = False
     sequenceLabels = []
     for token in sentence:
       if finder.entityTypesString in token.topKLabels and k < len(token.topKLabels[finder.entityTypesString]):
         label = token.topKLabels[finder.entityTypesString][k]
         topKLabelingExists = True
       else:
         # give each token a label. number finder labelings typically only label numbers and not other tokens.
         label = TokenLabel('other')
         label.prob = 0
       sequenceLabels.append(label)
     if topKLabelingExists or k == 0:
       # only keep labeling if it is the first one (may be all 'other'), or if the labeling exists.
       labelings.append(sequenceLabels)
   
   return labelings