def readLabelFile(self, labelFilename, entityTypes): """ read a mallet label file and return the list of labels """ labelLines = open(labelFilename, 'r').readlines() labelList = [] if len(entityTypes) == 1: binaryClassification = True else: binaryClassification = False labelConversionList = ['other'] for eType in entityTypes: labelConversionList.append(eType) for line in labelLines: parsedLine = line.strip().split() if len(parsedLine) > 0: mClass = int(parsedLine[0]) prob = float(parsedLine[1]) if binaryClassification: if prob < self.binaryThreshold: mClass = 0 else: mClass = 1 label = labelConversionList[mClass] tLabel = TokenLabel(label) tLabel.prob = prob tLabel.sequenceProb = 1 labelList.append([tLabel]) # labelList.append(label) return labelList
def readLabelFile(self, labelFilename, entityTypes): """ read a mallet label file and return the list of labels """ labelLines = open(labelFilename, 'r').readlines() labels = [] lineNo = 1 sequenceProb = [] for i in range(self.topK): sequenceProb.append(0.0) currentTopK = self.topK for line in labelLines: try: topKLabels = line.strip().split() if len(topKLabels) > 0: if topKLabels[0] == 'k' : newTopK = int(topKLabels[1]) if newTopK != currentTopK: currentTopK = newTopK sequenceProb = [] for i in range(currentTopK): sequenceProb.append(0.0) # this is the list of sequence probabilities for i in range(currentTopK): sequenceProb[i] = float(topKLabels[i+2]) # print lineNo, topKLabels[1], currentTopK, sequenceProb elif len(topKLabels) == 2*currentTopK: tokenLabelList = [] for i in range(0,currentTopK*2,2): label = topKLabels[i] prob = float(topKLabels[i+1]) tLabel = TokenLabel(label) tLabel.prob = prob tLabel.sequenceProb = sequenceProb[i/2] tokenLabelList.append(tLabel) # print 'Read:', tLabel.label, tLabel.sequenceProb, tLabel.prob labels.append(tokenLabelList) # if len(topKLabels) == self.topK: # # for i in range(len(topKLabels)): # # if topKLabels[i] == 'O': # # topKLabels[i] = 'other' # if self.topK == 1: # labels.append(topKLabels[0]) # else: # labels.append(topKLabels) except: print '%s: Error at line number %d' % (labelFilename, lineNo) lineNo += 1 return labels