def setFeaturesTrain(self, data): newData = [] words = [] for datum in data: words.append(datum.word) ## This is so that the feature factory code doesn't ## accidentally use the true label info previousLabel = "O" followingLabel = "O" for i in range(0, len(data)): datum = data[i] ## MY EDIT if i != len(data) - 1: followingDatum = data[i + 1] else: followingDatum = datum newDatum = Datum(datum.word, datum.label) newDatum.followingLabel = followingDatum.label newDatum.features = self.computeFeatures(words, previousLabel, i, followingDatum.label) newDatum.previousLabel = previousLabel newData.append(newDatum) previousLabel = datum.label return newData
def setFeaturesTest(self, data): newData = [] words = [] labels = [] labelIndex = {} for datum in data: words.append(datum.word) if not labelIndex.has_key(datum.label): labelIndex[datum.label] = len(labels) labels.append(datum.label) ## This is so that the feature factory code doesn't ## accidentally use the true label info for i in range(0, len(data)): datum = data[i] if i != len(data) - 1: followingDatum = data[i + 1] else: followingDatum = datum if i == 0: previousLabel = "O" datum.features = self.computeFeatures(words, previousLabel, i, followingDatum.label) newDatum = Datum(datum.word, datum.label) newDatum.followingLabel = followingDatum.label newDatum.features = self.computeFeatures( words, previousLabel, i, followingDatum.label) newDatum.previousLabel = previousLabel newData.append(newDatum) else: for previousLabel in labels: datum.features = self.computeFeatures( words, previousLabel, i, followingDatum.label) newDatum = Datum(datum.word, datum.label) newDatum.followingLabel = followingDatum.label newDatum.features = self.computeFeatures( words, previousLabel, i, followingDatum.label) newDatum.previousLabel = previousLabel newData.append(newDatum) return newData