コード例 #1
0
    def setFeaturesTrain(self, data):
        newData = []
        words = []

        for datum in data:
            words.append(datum.word)

        ## This is so that the feature factory code doesn't
        ## accidentally use the true label info
        previousLabel = "O"
        followingLabel = "O"
        for i in range(0, len(data)):
            datum = data[i]
            ## MY EDIT
            if i != len(data) - 1:
                followingDatum = data[i + 1]
            else:
                followingDatum = datum

            newDatum = Datum(datum.word, datum.label)
            newDatum.followingLabel = followingDatum.label
            newDatum.features = self.computeFeatures(words, previousLabel, i,
                                                     followingDatum.label)
            newDatum.previousLabel = previousLabel
            newData.append(newDatum)
            previousLabel = datum.label

        return newData
コード例 #2
0
    def setFeaturesTest(self, data):
        newData = []
        words = []
        labels = []
        labelIndex = {}

        for datum in data:
            words.append(datum.word)
            if not labelIndex.has_key(datum.label):
                labelIndex[datum.label] = len(labels)
                labels.append(datum.label)

        ## This is so that the feature factory code doesn't
        ## accidentally use the true label info
        for i in range(0, len(data)):
            datum = data[i]
            if i != len(data) - 1:
                followingDatum = data[i + 1]
            else:
                followingDatum = datum

            if i == 0:
                previousLabel = "O"
                datum.features = self.computeFeatures(words, previousLabel, i,
                                                      followingDatum.label)

                newDatum = Datum(datum.word, datum.label)
                newDatum.followingLabel = followingDatum.label
                newDatum.features = self.computeFeatures(
                    words, previousLabel, i, followingDatum.label)
                newDatum.previousLabel = previousLabel
                newData.append(newDatum)
            else:
                for previousLabel in labels:
                    datum.features = self.computeFeatures(
                        words, previousLabel, i, followingDatum.label)

                    newDatum = Datum(datum.word, datum.label)
                    newDatum.followingLabel = followingDatum.label
                    newDatum.features = self.computeFeatures(
                        words, previousLabel, i, followingDatum.label)
                    newDatum.previousLabel = previousLabel
                    newData.append(newDatum)

        return newData