예제 #1
0
 def addCharInfo(self):
     # format: [['EU', ['E', 'U'], 'B-ORG\n'], ...]
     self.trainSentences = addCharInformation(self.trainSentences)
     self.devSentences = addCharInformation(self.devSentences)
     self.testSentences = addCharInformation(self.testSentences)
예제 #2
0
파일: nn.py 프로젝트: topolphukhanh/aiclass
        correctLabels.append(labels)
        predLabels.append(pred)
        b.update(i)
    return predLabels, correctLabels


if __name__ == "__main__":
    epochs = 50
    embed_dim = 100
    pre_embed = False

    trainSentences = readfile("data/train.txt")
    devSentences = readfile("data/valid.txt")
    testSentences = readfile("data/test.txt")

    trainSentences = addCharInformation(trainSentences)
    devSentences = addCharInformation(devSentences)
    testSentences = addCharInformation(testSentences)

    labelSet = set()
    words = {}

    for dataset in [trainSentences, devSentences, testSentences]:
        for sentence in dataset:
            for token, char, label in sentence:
                labelSet.add(label)
                words[token.lower()] = True

    # :: Create a mapping for the labels ::
    label2Idx = {}
    for label in labelSet: