def addCharInfo(self): # format: [['EU', ['E', 'U'], 'B-ORG\n'], ...] self.trainSentences = addCharInformation(self.trainSentences) self.devSentences = addCharInformation(self.devSentences) self.testSentences = addCharInformation(self.testSentences)
correctLabels.append(labels) predLabels.append(pred) b.update(i) return predLabels, correctLabels if __name__ == "__main__": epochs = 50 embed_dim = 100 pre_embed = False trainSentences = readfile("data/train.txt") devSentences = readfile("data/valid.txt") testSentences = readfile("data/test.txt") trainSentences = addCharInformation(trainSentences) devSentences = addCharInformation(devSentences) testSentences = addCharInformation(testSentences) labelSet = set() words = {} for dataset in [trainSentences, devSentences, testSentences]: for sentence in dataset: for token, char, label in sentence: labelSet.add(label) words[token.lower()] = True # :: Create a mapping for the labels :: label2Idx = {} for label in labelSet: