class Labeler: def __init__(self): self.word_state = {} self.label_state = {} self.hyperParams = HyperParams() def createAlphabet(self, trainInsts, devInsts, testInsts): print("create alpha.................") for inst in trainInsts: for w in inst.words: if w not in self.word_state: self.word_state[w] = 1 else: self.word_state[w] += 1 for l in inst.labels: if l not in self.label_state: self.label_state[l] = 1 else: self.label_state[l] += 1 print("word state:", len(self.word_state)) self.addTestAlpha(devInsts) print("word state:", len(self.word_state)) self.addTestAlpha(testInsts) print("word state:", len(self.word_state)) self.word_state[self.hyperParams.unk] = self.hyperParams.wordCutOff + 1 self.hyperParams.wordAlpha.initial(self.word_state, self.hyperParams.wordCutOff) self.hyperParams.wordAlpha.set_fixed_flag(True) self.hyperParams.wordNum = self.hyperParams.wordAlpha.m_size self.hyperParams.unkWordID = self.hyperParams.wordAlpha.from_string( self.hyperParams.unk) self.hyperParams.labelAlpha.initial(self.label_state) self.hyperParams.labelAlpha.set_fixed_flag(True) self.hyperParams.labelSize = self.hyperParams.labelAlpha.m_size print("Label num: ", self.hyperParams.labelSize) print("Word num: ", self.hyperParams.wordNum) def addTestAlpha(self, insts): print("Add test alpha.............") if self.hyperParams.wordFineTune == False: for inst in insts: for w in inst.words: if (w not in self.word_state): self.word_state[w] = 1 else: self.word_state[w] += 1 def extractFeature(self, inst): feat = Feature() for w in inst.words: wordId = self.hyperParams.wordAlpha.from_string(w) if wordId == -1: feat.wordIndexs.append(self.hyperParams.unkWordID) else: feat.wordIndexs.append(wordId) feat.wordIndexs = torch.autograd.Variable( torch.LongTensor(feat.wordIndexs)) return feat def instance2Example(self, insts): exams = [] for inst in insts: example = Example() example.feat = self.extractFeature(inst) for l in inst.labels: labelId = self.hyperParams.labelAlpha.from_string(l) example.labelIndexs.append(labelId) example.labelIndexs = torch.autograd.Variable( torch.LongTensor(example.labelIndexs)) exams.append(example) return exams def train(self, train_file, dev_file, test_file): self.hyperParams.show() torch.set_num_threads(self.hyperParams.thread) reader = Reader(self.hyperParams.maxInstance) trainInsts = reader.readInstances(train_file) devInsts = reader.readInstances(dev_file) testInsts = reader.readInstances(test_file) print("Training Instance: ", len(trainInsts)) print("Dev Instance: ", len(devInsts)) print("Test Instance: ", len(testInsts)) self.createAlphabet(trainInsts, devInsts, testInsts) trainExamples = self.instance2Example(trainInsts) devExamples = self.instance2Example(devInsts) testExamples = self.instance2Example(testInsts) self.model = RNNLabeler(self.hyperParams) parameters = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer = torch.optim.Adagrad(parameters, lr=self.hyperParams.learningRate) indexes = [] for idx in range(len(trainExamples)): indexes.append(idx) for iter in range(self.hyperParams.maxIter): print('###Iteration' + str(iter) + "###") random.shuffle(indexes) for idx in range(len(trainExamples)): self.model.zero_grad() self.model.LSTMHidden = self.model.init_hidden() exam = trainExamples[indexes[idx]] lstm_feats = self.model(exam.feat) loss = self.model.crf.neg_log_likelihood( lstm_feats, exam.labelIndexs) loss.backward() optimizer.step() if (idx + 1) % self.hyperParams.verboseIter == 0: print('current: ', idx + 1, ", cost:", loss.data[0]) eval_dev = Eval() for idx in range(len(devExamples)): predictLabels = self.predict(devExamples[idx]) devInsts[idx].evalPRF(predictLabels, eval_dev) print('Dev: ', end="") eval_dev.getFscore() eval_test = Eval() for idx in range(len(testExamples)): predictLabels = self.predict(testExamples[idx]) testInsts[idx].evalPRF(predictLabels, eval_test) print('Test: ', end="") eval_test.getFscore() def predict(self, exam): tag_hiddens = self.model(exam.feat) _, best_path = self.model.crf._viterbi_decode(tag_hiddens) predictLabels = [] for idx in range(len(best_path)): predictLabels.append( self.hyperParams.labelAlpha.from_id(best_path[idx])) return predictLabels def getMaxIndex(self, tag_score): max = tag_score.data[0] maxIndex = 0 for idx in range(1, self.hyperParams.labelSize): if tag_score.data[idx] > max: max = tag_score.data[idx] maxIndex = idx return maxIndex
class Labeler: def __init__(self): self.word_state = {} self.label_state = {} self.hyperParams = HyperParams() self.wordAlpha = Alphabet() self.labelAlpha = Alphabet() def createAlphabet(self, trainInsts): for inst in trainInsts: for w in inst.words: if w not in self.word_state: self.word_state[w] = 1 else: self.word_state[w] += 1 for l in inst.labels: if l not in self.label_state: self.label_state[l] = 1 else: self.label_state[l] += 1 self.wordAlpha.initial(self.word_state, self.hyperParams.wordCutOff) self.labelAlpha.initial(self.label_state) self.labelAlpha.set_fixed_flag(True) self.wordAlpha.set_fixed_flag(True) self.hyperParams.wordNum = self.wordAlpha.m_size self.hyperParams.labelSize = self.labelAlpha.m_size print("word num: ", self.hyperParams.wordNum) print("label num: ", self.hyperParams.labelSize) def extractFeature(self, inst): feat = Feature() for w in inst.words: wordId = self.wordAlpha.from_string(w) feat.wordIndexs.append(wordId) feat.wordIndexs = torch.autograd.Variable( torch.LongTensor(feat.wordIndexs)) return feat def instance2Example(self, insts): exams = [] for inst in insts: example = Example() example.feat = self.extractFeature(inst) for l in inst.labels: labelId = self.labelAlpha.from_string(l) example.labelIndexs.append(labelId) example.labelIndexs = torch.autograd.Variable( torch.LongTensor(example.labelIndexs)) exams.append(example) return exams def train(self, train_file, dev_file, test_file): self.hyperParams.show() torch.set_num_threads(self.hyperParams.thread) reader = Reader(self.hyperParams.maxInstance) trainInsts = reader.readInstances(train_file) devInsts = reader.readInstances(dev_file) trainExamples = self.instance2Example(trainInsts) devExamples = self.instance2Example(devInsts) print("Training Instance: ", len(trainInsts)) print("Dev Instance: ", len(devInsts)) self.createAlphabet(trainInsts) self.model = RNNLabeler(self.hyperParams) optimizer = torch.optim.Adagrad(self.model.parameters(), lr=self.hyperParams.learningRate) indexes = [] for idx in range(len(trainExamples)): indexes.append(idx) for iter in range(self.hyperParams.maxIter): print('###Iteration' + str(iter) + "###") random.shuffle(indexes) for idx in range(len(trainExamples)): self.model.zero_grad() self.model.LSTMHidden = self.model.init_hidden() exam = trainExamples[indexes[idx]] tag_scores = self.model(exam.feat) loss = torch.nn.functional.cross_entropy( tag_scores, exam.labelIndexs) loss.backward() optimizer.step() if (idx + 1) % self.hyperParams.verboseIter == 0: print('current: ', idx + 1, ", cost:", loss.data[0]) eval_dev = Eval() for idx in range(len(devExamples)): predictLabels = self.predict(devExamples[idx]) devInsts[idx].evalPRF(predictLabels, eval_dev) eval_dev.getFscore() def predict(self, exam): tag_scores = self.model(exam.feat) if len(tag_scores) != len(exam.labelIndexs) or len( tag_scores.data[0]) != self.hyperParams.labelSize: print("error") predictIndexs = [] for idx in range(len(tag_scores)): pred_idx = self.getMaxIndex(tag_scores[idx]) predictIndexs.append(pred_idx) predictLabels = [] for idx in range(len(tag_scores)): predictLabels.append(self.labelAlpha.from_id(predictIndexs[idx])) return predictLabels def getMaxIndex(self, tag_score): max = tag_score.data[0] maxIndex = 0 for idx in range(1, self.hyperParams.labelSize): if tag_score.data[idx] > max: max = tag_score.data[idx] maxIndex = idx return maxIndex
class Labeler: def __init__(self): self.word_state = {} self.label_state = {} self.hyperParams = HyperParams() def createAlphabet(self, trainInsts, devInsts, testInsts): print("create alpha.................") for inst in trainInsts: for w in inst.words: if w not in self.word_state: self.word_state[w] = 1 else: self.word_state[w] += 1 l = inst.label if l not in self.label_state: self.label_state[l] = 1 else: self.label_state[l] += 1 print("word state:", len(self.word_state)) self.addTestAlpha(devInsts) print("word state:", len(self.word_state)) self.addTestAlpha(testInsts) print("word state:", len(self.word_state)) self.word_state[self.hyperParams.unk] = self.hyperParams.wordCutOff + 1 self.word_state[self.hyperParams.padding] = self.hyperParams.wordCutOff + 1 self.hyperParams.wordAlpha.initial(self.word_state, self.hyperParams.wordCutOff) self.hyperParams.wordAlpha.set_fixed_flag(True) self.hyperParams.wordNum = self.hyperParams.wordAlpha.m_size self.hyperParams.unkWordID = self.hyperParams.wordAlpha.from_string(self.hyperParams.unk) self.hyperParams.paddingID = self.hyperParams.wordAlpha.from_string(self.hyperParams.padding) self.hyperParams.labelAlpha.initial(self.label_state) self.hyperParams.labelAlpha.set_fixed_flag(True) self.hyperParams.labelSize = self.hyperParams.labelAlpha.m_size print("Label num: ", self.hyperParams.labelSize) print("Word num: ", self.hyperParams.wordNum) print("Padding ID: ", self.hyperParams.paddingID) print("UNK ID: ", self.hyperParams.unkWordID) def addTestAlpha(self, insts): print("Add test alpha.............") if self.hyperParams.wordFineTune == False: for inst in insts: for w in inst.words: if (w not in self.word_state): self.word_state[w] = 1 else: self.word_state[w] += 1 def extractFeature(self, inst): feat = Feature() feat.sentLen = len(inst.words) feat.wordIndexs = torch.autograd.Variable(torch.LongTensor(1, feat.sentLen)) for idx in range(len(inst.words)): w = inst.words[idx] wordId = self.hyperParams.wordAlpha.from_string(w) if wordId == -1: wordId = self.hyperParams.unkWordID feat.wordIndexs.data[0][idx] = wordId return feat def instance2Example(self, insts): exams = [] for inst in insts: example = Example() example.labelIndex = torch.autograd.Variable(torch.LongTensor(1)) example.feat = self.extractFeature(inst) l = inst.label labelId = self.hyperParams.labelAlpha.from_string(l) example.labelIndex.data[0] = labelId exams.append(example) return exams def getBatchFeatLabel(self, exams): maxSentSize = 0 for e in exams: if maxSentSize < e.feat.sentLen: maxSentSize = e.feat.sentLen if maxSentSize > 40: maxSentSize = 40 batch_feats = torch.autograd.Variable(torch.LongTensor(self.hyperParams.batch, maxSentSize)) batch_labels = torch.autograd.Variable(torch.LongTensor(self.hyperParams.batch)) for idx in range(len(batch_feats.data)): e = exams[idx] batch_labels.data[idx] = e.labelIndex.data[0] for idy in range(maxSentSize): if idy < e.feat.sentLen: batch_feats.data[idx][idy] = e.feat.wordIndexs.data[0][idy] else: batch_feats.data[idx][idy] = self.hyperParams.paddingID return batch_feats, batch_labels def train(self, train_file, dev_file, test_file): self.hyperParams.show() torch.set_num_threads(self.hyperParams.thread) reader = Reader() trainInsts = reader.readInstances(train_file, self.hyperParams.maxInstance) devInsts = reader.readInstances(dev_file, self.hyperParams.maxInstance) testInsts = reader.readInstances(test_file, self.hyperParams.maxInstance) print("Training Instance: ", len(trainInsts)) print("Dev Instance: ", len(devInsts)) print("Test Instance: ", len(testInsts)) self.createAlphabet(trainInsts, devInsts, testInsts) trainExamples = self.instance2Example(trainInsts) devExamples = self.instance2Example(devInsts) testExamples = self.instance2Example(testInsts) self.model = RNNLabeler(self.hyperParams) parameters = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer = torch.optim.Adam(parameters, lr=self.hyperParams.learningRate) indexes = [] for idx in range(len(trainExamples)): indexes.append(idx) batchBlock = len(trainExamples) // self.hyperParams.batch for iter in range(self.hyperParams.maxIter): print('###Iteration' + str(iter) + "###") random.shuffle(indexes) self.model.train() for updateIter in range(batchBlock): #self.model.zero_grad() optimizer.zero_grad() exams = [] start_pos = updateIter * self.hyperParams.batch end_pos = (updateIter + 1) * self.hyperParams.batch for idx in range(start_pos, end_pos): exams.append(trainExamples[indexes[idx]]) feats, labels = self.getBatchFeatLabel(exams) output = self.model(feats, self.hyperParams.batch) loss = torch.nn.functional.cross_entropy(output, labels) loss.backward() optimizer.step() if (updateIter + 1) % self.hyperParams.verboseIter == 0: print('current: ', idx + 1, ", cost:", loss.data[0]) self.model.eval() eval_dev = Eval() for idx in range(len(devExamples)): predictLabel = self.predict(devExamples[idx]) devInsts[idx].evalACC(predictLabel, eval_dev) print("dev: ", end='') eval_dev.getACC() eval_test = Eval() for idx in range(len(testExamples)): predictLabel = self.predict(testExamples[idx]) testInsts[idx].evalACC(predictLabel, eval_test) print("test: ", end='') eval_test.getACC() def predict(self, exam): output = self.model(exam.feat.wordIndexs) labelID = self.getMaxIndex(output) return self.hyperParams.labelAlpha.from_id(labelID) def getMaxIndex(self, tag_score): max = tag_score.data[0][0] maxIndex = 0 for idx in range(1, self.hyperParams.labelSize): if tag_score.data[0][idx] > max: max = tag_score.data[0][idx] maxIndex = idx return maxIndex
class Labeler: def __init__(self): self.hyperParams=HyperParams() pass def train(self,trainFile,devFile,testFile): self.hyperParams.show() train=DataSet(trainFile) dev=DataSet(devFile) test=DataSet(testFile) vocab=Alphabet(train) self.hyperParams.vocabSize=vocab.size() self.hyperParams.labelSize=vocab.label_size() print('vocab_size:',self.hyperParams.vocabSize) print('label_size:',self.hyperParams.labelSize) train=IndexSet(train,vocab) dev=IndexSet(dev,vocab) test=IndexSet(test,vocab) print('trainset_size: ',train.size()) print('devset_size: ',dev.size()) print('testset_size: ',test.size()) if self.hyperParams.embedFile!='': pretrain=PretrainEmb(self.hyperParams.embedFile,vocab.word2id) else: pretrain=None ############################## self.model = Encoder(self.hyperParams,pretrain) #encoder self.crf = CRF(self.hyperParams.labelSize,vocab.label2id['<start>'],vocab.label2id['<padding>'],vocab)#decoder parameters = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer_rnn = torch.optim.Adam(parameters, lr = self.hyperParams.learningRate) optimizer_crf = torch.optim.Adam(self.crf.parameters(), lr = self.hyperParams.learningRate) ############################## indexes = [] for idx in range(train.size()): indexes.append(idx) batchBlock = len(train.word_mat) // self.hyperParams.batch for iter in range(self.hyperParams.maxIter):################# print('###Iteration' + str(iter) + "###") random.shuffle(indexes) self.model.train()### for updateIter in range(batchBlock): #self.model.zero_grad() optimizer_rnn.zero_grad() optimizer_crf.zero_grad() start_pos = updateIter * self.hyperParams.batch end_pos = (updateIter + 1) * self.hyperParams.batch feats=[] labels=[] for idx in range(start_pos, end_pos): feats.append(train.word_mat[indexes[idx]]) labels.append(train.label_mat[indexes[idx]]) batch=BatchBucket(len(feats),self.hyperParams.maxSentSize,feats,labels,vocab.word2id['<padding>'],vocab.label2id['<padding>']) tag_scores = self.model(batch.batch_words, self.hyperParams.batch) #print(tag_scores.size()) loss = self.crf.neg_log_likelihood(tag_scores, batch.batch_labels,batch.masks) loss.backward() optimizer_rnn.step() optimizer_crf.step() if (updateIter + 1) % self.hyperParams.verboseIter == 0: print('current: ', idx + 1, ", cost:", loss.data[0]) self.model.eval()### self.eval_predict(dev,vocab) self.eval_predict(test,vocab) def eval_predict(self,indexset,vocab) : correct_num=0 total_num=0 batchBlock=len(indexset.label_mat)//self.hyperParams.batch for updateIter in range(batchBlock): start_pos = updateIter * self.hyperParams.batch end_pos = (updateIter + 1) * self.hyperParams.batch feats=[] labels=[] for idx in range(start_pos, end_pos): feats.append(indexset.word_mat[idx]) labels.append(indexset.label_mat[idx]) batch=BatchBucket(len(feats),self.hyperParams.maxSentSize,feats,labels,vocab.word2id['<padding>'],vocab.label2id['<padding>']) tag_scores = self.model(batch.batch_words, self.hyperParams.batch) predict_labels=self.crf.viterbi_decode(tag_scores,batch.masks) predict_labels=predict_labels.masked_select(batch.masks) gold_labels=batch.batch_labels.masked_select(batch.masks) correct_num+=torch.sum(torch.gt(predict_labels.float(),gold_labels.float())).data[0] total_num+=torch.sum(batch.masks).data[0] rate=correct_num/total_num print('total_num: {} , correct_num: {}'.format(total_num,correct_num)) print('rate: ',rate)