def getInitHidden(self, rnnType, numLayers, useBiRNN, batchSize, hiddenDim): if self.Config.train.get('rnn_hidden_initalization', 'none') == "none": hidden = None elif self.Config.train.get('rnn_hidden_initalization', 'none') == "randn": if useBiRNN: hiddenDim = hiddenDim // 2 if rnnType == "lstm": hidden = (torch.randn(numLayers * 2 if useBiRNN else numLayers, batchSize, hiddenDim), torch.randn(numLayers * 2 if useBiRNN else numLayers, batchSize, hiddenDim)) if self.Config.use_gpu: hidden = move2cuda(hidden) elif rnnType == "gru": hidden = torch.randn(numLayers * 2 if useBiRNN else numLayers, batchSize, hiddenDim) if self.Config.use_gpu: hidden = move2cuda(hidden) else: print("Invalid rnn type {}".format(rnnType)) exit(1) if self.Config.use_gpu: hidden = move2cuda(hidden) elif self.Config.train.get('rnn_hidden_initalization', 'none') == "normal": if useBiRNN: hiddenDim = hiddenDim // 2 if rnnType == "lstm": hidden = ( torch.normal(mean=torch.zeros( numLayers * 2 if useBiRNN else numLayers, batchSize, hiddenDim)), torch.normal(mean=torch.zeros( numLayers * 2 if useBiRNN else numLayers, batchSize, hiddenDim))) if self.Config.use_gpu: hidden = move2cuda(hidden) elif rnnType == "gru": hidden = torch.normal(mean=torch.zeros( numLayers * 2 if useBiRNN else numLayers, batchSize, hiddenDim)) if self.Config.use_gpu: hidden = move2cuda(hidden) else: print("Invalid rnn type {}".format(rnnType)) exit(1) return hidden
def getInitAlpha(self, batchSize): initAlpha = torch.full((batchSize, self.tagSize), -10000) for idx in range(batchSize): initAlpha[idx][self.Config.data.TAG_START_ID] = 0 if self.Config.use_gpu: initAlpha = move2cuda(initAlpha) return initAlpha
def viterbiDecode(self, features, seqLens, scores=None): batchSize = len(features) scoresRecord = torch.zeros([batchSize, features.shape[1], self.tagSize]) idxRecord = torch.zeros([batchSize, features.shape[1], self.tagSize], dtype=torch.int64) mask = torch.ones_like(seqLens, dtype=torch.int64) startIds = torch.full((batchSize, self.tagSize), self.tagStartId, dtype=torch.int64) decodeIdx = torch.LongTensor(batchSize, features.shape[1]) if self.useGpu: scoresRecord = move2cuda(scoresRecord) idxRecord = move2cuda(idxRecord) mask = move2cuda(mask) decodeIdx = move2cuda(decodeIdx) startIds = move2cuda(startIds) if scores is None: scores = self.transitions.view(1, 1, self.tagSize, self.tagSize).expand( batchSize, features.shape[1], self.tagSize, self.tagSize) + \ features.view(batchSize, features.shape[1], self.tagSize, 1).expand(batchSize, features.shape[1], self.tagSize, self.tagSize) # scoresRecord[:, 0, :] = self.getInitAlphaWithBatchSize(batchSize).view(batchSize, self.tagSize) scoresRecord[:, 0, :] = scores[:, 0, :, self.tagStartId] idxRecord[:, 0, :] = startIds for wordIdx in range(1, features.shape[1]): scoresIdx = scoresRecord[:, wordIdx - 1, :].view(batchSize, 1, self.tagSize).expand(batchSize, self.tagSize, self.tagSize) + scores[:, wordIdx, :, :] idxRecord[:, wordIdx, :] = torch.argmax(scoresIdx, 2) scoresRecord[:, wordIdx, :] = torch.gather(scoresIdx, 2, idxRecord[:, wordIdx, :].view(batchSize, self.tagSize, 1)).view(batchSize, self.tagSize) lastScores = torch.gather(scoresRecord, 1, seqLens.view(batchSize, 1, 1).expand(batchSize, 1, self.tagSize) - 1).view(batchSize, self.tagSize) lastScores = lastScores + self.transitions[self.tagEndId].view(1, self.tagSize).expand(batchSize, self.tagSize) decodeIdx[:, 0] = torch.argmax(lastScores, 1) bestScores = torch.gather(lastScores, 1, decodeIdx[:, 0].view(batchSize, 1)) for distance2Last in range(features.shape[1] - 1): lastNIdxRecord = torch.gather(idxRecord, 1, torch.where(seqLens - distance2Last - 1 > 0, seqLens - distance2Last - 1, mask).view(batchSize, 1, 1).expand(batchSize, 1, self.tagSize)).view(batchSize, self.tagSize) decodeIdx[:, distance2Last + 1] = torch.gather(lastNIdxRecord, 1, decodeIdx[:, distance2Last].view(batchSize, 1)).view(batchSize) return bestScores, decodeIdx
def forward(self, inputs, inputSeqLengths): sentLength = inputs.shape[1] sortedSeqLengths, permIdx = inputSeqLengths.sort(0, descending=True) _, recoverPermIdx = permIdx.sort(0, descending=False) sortedSeqTensors = inputs[permIdx] packedWords = pack_padded_sequence(sortedSeqTensors, sortedSeqLengths, True) hidden = self.initHiddenWithBatchSize(len(inputs)) lstmOut, hidden = self.rnn(packedWords, hidden) lstmOut, _ = pad_packed_sequence(lstmOut, batch_first=True) if lstmOut.shape[1] < sentLength: pad = torch.zeros(inputs.shape[0], sentLength - lstmOut.shape[1], lstmOut.shape[2]) pad = move2cuda(pad) return torch.cat([lstmOut[recoverPermIdx], pad], 1) else: return lstmOut[recoverPermIdx]
def forward(self, features, seqLens, mask): """ :param features: [batchSize * sentLength * tagSize] :return: scalar """ batchSize = len(features) scores = self.transitions.view(1, 1, self.tagSize, self.tagSize).expand( batchSize, features.shape[1], self.tagSize, self.tagSize) + \ features.view(batchSize, features.shape[1], self.tagSize, 1).expand(batchSize, features.shape[1], self.tagSize, self.tagSize) alpha = torch.zeros([batchSize, features.shape[1], self.tagSize]) if self.useGpu: alpha = move2cuda(alpha) alpha[:, 0, :] = scores[:, 0, :, self.tagStartId] for wordIdx in range(1, features.shape[1]): scoresIdx = alpha[:, wordIdx - 1, :].view(batchSize, 1, self.tagSize).expand(batchSize, self.tagSize, self.tagSize) + scores[:, wordIdx, :, :] alpha[:, wordIdx, :] = torch.logsumexp(scoresIdx, 2) lastAlpha = torch.gather(alpha, 1, seqLens.view(batchSize, 1, 1).expand(batchSize, 1, self.tagSize) - 1).view(batchSize, self.tagSize) lastAlpha = lastAlpha + self.transitions[self.tagEndId].view(1, self.tagSize).expand(batchSize, self.tagSize) lastAlpha = torch.logsumexp(lastAlpha, 1).view(batchSize) return torch.sum(lastAlpha), scores
def forward(self, batchInput, negMode=False): if negMode: return self.negLogLikelihoodLoss(batchInput) else: wordSeqTensor, tagSeqTensor, wordSeqLengths, charSeqTensor, charSeqLengths, seq2NodeTensor, node2SeqTensor, adjMatrixTensor0, gazNode2Idxs, gazNodeLengths, nodeNums, gazBlankState, fwbigramTensor, bwbigramTensor = batchInput batchSize = wordSeqTensor.shape[0] sentLength = wordSeqTensor.shape[1] if self.useChar: wordSeqEmbedding = self.dropout( self.wordEmbedding(wordSeqTensor, charSeqTensor, charSeqLengths)) else: if self.useBigram: wordSeqEmbedding = self.dropout( torch.cat([ self.wordEmbedding(wordSeqTensor), self.fwbigramEmbedding(fwbigramTensor), self.bwbigramEmbedding(bwbigramTensor) ], 2)) else: wordSeqEmbedding = self.dropout( self.wordEmbedding(wordSeqTensor)) wordStateEmbedding = self.embStateLinear(wordSeqEmbedding) maxNodeLength = node2SeqTensor.shape[1] mainNodeState = torch.gather( wordStateEmbedding, 1, node2SeqTensor.expand(batchSize, maxNodeLength, wordStateEmbedding.shape[2])) if self.gaNum > 0: initNodeStateEmbedding = torch.cat([ mainNodeState, gazBlankState.view(batchSize, -1, 1).expand( batchSize, -1, self.stateDim) ], dim=1) else: initNodeStateEmbedding = mainNodeState startNodeIdx = nodeNums.clone() for gazIdx in range(self.gaNum): gazState = self.gaLinear[gazIdx](self.gaEmb[gazIdx]( gazNode2Idxs[gazIdx])) gazMaskRaw = torch.arange( 0, gazState.shape[1], dtype=torch.int64).view(1, gazState.shape[1], 1).expand(batchSize, gazState.shape[1], self.stateDim) if self.useGpu: gazMaskRaw = move2cuda(gazMaskRaw) gazMask = torch.where( gazMaskRaw < gazNodeLengths[gazIdx].view(batchSize, 1, 1), gazMaskRaw, gazNodeLengths[gazIdx].view(batchSize, 1, 1)) if self.useGpu: gazMask = move2cuda(gazMask) gazMask = gazMask + startNodeIdx.view(batchSize, 1, 1).expand( batchSize, gazState.shape[1], self.stateDim) initNodeStateEmbedding.scatter_(1, gazMask, gazState) startNodeIdx = startNodeIdx + gazNodeLengths[gazIdx] adjMatrixTensor = adjMatrixTensor0 nodeGraphEmbeddings = [initNodeStateEmbedding] for i in range(self.nLayer): nodeGraphEmbeddings.append( self.graphEmb[i](nodeGraphEmbeddings[i], adjMatrixTensor, adjMatrixTensor.shape[1])) nodeGraphEmbedding = nodeGraphEmbeddings[self.nLayer] wordGraphEmbedding = torch.gather( nodeGraphEmbedding, 1, seq2NodeTensor.expand( [batchSize, sentLength, nodeGraphEmbedding.shape[2]])) if self.useRnn: rnnEmbedding = self.encoder(wordGraphEmbedding, wordSeqLengths) wordFeatures = self.logsoftmax( self.embFeatureLinear(rnnEmbedding)) else: wordFeatures = self.logsoftmax( self.embFeatureLinear(wordGraphEmbedding)) bestScores, decodeIdx = self.crf.viterbiDecode( wordFeatures, wordSeqLengths) return bestScores, decodeIdx
def evaluate(model, batchInput, tag2Idx, idx2Tag, dumpFile=None, id2Word=None, rawSentenceBatch=None, useGpu=False): model.eval() s = 0 g = 0 sIgStrict = 0 sIgLoose = 0 batchId = -1 for batchItem in batchInput: batchId += 1 if useGpu: batchItem = move2cuda(batchItem) wordSeqTensor = batchItem[0] tagSeqTensor = batchItem[1] wordSeqLengths = batchItem[2] bestScores, decodeIdx = model(batchItem) tagSeq = tagSeqTensor.data predSeq = decodeIdx.data for batchPos in range(len(wordSeqTensor)): sentLength = wordSeqLengths[batchPos] gold = tagSeq[batchPos][: sentLength] pred = reversed(predSeq[batchPos][: sentLength]) sentenceString = ' '.join(rawSentenceBatch[batchId][batchPos]) if dumpFile is not None: for wordIdx in range(len(pred)): dumpFile[2].write(rawSentenceBatch[batchId][batchPos][wordIdx] + '\t' + idx2Tag[pred[wordIdx]] + '\n') dumpFile[2].write('\n') for wordIdx in range(len(gold)): dumpFile[5].write(rawSentenceBatch[batchId][batchPos][wordIdx] + '\t' + idx2Tag[gold[wordIdx]] + '\n') dumpFile[5].write('\n') goldEntities = getEntities(gold, tag2Idx, idx2Tag) predEntities = getEntities(pred, tag2Idx, idx2Tag) if dumpFile is not None: dumpFile[3].write(sentenceString + '\n') dumpFile[4].write(sentenceString + '\n') for entity in goldEntities: dumpFile[3].write(' '.join(rawSentenceBatch[batchId][batchPos][entity[0]: entity[1] + 1]) + '\t' + str(entity[0]) + '\t' + str(entity[1]) + '\t' + entity[2] + '\n') for entity in predEntities: dumpFile[4].write(' '.join(rawSentenceBatch[batchId][batchPos][entity[0]: entity[1] + 1]) + '\t' + str(entity[0]) + '\t' + str(entity[1]) + '\t' + entity[2] + '\n') dumpFile[3].write('\n') dumpFile[4].write('\n') s += len(predEntities) g += len(goldEntities) if len(predEntities) == 0: if dumpFile is not None: dumpFile[0].write(sentenceString) for entity in goldEntities: dumpFile[0].write('\t'.join([' '.join(rawSentenceBatch[batchId][batchPos][entity[0]: entity[1] + 1]), str(entity[0]), str(entity[1]), entity[2]]) + '\n') continue predIdx = 0 strictFalse = [] looseFalse = [] for entityIdx in range(len(goldEntities)): losseMatch = False strictMath = False entityStart, entityEnd, entityType = goldEntities[entityIdx] while predIdx < len(predEntities) - 1 and predEntities[predIdx][1] < entityStart: predIdx += 1 if entityType == predEntities[predIdx][2]: if entityStart == predEntities[predIdx][0] and entityEnd == predEntities[predIdx][1]: sIgStrict += 1 sIgLoose += 1 strictMath = True elif max(predEntities[predIdx][0], entityStart) <= min(entityEnd, predEntities[predIdx][1]): sIgLoose += 1 losseMatch = True if dumpFile is not None and not strictMath: if not losseMatch: strictFalse.append(goldEntities[entityIdx]) else: looseFalse.append([goldEntities[entityIdx], predEntities[predIdx]]) if dumpFile is not None: if len(strictFalse) > 0: dumpFile[0].write(sentenceString + '\n') for entity in strictFalse: dumpFile[0].write('\t'.join([''.join(rawSentenceBatch[batchId][batchPos][entity[0]: entity[1] + 1]), str(entity[0]), str(entity[1]), entity[2]]) + '\n') if len(looseFalse) > 0: dumpFile[1].write(sentenceString + '\n') for entity in looseFalse: dumpFile[1].write('\t'.join([''.join(rawSentenceBatch[batchId][batchPos][entity[0][0]: entity[0][1] + 1]), str(entity[0][0]), str(entity[0][1]), entity[0][2], ''.join(rawSentenceBatch[batchId][batchPos][entity[1][0]: entity[1][1] + 1]), str(entity[1][0]), str(entity[1][1]), entity[1][2]]) + '\n') ps = sIgStrict / s if s > 0 else 0 pl = sIgLoose / s if s > 0 else 0 rs = sIgStrict / g if g > 0 else 0 rl = sIgLoose / g if g > 0 else 0 fs = 2 * ps * rs / (ps + rs) if ps + rs > 0 else 0 fl = 2 * pl * rl / (pl + rl) if pl + rl > 0 else 0 return ps * 100, pl * 100, rs * 100, rl * 100, fs * 100, fl * 100
def train(Config): setSeed(Config.train.seed, Config.use_gpu) # load data dataPath = os.path.join(Config.data.data_base_path) if Config.eval.do_eval: testData = Corpus(os.path.join(dataPath, 'test.txt'), Config.data.use_normalized_word) trainData = Corpus(os.path.join(dataPath, 'train.txt'), Config.data.use_normalized_word) if Config.use_dev: devData = Corpus(os.path.join(dataPath, 'dev.txt'), Config.data.use_normalized_word) if Config.data.get('word_embedding', False): trainData.words.update(testData.words) trainData.words.update(devData.words) trainData.bwbigrams.update(testData.bwbigrams) trainData.bwbigrams.update(devData.bwbigrams) trainData.fwbigrams.update(testData.fwbigrams) trainData.fwbigrams.update(devData.fwbigrams) # gazetter gazetters = [] if Config.model.get("graph_emb", None) is not None and Config.model.graph_emb.get('gazetter', None) is not None: for name in Config.model.graph_emb.gazetter.to_dict(): if name != 'get_tag': gaItem = Config.model.graph_emb.gazetter.get(name) gazetters.append(Gazetteer(name, gaItem['path'], Config.data.use_normalized_word, gaItem['emb_dim'], gaItem['method'], gaItem['space'], gaItem['match_ignore_case'], embedding=gaItem.get('embedding', None))) gazetters[-1].matchCorpus(testData) gazetters[-1].matchCorpus(devData) gazetters[-1].matchCorpus(trainData) # generate train data meta print("Generating corpus meta...") trainMeta = CorpusMeta(trainData, Config) trainMeta.updateMaxSentLength(devData) trainMeta.updateMaxSentLength(testData) trainMeta.gazetters = gazetters # initialize model print("Initializing model...") layerUtils = LayerUtils(Config, trainMeta) layerHelper = LayerHelper(Config, layerUtils) modelHelper = ModelHelper(Config, layerHelper) if Config.model.get('load_from_pretrain', False): model = modelHelper.loadModel() else: model = modelHelper.getModel() if Config.use_gpu: model.cuda() trainer = modelHelper.getTrainer(model) if len(Config.gpu_num) > 1: trainer = nn.DataParallel(trainer, device_ids=device_ids) # genenerate batch input print("Generating batch input...") if Config.use_dev: if len(Config.gpu_num) > 1: devRawSentenceBatch = model.module.getRawSentenceBatches(devData, trainMeta, Config.train.batch_size) else: devRawSentenceBatch = model.getRawSentenceBatches(devData, trainMeta, Config.train.batch_size) if Config.eval.do_eval: if len(Config.gpu_num) > 1: testRawSentenceBatch = model.module.getRawSentenceBatches(testData, trainMeta, Config.train.batch_size) else: testRawSentenceBatch = model.getRawSentenceBatches(testData, trainMeta, Config.train.batch_size) logFolder = os.path.join(Config.log_folder, Config.model.model_name) if not os.path.exists(logFolder): os.makedirs(logFolder) # train trainLog = open(os.path.join(logFolder, "train.log"), 'w', encoding='utf-8') trainLog.write(Config.__str__()+'\n') print(Config.__str__()) trainStart = time.time() bestF1 = -1 bestEpoch = -1 for epoch in range(1, Config.train.epoch + 1): random.shuffle(trainData.utterances) if len(Config.gpu_num) > 1: batchInput = model.module.generateBatchInput(trainData, trainMeta, Config.train.batch_size) else: batchInput = model.generateBatchInput(trainData, trainMeta, Config.train.batch_size) if Config.train.optimizer == "sgd": trainer = modelHelper.lrDecay(trainer, epoch) model.train() model.zero_grad() sampleLoss = 0 sampleCount = 0 tempStart = time.time() epochStart = time.time() epochLoss = 0 for batchItem in batchInput: if Config.use_gpu: batchItem = move2cuda(batchItem) if len(Config.gpu_num) > 1: loss = model(batchItem, negMode=True).sum() else: loss = model.negLogLikelihoodLoss(batchItem) sampleLoss += loss.data epochLoss += float(loss.data) sampleCount += len(batchItem[0]) loss.backward() trainer.step() model.zero_grad() if sampleCount >= Config.train.report_frequence: tempTime = time.time() tempCost = tempTime - tempStart tempStart = tempTime print("Process {} sentences. Loss: {:.2f}. Time: {:.2f}".format(sampleCount, loss/sampleCount, tempCost)) trainLog.write("Process {} sentences. Loss: {:.2f}. Time: {:.2f}".format(sampleCount, loss/sampleCount, tempCost) + '\n') if sampleLoss > 1e8 or str(sampleLoss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") trainLog.write("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." + '\n') exit(1) sys.stdout.flush() sampleLoss = 0 sampleCount = 0 epochCost = time.time() - epochStart if Config.use_dev: if len(Config.gpu_num) > 1: devBatchInput = model.module.generateBatchInput(devData, trainMeta, Config.train.batch_size) else: devBatchInput = model.generateBatchInput(devData, trainMeta, Config.train.batch_size) if Config.eval.do_eval: if len(Config.gpu_num) > 1: testBatchInpit = model.module.generateBatchInput(testData, trainMeta, Config.train.batch_size) else: testBatchInpit = model.generateBatchInput(testData, trainMeta, Config.train.batch_size) if Config.use_dev and epoch % Config.train.dev_epoch_frequence == 0: model.eval() ps, pl, rs, rl, fs, fl = evaluate(model, devBatchInput, trainMeta.tag2Idx, trainMeta.idx2Tag, rawSentenceBatch=devRawSentenceBatch, useGpu=Config.use_gpu) if fs >= bestF1: bestF1 = fs bestEpoch = epoch modelHelper.saveModel(model, epoch) print("Epoch {}. Loss: {:.2f}. Time: {:.2f}".format(epoch, epochLoss, epochCost)) print('Dev P: {:.2f} R: {:.2f} F1: {:.2f}'.format(ps, rs, fs)) ps, pl, rs, rl, fs, fl = evaluate(model, testBatchInpit, trainMeta.tag2Idx, trainMeta.idx2Tag, rawSentenceBatch=testRawSentenceBatch, useGpu=Config.use_gpu) print('Test P: {:.2f} R: {:.2f} F1: {:.2f}'.format(ps, rs, fs)) trainLog.write("Epoch {}. Loss: {:.2f}. Dev P: {:.2f} R: {:.2f} F1: {:.2f}. Time: {:.2f}".format( epoch, epochLoss, ps, rs, fs, epochCost) + '\n') else: print("Epoch {}. Loss: {:.2f}. Time: {:.2f}".format(epoch, epochLoss, epochCost)) trainLog.write("Epoch {}. Loss: {:.2f}. Time: {:.2f}".format(epoch, epochLoss, epochCost) + '\n') sys.stdout.flush() if bestF1 == 0: modelHelper.saveModel(model, 0) print("Finish training. Best epoch {}. F1 {:.2f}. Time {:.2f}".format(bestEpoch, bestF1, time.time() - trainStart)) trainLog.write("Finish training. Time {:.2f}".format(time.time() - trainStart) + '\n') trainLog.close() if Config.eval.do_eval: model = modelHelper.loadModel(bestEpoch) if Config.use_gpu: model.cuda() testBatchInpit = model.generateBatchInput(testData, trainMeta, 20) testRawSentenceBatch = model.getRawSentenceBatches(testData, trainMeta, 20) testLog = open(os.path.join(logFolder, 'test.log'), 'w', encoding='utf-8') strictFalse = open(os.path.join(logFolder, 'strictFalse'), 'w', encoding='utf-8') looseFalse = open(os.path.join(logFolder, 'looseFalse'), 'w', encoding='utf-8') rawOutput = open(os.path.join(logFolder, 'testOutput'), 'w', encoding='utf-8') goldOutput = open(os.path.join(logFolder, 'testGold'), 'w', encoding='utf-8') goldEntityOutput = open(os.path.join(logFolder, 'testGoldEntities'), 'w', encoding='utf-8') predEntityOutput = open(os.path.join(logFolder, 'testPredEntities'), 'w', encoding='utf-8') ps, pl, rs, rl, fs, fl = evaluate(model, testBatchInpit, trainMeta.tag2Idx, trainMeta.idx2Tag, ( strictFalse, looseFalse, rawOutput, goldEntityOutput, predEntityOutput, goldOutput), trainMeta.idx2Word, testRawSentenceBatch, useGpu=Config.use_gpu) print("Test P: {:.2f} R: {:.2f} F1: {:.2f}".format(ps, rs, fs)) testLog.write("Test P: {:.2f} R: {:.2f} F1: {:.2f}".format(ps, rs, fs) + '\n') strictFalse.close() looseFalse.close() testLog.close()
def getInitAlphaVector(self): initAlpha = torch.full((self.tagSize), -10000) initAlpha[self.Config.data.TAG_START_ID] = 0 if self.Config.use_gpu: initAlpha = move2cuda(initAlpha) return initAlpha