targetEmbedDim, hiddenDim, corpus.targetVoc.size(), useSmallSoftmax=useSmallSoftmax, dropoutRate=dropoutRate, numLayers=numLayers) if useSmallSoftmax: vocGen = VocGenerator(vocGenHiddenDim, corpus.targetVoc.size(), corpus.sourceVoc.size()) vocGen.load_state_dict(torch.load(vocGenFile)) vocGen.cuda() vocGen.eval() encdec.softmaxLayer.weight.weight = embedding.targetEmbedding.weight ''' # how to load NMT model parameters all_state_dict = torch.load(nmtFile) embedding_state_dict = {} encdec_state_dict = {} for s in all_state_dict: if 'Embedding' in s: embedding_state_dict[s] = all_state_dict[s] else: encdec_state_dict[s] = all_state_dict[s] embedding.load_state_dict(embedding_state_dict) encdec.load_state_dict(encdec_state_dict) ''' embedding.cuda() encdec.cuda()
stateDict[elem] = stateDict[elem].to(cpu) torch.save(stateDict, './params/embedding.bin') stateDict = encdec.state_dict() for elem in stateDict: stateDict[elem] = stateDict[elem].to(cpu) torch.save(stateDict, './params/encdec.bin') prevDevGleu = devGleu if train: exit(0) torch.set_grad_enabled(False) embedding.load_state_dict(torch.load('./params/embedding.bin')) encdec.load_state_dict(torch.load('./params/encdec.bin')) embedding.to(device) encdec.to(device) embedding.eval() encdec.eval() f_trans = open('./trans.txt', 'w') f_gold = open('./gold.txt', 'w') devPerp = 0.0 totalTokenCount = 0.0 for batch in batchListDev:
if os.path.exists(wordParamsFile): embedding.wordEmbedding.load_state_dict(torch.load(wordParamsFile)) else: utils.loadEmbeddings(embedding.wordEmbedding, corpus.voc, wordEmbeddingFile) torch.save(embedding.wordEmbedding.state_dict(), wordParamsFile) if os.path.exists(charParamsFile): embedding.charEmbedding.load_state_dict(torch.load(charParamsFile)) else: utils.loadEmbeddings(embedding.charEmbedding, corpus.charVoc, charEmbeddingFile) torch.save(embedding.charEmbedding.state_dict(), charParamsFile) if test: tagger.load_state_dict(torch.load(taggerParamsFile)) embedding.load_state_dict(torch.load(embeddingParamsFile)) if useGpu: if torch.cuda.is_available(): torch.cuda.set_device(args.gpuId) torch.cuda.manual_seed(seed) embedding.cuda() tagger.cuda() print('**** Running with GPU-' + str(args.gpuId) + ' ****\n') else: useGpu = False print('**** Warning: GPU is not available ****\n') criterionTagger = nn.CrossEntropyLoss(size_average=False, ignore_index=-1) batchListTrain = utils.buildBatchList(len(corpus.trainData), batchSize)
vocGen.cuda() vocGen.eval() encdec.softmaxLayer.weight.weight = embedding.targetEmbedding.weight # how to load NMT model parameters if os.path.exists(nmtFile): all_state_dict = torch.load(nmtFile) embedding_state_dict = {} encdec_state_dict = {} for s in all_state_dict: if 'Embedding' in s: embedding_state_dict[s] = all_state_dict[s] else: encdec_state_dict[s] = all_state_dict[s] embedding.load_state_dict(embedding_state_dict) encdec.load_state_dict(encdec_state_dict) else: print('****** No pre-trained model found --> training from scratch ******') embedding.cuda() encdec.cuda() bse.cuda() batchListTrain = utils.buildBatchList(len(corpus.trainData), batchSize) batchListDev = utils.buildBatchList(len(corpus.devData), batchSize) withoutWeightDecay = [] withWeightDecay = [] for name, param in list(embedding.named_parameters()) + list( encdec.named_parameters()):
})) lstm = LSTM( Config({ 'input_size': word_embed.output_size + char_cnn.output_size, 'hidden_size': train_args['lstm_hidden_size'], 'forget_bias': 1.0, 'batch_first': True, 'bidirectional': True })) crf = CRF(Config({'label_vocab': label_vocab})) output_linear = Linear( Config({ 'in_features': lstm.output_size, 'out_features': len(label_vocab) })) word_embed.load_state_dict(state['model']['word_embed']) char_cnn.load_state_dict(state['model']['char_cnn']) char_highway.load_state_dict(state['model']['char_highway']) lstm.load_state_dict(state['model']['lstm']) crf.load_state_dict(state['model']['crf']) output_linear.load_state_dict(state['model']['output_linear']) lstm_crf = LstmCrf(token_vocab=token_vocab, label_vocab=label_vocab, char_vocab=char_vocab, word_embedding=word_embed, char_embedding=char_cnn, crf=crf, lstm=lstm, univ_fc_layer=output_linear, embed_dropout_prob=train_args['embed_dropout'], lstm_dropout_prob=train_args['lstm_dropout'],