def showAtt(): attn_file = '/search/odin/zll/NQG/data/models/NQG_plus/attentions/' srcBatch = [[ 'after', 'the', 'death', 'of', 'tugh', 'temür', 'in', '1332', 'and', 'subsequent', 'death', 'of', 'rinchinbal', '-lrb-', 'emperor', 'ningzong', '-rrb-', 'the', 'same', 'year', ',', 'the', '13-year-old', 'toghun', 'temür', '-lrb-', 'emperor', 'huizong', '-rrb-', ',', 'the', 'last', 'of', 'the', 'nine', 'successors', 'of', 'kublai', 'khan', ',', 'was', 'summoned', 'back', 'from', 'guangxi', 'and', 'succeeded', 'to', 'the', 'throne', '.' ]] feats_batch = [[[ 'IN', 'DT', 'NN', 'IN', 'NNP', 'NNP', 'IN', 'CD', 'CC', 'JJ', 'NN', 'IN', 'NNP', '-LRB-', 'NNP', 'NNP', '-RRB-', 'DT', 'JJ', 'NN', ',', 'DT', 'JJ', 'NNP', 'NNP', '-LRB-', 'NNP', 'NNP', '-RRB-', ',', 'DT', 'JJ', 'IN', 'DT', 'CD', 'NNS', 'IN', 'NNP', 'NNP', ',', 'VBD', 'VBN', 'RB', 'IN', 'NNP', 'CC', 'VBD', 'TO', 'DT', 'NN', '.' ], [ 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'DATE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'PERSON', 'PERSON', 'O', 'O', 'PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'NUMBER', 'O', 'O', 'PERSON', 'PERSON', 'O', 'O', 'O', 'O', 'O', 'LOCATION', 'O', 'O', 'O', 'O', 'O', 'O' ], [ 'UP', 'LOW', 'LOW', 'LOW', 'UP', 'UP', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'UP', 'LOW', 'UP', 'UP', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'UP', 'UP', 'LOW', 'UP', 'UP', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'UP', 'UP', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'UP', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW' ]]] tgtBatch = [['when', 'did', 'tugh', 'temur', 'die', '?\n']] #[[['what', 'was', 'the', 'last', 'year', 'of', '<unk>', 'khan', '?']]] opt = parser.parse_args() logger.info(opt) opt.cuda = opt.gpu > -1 if opt.cuda: torch.cuda.set_device(opt.gpu) translator = s2s.Translator(opt) predBatch, predScore, mul_attn, goldScore = translator.translate( srcBatch, feats_batch, tgtBatch) mul_attns = torch.split(mul_attn[0], 1, 1) for i in range(len(mul_attns)): attn_ofn = attn_file + 'head_' + str(i) + '.jpg' attn = mul_attns[i].squeeze(1) src = ' '.join(srcBatch[0]) tgt = ' '.join(tgtBatch[0]) out = ' '.join(predBatch[0][0]) display_attention(src, out, attn, attn_ofn) print(len(mul_attns)) print(mul_attns[0].shape)
def main(): opt = parser.parse_args() logger.info(opt) opt.cuda = opt.gpu > -1 if opt.cuda: torch.cuda.set_device(opt.gpu) translator = s2s.Translator(opt) outF = open(opt.output, 'w', encoding='utf-8') predScoreTotal, predWordsTotal, goldScoreTotal, goldWordsTotal = 0, 0, 0, 0 srcBatch, tgtBatch = [], [] bio_batch, feats_batch = [], [] count = 0 tgtF = open(opt.tgt) if opt.tgt else None bioF = open(opt.bio, encoding='utf-8') featFs = [open(x, encoding='utf-8') for x in opt.feats] for line in addone(open(opt.src, encoding='utf-8')): if (line is not None): srcTokens = line.strip().split(' ') srcBatch += [srcTokens] bio_tokens = bioF.readline().strip().split(' ') bio_batch += [bio_tokens] feats_tokens = [ reader.readline().strip().split((' ')) for reader in featFs ] feats_batch += [feats_tokens] if tgtF: tgtTokens = tgtF.readline().split(' ') if tgtF else None tgtBatch += [tgtTokens] if len(srcBatch) < opt.batch_size: continue else: # at the end of file, check last batch if len(srcBatch) == 0: break predBatch, predScore, goldScore = translator.translate( srcBatch, bio_batch, feats_batch, tgtBatch) predScoreTotal += sum(score[0] for score in predScore) predWordsTotal += sum(len(x[0]) for x in predBatch) # if tgtF is not None: # goldScoreTotal += sum(goldScore) # goldWordsTotal += sum(len(x) for x in tgtBatch) for b in range(len(predBatch)): count += 1 outF.write(" ".join(predBatch[b][0]) + '\n') outF.flush() if opt.verbose: srcSent = ' '.join(srcBatch[b]) if translator.tgt_dict.lower: srcSent = srcSent.lower() logger.info('SENT %d: %s' % (count, srcSent)) logger.info('PRED %d: %s' % (count, " ".join(predBatch[b][0]))) logger.info("PRED SCORE: %.4f" % predScore[b][0]) if tgtF is not None: tgtSent = ' '.join(tgtBatch[b]) if translator.tgt_dict.lower: tgtSent = tgtSent.lower() logger.info('GOLD %d: %s ' % (count, tgtSent)) # logger.info("GOLD SCORE: %.4f" % goldScore[b]) if opt.n_best > 1: logger.info('\nBEST HYP:') for n in range(opt.n_best): logger.info( "[%.4f] %s" % (predScore[b][n], " ".join(predBatch[b][n]))) logger.info('') srcBatch, tgtBatch = [], [] bio_batch, feats_batch = [], [] reportScore('PRED', predScoreTotal, predWordsTotal) # if tgtF: # reportScore('GOLD', goldScoreTotal, goldWordsTotal) if tgtF: tgtF.close() logger.info('{0} copy'.format(translator.copyCount))
def main(): import onlinePreprocess onlinePreprocess.lower = opt.lower_input onlinePreprocess.seq_length = opt.max_sent_length onlinePreprocess.shuffle = 1 if opt.process_shuffle else 0 from onlinePreprocess import prepare_data_online dataset = prepare_data_online(opt.train_src, opt.src_vocab, opt.train_bio, opt.bio_vocab, opt.train_feats, opt.feat_vocab, opt.train_tgt, opt.tgt_vocab) trainData = s2s.Dataset(dataset['train']['src'], dataset['train']['bio'], dataset['train']['feats'], dataset['train']['tgt'], dataset['train']['switch'], dataset['train']['c_tgt'], opt.batch_size, opt.gpus) dicts = dataset['dicts'] logger.info(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) logger.info(' * number of training sentences. %d' % len(dataset['train']['src'])) logger.info(' * maximum batch size. %d' % opt.batch_size) logger.info('Building model...') encoder = s2s.Models.Encoder(opt, dicts['src']) decoder = s2s.Models.Decoder(opt, dicts['tgt']) decIniter = s2s.Models.DecInit(opt) generator = nn.Sequential( nn.Linear(opt.dec_rnn_size // opt.maxout_pool_size, dicts['tgt'].size()), # TODO: fix here # nn.LogSoftmax(dim=1) nn.Softmax(dim=1) ) model = s2s.Models.NMTModel(encoder, decoder, decIniter) model.generator = generator translator = s2s.Translator(opt, model, dataset) if len(opt.gpus) >= 1: model.cuda() generator.cuda() else: model.cpu() generator.cpu() # if len(opt.gpus) > 1: # model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) # generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0) for pr_name, p in model.named_parameters(): logger.info(pr_name) # p.data.uniform_(-opt.param_init, opt.param_init) if p.dim() == 1: # p.data.zero_() p.data.normal_(0, math.sqrt(6 / (1 + p.size(0)))) else: nn.init.xavier_normal_(p, math.sqrt(3)) encoder.load_pretrained_vectors(opt) decoder.load_pretrained_vectors(opt) optim = s2s.Optim( opt.optim, opt.learning_rate, max_grad_norm=opt.max_grad_norm, max_weight_value=opt.max_weight_value, lr_decay=opt.learning_rate_decay, start_decay_at=opt.start_decay_at, decay_bad_count=opt.halve_lr_bad_count ) optim.set_parameters(model.parameters()) validData = None if opt.dev_input_src and opt.dev_ref: validData = load_dev_data(translator, opt.dev_input_src, opt.dev_bio, opt.dev_feats, opt.dev_ref) trainModel(model, translator, trainData, validData, dataset, optim)
def main(): opt = parser.parse_args() logger.info(opt) opt.cuda = opt.gpu > -1 if opt.cuda: torch.cuda.set_device(opt.gpu) translator = s2s.Translator(opt) outF = open(opt.output, 'w', encoding='utf-8') predScoreTotal, predWordsTotal, goldScoreTotal, goldWordsTotal = 0, 0, 0, 0 srcBatch, tgtBatch = [], [] lda_batch = [] count = 0 all_attn = [] all_topic_attn = [] all_mix_gate = [] tgtF = open(opt.tgt) if opt.tgt else None for line, lda in addPair(open(opt.src, encoding='utf-8'), open(opt.lda, encoding='utf-8')): if (line is not None) and (lda is not None): srcTokens = line.strip().split(' ') srcBatch += [srcTokens] lda_tokens = lda.strip().split(' ')[:opt.max_lda_words] lda_batch += [lda_tokens] if tgtF: tgtTokens = tgtF.readline().split(' ') if tgtF else None tgtBatch += [tgtTokens] if len(srcBatch) < opt.batch_size: continue else: # at the end of file, check last batch if len(srcBatch) == 0: break predBatch, predScore, goldScore = translator.translate( srcBatch, lda_batch, tgtBatch) predScoreTotal += sum(score[0] for score in predScore) predWordsTotal += sum(len(x[0]) for x in predBatch) # if tgtF is not None: # goldScoreTotal += sum(goldScore) # goldWordsTotal += sum(len(x) for x in tgtBatch) for b in range(len(predBatch)): count += 1 outF.write(" ".join(predBatch[b][0][0]) + '\n') all_attn.append(predBatch[b][0][1].cpu()) all_topic_attn.append(predBatch[b][0][2].cpu()) all_mix_gate.append(predBatch[b][0][3].cpu()) outF.flush() if opt.verbose: srcSent = ' '.join(srcBatch[b]) if translator.tgt_dict.lower: srcSent = srcSent.lower() logger.info('SENT %d: %s' % (count, srcSent)) logger.info('PRED %d: %s' % (count, " ".join(predBatch[b][0]))) logger.info("PRED SCORE: %.4f" % predScore[b][0]) if tgtF is not None: tgtSent = ' '.join(tgtBatch[b]) if translator.tgt_dict.lower: tgtSent = tgtSent.lower() logger.info('GOLD %d: %s ' % (count, tgtSent)) # logger.info("GOLD SCORE: %.4f" % goldScore[b]) if opt.n_best > 1: logger.info('\nBEST HYP:') for n in range(opt.n_best): logger.info( "[%.4f] %s" % (predScore[b][n], " ".join(predBatch[b][n]))) logger.info('') srcBatch, tgtBatch = [], [] lda_batch = [] reportScore('PRED', predScoreTotal, predWordsTotal) # if tgtF: # reportScore('GOLD', goldScoreTotal, goldWordsTotal) if tgtF: tgtF.close() logger.info('{0} copy'.format(translator.copyCount)) torch.save(all_attn, opt.output + '.attn.pt') torch.save(all_topic_attn, opt.output + '.topicattn.pt') torch.save(all_mix_gate, opt.output + '.mixgate.pt')
def main(): import onlinePreprocess onlinePreprocess.seq_length = opt.max_sent_length onlinePreprocess.MAX_LDA_WORDS = opt.max_lda_words onlinePreprocess.shuffle = 1 if opt.process_shuffle else 0 from onlinePreprocess import prepare_data_online dataset = prepare_data_online(opt.train_src, opt.src_vocab, opt.train_tgt, opt.tgt_vocab, opt.train_lda, opt.lda_vocab) dict_checkpoint = opt.train_from if opt.train_from else opt.train_from_state_dict if dict_checkpoint: logger.info('Loading dicts from checkpoint at %s' % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) dataset['dicts'] = checkpoint['dicts'] trainData = s2s.Dataset(dataset['train']['src'], dataset['train']['eq_mask'], dataset['train']['lda'], dataset['train']['tgt'], opt.batch_size, opt.gpus) # validData = s2s.Dataset(dataset['valid']['src'], dataset['valid']['bio'], dataset['valid']['tgt'], # None, None, opt.batch_size, opt.gpus, # volatile=True) dicts = dataset['dicts'] logger.info(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) logger.info(' * number of training sentences. %d' % len(dataset['train']['src'])) logger.info(' * maximum batch size. %d' % opt.batch_size) logger.info('Building model...') encoder = s2s.Models.Encoder(opt, dicts['src']) topic_encoder = s2s.Models.TopicEncoder(opt, dicts['lda']) decoder = s2s.Models.MPGDecoder(opt, dicts['tgt']) decIniter = s2s.Models.DecInit(opt) generator = nn.Sequential( nn.Linear(opt.dec_rnn_size // opt.maxout_pool_size, dicts['tgt'].size()), # TODO: fix here nn.LogSoftmax(dim=1)) model = s2s.Models.NMTModel(encoder, topic_encoder, decoder, decIniter) model.generator = generator translator = s2s.Translator(opt, model, dataset) if opt.train_from: logger.info('Loading model from checkpoint at %s' % opt.train_from) chk_model = checkpoint['model'] generator_state_dict = chk_model.generator.state_dict() model_state_dict = { k: v for k, v in chk_model.state_dict().items() if 'generator' not in k } model.load_state_dict(model_state_dict) generator.load_state_dict(generator_state_dict) opt.start_epoch = checkpoint['epoch'] + 1 if opt.train_from_state_dict: logger.info('Loading model from checkpoint at %s' % opt.train_from_state_dict) model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) opt.start_epoch = checkpoint['epoch'] + 1 if len(opt.gpus) >= 1: model.cuda() generator.cuda() else: model.cpu() generator.cpu() # if len(opt.gpus) > 1: # model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) # generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0) if not opt.train_from_state_dict and not opt.train_from: for pr_name, p in model.named_parameters(): logger.info(pr_name) # p.data.uniform_(-opt.param_init, opt.param_init) if p.dim() == 1: # p.data.zero_() p.data.normal_(0, math.sqrt(6 / (1 + p.size(0)))) else: nn.init.xavier_normal_(p, math.sqrt(3)) encoder.load_pretrained_vectors(opt) decoder.load_pretrained_vectors(opt) optim = s2s.Optim(opt.optim, opt.learning_rate, max_grad_norm=opt.max_grad_norm, max_weight_value=opt.max_weight_value, lr_decay=opt.learning_rate_decay, start_decay_at=opt.start_decay_at, decay_bad_count=opt.halve_lr_bad_count) else: logger.info('Loading optimizer from checkpoint:') optim = checkpoint['optim'] logger.info(optim) optim.set_parameters(model.parameters()) if opt.train_from or opt.train_from_state_dict: optim.optimizer.load_state_dict( checkpoint['optim'].optimizer.state_dict()) validData = None if opt.dev_input_src and opt.dev_ref: validData = load_dev_data(translator, opt.dev_input_src, opt.dev_input_lda, opt.dev_ref) if opt.test_input_src and opt.test_ref: testData = load_dev_data(translator, opt.test_input_src, opt.test_input_lda, opt.test_ref) trainModel(model, translator, trainData, validData, testData, dataset, optim)
def main(): import onlinePreprocess onlinePreprocess.lower = opt.lower_input onlinePreprocess.seq_length = opt.max_sent_length onlinePreprocess.shuffle = 1 if opt.process_shuffle else 0 from onlinePreprocess import prepare_data_online # opt.train_src (source file of sequence) 'it is a replica of the grotto at lourdes , france where the virgin mary reputedly appeared to saint bernadette soubirous in 1858 .' # opt.src_vocab (source file of vocab) 'the(word) 4(index) 256272(frequency) 0.06749202214022335' # opt.train_bio (answer position embedding) 'O O O O O O O O O O O O O O O O O O B I I O O O' # opt.bio_vocab (source file of answer position vocab) 'O(bio) 4(index) 2525015(frequency) 0.8958601572376024' # opt.train_feats (source file of postag/ner/case) 'PERSON/UPCASE/NN ...' (3 different embeddings) # opt.feat_vocab (source file of answer feat vocab) # opt.train_tgt (source file of question) 'to whom did the virgin mary allegedly appear in 1858 in lourdes france ?' # opt.tgt_vocab (source file of vocab) same file with opt.src_vocab !! dataset = prepare_data_online(opt.train_src, opt.src_vocab, opt.train_bio, opt.bio_vocab, opt.train_feats, opt.feat_vocab, opt.train_tgt, opt.tgt_vocab, opt.train_guide_src, opt.guide_src_vocab) trainData = s2s.Dataset(dataset['train']['src'], dataset['train']['bio'], dataset['train']['feats'], dataset['train']['tgt'], dataset['train']['switch'], dataset['train']['c_tgt'], opt.batch_size, opt.gpus, dataset['train']['guide_src']) dicts = dataset['dicts'] logger.info(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) logger.info(' * number of training sentences. %d' % len(dataset['train']['src'])) logger.info(' * maximum batch size. %d' % opt.batch_size) logger.info('Building Model ...') encoder = s2s.Models.Encoder(opt, dicts['src'], dicts['guide_src']) decoder = s2s.Models.Decoder(opt, dicts['tgt']) decIniter = s2s.Models.DecInit(opt) ''' generator map output embedding to vocab size vector then softmax''' generator = nn.Sequential( nn.Linear(opt.dec_rnn_size // opt.maxout_pool_size, dicts['tgt'].size()), nn.Softmax(dim=1)) classifier = nn.Sequential( nn.Linear(opt.dec_rnn_size + 300, dicts['guide_src'].size()), nn.Softmax(dim=1)) nlu_generator = nn.Sequential( nn.Linear(opt.dec_rnn_size * 2, dicts['guide_src'].size()), nn.Softmax(dim=1)) model = s2s.Models.NMTModel(encoder, decoder, decIniter) model.generator = generator model.classifier = classifier model.nlu_generator = nlu_generator translator = s2s.Translator(opt, model, dataset) if len(opt.gpus) >= 1: model.cuda() generator.cuda() classifier.cuda() nlu_generator.cuda() else: model.cpu() generator.cpu() classifier.cpu() nlu_generator.cpu() # if len(opt.gpus) > 1: # model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) # generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0) for pr_name, p in model.named_parameters(): logger.info(pr_name) # p.data.uniform_(-opt.param_init, opt.param_init) if p.dim() == 1: # p.data.zero_() p.data.normal_(0, math.sqrt(6 / (1 + p.size(0)))) else: nn.init.xavier_normal_(p, math.sqrt(3)) encoder.load_pretrained_vectors(opt) decoder.load_pretrained_vectors(opt) optim = s2s.Optim(opt.optim, opt.learning_rate, max_grad_norm=opt.max_grad_norm, max_weight_value=opt.max_weight_value, lr_decay=opt.learning_rate_decay, start_decay_at=opt.start_decay_at, decay_bad_count=opt.halve_lr_bad_count) optim.set_parameters(model.parameters()) validData = None if opt.dev_input_src and opt.dev_ref: validData = load_dev_data(translator, opt.dev_input_src, opt.dev_bio, opt.dev_feats, opt.dev_ref, opt.dev_guide_src) testData = None if opt.test_input_src and opt.test_ref: testData = load_dev_data(translator, opt.test_input_src, opt.test_bio, opt.test_feats, opt.test_ref, opt.test_guide_src) trainModel(model, translator, trainData, validData, testData, dataset, optim)