예제 #1
0
def showAtt():
    attn_file = '/search/odin/zll/NQG/data/models/NQG_plus/attentions/'
    srcBatch = [[
        'after', 'the', 'death', 'of', 'tugh', 'temür', 'in', '1332', 'and',
        'subsequent', 'death', 'of', 'rinchinbal', '-lrb-', 'emperor',
        'ningzong', '-rrb-', 'the', 'same', 'year', ',', 'the', '13-year-old',
        'toghun', 'temür', '-lrb-', 'emperor', 'huizong', '-rrb-', ',', 'the',
        'last', 'of', 'the', 'nine', 'successors', 'of', 'kublai', 'khan', ',',
        'was', 'summoned', 'back', 'from', 'guangxi', 'and', 'succeeded', 'to',
        'the', 'throne', '.'
    ]]
    feats_batch = [[[
        'IN', 'DT', 'NN', 'IN', 'NNP', 'NNP', 'IN', 'CD', 'CC', 'JJ', 'NN',
        'IN', 'NNP', '-LRB-', 'NNP', 'NNP', '-RRB-', 'DT', 'JJ', 'NN', ',',
        'DT', 'JJ', 'NNP', 'NNP', '-LRB-', 'NNP', 'NNP', '-RRB-', ',', 'DT',
        'JJ', 'IN', 'DT', 'CD', 'NNS', 'IN', 'NNP', 'NNP', ',', 'VBD', 'VBN',
        'RB', 'IN', 'NNP', 'CC', 'VBD', 'TO', 'DT', 'NN', '.'
    ],
                    [
                        'O', 'O', 'O', 'O', 'O', 'O', 'O', 'DATE', 'O', 'O',
                        'O', 'O', 'O', 'O', 'O', 'PERSON', 'O', 'O', 'O', 'O',
                        'O', 'O', 'O', 'PERSON', 'PERSON', 'O', 'O', 'PERSON',
                        'O', 'O', 'O', 'O', 'O', 'O', 'NUMBER', 'O', 'O',
                        'PERSON', 'PERSON', 'O', 'O', 'O', 'O', 'O',
                        'LOCATION', 'O', 'O', 'O', 'O', 'O', 'O'
                    ],
                    [
                        'UP', 'LOW', 'LOW', 'LOW', 'UP', 'UP', 'LOW', 'LOW',
                        'LOW', 'LOW', 'LOW', 'LOW', 'UP', 'LOW', 'UP', 'UP',
                        'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'UP',
                        'UP', 'LOW', 'UP', 'UP', 'LOW', 'LOW', 'LOW', 'LOW',
                        'LOW', 'LOW', 'LOW', 'LOW', 'LOW', 'UP', 'UP', 'LOW',
                        'LOW', 'LOW', 'LOW', 'LOW', 'UP', 'LOW', 'LOW', 'LOW',
                        'LOW', 'LOW', 'LOW'
                    ]]]
    tgtBatch = [['when', 'did', 'tugh', 'temur', 'die', '?\n']]
    #[[['what', 'was', 'the', 'last', 'year', 'of', '<unk>', 'khan', '?']]]
    opt = parser.parse_args()
    logger.info(opt)
    opt.cuda = opt.gpu > -1
    if opt.cuda:
        torch.cuda.set_device(opt.gpu)

    translator = s2s.Translator(opt)
    predBatch, predScore, mul_attn, goldScore = translator.translate(
        srcBatch, feats_batch, tgtBatch)
    mul_attns = torch.split(mul_attn[0], 1, 1)
    for i in range(len(mul_attns)):
        attn_ofn = attn_file + 'head_' + str(i) + '.jpg'
        attn = mul_attns[i].squeeze(1)
        src = ' '.join(srcBatch[0])
        tgt = ' '.join(tgtBatch[0])
        out = ' '.join(predBatch[0][0])
        display_attention(src, out, attn, attn_ofn)
    print(len(mul_attns))
    print(mul_attns[0].shape)
def main():
    opt = parser.parse_args()
    logger.info(opt)
    opt.cuda = opt.gpu > -1
    if opt.cuda:
        torch.cuda.set_device(opt.gpu)

    translator = s2s.Translator(opt)

    outF = open(opt.output, 'w', encoding='utf-8')

    predScoreTotal, predWordsTotal, goldScoreTotal, goldWordsTotal = 0, 0, 0, 0

    srcBatch, tgtBatch = [], []
    bio_batch, feats_batch = [], []

    count = 0

    tgtF = open(opt.tgt) if opt.tgt else None
    bioF = open(opt.bio, encoding='utf-8')
    featFs = [open(x, encoding='utf-8') for x in opt.feats]
    for line in addone(open(opt.src, encoding='utf-8')):

        if (line is not None):
            srcTokens = line.strip().split(' ')
            srcBatch += [srcTokens]
            bio_tokens = bioF.readline().strip().split(' ')
            bio_batch += [bio_tokens]
            feats_tokens = [
                reader.readline().strip().split((' ')) for reader in featFs
            ]
            feats_batch += [feats_tokens]
            if tgtF:
                tgtTokens = tgtF.readline().split(' ') if tgtF else None
                tgtBatch += [tgtTokens]

            if len(srcBatch) < opt.batch_size:
                continue
        else:
            # at the end of file, check last batch
            if len(srcBatch) == 0:
                break

        predBatch, predScore, goldScore = translator.translate(
            srcBatch, bio_batch, feats_batch, tgtBatch)

        predScoreTotal += sum(score[0] for score in predScore)
        predWordsTotal += sum(len(x[0]) for x in predBatch)
        # if tgtF is not None:
        #     goldScoreTotal += sum(goldScore)
        #     goldWordsTotal += sum(len(x) for x in tgtBatch)

        for b in range(len(predBatch)):
            count += 1
            outF.write(" ".join(predBatch[b][0]) + '\n')
            outF.flush()

            if opt.verbose:
                srcSent = ' '.join(srcBatch[b])
                if translator.tgt_dict.lower:
                    srcSent = srcSent.lower()
                logger.info('SENT %d: %s' % (count, srcSent))
                logger.info('PRED %d: %s' % (count, " ".join(predBatch[b][0])))
                logger.info("PRED SCORE: %.4f" % predScore[b][0])

                if tgtF is not None:
                    tgtSent = ' '.join(tgtBatch[b])
                    if translator.tgt_dict.lower:
                        tgtSent = tgtSent.lower()
                    logger.info('GOLD %d: %s ' % (count, tgtSent))
                    # logger.info("GOLD SCORE: %.4f" % goldScore[b])

                if opt.n_best > 1:
                    logger.info('\nBEST HYP:')
                    for n in range(opt.n_best):
                        logger.info(
                            "[%.4f] %s" %
                            (predScore[b][n], " ".join(predBatch[b][n])))

                logger.info('')

        srcBatch, tgtBatch = [], []
        bio_batch, feats_batch = [], []

    reportScore('PRED', predScoreTotal, predWordsTotal)
    # if tgtF:
    #     reportScore('GOLD', goldScoreTotal, goldWordsTotal)

    if tgtF:
        tgtF.close()

    logger.info('{0} copy'.format(translator.copyCount))
예제 #3
0
def main():
    import onlinePreprocess
    onlinePreprocess.lower = opt.lower_input
    onlinePreprocess.seq_length = opt.max_sent_length
    onlinePreprocess.shuffle = 1 if opt.process_shuffle else 0
    from onlinePreprocess import prepare_data_online
    dataset = prepare_data_online(opt.train_src, opt.src_vocab, opt.train_bio, opt.bio_vocab, opt.train_feats,
                                  opt.feat_vocab, opt.train_tgt, opt.tgt_vocab)

    trainData = s2s.Dataset(dataset['train']['src'], dataset['train']['bio'], dataset['train']['feats'],
                            dataset['train']['tgt'],
                            dataset['train']['switch'], dataset['train']['c_tgt'],
                            opt.batch_size, opt.gpus)
    dicts = dataset['dicts']
    logger.info(' * vocabulary size. source = %d; target = %d' %
                (dicts['src'].size(), dicts['tgt'].size()))
    logger.info(' * number of training sentences. %d' %
                len(dataset['train']['src']))
    logger.info(' * maximum batch size. %d' % opt.batch_size)

    logger.info('Building model...')

    encoder = s2s.Models.Encoder(opt, dicts['src'])
    decoder = s2s.Models.Decoder(opt, dicts['tgt'])
    decIniter = s2s.Models.DecInit(opt)

    generator = nn.Sequential(
        nn.Linear(opt.dec_rnn_size // opt.maxout_pool_size, dicts['tgt'].size()),  # TODO: fix here
        # nn.LogSoftmax(dim=1)
        nn.Softmax(dim=1)
    )

    model = s2s.Models.NMTModel(encoder, decoder, decIniter)
    model.generator = generator
    translator = s2s.Translator(opt, model, dataset)

    if len(opt.gpus) >= 1:
        model.cuda()
        generator.cuda()
    else:
        model.cpu()
        generator.cpu()

    # if len(opt.gpus) > 1:
    #     model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)
    #     generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0)

    for pr_name, p in model.named_parameters():
        logger.info(pr_name)
        # p.data.uniform_(-opt.param_init, opt.param_init)
        if p.dim() == 1:
            # p.data.zero_()
            p.data.normal_(0, math.sqrt(6 / (1 + p.size(0))))
        else:
            nn.init.xavier_normal_(p, math.sqrt(3))

    encoder.load_pretrained_vectors(opt)
    decoder.load_pretrained_vectors(opt)

    optim = s2s.Optim(
        opt.optim, opt.learning_rate,
        max_grad_norm=opt.max_grad_norm,
        max_weight_value=opt.max_weight_value,
        lr_decay=opt.learning_rate_decay,
        start_decay_at=opt.start_decay_at,
        decay_bad_count=opt.halve_lr_bad_count
    )
    optim.set_parameters(model.parameters())

    validData = None
    if opt.dev_input_src and opt.dev_ref:
        validData = load_dev_data(translator, opt.dev_input_src, opt.dev_bio, opt.dev_feats, opt.dev_ref)
    trainModel(model, translator, trainData, validData, dataset, optim)
예제 #4
0
def main():
    opt = parser.parse_args()
    logger.info(opt)
    opt.cuda = opt.gpu > -1
    if opt.cuda:
        torch.cuda.set_device(opt.gpu)

    translator = s2s.Translator(opt)

    outF = open(opt.output, 'w', encoding='utf-8')

    predScoreTotal, predWordsTotal, goldScoreTotal, goldWordsTotal = 0, 0, 0, 0

    srcBatch, tgtBatch = [], []
    lda_batch = []

    count = 0
    all_attn = []
    all_topic_attn = []
    all_mix_gate = []

    tgtF = open(opt.tgt) if opt.tgt else None
    for line, lda in addPair(open(opt.src, encoding='utf-8'),
                             open(opt.lda, encoding='utf-8')):

        if (line is not None) and (lda is not None):
            srcTokens = line.strip().split(' ')
            srcBatch += [srcTokens]
            lda_tokens = lda.strip().split(' ')[:opt.max_lda_words]
            lda_batch += [lda_tokens]
            if tgtF:
                tgtTokens = tgtF.readline().split(' ') if tgtF else None
                tgtBatch += [tgtTokens]

            if len(srcBatch) < opt.batch_size:
                continue
        else:
            # at the end of file, check last batch
            if len(srcBatch) == 0:
                break

        predBatch, predScore, goldScore = translator.translate(
            srcBatch, lda_batch, tgtBatch)

        predScoreTotal += sum(score[0] for score in predScore)
        predWordsTotal += sum(len(x[0]) for x in predBatch)
        # if tgtF is not None:
        #     goldScoreTotal += sum(goldScore)
        #     goldWordsTotal += sum(len(x) for x in tgtBatch)

        for b in range(len(predBatch)):
            count += 1
            outF.write(" ".join(predBatch[b][0][0]) + '\n')
            all_attn.append(predBatch[b][0][1].cpu())
            all_topic_attn.append(predBatch[b][0][2].cpu())
            all_mix_gate.append(predBatch[b][0][3].cpu())
            outF.flush()

            if opt.verbose:
                srcSent = ' '.join(srcBatch[b])
                if translator.tgt_dict.lower:
                    srcSent = srcSent.lower()
                logger.info('SENT %d: %s' % (count, srcSent))
                logger.info('PRED %d: %s' % (count, " ".join(predBatch[b][0])))
                logger.info("PRED SCORE: %.4f" % predScore[b][0])

                if tgtF is not None:
                    tgtSent = ' '.join(tgtBatch[b])
                    if translator.tgt_dict.lower:
                        tgtSent = tgtSent.lower()
                    logger.info('GOLD %d: %s ' % (count, tgtSent))
                    # logger.info("GOLD SCORE: %.4f" % goldScore[b])

                if opt.n_best > 1:
                    logger.info('\nBEST HYP:')
                    for n in range(opt.n_best):
                        logger.info(
                            "[%.4f] %s" %
                            (predScore[b][n], " ".join(predBatch[b][n])))

                logger.info('')

        srcBatch, tgtBatch = [], []
        lda_batch = []

    reportScore('PRED', predScoreTotal, predWordsTotal)
    # if tgtF:
    #     reportScore('GOLD', goldScoreTotal, goldWordsTotal)

    if tgtF:
        tgtF.close()

    logger.info('{0} copy'.format(translator.copyCount))

    torch.save(all_attn, opt.output + '.attn.pt')
    torch.save(all_topic_attn, opt.output + '.topicattn.pt')
    torch.save(all_mix_gate, opt.output + '.mixgate.pt')
예제 #5
0
파일: train.py 프로젝트: scape1989/MAGNET
def main():
    import onlinePreprocess
    onlinePreprocess.seq_length = opt.max_sent_length
    onlinePreprocess.MAX_LDA_WORDS = opt.max_lda_words
    onlinePreprocess.shuffle = 1 if opt.process_shuffle else 0
    from onlinePreprocess import prepare_data_online
    dataset = prepare_data_online(opt.train_src, opt.src_vocab, opt.train_tgt,
                                  opt.tgt_vocab, opt.train_lda, opt.lda_vocab)

    dict_checkpoint = opt.train_from if opt.train_from else opt.train_from_state_dict
    if dict_checkpoint:
        logger.info('Loading dicts from checkpoint at %s' % dict_checkpoint)
        checkpoint = torch.load(dict_checkpoint)
        dataset['dicts'] = checkpoint['dicts']

    trainData = s2s.Dataset(dataset['train']['src'],
                            dataset['train']['eq_mask'],
                            dataset['train']['lda'], dataset['train']['tgt'],
                            opt.batch_size, opt.gpus)
    # validData = s2s.Dataset(dataset['valid']['src'], dataset['valid']['bio'], dataset['valid']['tgt'],
    #                          None, None, opt.batch_size, opt.gpus,
    #                          volatile=True)
    dicts = dataset['dicts']
    logger.info(' * vocabulary size. source = %d; target = %d' %
                (dicts['src'].size(), dicts['tgt'].size()))
    logger.info(' * number of training sentences. %d' %
                len(dataset['train']['src']))
    logger.info(' * maximum batch size. %d' % opt.batch_size)

    logger.info('Building model...')

    encoder = s2s.Models.Encoder(opt, dicts['src'])
    topic_encoder = s2s.Models.TopicEncoder(opt, dicts['lda'])
    decoder = s2s.Models.MPGDecoder(opt, dicts['tgt'])
    decIniter = s2s.Models.DecInit(opt)

    generator = nn.Sequential(
        nn.Linear(opt.dec_rnn_size // opt.maxout_pool_size,
                  dicts['tgt'].size()),  # TODO: fix here
        nn.LogSoftmax(dim=1))

    model = s2s.Models.NMTModel(encoder, topic_encoder, decoder, decIniter)
    model.generator = generator
    translator = s2s.Translator(opt, model, dataset)

    if opt.train_from:
        logger.info('Loading model from checkpoint at %s' % opt.train_from)
        chk_model = checkpoint['model']
        generator_state_dict = chk_model.generator.state_dict()
        model_state_dict = {
            k: v
            for k, v in chk_model.state_dict().items() if 'generator' not in k
        }
        model.load_state_dict(model_state_dict)
        generator.load_state_dict(generator_state_dict)
        opt.start_epoch = checkpoint['epoch'] + 1

    if opt.train_from_state_dict:
        logger.info('Loading model from checkpoint at %s' %
                    opt.train_from_state_dict)
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        opt.start_epoch = checkpoint['epoch'] + 1

    if len(opt.gpus) >= 1:
        model.cuda()
        generator.cuda()
    else:
        model.cpu()
        generator.cpu()

    # if len(opt.gpus) > 1:
    #     model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)
    #     generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0)

    if not opt.train_from_state_dict and not opt.train_from:
        for pr_name, p in model.named_parameters():
            logger.info(pr_name)
            # p.data.uniform_(-opt.param_init, opt.param_init)
            if p.dim() == 1:
                # p.data.zero_()
                p.data.normal_(0, math.sqrt(6 / (1 + p.size(0))))
            else:
                nn.init.xavier_normal_(p, math.sqrt(3))

        encoder.load_pretrained_vectors(opt)
        decoder.load_pretrained_vectors(opt)

        optim = s2s.Optim(opt.optim,
                          opt.learning_rate,
                          max_grad_norm=opt.max_grad_norm,
                          max_weight_value=opt.max_weight_value,
                          lr_decay=opt.learning_rate_decay,
                          start_decay_at=opt.start_decay_at,
                          decay_bad_count=opt.halve_lr_bad_count)
    else:
        logger.info('Loading optimizer from checkpoint:')
        optim = checkpoint['optim']
        logger.info(optim)

    optim.set_parameters(model.parameters())

    if opt.train_from or opt.train_from_state_dict:
        optim.optimizer.load_state_dict(
            checkpoint['optim'].optimizer.state_dict())

    validData = None
    if opt.dev_input_src and opt.dev_ref:
        validData = load_dev_data(translator, opt.dev_input_src,
                                  opt.dev_input_lda, opt.dev_ref)
    if opt.test_input_src and opt.test_ref:
        testData = load_dev_data(translator, opt.test_input_src,
                                 opt.test_input_lda, opt.test_ref)
    trainModel(model, translator, trainData, validData, testData, dataset,
               optim)
예제 #6
0
파일: train.py 프로젝트: xdong2ps/InjType
def main():

    import onlinePreprocess
    onlinePreprocess.lower = opt.lower_input
    onlinePreprocess.seq_length = opt.max_sent_length
    onlinePreprocess.shuffle = 1 if opt.process_shuffle else 0
    from onlinePreprocess import prepare_data_online

    # opt.train_src (source file of sequence) 'it is a replica of the grotto at lourdes , france where the virgin mary reputedly appeared to saint bernadette soubirous in 1858 .'
    # opt.src_vocab (source file of vocab) 'the(word) 4(index) 256272(frequency) 0.06749202214022335'
    # opt.train_bio (answer position embedding) 'O O O O O O O O O O O O O O O O O O B I I O O O'
    # opt.bio_vocab (source file of answer position vocab) 'O(bio) 4(index) 2525015(frequency) 0.8958601572376024'
    # opt.train_feats (source file of postag/ner/case) 'PERSON/UPCASE/NN ...' (3 different embeddings)
    # opt.feat_vocab (source file of answer feat vocab)
    # opt.train_tgt (source file of question) 'to whom did the virgin mary allegedly appear in 1858 in lourdes france ?'
    # opt.tgt_vocab (source file of vocab) same file with opt.src_vocab !!
    dataset = prepare_data_online(opt.train_src, opt.src_vocab, opt.train_bio,
                                  opt.bio_vocab, opt.train_feats,
                                  opt.feat_vocab, opt.train_tgt, opt.tgt_vocab,
                                  opt.train_guide_src, opt.guide_src_vocab)

    trainData = s2s.Dataset(dataset['train']['src'], dataset['train']['bio'],
                            dataset['train']['feats'], dataset['train']['tgt'],
                            dataset['train']['switch'],
                            dataset['train']['c_tgt'], opt.batch_size,
                            opt.gpus, dataset['train']['guide_src'])

    dicts = dataset['dicts']
    logger.info(' * vocabulary size. source = %d; target = %d' %
                (dicts['src'].size(), dicts['tgt'].size()))
    logger.info(' * number of training sentences. %d' %
                len(dataset['train']['src']))
    logger.info(' * maximum batch size. %d' % opt.batch_size)

    logger.info('Building Model ...')
    encoder = s2s.Models.Encoder(opt, dicts['src'], dicts['guide_src'])
    decoder = s2s.Models.Decoder(opt, dicts['tgt'])
    decIniter = s2s.Models.DecInit(opt)
    ''' generator map output embedding to vocab size vector then softmax'''
    generator = nn.Sequential(
        nn.Linear(opt.dec_rnn_size // opt.maxout_pool_size,
                  dicts['tgt'].size()), nn.Softmax(dim=1))
    classifier = nn.Sequential(
        nn.Linear(opt.dec_rnn_size + 300, dicts['guide_src'].size()),
        nn.Softmax(dim=1))
    nlu_generator = nn.Sequential(
        nn.Linear(opt.dec_rnn_size * 2, dicts['guide_src'].size()),
        nn.Softmax(dim=1))

    model = s2s.Models.NMTModel(encoder, decoder, decIniter)
    model.generator = generator
    model.classifier = classifier
    model.nlu_generator = nlu_generator
    translator = s2s.Translator(opt, model, dataset)

    if len(opt.gpus) >= 1:
        model.cuda()
        generator.cuda()
        classifier.cuda()
        nlu_generator.cuda()
    else:
        model.cpu()
        generator.cpu()
        classifier.cpu()
        nlu_generator.cpu()

    # if len(opt.gpus) > 1:
    #     model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)
    #     generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0)

    for pr_name, p in model.named_parameters():
        logger.info(pr_name)
        # p.data.uniform_(-opt.param_init, opt.param_init)
        if p.dim() == 1:
            # p.data.zero_()
            p.data.normal_(0, math.sqrt(6 / (1 + p.size(0))))
        else:
            nn.init.xavier_normal_(p, math.sqrt(3))

    encoder.load_pretrained_vectors(opt)
    decoder.load_pretrained_vectors(opt)

    optim = s2s.Optim(opt.optim,
                      opt.learning_rate,
                      max_grad_norm=opt.max_grad_norm,
                      max_weight_value=opt.max_weight_value,
                      lr_decay=opt.learning_rate_decay,
                      start_decay_at=opt.start_decay_at,
                      decay_bad_count=opt.halve_lr_bad_count)
    optim.set_parameters(model.parameters())

    validData = None
    if opt.dev_input_src and opt.dev_ref:
        validData = load_dev_data(translator, opt.dev_input_src, opt.dev_bio,
                                  opt.dev_feats, opt.dev_ref,
                                  opt.dev_guide_src)

    testData = None
    if opt.test_input_src and opt.test_ref:
        testData = load_dev_data(translator, opt.test_input_src, opt.test_bio,
                                 opt.test_feats, opt.test_ref,
                                 opt.test_guide_src)

    trainModel(model, translator, trainData, validData, testData, dataset,
               optim)