예제 #1
0
def main(opt):
    device = torch.device('cuda' if opt.cuda else 'cpu')

    checkpoint = torch.load(opt.model)
    model_opt = checkpoint['settings']
    model_opt.gpus = opt.gpus
    model_opt.beam_size, model_opt.batch_size = opt.beam_size, opt.batch_size

    ### Prepare Data ###
    sequences = torch.load(opt.sequence_data)
    seq_vocabularies = sequences['dict']

    validData = torch.load(opt.valid_data)
    validData.batchSize = opt.batch_size
    validData.numBatches = math.ceil(len(validData.src) / validData.batchSize)

    ### Prepare Model ###
    validData.device = validData.device = device
    model, _ = build_model(model_opt, device)
    model.load_state_dict(checkpoint['model'])
    model.eval()

    translator = Translator(model_opt, seq_vocabularies['tgt'],
                            sequences['valid']['tokens'],
                            seq_vocabularies['src'])

    bleu, outputs = translator.eval_all(model, validData, output_sent=True)

    print('\nbleu-4', bleu, '\n')

    dump(outputs, opt.output)
예제 #2
0
def main(opt):
    if opt.cuda:
        cuda.set_device(opt.gpus[0])
    device = torch.device('cuda' if opt.cuda else 'cpu')

    checkpoint = torch.load(opt.model)
    model_opt = checkpoint['settings']
    model_opt.gpus = opt.gpus
    model_opt.beam_size, model_opt.batch_size, model_opt.n_best = opt.beam_size, opt.batch_size, opt.n_best

    ### Prepare Data ###
    sequences = torch.load(opt.sequence_data)
    seq_vocabularies = sequences['dict']

    if hasattr(opt, "valid_data"):
        validData = torch.load(opt.valid_data)
    else:
        validData = loadtxt(opt.valid_data_txt, delimiter='\n', comments=None, dtype=str)
        validData = torch.tensor(validData, requires_grad=False)
    validData.batchSize = opt.batch_size
    validData.numBatches = math.ceil(len(validData.src) / validData.batchSize)

    ### Prepare Model ###
    validData.device = validData.device = device 
    model, _ = build_model(model_opt, device)
    model.load_state_dict(checkpoint['model'])
    model.eval()

    translator = Translator(model_opt, seq_vocabularies['tgt'], sequences['valid']['tokens'], seq_vocabularies['src'])

    bleu, outputs = translator.eval_all(model, validData, output_sent=True)

    print('\nbleu-4', bleu, '\n')

    dump(outputs, opt.output)
예제 #3
0
def main(opt):
    device = torch.device('cuda' if opt.cuda else 'cpu')

    checkpoint = torch.load(opt.model)
    model_opt = checkpoint['options']  # torch.load('cased_opt.pt')
    model_opt.gpus = opt.gpus
    model_opt.beam_size, model_opt.batch_size = opt.beam_size, opt.batch_size
    # model_opt.checkpoint_mode = 'all'
    #model_opt.slf_attn_type = 'gated'
    #model_opt.max_token_tgt_len = 50
    #model_opt.proj_share_weight = False

    ### Prepare Data ###
    data = torch.load(opt.data)

    src_vocab, tgt_vocab = data['dict']['src'], data['dict']['tgt']
    # validData = Dataset(data['train'], model_opt.batch_size, copy=model_opt.copy,
    #                     answer=model_opt.answer == 'enc', ans_feature=model_opt.ans_feature,
    #                     feature=model_opt.feature, opt_cuda=model_opt.gpus)
    validData = Dataset(data['valid'],
                        model_opt.batch_size,
                        copy=model_opt.copy,
                        answer=model_opt.answer == 'enc',
                        ans_feature=model_opt.ans_feature,
                        feature=model_opt.feature,
                        opt_cuda=model_opt.gpus)

    ### Prepare Model ###
    model, _ = build_model(model_opt, device)
    model.load_state_dict(checkpoint['model state dict'])
    model.eval()

    translator = Translator(model_opt, tgt_vocab, data['valid']['tokens'],
                            src_vocab)

    bleu, outputs = translator.eval_all(model, validData, output_sent=True)

    print('\nbleu-4', bleu, '\n')

    # dump(outputs, opt.output, bleu)

    # import ipdb; ipdb.set_trace()

    golds, preds, paras = outputs[0], outputs[1], outputs[2]
    golds = [[[w.lower() for w in g[0]]] for g in golds]
    preds = [[w.lower() for w in p] for p in preds]
    from nltk.translate import bleu_score
    bleu = bleu_score.corpus_bleu(golds, preds)
    print('\nbleu-4', bleu, '\n')

    dump(outputs, opt.output, bleu)
예제 #4
0
def main(opt):
    device = torch.device('cuda' if opt.cuda else 'cpu')

    checkpoint = torch.load(opt.model)
    model_opt = checkpoint['settings']
    model_opt.gpus = opt.gpus
    model_opt.beam_size, model_opt.batch_size = opt.beam_size, opt.batch_size

    #model_opt.max_token_tgt_len = 50
    #model_opt.proj_share_weight = False

    ### Prepare Data ###
    data = torch.load(opt.data)

    src_vocab, tgt_vocab = data['dict']['src'], data['dict']['tgt']
    validData = Dataset(data['valid'],
                        model_opt.batch_size,
                        copy=model_opt.copy,
                        answer=model_opt.answer == 'enc',
                        ans_feature=model_opt.ans_feature,
                        feature=model_opt.feature,
                        opt_cuda=model_opt.gpus)

    ### Prepare Model ###
    model, _ = build_model(model_opt, device)
    model.load_state_dict(checkpoint['model'])
    model.eval()

    translator = Translator(model_opt, tgt_vocab, data['valid']['tokens'],
                            src_vocab)

    bleu, outputs = translator.eval_all(model, validData, output_sent=True)

    print('\nbleu-4', bleu, '\n')

    dump(outputs, opt.output)
예제 #5
0
def main(opt, logger):
    logger.info('My PID is {0}'.format(os.getpid()))
    logger.info('PyTorch version: {0}'.format(str(torch.__version__)))
    logger.info(opt)

    if torch.cuda.is_available() and not opt.gpus:
        logger.info("WARNING: You have a CUDA device, so you should probably run with -gpus 0")
    if opt.seed > 0:
        torch.manual_seed(opt.seed)
    if opt.gpus:
        if opt.cuda_seed > 0:
            torch.cuda.manual_seed(opt.cuda_seed)
        # cuda.set_device(opt.gpus[0])
    logger.info('My seed is {0}'.format(torch.initial_seed()))
    logger.info('My cuda seed is {0}'.format(torch.cuda.initial_seed()))
    
    ###### ==================== Loading Dataset ==================== ######
    data = torch.load(opt.data)
    vocabularies = data['dict']
    if isinstance(vocabularies['src'], str):
        assert vocabularies['src'] == opt.pretrained
        sep = True if opt.answer == 'sep' else False
        options = {'transf':opt.answer != 'enc', 'separate':sep, 'tgt':False}
        vocabularies['src'] = Vocab.from_opt(pretrained=opt.pretrained, opt=options)
    train_data, valid_data = data['train'], data['valid']

    ### ===== load pre-trained vocabulary ===== ###
    if opt.pre_trained_vocab:
        if not opt.pretrained:
            opt.pre_trained_src_emb = vocabularies['pre-trained']['src']
        opt.pre_trained_tgt_emb = vocabularies['pre-trained']['tgt']
        if opt.answer == 'enc':
            opt.pre_trained_ans_emb = vocabularies['pre-trained']['ans']
    
    ### ===== wrap datasets ===== ###
    attn_mask_file = '' if not opt.defined_slf_attn_mask else opt.defined_slf_attn_mask + '.train.npy'
    pad_id = vocabularies['src'].lookup('<|endoftext|>') if opt.pretrained.count('gpt2') else Constants.PAD
    trainData = Dataset(train_data, opt.batch_size, copy=opt.copy, 
                        answer=opt.answer == 'enc', ans_feature=opt.ans_feature, 
                        feature=opt.feature, attn_mask_file=attn_mask_file,
                        opt_cuda=opt.gpus, pad=pad_id)
    validData = Dataset(valid_data, opt.eval_batch_size, copy=opt.copy, 
                        answer=opt.answer == 'enc', ans_feature=opt.ans_feature, 
                        feature=opt.feature, attn_mask_file=attn_mask_file,
                        opt_cuda=opt.gpus, pad=pad_id)
    
    opt.src_vocab_size = vocabularies['src'].size
    opt.tgt_vocab_size = vocabularies['tgt'].size
    opt.feat_vocab = [fv.size for fv in vocabularies['feature']] if opt.feature else None
    opt.ans_feat_vocab = [fv.size for fv in vocabularies['ans_feature']] if opt.ans_feature else None

    logger.info(' * vocabulary size. source = %d; target = %d' % (opt.src_vocab_size, opt.tgt_vocab_size))
    logger.info(' * number of training batches. %d' % len(trainData))
    logger.info(' * maximum batch size. %d' % opt.batch_size)

    ##### =================== Prepare Model =================== #####
    separate = vocabularies['src'].lookup(Constants.SEP_WORD) if opt.answer == 'sep' else -1
    device = torch.device('cuda:' + str(opt.gpus[0]) if len(opt.gpus) else 'cpu')
    checkpoint = torch.load(opt.checkpoint) if opt.checkpoint else None
    if opt.rl:
        rl_device = [torch.device('cuda:' + str(gpu)) for gpu in opt.rl_gpu]
        rl_device = {k:v for k, v in zip(opt.rl, rl_device)}
        opt.rl_device = rl_device
        discriminator = load_rl_model(opt, device, rl_device)
    model, parameters_cnt = build_model(opt, device, separate=separate, checkpoint=checkpoint)
    logger.info(' * Number of parameters to learn = %d' % parameters_cnt)

    ##### ==================== Prepare Optimizer ==================== #####
    optimizer = Optimizer.from_opt(model, opt)

    ##### ==================== Prepare Loss ==================== #####
    weight = torch.ones(opt.tgt_vocab_size)
    weight[Constants.PAD] = 0
    loss = NLLLoss(opt, weight=weight, size_average=False)
    if opt.gpus:
        cuda.set_device(opt.gpus[0])
        loss.cuda()
        
    ##### ==================== Prepare Translator ==================== #####
    translator = Translator(opt, vocabularies['tgt'], data['valid']['tokens'], vocabularies['src'])
    
    ##### ==================== Training ==================== #####
    if opt.rl:
        trainer = RLTrainer(model, discriminator, loss, optimizer, translator, logger, 
                            opt, trainData, validData, vocabularies['src'], vocabularies['tgt'])
    else:
        trainer = SupervisedTrainer(model, loss, optimizer, translator, logger, 
                                    opt, trainData, validData, vocabularies['src'])
    trainer.train(device)
예제 #6
0
def main(opt):
    logging.info('My PID is {0}'.format(os.getpid()))
    logging.info('PyTorch version: {0}'.format(str(torch.__version__)))
    logging.info(opt)

    if torch.cuda.is_available() and not opt.gpus:
        logging.info(
            "WARNING: You have a CUDA device, so you should probably run with -gpus 0"
        )
    if opt.seed > 0:
        torch.manual_seed(opt.seed)
    if opt.gpus:
        if opt.cuda_seed > 0:
            torch.cuda.manual_seed(opt.cuda_seed)
        cuda.set_device(opt.gpus[0])
    logging.info('My seed is {0}'.format(torch.initial_seed()))
    logging.info('My cuda seed is {0}'.format(torch.cuda.initial_seed()))

    ###### ==================== Loading Options ==================== ######
    if opt.checkpoint:
        checkpoint = torch.load(opt.checkpoint)

    ###### ==================== Loading Dataset ==================== ######
    opt.sparse = True if opt.sparse else False
    # logger.info('Loading sequential data ......')
    # sequences = torch.load(opt.sequence_data)
    # seq_vocabularies = sequences['dict']
    # logger.info('Loading structural data ......')
    # graphs = torch.load(opt.graph_data)
    # graph_vocabularies = graphs['dict']

    ### ===== load pre-trained vocabulary ===== ###
    logging.info('Loading sequential data ......')
    sequences = torch.load(opt.sequence_data)
    seq_vocabularies = sequences['dict']
    logging.info('Loading pre-trained vocabulary ......')
    if opt.pre_trained_vocab:
        if not opt.pretrained:
            opt.pre_trained_src_emb = seq_vocabularies['pre-trained']['src']
        opt.pre_trained_tgt_emb = seq_vocabularies['pre-trained']['tgt']
        if opt.answer:
            opt.pre_trained_ans_emb = seq_vocabularies['pre-trained']['src']

    ### ===== wrap datasets ===== ###
    logging.info('Loading Dataset objects ......')
    trainData = torch.load(opt.train_dataset)
    validData = torch.load(opt.valid_dataset)
    trainData.batchSize = validData.batchSize = opt.batch_size
    trainData.numBatches = math.ceil(len(trainData.src) / trainData.batchSize)
    validData.numBatches = math.ceil(len(validData.src) / validData.batchSize)

    logging.info('Preparing vocabularies ......')
    opt.src_vocab_size = seq_vocabularies['src'].size
    opt.tgt_vocab_size = seq_vocabularies['tgt'].size
    opt.feat_vocab = [fv.size for fv in seq_vocabularies['feature']
                      ] if opt.feature else None

    logging.info('Loading structural data ......')
    graphs = torch.load(opt.graph_data)
    graph_vocabularies = graphs['dict']
    del graphs

    opt.edge_vocab_size = graph_vocabularies['edge']['in'].size
    opt.node_feat_vocab = [
        fv.size for fv in graph_vocabularies['feature'][1:-1]
    ] if opt.node_feature else None

    logging.info(' * vocabulary size. source = %d; target = %d' %
                 (opt.src_vocab_size, opt.tgt_vocab_size))
    logging.info(' * number of training batches. %d' % len(trainData))
    logging.info(' * maximum batch size. %d' % opt.batch_size)

    ##### =================== Prepare Model =================== #####
    device = torch.device('cuda' if opt.gpus else 'cpu')
    trainData.device = validData.device = device
    checkpoint = checkpoint if opt.checkpoint else None

    model, parameters_cnt = build_model(opt, device, checkpoint=checkpoint)
    del checkpoint

    logging.info(' * Number of parameters to learn = %d' % parameters_cnt)

    ##### ==================== Prepare Optimizer ==================== #####
    optimizer = Optimizer.from_opt(model, opt)

    ##### ==================== Prepare Loss ==================== #####
    weight = torch.ones(opt.tgt_vocab_size)
    weight[Constants.PAD] = 0
    loss = NLLLoss(opt, weight, size_average=False)
    if opt.gpus:
        loss.cuda()

    ##### ==================== Prepare Translator ==================== #####
    translator = Translator(opt, seq_vocabularies['tgt'],
                            sequences['valid']['tokens'],
                            seq_vocabularies['src'])

    ##### ==================== Training ==================== #####
    trainer = SupervisedTrainer(model, loss, optimizer, translator, opt,
                                trainData, validData, seq_vocabularies['src'],
                                graph_vocabularies['feature'])
    del model
    del trainData
    del validData
    del seq_vocabularies['src']
    del graph_vocabularies['feature']
    trainer.train(device)