def main(opt): device = torch.device('cuda' if opt.cuda else 'cpu') checkpoint = torch.load(opt.model) model_opt = checkpoint['settings'] model_opt.gpus = opt.gpus model_opt.beam_size, model_opt.batch_size = opt.beam_size, opt.batch_size ### Prepare Data ### sequences = torch.load(opt.sequence_data) seq_vocabularies = sequences['dict'] validData = torch.load(opt.valid_data) validData.batchSize = opt.batch_size validData.numBatches = math.ceil(len(validData.src) / validData.batchSize) ### Prepare Model ### validData.device = validData.device = device model, _ = build_model(model_opt, device) model.load_state_dict(checkpoint['model']) model.eval() translator = Translator(model_opt, seq_vocabularies['tgt'], sequences['valid']['tokens'], seq_vocabularies['src']) bleu, outputs = translator.eval_all(model, validData, output_sent=True) print('\nbleu-4', bleu, '\n') dump(outputs, opt.output)
def main(opt): if opt.cuda: cuda.set_device(opt.gpus[0]) device = torch.device('cuda' if opt.cuda else 'cpu') checkpoint = torch.load(opt.model) model_opt = checkpoint['settings'] model_opt.gpus = opt.gpus model_opt.beam_size, model_opt.batch_size, model_opt.n_best = opt.beam_size, opt.batch_size, opt.n_best ### Prepare Data ### sequences = torch.load(opt.sequence_data) seq_vocabularies = sequences['dict'] if hasattr(opt, "valid_data"): validData = torch.load(opt.valid_data) else: validData = loadtxt(opt.valid_data_txt, delimiter='\n', comments=None, dtype=str) validData = torch.tensor(validData, requires_grad=False) validData.batchSize = opt.batch_size validData.numBatches = math.ceil(len(validData.src) / validData.batchSize) ### Prepare Model ### validData.device = validData.device = device model, _ = build_model(model_opt, device) model.load_state_dict(checkpoint['model']) model.eval() translator = Translator(model_opt, seq_vocabularies['tgt'], sequences['valid']['tokens'], seq_vocabularies['src']) bleu, outputs = translator.eval_all(model, validData, output_sent=True) print('\nbleu-4', bleu, '\n') dump(outputs, opt.output)
def main(opt): device = torch.device('cuda' if opt.cuda else 'cpu') checkpoint = torch.load(opt.model) model_opt = checkpoint['options'] # torch.load('cased_opt.pt') model_opt.gpus = opt.gpus model_opt.beam_size, model_opt.batch_size = opt.beam_size, opt.batch_size # model_opt.checkpoint_mode = 'all' #model_opt.slf_attn_type = 'gated' #model_opt.max_token_tgt_len = 50 #model_opt.proj_share_weight = False ### Prepare Data ### data = torch.load(opt.data) src_vocab, tgt_vocab = data['dict']['src'], data['dict']['tgt'] # validData = Dataset(data['train'], model_opt.batch_size, copy=model_opt.copy, # answer=model_opt.answer == 'enc', ans_feature=model_opt.ans_feature, # feature=model_opt.feature, opt_cuda=model_opt.gpus) validData = Dataset(data['valid'], model_opt.batch_size, copy=model_opt.copy, answer=model_opt.answer == 'enc', ans_feature=model_opt.ans_feature, feature=model_opt.feature, opt_cuda=model_opt.gpus) ### Prepare Model ### model, _ = build_model(model_opt, device) model.load_state_dict(checkpoint['model state dict']) model.eval() translator = Translator(model_opt, tgt_vocab, data['valid']['tokens'], src_vocab) bleu, outputs = translator.eval_all(model, validData, output_sent=True) print('\nbleu-4', bleu, '\n') # dump(outputs, opt.output, bleu) # import ipdb; ipdb.set_trace() golds, preds, paras = outputs[0], outputs[1], outputs[2] golds = [[[w.lower() for w in g[0]]] for g in golds] preds = [[w.lower() for w in p] for p in preds] from nltk.translate import bleu_score bleu = bleu_score.corpus_bleu(golds, preds) print('\nbleu-4', bleu, '\n') dump(outputs, opt.output, bleu)
def main(opt): device = torch.device('cuda' if opt.cuda else 'cpu') checkpoint = torch.load(opt.model) model_opt = checkpoint['settings'] model_opt.gpus = opt.gpus model_opt.beam_size, model_opt.batch_size = opt.beam_size, opt.batch_size #model_opt.max_token_tgt_len = 50 #model_opt.proj_share_weight = False ### Prepare Data ### data = torch.load(opt.data) src_vocab, tgt_vocab = data['dict']['src'], data['dict']['tgt'] validData = Dataset(data['valid'], model_opt.batch_size, copy=model_opt.copy, answer=model_opt.answer == 'enc', ans_feature=model_opt.ans_feature, feature=model_opt.feature, opt_cuda=model_opt.gpus) ### Prepare Model ### model, _ = build_model(model_opt, device) model.load_state_dict(checkpoint['model']) model.eval() translator = Translator(model_opt, tgt_vocab, data['valid']['tokens'], src_vocab) bleu, outputs = translator.eval_all(model, validData, output_sent=True) print('\nbleu-4', bleu, '\n') dump(outputs, opt.output)
def main(opt, logger): logger.info('My PID is {0}'.format(os.getpid())) logger.info('PyTorch version: {0}'.format(str(torch.__version__))) logger.info(opt) if torch.cuda.is_available() and not opt.gpus: logger.info("WARNING: You have a CUDA device, so you should probably run with -gpus 0") if opt.seed > 0: torch.manual_seed(opt.seed) if opt.gpus: if opt.cuda_seed > 0: torch.cuda.manual_seed(opt.cuda_seed) # cuda.set_device(opt.gpus[0]) logger.info('My seed is {0}'.format(torch.initial_seed())) logger.info('My cuda seed is {0}'.format(torch.cuda.initial_seed())) ###### ==================== Loading Dataset ==================== ###### data = torch.load(opt.data) vocabularies = data['dict'] if isinstance(vocabularies['src'], str): assert vocabularies['src'] == opt.pretrained sep = True if opt.answer == 'sep' else False options = {'transf':opt.answer != 'enc', 'separate':sep, 'tgt':False} vocabularies['src'] = Vocab.from_opt(pretrained=opt.pretrained, opt=options) train_data, valid_data = data['train'], data['valid'] ### ===== load pre-trained vocabulary ===== ### if opt.pre_trained_vocab: if not opt.pretrained: opt.pre_trained_src_emb = vocabularies['pre-trained']['src'] opt.pre_trained_tgt_emb = vocabularies['pre-trained']['tgt'] if opt.answer == 'enc': opt.pre_trained_ans_emb = vocabularies['pre-trained']['ans'] ### ===== wrap datasets ===== ### attn_mask_file = '' if not opt.defined_slf_attn_mask else opt.defined_slf_attn_mask + '.train.npy' pad_id = vocabularies['src'].lookup('<|endoftext|>') if opt.pretrained.count('gpt2') else Constants.PAD trainData = Dataset(train_data, opt.batch_size, copy=opt.copy, answer=opt.answer == 'enc', ans_feature=opt.ans_feature, feature=opt.feature, attn_mask_file=attn_mask_file, opt_cuda=opt.gpus, pad=pad_id) validData = Dataset(valid_data, opt.eval_batch_size, copy=opt.copy, answer=opt.answer == 'enc', ans_feature=opt.ans_feature, feature=opt.feature, attn_mask_file=attn_mask_file, opt_cuda=opt.gpus, pad=pad_id) opt.src_vocab_size = vocabularies['src'].size opt.tgt_vocab_size = vocabularies['tgt'].size opt.feat_vocab = [fv.size for fv in vocabularies['feature']] if opt.feature else None opt.ans_feat_vocab = [fv.size for fv in vocabularies['ans_feature']] if opt.ans_feature else None logger.info(' * vocabulary size. source = %d; target = %d' % (opt.src_vocab_size, opt.tgt_vocab_size)) logger.info(' * number of training batches. %d' % len(trainData)) logger.info(' * maximum batch size. %d' % opt.batch_size) ##### =================== Prepare Model =================== ##### separate = vocabularies['src'].lookup(Constants.SEP_WORD) if opt.answer == 'sep' else -1 device = torch.device('cuda:' + str(opt.gpus[0]) if len(opt.gpus) else 'cpu') checkpoint = torch.load(opt.checkpoint) if opt.checkpoint else None if opt.rl: rl_device = [torch.device('cuda:' + str(gpu)) for gpu in opt.rl_gpu] rl_device = {k:v for k, v in zip(opt.rl, rl_device)} opt.rl_device = rl_device discriminator = load_rl_model(opt, device, rl_device) model, parameters_cnt = build_model(opt, device, separate=separate, checkpoint=checkpoint) logger.info(' * Number of parameters to learn = %d' % parameters_cnt) ##### ==================== Prepare Optimizer ==================== ##### optimizer = Optimizer.from_opt(model, opt) ##### ==================== Prepare Loss ==================== ##### weight = torch.ones(opt.tgt_vocab_size) weight[Constants.PAD] = 0 loss = NLLLoss(opt, weight=weight, size_average=False) if opt.gpus: cuda.set_device(opt.gpus[0]) loss.cuda() ##### ==================== Prepare Translator ==================== ##### translator = Translator(opt, vocabularies['tgt'], data['valid']['tokens'], vocabularies['src']) ##### ==================== Training ==================== ##### if opt.rl: trainer = RLTrainer(model, discriminator, loss, optimizer, translator, logger, opt, trainData, validData, vocabularies['src'], vocabularies['tgt']) else: trainer = SupervisedTrainer(model, loss, optimizer, translator, logger, opt, trainData, validData, vocabularies['src']) trainer.train(device)
def main(opt): logging.info('My PID is {0}'.format(os.getpid())) logging.info('PyTorch version: {0}'.format(str(torch.__version__))) logging.info(opt) if torch.cuda.is_available() and not opt.gpus: logging.info( "WARNING: You have a CUDA device, so you should probably run with -gpus 0" ) if opt.seed > 0: torch.manual_seed(opt.seed) if opt.gpus: if opt.cuda_seed > 0: torch.cuda.manual_seed(opt.cuda_seed) cuda.set_device(opt.gpus[0]) logging.info('My seed is {0}'.format(torch.initial_seed())) logging.info('My cuda seed is {0}'.format(torch.cuda.initial_seed())) ###### ==================== Loading Options ==================== ###### if opt.checkpoint: checkpoint = torch.load(opt.checkpoint) ###### ==================== Loading Dataset ==================== ###### opt.sparse = True if opt.sparse else False # logger.info('Loading sequential data ......') # sequences = torch.load(opt.sequence_data) # seq_vocabularies = sequences['dict'] # logger.info('Loading structural data ......') # graphs = torch.load(opt.graph_data) # graph_vocabularies = graphs['dict'] ### ===== load pre-trained vocabulary ===== ### logging.info('Loading sequential data ......') sequences = torch.load(opt.sequence_data) seq_vocabularies = sequences['dict'] logging.info('Loading pre-trained vocabulary ......') if opt.pre_trained_vocab: if not opt.pretrained: opt.pre_trained_src_emb = seq_vocabularies['pre-trained']['src'] opt.pre_trained_tgt_emb = seq_vocabularies['pre-trained']['tgt'] if opt.answer: opt.pre_trained_ans_emb = seq_vocabularies['pre-trained']['src'] ### ===== wrap datasets ===== ### logging.info('Loading Dataset objects ......') trainData = torch.load(opt.train_dataset) validData = torch.load(opt.valid_dataset) trainData.batchSize = validData.batchSize = opt.batch_size trainData.numBatches = math.ceil(len(trainData.src) / trainData.batchSize) validData.numBatches = math.ceil(len(validData.src) / validData.batchSize) logging.info('Preparing vocabularies ......') opt.src_vocab_size = seq_vocabularies['src'].size opt.tgt_vocab_size = seq_vocabularies['tgt'].size opt.feat_vocab = [fv.size for fv in seq_vocabularies['feature'] ] if opt.feature else None logging.info('Loading structural data ......') graphs = torch.load(opt.graph_data) graph_vocabularies = graphs['dict'] del graphs opt.edge_vocab_size = graph_vocabularies['edge']['in'].size opt.node_feat_vocab = [ fv.size for fv in graph_vocabularies['feature'][1:-1] ] if opt.node_feature else None logging.info(' * vocabulary size. source = %d; target = %d' % (opt.src_vocab_size, opt.tgt_vocab_size)) logging.info(' * number of training batches. %d' % len(trainData)) logging.info(' * maximum batch size. %d' % opt.batch_size) ##### =================== Prepare Model =================== ##### device = torch.device('cuda' if opt.gpus else 'cpu') trainData.device = validData.device = device checkpoint = checkpoint if opt.checkpoint else None model, parameters_cnt = build_model(opt, device, checkpoint=checkpoint) del checkpoint logging.info(' * Number of parameters to learn = %d' % parameters_cnt) ##### ==================== Prepare Optimizer ==================== ##### optimizer = Optimizer.from_opt(model, opt) ##### ==================== Prepare Loss ==================== ##### weight = torch.ones(opt.tgt_vocab_size) weight[Constants.PAD] = 0 loss = NLLLoss(opt, weight, size_average=False) if opt.gpus: loss.cuda() ##### ==================== Prepare Translator ==================== ##### translator = Translator(opt, seq_vocabularies['tgt'], sequences['valid']['tokens'], seq_vocabularies['src']) ##### ==================== Training ==================== ##### trainer = SupervisedTrainer(model, loss, optimizer, translator, opt, trainData, validData, seq_vocabularies['src'], graph_vocabularies['feature']) del model del trainData del validData del seq_vocabularies['src'] del graph_vocabularies['feature'] trainer.train(device)