def main(): # load settings for training opt = init_opt(description='train.py') logging = init_logging(logger_name='train.py', log_file=opt.log_file, redirect_to_stdout=False) logging.info('EXP_PATH : ' + opt.exp_path) logging.info('Parameters:') [logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items()] logging.info('====================== Checking GPU Availability =========================') if torch.cuda.is_available(): if isinstance(opt.device_ids, int): opt.device_ids = [opt.device_ids] logging.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.device_ids) > 1 else '1 GPU', str(opt.device_ids))) else: logging.info('Running on CPU!') try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab(opt) model = init_model(opt) optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.error(e, exc_info=True) raise
def __init__(self): super(KeyphrasePredictor, self).__init__() self.model_opts = config.init_opt(description='predictor') # self.vocab_path = self.model_opts.vocab#os.path.join(self.model_opts.data, 'kp20k', 'kp20k.vocab.pt') # parser = argparse.ArgumentParser(description='predictor',formatter_class=argparse.ArgumentDefaultsHelpFormatter) # config.preprocess_opts(parser) # self.opt = parser.parse_args([]) self.load()
def main(): opt = config.init_opt(description='predict_keyphrase.py') logger = config.init_logging('predict_keyphrase', opt.exp_path + '/output.log', redirect_to_stdout=False) logger.info('EXP_PATH : ' + opt.exp_path) logger.info('Parameters:') [ logger.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logger.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available(): if isinstance(opt.device_ids, int): opt.device_ids = [opt.device_ids] logger.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.device_ids) > 1 else '1 GPU', str(opt.device_ids))) else: logger.info('Running on CPU!') try: one2one, one2many = generate_dataset() test_data_loaders, word2id, id2word, vocab = load_vocab_and_testsets( opt, one2one, one2many) model = init_model(opt) generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) for testset_name, test_data_loader in zip(['kp20k'], test_data_loaders): logger.info('Evaluating %s' % testset_name) output = predict_beam_search( generator, test_data_loader, opt, title='test_%s' % testset_name, predict_save_path=None ) #opt.pred_path + '/%s_test_result/' % (testset_name)) print(output) except Exception as e: logger.error(e, exc_info=True)
def main(): # load settings for training opt = init_opt(description='train.py') logging = init_logging(logger_name='train.py', log_file=opt.log_file, redirect_to_stdout=False) logging.info('EXP_PATH : ' + opt.exp_path) logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logging.info( '====================== Checking GPU Availability =========================' ) logger.info("torch.cuda.is_available()={}".format( torch.cuda.is_available())) if torch.cuda.is_available(): if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.gpuid) > 1 else '1 GPU', str(opt.gpuid))) else: logging.info('Running on CPU!') try: train_data_loader, valid_data_loader, _, _, _, _ = load_data_vocab_for_training( opt) # ignore the previous test_data_loader valid_data_loaders, _, _, _ = load_vocab_and_datasets_for_testing( dataset_names=opt.test_dataset_names, type='valid', opt=opt) test_data_loaders, _, _, _ = load_vocab_and_datasets_for_testing( dataset_names=opt.test_dataset_names, type='test', opt=opt) model = init_model(opt) optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion( model, opt) train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loaders, test_data_loaders, opt) except Exception as e: logging.error(e, exc_info=True) raise
def main(): # load settings for training opt = init_opt(description='train.py') opt.useGpu = 1 opt.encoder_type = 'bert' opt.useCLF = True opt.data = 'data/kp20k/kp20k' opt.vocab = 'data/kp20k/kp20k.vocab.pt' if opt.encoder_type == 'transformer': opt.batch_size = 32 opt.d_inner = 2048 opt.enc_n_layers = 4 opt.dec_n_layers = 2 opt.n_head = 8 opt.d_k = 64 opt.d_v = 64 opt.d_model = 512 opt.word_vec_size = 512 opt.run_valid_every = 5000000 opt.save_model_every = 20000 opt.decode_old = True #opt.copy_attention = False elif opt.encoder_type == 'bert': opt.useOnlyTwo = False opt.avgHidden = False opt.useZeroDecodeHidden = True opt.useSameEmbeding = False opt.batch_size = 10 opt.max_sent_length = 10 opt.run_valid_every = 40000 opt.decode_old = False opt.beam_search_batch_size = 10 opt.bert_model = 'bert-base-uncased' opt.tokenizer = BertTokenizer.from_pretrained(opt.bert_model) else: opt.enc_layers = 2 opt.bidirectional = True opt.decode_old = True opt.run_valid_every = 2 opt.onlyTest = False if opt.onlyTest: opt.train_ml = False opt.run_valid_every = 5 opt.beam_size = 64 opt.beam_search_batch_size = 128 #opt.train_from = 'exp/kp20k.ml.copy.20181129-193506/model/kp20k.ml.copy.epoch=1.batch=20000.total_batch=20000.model' opt.train_from = 'exp/kp20k.ml.copy.20181128-153121/model/kp20k.ml.copy.epoch=2.batch=15495.total_batch=38000.model' #opt.train_from = 'exp/kp20k.ml.copy.20181117-190121/model/kp20k.ml.copy.epoch=3.batch=14172.total_batch=56000.model' logging = init_logging(logger_name='train.py', log_file=opt.log_file, redirect_to_stdout=False) logging.info('EXP_PATH : ' + opt.exp_path) logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logging.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available() and opt.useGpu: if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logging.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.gpuid) > 1 else '1 GPU', str(opt.gpuid))) else: logging.info('Running on CPU!') try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab( opt) model = init_model(opt) if torch.cuda.is_available() and opt.useGpu: model.cuda() print("model:") print(model) print() optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion( model, opt) if torch.cuda.is_available() and opt.useGpu: criterion.cuda() train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.error(e, exc_info=True) raise
def main(): opt = config.init_opt(description='predict.py') logger = config.init_logging('predict', opt.exp_path + '/output.log', redirect_to_stdout=False) logger.info('EXP_PATH : ' + opt.exp_path) logger.info('Parameters:') [ logger.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logger.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available(): if isinstance(opt.device_ids, int): opt.device_ids = [opt.device_ids] logger.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.device_ids) > 1 else '1 GPU', str(opt.device_ids))) else: logger.info('Running on CPU!') try: valid_data_loaders, word2id, id2word, vocab = load_vocab_and_datasets_for_testing( dataset_names=opt.test_dataset_names, type='valid', opt=opt) test_data_loaders, _, _, _ = load_vocab_and_datasets_for_testing( dataset_names=opt.test_dataset_names, type='test', opt=opt) opt.word2id = word2id opt.id2word = id2word opt.vocab = vocab model = init_model(opt) generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) valid_score_dict = evaluate_multiple_datasets( generator, valid_data_loaders, opt, title='valid', predict_save_path=opt.pred_path) test_score_dict = evaluate_multiple_datasets( generator, test_data_loaders, opt, title='test', predict_save_path=opt.pred_path) # test_data_loaders, word2id, id2word, vocab = load_vocab_and_datasets(opt) # for testset_name, test_data_loader in zip(opt.test_dataset_names, test_data_loaders): # logger.info('Evaluating %s' % testset_name) # evaluate_beam_search(generator, test_data_loader, opt, # title='test_%s' % testset_name, # predict_save_path=opt.pred_path + '/%s_test_result/' % (testset_name)) except Exception as e: logger.error(e, exc_info=True)
def main(): opt = config.init_opt(description='predict.py') opt.data = 'data3/kp20k/kp20k' opt.vocab = 'data3/kp20k/kp20k.vocab.pt' #opt.train_from = 'exp/kp20k.ml.copy.20181129-193506/model/kp20k.ml.copy.epoch=1.batch=20000.total_batch=20000.model' opt.train_from = 'exp/kp20k.ml.copy.20181128-153121/model/kp20k.ml.copy.epoch=2.batch=15495.total_batch=38000.model' opt.useGpu = 0 opt.encoder_type = 'rnn' opt.useCLF = False if opt.encoder_type.startswith('transformer'): opt.batch_size = 32 opt.d_inner = 2048 opt.enc_n_layers = 4 opt.dec_n_layers = 2 opt.n_head = 8 opt.d_k = 64 opt.d_v = 64 opt.d_model = 512 opt.word_vec_size = 512 opt.run_valid_every = 5000000 opt.save_model_every = 20000 opt.decode_old = True # opt.copy_attention = False elif opt.encoder_type.startswith('bert'): opt.useOnlyTwo = False opt.avgHidden = True opt.useZeroDecodeHidden = False opt.useSameEmbeding = False opt.batch_size = 10 opt.max_sent_length = 10 opt.run_valid_every = 20000 opt.decode_old = False opt.beam_search_batch_size = 10 opt.bert_model = 'bert-base-uncased' opt.tokenizer = BertTokenizer.from_pretrained(opt.bert_model) if opt.encoder_type == 'bert_low': opt.copy_attention = False else: opt.enc_layers = 2 opt.bidirectional = True opt.decode_old = True logger = config.init_logging('predict', opt.exp_path + '/output.log', redirect_to_stdout=False) logger.info('EXP_PATH : ' + opt.exp_path) logger.info('Parameters:') [ logger.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] logger.info( '====================== Checking GPU Availability =========================' ) if torch.cuda.is_available() and opt.useGpu: if isinstance(opt.gpuid, int): opt.gpuid = [opt.gpuid] logger.info('Running on %s! devices=%s' % ('MULTIPLE GPUs' if len(opt.gpuid) > 1 else '1 GPU', str(opt.gpuid))) else: logger.info('Running on CPU!') try: test_data_loaders, word2id, id2word, vocab = load_vocab_and_testsets( opt) model = init_model(opt) if torch.cuda.is_available() and opt.useGpu: model.cuda() generator = SequenceGenerator(model, opt.word_vec_size if opt.encoder_type == 'transformer' else opt.vocab_size, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length, useGpu=opt.useGpu) for testset_name, test_data_loader in zip(opt.test_dataset_names, test_data_loaders): logger.info('Evaluating %s' % testset_name) evaluate_beam_search(generator, test_data_loader, opt, title='test_%s' % testset_name, predict_save_path=opt.pred_path + '/%s_test_result/' % (testset_name)) except Exception as e: logger.error(e, exc_info=True)