def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) config.transformer_opts(parser) opt = parser.parse_args() opt = process_opt(opt) opt.input_feeding = False opt.copy_input_feeding = False logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) try: # print(opt.bidirectional) # exit(0) # opt.train_from = 'model/kp20k.ml.copy.uni-directional.20180817-021054/kp20k.ml.copy.uni-directional.epoch=6.batch=6735.total_batch=57300.model' train_data_loader, word2id, id2word, vocab, eval_dataloader = load_data_vocab( opt) model = init_model(opt) optimizer_ml, _, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer_ml, _, criterion, train_data_loader, opt, eval_dataloader) except Exception as e: logging.exception("message")
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() opt = process_opt(opt) opt.input_feeding = False opt.copy_input_feeding = False logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) logging.info('Parameters:') [logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items()] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab(opt) model = init_model(opt) optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.exception("message")
def main(): # load settings for training parser = argparse.ArgumentParser( description='predict.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) config.transformer_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) # print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 opt.exp = 'predict.' + opt.exp if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.pred_path = opt.pred_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.pred_path): os.makedirs(opt.pred_path) logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) try: opt.train_from = 'model/kp20k.ml.copy.bi-directional.20180908-054257/kp20k.ml.copy.bi-directional.epoch=9.batch=2932.model' test_data_loader, word2id, id2word, vocab = load_data_vocab(opt, load_train=False) model = init_model(opt) generator = SequenceGenerator(model,opt, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length, ) evaluate_beam_search(generator, test_data_loader, opt, title='predict', save_path=opt.pred_path + '/[epoch=%d,batch=%d,total_batch=%d]test_result.csv' % (0, 0, 0)) except Exception as e: logging.exception("message")
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.save_path = opt.save_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.save_path): os.makedirs(opt.save_path) config.init_logging(opt.exp_path + '/output.log') logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab( opt) model = init_model(opt) optimizer, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.exception("message")
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.save_path = opt.save_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.save_path): os.makedirs(opt.save_path) config.init_logging(opt.exp_path + '/output.log') logging.info('Parameters:') [logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items()] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab(opt) model = init_model(opt) optimizer, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt) except Exception as e: logging.exception("message")
def main(): # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) config.transformer_opts(parser) opt = parser.parse_args() opt = process_opt(opt) opt.input_feeding = False opt.copy_input_feeding = False logging = config.init_logging(logger_name=None, log_file=opt.exp_path + '/output.log', stdout=True) try: # opt.train_from = 'model/kp20k.ml.copy.bi-directional.20180901-025437/kp20k.ml.copy.bi-directional.epoch=9.batch=938.model' train_data_loader, word2id, id2word, vocab, eval_dataloader = load_data_vocab( opt) model = init_model(opt) # embedding=make_embedding(word2id,id2word) embedding = torch.load('embedding50004.pt') model.init_embedding(embedding) opt.learning_rate = 0.001 optimizer_ml, criterion = init_optimizer_criterion(model, opt) train_model(model, optimizer_ml, criterion, train_data_loader, opt, eval_dataloader) except Exception as e: logging.exception("message")
def wrapper(*args, **kwargs): beg_ts = time.time() retval = func(*args, **kwargs) end_ts = time.time() print(fname, "elapsed time: %f" % (end_ts - beg_ts)) return retval return wrapper __author__ = "Rui Meng" __email__ = "*****@*****.**" # load settings for training parser = argparse.ArgumentParser( description='train.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 # if opt.gpuid: # cuda.set_device(0) # fill time into the name
import pykp.io parser = argparse.ArgumentParser( description='preprocess.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # **Preprocess Options** parser.add_argument('-dataset_name', required=True, help="Name of dataset") parser.add_argument('-source_dataset_dir', required=True, help="The path to the source data (raw json).") parser.add_argument('-output_path_prefix', default='data', help="Output file for the prepared data") config.preprocess_opts(parser) opt = parser.parse_args() # input path of each json file opt.source_train_file = os.path.join(opt.source_dataset_dir, '%s_training.json' % (opt.dataset_name)) opt.source_valid_file = os.path.join(opt.source_dataset_dir, '%s_validation.json' % (opt.dataset_name)) opt.source_test_file = os.path.join(opt.source_dataset_dir, '%s_testing.json' % (opt.dataset_name)) # output path for exporting the processed dataset opt.output_path = os.path.join(opt.output_path_prefix, opt.dataset_name) # output path for exporting the processed dataset opt.subset_output_path = os.path.join(opt.output_path_prefix, opt.dataset_name + '_small')
def main(): # load settings for training parser = argparse.ArgumentParser( description='predict.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) config.preprocess_opts(parser) config.model_opts(parser) config.train_opts(parser) config.predict_opts(parser) opt = parser.parse_args() if opt.seed > 0: torch.manual_seed(opt.seed) print(opt.gpuid) if torch.cuda.is_available() and not opt.gpuid: opt.gpuid = 0 opt.exp = 'predict.' + opt.exp if hasattr(opt, 'copy_model') and opt.copy_model: opt.exp += '.copy' if hasattr(opt, 'bidirectional'): if opt.bidirectional: opt.exp += '.bi-directional' else: opt.exp += '.uni-directional' # fill time into the name if opt.exp_path.find('%s') > 0: opt.exp_path = opt.exp_path % (opt.exp, opt.timemark) opt.pred_path = opt.pred_path % (opt.exp, opt.timemark) if not os.path.exists(opt.exp_path): os.makedirs(opt.exp_path) if not os.path.exists(opt.pred_path): os.makedirs(opt.pred_path) logging = config.init_logging('train', opt.exp_path + '/output.log') logging.info('Parameters:') [ logging.info('%s : %s' % (k, str(v))) for k, v in opt.__dict__.items() ] try: train_data_loader, valid_data_loader, test_data_loader, word2id, id2word, vocab = load_data_vocab( opt, load_train=False) model = init_model(opt) # optimizer, criterion = init_optimizer_criterion(model, opt) generator = SequenceGenerator(model, eos_id=opt.word2id[pykp.io.EOS_WORD], beam_size=opt.beam_size, max_sequence_length=opt.max_sent_length) # import time # start_time = time.time() evaluate_beam_search( generator, test_data_loader, opt, title='predict', save_path=opt.pred_path + '/[epoch=%d,batch=%d,total_batch=%d]test_result.csv' % (0, 0, 0)) # print("--- %s seconds --- Complete Beam Search" % (time.time() - start_time)) # predict_greedy(model, test_data_loader, test_examples, opt) except Exception as e: logging.exception("message")
def generate_dataset(): test_dataset_name = 'kp20k' src_fields = ['title', 'abstract'] trg_fields = ['keyword'] parser = argparse.ArgumentParser( description='preprocess_testset.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # **Preprocess Options** parser.add_argument('-source_dataset_root_dir', default='test/', help="The path to the source data (raw json).") parser.add_argument('-output_path_prefix', default='data/', help="Output file for the prepared data") config.preprocess_opts(parser) opt = parser.parse_args([]) print("Loading Vocab...") opt.vocab_path = os.path.join(opt.output_path_prefix, 'kp20k', 'kp20k.vocab.pt') print(os.path.abspath(opt.vocab_path)) word2id, id2word, vocab = torch.load(opt.vocab_path, 'rb') print('Vocab size = %d' % len(vocab)) # for test_dataset_name in test_dataset_names: opt.source_test_file = os.path.join( opt.source_dataset_root_dir, '%s_testing.json' % (test_dataset_name)) # output path for exporting the processed dataset opt.output_path = os.path.join(opt.output_path_prefix, test_dataset_name) if not os.path.exists(opt.output_path): os.makedirs(opt.output_path) print("Loading test data...") tokenized_test_pairs = pykp.io.load_src_trgs_pairs( source_json_path=opt.source_test_file, dataset_name=test_dataset_name, src_fields=src_fields, trg_fields=trg_fields, valid_check=True, opt=opt) print("Exporting complete dataset") # pykp.io.process_and_export_dataset(tokenized_test_pairs, # word2id, id2word, # opt, # opt.output_path, # dataset_name=test_dataset_name, # data_type='test') return pykp.io.process_dataset(tokenized_test_pairs, word2id, id2word, opt, opt.output_path, dataset_name=test_dataset_name, data_type='test')