def validate(args, device_id, pt, step): device = "cpu" if args.visible_gpus == '-1' else "cuda" if (pt != ''): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) print(args) if args.ext_sum_dec: model = SentenceTransformer(args, device, checkpoint, sum_or_jigsaw=0) else: model = ExtSummarizer(args, device, checkpoint) model.eval() valid_iter = data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) trainer = build_trainer(args, device_id, model, None) stats = trainer.validate(valid_iter, step) return stats.xent()
def test_ext(args, device_id, pt, step): device = "cpu" if args.visible_gpus == '-1' else "cuda" if (pt != ''): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) print(args) model = ExtSummarizer(args, device, checkpoint) model.eval() test_iter = data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), args.batch_size, device, shuffle=False, is_test=True) trainer = build_trainer(args, device_id, model, None) trainer.test(test_iter, step)
def test_ext(args, device_id, pt, step): if device_id == -1: device = "cpu" else: device = "cuda" logger.info('Device ID %s' % ','.join(map(str, device_id))) logger.info('Device %s' % device) if (pt != ''): test_from = pt else: test_from = args.load_model logger.info('Loading model_checkpoint from %s' % test_from) model_checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) args.doc_classifier = model_checkpoint['opt'].doc_classifier args.nbr_class_neurons = model_checkpoint['opt'].nbr_class_neurons model = Ext_summarizer(args, device, model_checkpoint) test_iter = data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), args.test_batch_size, device, shuffle=False) trainer = trainer = build_trainer(args, device_id, model, None, None, None) for ref_patents, summaries, output_probas, prediction_contradiction, str_context in trainer.test( test_iter): yield ref_patents, summaries, output_probas, prediction_contradiction, str_context
def validate(args, device_id, pt, step): if pt != '': test_from = pt else: test_from = args.test_from print('Loading checkpoint from %s' % test_from) logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) print(args) model = Summarizer(args, device, checkpoint) valid_iter = DataLoader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) trainer = build_trainer(args, device_id, model, None) stats = trainer.validate(valid_iter, step) return stats.xent()
def train_single_ext(args, device_id): torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True #TODO -> add ability to load model from chkpt if args.train_from != '': checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) else: checkpoint = None def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, args.device, shuffle=True, is_test=False) model = ExtSummarizer(args, checkpoint) optim = model_builder.build_optim(args, model, checkpoint) trainer = build_trainer(args, device_id, model, optim) trainer.train(train_iter_fct, args.train_steps)
def train_single_ext(args, device_id): init_logger(args.log_file) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) else: checkpoint = None def train_iter_fct(): if args.is_debugging: print("YES it is debugging") return data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) # exit() else: return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) model = ExtSummarizer(args, device, checkpoint) optim = model_builder.build_optim(args, model, checkpoint) logger.info(model) trainer = build_trainer(args, device_id, model, optim) trainer.train(train_iter_fct, args.train_steps)
def train_single_ext(args, device_id): init_logger(args.log_file) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) else: checkpoint = None def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) if args.ext_sum_dec: model = SentenceTransformer(args, device, checkpoint, sum_or_jigsaw=0) else: model = ExtSummarizer(args, device, checkpoint) optim = model_builder.build_optim(args, model, checkpoint) logger.info(model) # if args.fp16: # opt_level = 'O1' # typical fp16 training, can also try O2 to compare performance # else: # opt_level = 'O0' # pure fp32 traning # model, optim.optimizer = amp.initialize(model, optim.optimizer, opt_level=opt_level) trainer = build_trainer(args, device_id, model, optim) trainer.train(train_iter_fct, args.train_steps)
def __init__( self, ext_model_file, ): import models.model_builder as model import models.trainer_ext as trainer_ext args = self._build_ext_args() checkpoint = torch.load(ext_model_file, map_location=lambda storage, loc: storage) self.name = 'BERT-Ext' self.model_file = ext_model_file self.model_ext = model.ExtSummarizer(args, args.device, checkpoint) self.model_ext.eval() self.decider = ExtDecider(logger) self.decider.load(ext_model_file + '.config') self.trainer = trainer_ext.build_trainer(args, args.device_id, self.model_ext, None)
def test_text_ext(args): logger.info('Loading checkpoint from %s' % args.test_from) checkpoint = torch.load(args.test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) print(args) device = "cpu" if args.visible_gpus == '-1' else "cuda" device_id = 0 if device == "cuda" else -1 model = ExtSummarizer(args, device, checkpoint) model.eval() test_iter = data_loader.load_text(args, args.text_src, args.text_tgt, device) trainer = build_trainer(args, device_id, model, None) trainer.test(test_iter, -1)
def test_ext(args, device_id, pt, step, is_joint=False): device = "cpu" if args.visible_gpus == '-1' else "cuda" if (pt != ''): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) print(args) def test_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, args.exp_set, shuffle=False), args.test_batch_size, device, shuffle=False, is_test=True) model = ExtSummarizer(args, device, checkpoint, is_joint=is_joint) model.eval() # test_iter = data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), # args.test_batch_size, device, # shuffle=False, is_test=True) trainer = build_trainer(args, device_id, model, None) # trainer.test(test_iter, step) # trainer.test(test_iter_fct, step) # trainer.validate_rouge_mmr(test_iter_fct, step) # trainer.validate_rouge(test_iter_fct, step) trainer.validate_rouge_baseline(test_iter_fct, step, write_scores_to_pickle=True)
def train_ext(args, device_id): device = "cpu" if args.visible_gpus == '-1' else "cuda" print('Device ID %d' % device_id) print('Device %s' % device) if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if args.train_from != '': print('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) else: checkpoint = None def train_iter_fct(): return DataLoader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) model = Summarizer(args, device, checkpoint) optim = build_optim(args, model, checkpoint) trainer = build_trainer(args, device_id, model, optim) trainer.train(train_iter_fct, args.train_steps)
def test_ext(args, device_id, pt, step): if pt != '': test_from = pt else: test_from = args.test_from print('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) print(args) model = Summarizer(args, device, checkpoint) test_iter = DataLoader.Dataloader(args, load_dataset(args, 'test', shuffle=False), args.test_batch_size, device, shuffle=False, is_test=True) trainer = build_trainer(args, device_id, model, None) trainer.test(test_iter, step)
device_id = 0 if device == "cuda" else -1 model = ExtSummarizer(args, device, checkpoint) model.eval() # load data_files # args.text_src and args.result_path change for every paper file_dir_papers = "N:/Organisatorisches/Bereiche_Teams/ID/03_Studenten/Korte/Newsletter/Automatic Text Summarization/PreSumm_dev/cnndm/papers/" file_dir_results = "N:/Organisatorisches/Bereiche_Teams/ID/03_Studenten/Korte/Newsletter/Automatic Text Summarization/PreSumm_dev/cnndm/results/" for filename in os.listdir(file_dir_papers): print(filename) print("Inference for ", filename) #change parameter for every trial args.text_src = file_dir_papers + filename resultname = filename.replace('.raw_src', '') args.result_path = file_dir_results + "result_" + resultname try: test_iter = data_loader.load_text(args, args.text_src, args.text_tgt, device) trainer = build_trainer(args, device_id, model, None) trainer.test(test_iter, -1) except: print("Encoding Error at file ", filename)
def train_ext(args, device_id): init_logger(args.log_file) if device_id == -1: device = "cpu" else: device = "cuda" logger.info('Device ID %s' % ','.join(map(str, device_id))) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) if device_id != -1: torch.cuda.set_device(device_id[0]) torch.cuda.manual_seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True # Load checkpoint if necessary if args.load_model is not None: logger.info('Loading model_checkpoint from %s' % args.load_model) model_checkpoint = torch.load( args.load_model, map_location=lambda storage, loc: storage) if not args.transfer_learning: args.doc_classifier = model_checkpoint['opt'].doc_classifier args.nbr_class_neurons = model_checkpoint['opt'].nbr_class_neurons else: model_checkpoint = None if args.gan_mode and args.load_generator is not None: logger.info('Loading generator_checkpoint from %s' % args.load_generator) generator_checkpoint = torch.load( args.load_generator, map_location=lambda storage, loc: storage) args.generator = generator_checkpoint['opt'].generator else: generator_checkpoint = None # Data generator for training def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True) # Data generator for validation def valid_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.test_batch_size, device, shuffle=False) # Creation model model = Ext_summarizer(args, device, model_checkpoint) optim = model_builder.build_optim(args, model, model_checkpoint) logger.info(model) if args.gan_mode: # Creation generator if gan generator = Generator(args, model.length_embeddings, device, generator_checkpoint) optim_generator = model_builder.build_optim_generator( args, generator, generator_checkpoint) logger.info(generator) else: generator = None optim_generator = None trainer = build_trainer(args, device_id, model, generator, optim, optim_generator) trainer.train(train_iter_fct, args.train_steps, valid_iter_fct)
def call_train(): parser = argparse.ArgumentParser() parser.add_argument("-task", default='ext', type=str, choices=['ext']) parser.add_argument("-encoder", default='bert', type=str, choices=['bert', 'baseline']) parser.add_argument("-mode", default='test_text', type=str, choices=['test_text']) parser.add_argument("-bert_data_path", default='bert_data/bert_data_cnndm_final/cnndm') parser.add_argument("-model_path", default='models/') parser.add_argument("-result_path", default='logs/ext_bert_cnndm') parser.add_argument("-temp_dir", default='./temp') parser.add_argument("-text_src", default='raw_data/temp.raw_src') parser.add_argument("-text_tgt", default='') parser.add_argument("-batch_size", default=140, type=int) parser.add_argument("-test_batch_size", default=200, type=int) parser.add_argument("-max_ndocs_in_batch", default=6, type=int) parser.add_argument("-max_pos", default=512, type=int) parser.add_argument("-use_interval", type=str2bool, nargs='?', const=True, default=True) parser.add_argument("-large", type=str2bool, nargs='?', const=True, default=False) parser.add_argument("-load_from_extractive", default='', type=str) parser.add_argument("-sep_optim", type=str2bool, nargs='?', const=True, default=True) parser.add_argument("-lr_bert", default=2e-3, type=float) parser.add_argument("-lr_dec", default=2e-3, type=float) parser.add_argument("-use_bert_emb", type=str2bool, nargs='?', const=True, default=False) parser.add_argument("-share_emb", type=str2bool, nargs='?', const=True, default=False) parser.add_argument("-finetune_bert", type=str2bool, nargs='?', const=True, default=True) parser.add_argument("-dec_dropout", default=0.2, type=float) parser.add_argument("-dec_layers", default=6, type=int) parser.add_argument("-dec_hidden_size", default=768, type=int) parser.add_argument("-dec_heads", default=8, type=int) parser.add_argument("-dec_ff_size", default=2048, type=int) parser.add_argument("-enc_hidden_size", default=512, type=int) parser.add_argument("-enc_ff_size", default=512, type=int) parser.add_argument("-enc_dropout", default=0.2, type=float) parser.add_argument("-enc_layers", default=6, type=int) # params for EXT parser.add_argument("-ext_dropout", default=0.2, type=float) parser.add_argument("-ext_layers", default=2, type=int) parser.add_argument("-ext_hidden_size", default=768, type=int) parser.add_argument("-ext_heads", default=8, type=int) parser.add_argument("-ext_ff_size", default=2048, type=int) parser.add_argument("-label_smoothing", default=0.1, type=float) parser.add_argument("-generator_shard_size", default=32, type=int) parser.add_argument("-alpha", default=0.95, type=float) parser.add_argument("-beam_size", default=5, type=int) parser.add_argument("-min_length", default=50, type=int) parser.add_argument("-max_length", default=200, type=int) parser.add_argument("-max_tgt_len", default=140, type=int) parser.add_argument("-param_init", default=0, type=float) parser.add_argument("-param_init_glorot", type=str2bool, nargs='?', const=True, default=True) parser.add_argument("-optim", default='adam', type=str) parser.add_argument("-lr", default=1, type=float) parser.add_argument("-beta1", default=0.9, type=float) parser.add_argument("-beta2", default=0.999, type=float) parser.add_argument("-warmup_steps", default=8000, type=int) parser.add_argument("-warmup_steps_bert", default=8000, type=int) parser.add_argument("-warmup_steps_dec", default=8000, type=int) parser.add_argument("-max_grad_norm", default=0, type=float) parser.add_argument("-save_checkpoint_steps", default=5, type=int) parser.add_argument("-accum_count", default=1, type=int) parser.add_argument("-report_every", default=1, type=int) parser.add_argument("-train_steps", default=1000, type=int) parser.add_argument("-recall_eval", type=str2bool, nargs='?', const=True, default=False) parser.add_argument('-visible_gpus', default='0', type=str) parser.add_argument('-gpu_ranks', default='0', type=str) parser.add_argument('-log_file', default='logs/cnndm.log') parser.add_argument('-seed', default=666, type=int) parser.add_argument("-test_all", type=str2bool, nargs='?', const=True, default=False) parser.add_argument("-test_from", default='models/bertext_cnndm_transformer.pt') parser.add_argument("-test_start_from", default=-1, type=int) parser.add_argument("-train_from", default='') parser.add_argument("-report_rouge", type=str2bool, nargs='?', const=True, default=True) parser.add_argument("-block_trigram", type=str2bool, nargs='?', const=True, default=True) args = parser.parse_args() args.gpu_ranks = [int(i) for i in range(len(args.visible_gpus.split(',')))] args.world_size = len(args.gpu_ranks) os.environ["CUDA_VISIBLE_DEVICES"] = args.visible_gpus init_logger(args.log_file) logger.info('Loading checkpoint from %s' % args.test_from) checkpoint = torch.load(args.test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) device = "cpu" if args.visible_gpus == '-1' else "cuda" device_id = 0 if device == "cuda" else -1 logger.info('Coming here:1') model = ExtSummarizer(args, device, checkpoint) model.eval() trainer = build_trainer(args, device_id, model, None) logger.info('Coming here:2') logger.info('args: %s' % args) return model, args, trainer