def cluster_sentence(premise, hypothesis, tokenizer, test_collater, model): # print('Enter Sentence 1:') # premise = input() # print('Enter Sentence 2:') # hypothesis = input() prefix = 'mnli' input_ids, _, type_ids = bert_feature_extractor(premise, hypothesis, max_seq_length=64, tokenize_fn=tokenizer) features = [{'uid': '0', 'label': '0', 'token_id': input_ids, 'type_id': type_ids}] dev_data_set = RunTimeDataset(features, None, False, maxlen=args.max_seq_len, task_id=0, task_type=TaskType.Classification, data_type=DataFormat.PremiseAndOneHypothesis) dev_data = DataLoader(dev_data_set, batch_size=args.batch_size_eval, collate_fn=test_collater.collate_fn, pin_memory=args.cuda) # dev_data_list = [dev_data] args.cuda = False with torch.no_grad(): label_dict = task_defs.global_map.get('mnli', None) pred = eval_model(model, dev_data, metric_meta=task_defs.metric_meta_map[ prefix], use_cuda=args.cuda, label_mapper=label_dict, with_label=False, task_type=task_defs.task_type_map[ prefix]) return '{"label": ' + pred + '}'
def main(args): # load task info task_defs = TaskDefs(args.task_def) assert args.task in task_defs.task_type_map assert args.task in task_defs.data_type_map assert args.task in task_defs.metric_meta_map data_type = task_defs.data_type_map[args.task] task_type = task_defs.task_type_map[args.task] metric_meta = task_defs.metric_meta_map[args.task] # load model checkpoint_path = args.checkpoint assert os.path.exists(checkpoint_path) if args.cuda: state_dict = torch.load(checkpoint_path) else: state_dict = torch.load(checkpoint_path, map_location="cpu") config = state_dict['config'] config["cuda"] = args.cuda model = MTDNNModel(config, state_dict=state_dict) model.load(checkpoint_path) encoder_type = config.get('encoder_type', EncoderModelType.BERT) # load data test_data_set = SingleTaskDataset(args.prep_input, False, task_type=task_type, maxlen=args.max_seq_len) collater = Collater(is_train=False, encoder_type=encoder_type) test_data = DataLoader(test_data_set, batch_size=args.batch_size_eval, collate_fn=collater.collate_fn, pin_memory=args.cuda) with torch.no_grad(): test_metrics, test_predictions, scores, golds, test_ids = eval_model( model, test_data, metric_meta=metric_meta, use_cuda=args.cuda, with_label=args.with_label) results = { 'metrics': test_metrics, 'predictions': test_predictions, 'uids': test_ids, 'scores': scores } dump(args.score, results) if args.with_label: print(test_metrics)
def evaluation(model, datasets, data_list, task_defs, output_dir='checkpoints', epoch=0, n_updates=-1, with_label=False, tensorboard=None, glue_format_on=False, test_on=False, device=None, logger=None): # eval on rank 1 print_message(logger, "Evaluation") test_prefix = "Test" if test_on else "Dev" if n_updates > 0: updates_str = "updates" else: updates_str = "epoch" updates = model.updates if n_updates > 0 else epoch for idx, dataset in enumerate(datasets): prefix = dataset.split('_')[0] task_def = task_defs.get_task_def(prefix) label_dict = task_def.label_vocab test_data = data_list[idx] if test_data is not None: with torch.no_grad(): test_metrics, test_predictions, test_scores, test_golds, test_ids= eval_model(model, test_data, metric_meta=task_def.metric_meta, device=device, with_label=with_label, label_mapper=label_dict, task_type=task_def.task_type) for key, val in test_metrics.items(): if tensorboard: tensorboard.add_scalar('{}/{}/{}'.format(test_prefix, dataset, key), val, global_step=updates) if isinstance(val, str): print_message(logger, 'Task {0} -- {1} {2} -- {3} {4}: {5}'.format(dataset, updates_str, updates, test_prefix, key, val), level=1) elif isinstance(val, float): print_message(logger, 'Task {0} -- {1} {2} -- {3} {4}: {5:.3f}'.format(dataset, updates_str, updates, test_prefix, key, val), level=1) else: test_metrics[key] = str(val) print_message(logger, 'Task {0} -- {1} {2} -- {3} {4}: \n{5}'.format(dataset, updates_str, updates, test_prefix, key, val), level=1) if args.local_rank in [-1, 0]: score_file = os.path.join(output_dir, '{}_{}_scores_{}_{}.json'.format(dataset, test_prefix.lower(), updates_str, updates)) results = {'metrics': test_metrics, 'predictions': test_predictions, 'uids': test_ids, 'scores': test_scores} dump(score_file, results) if glue_format_on: from experiments.glue.glue_utils import submit official_score_file = os.path.join(output_dir, '{}_{}_scores_{}.tsv'.format(dataset, test_prefix.lower(), updates_str)) submit(official_score_file, results, label_dict)
def main(): logger.info('Launching the MT-DNN training') opt = vars(args) # update data dir opt['data_dir'] = data_dir batch_size = args.batch_size tasks = {} task_def_list = [] dropout_list = [] train_datasets = [] for dataset in args.train_datasets: prefix = dataset.split('_')[0] if prefix in tasks: continue task_id = len(tasks) tasks[prefix] = task_id task_def = task_defs.get_task_def(prefix) task_def_list.append(task_def) train_path = os.path.join(data_dir, '{}_train.json'.format(dataset)) logger.info('Loading {} as task {}'.format(train_path, task_id)) train_data_set = SingleTaskDataset(train_path, True, maxlen=args.max_seq_len, task_id=task_id, task_def=task_def) train_datasets.append(train_data_set) train_collater = Collater(dropout_w=args.dropout_w, encoder_type=encoder_type, soft_label=args.mkd_opt > 0) multi_task_train_dataset = MultiTaskDataset(train_datasets) multi_task_batch_sampler = MultiTaskBatchSampler(train_datasets, args.batch_size, args.mix_opt, args.ratio) multi_task_train_data = DataLoader(multi_task_train_dataset, batch_sampler=multi_task_batch_sampler, collate_fn=train_collater.collate_fn, pin_memory=args.cuda) opt['task_def_list'] = task_def_list dev_data_list = [] test_data_list = [] test_collater = Collater(is_train=False, encoder_type=encoder_type) for dataset in args.test_datasets: prefix = dataset.split('_')[0] task_def = task_defs.get_task_def(prefix) task_id = tasks[prefix] task_type = task_def.task_type data_type = task_def.data_type dev_path = os.path.join(data_dir, '{}_dev.json'.format(dataset)) dev_data = None if os.path.exists(dev_path): dev_data_set = SingleTaskDataset(dev_path, False, maxlen=args.max_seq_len, task_id=task_id, task_def=task_def) dev_data = DataLoader(dev_data_set, batch_size=args.batch_size_eval, collate_fn=test_collater.collate_fn, pin_memory=args.cuda) dev_data_list.append(dev_data) test_path = os.path.join(data_dir, '{}_test.json'.format(dataset)) test_data = None if os.path.exists(test_path): test_data_set = SingleTaskDataset(test_path, False, maxlen=args.max_seq_len, task_id=task_id, task_def=task_def) test_data = DataLoader(test_data_set, batch_size=args.batch_size_eval, collate_fn=test_collater.collate_fn, pin_memory=args.cuda) test_data_list.append(test_data) logger.info('#' * 20) logger.info(opt) logger.info('#' * 20) # div number of grad accumulation. num_all_batches = args.epochs * len( multi_task_train_data) // args.grad_accumulation_step logger.info('############# Gradient Accumulation Info #############') logger.info('number of step: {}'.format(args.epochs * len(multi_task_train_data))) logger.info('number of grad grad_accumulation step: {}'.format( args.grad_accumulation_step)) logger.info('adjusted number of step: {}'.format(num_all_batches)) logger.info('############# Gradient Accumulation Info #############') init_model = args.init_checkpoint state_dict = None if os.path.exists(init_model): state_dict = torch.load(init_model) config = state_dict['config'] else: if opt['encoder_type'] not in EncoderModelType._value2member_map_: raise ValueError("encoder_type is out of pre-defined types") literal_encoder_type = EncoderModelType( opt['encoder_type']).name.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[ literal_encoder_type] config = config_class.from_pretrained( init_model, output_hidden_states=True).to_dict( ) # change here to enable multi-layer output config['output_hidden_states'] = True config['attention_probs_dropout_prob'] = args.bert_dropout_p config['hidden_dropout_prob'] = args.bert_dropout_p config['multi_gpu_on'] = opt["multi_gpu_on"] if args.num_hidden_layers != -1: config['num_hidden_layers'] = args.num_hidden_layers opt.update(config) model = MTDNNModel(opt, state_dict=state_dict, num_train_step=num_all_batches) if args.resume and args.model_ckpt: logger.info('loading model from {}'.format(args.model_ckpt)) model.load(args.model_ckpt) #### model meta str headline = '############# Model Arch of MT-DNN #############' ### print network logger.info('\n{}\n{}\n'.format(headline, model.network)) # dump config config_file = os.path.join(output_dir, 'config.json') with open(config_file, 'w', encoding='utf-8') as writer: writer.write('{}\n'.format(json.dumps(opt))) writer.write('\n{}\n{}\n'.format(headline, model.network)) logger.info("Total number of params: {}".format(model.total_param)) # tensorboard if args.tensorboard: args.tensorboard_logdir = os.path.join(args.output_dir, args.tensorboard_logdir) tensorboard = SummaryWriter(log_dir=args.tensorboard_logdir) if args.encode_mode: for idx, dataset in enumerate(args.test_datasets): prefix = dataset.split('_')[0] test_data = test_data_list[idx] with torch.no_grad(): encoding = extract_encoding(model, test_data, use_cuda=args.cuda) torch.save( encoding, os.path.join(output_dir, '{}_encoding.pt'.format(dataset))) return for epoch in range(0, args.epochs): logger.warning('At epoch {}'.format(epoch)) start = datetime.now() for i, (batch_meta, batch_data) in enumerate(multi_task_train_data): batch_meta, batch_data = Collater.patch_data( args.cuda, batch_meta, batch_data) task_id = batch_meta['task_id'] model.update(batch_meta, batch_data) if (model.local_updates) % (args.log_per_updates * args.grad_accumulation_step ) == 0 or model.local_updates == 1: ramaining_time = str( (datetime.now() - start) / (i + 1) * (len(multi_task_train_data) - i - 1)).split('.')[0] logger.info( 'Task [{0:2}] updates[{1:6}] train loss[{2:.5f}] remaining[{3}]' .format(task_id, model.updates, model.train_loss.avg, ramaining_time)) if args.tensorboard: tensorboard.add_scalar('train/loss', model.train_loss.avg, global_step=model.updates) if args.save_per_updates_on and ( (model.local_updates) % (args.save_per_updates * args.grad_accumulation_step) == 0): model_file = os.path.join( output_dir, 'model_{}_{}.pt'.format(epoch, model.updates)) logger.info('Saving mt-dnn model to {}'.format(model_file)) model.save(model_file) for idx, dataset in enumerate(args.test_datasets): prefix = dataset.split('_')[0] task_def = task_defs.get_task_def(prefix) label_dict = task_def.label_vocab dev_data = dev_data_list[idx] if dev_data is not None: with torch.no_grad(): dev_metrics, dev_predictions, scores, golds, dev_ids = eval_model( model, dev_data, metric_meta=task_def.metric_meta, use_cuda=args.cuda, label_mapper=label_dict, task_type=task_def.task_type) for key, val in dev_metrics.items(): if args.tensorboard: tensorboard.add_scalar('dev/{}/{}'.format( dataset, key), val, global_step=epoch) if isinstance(val, str): logger.warning( 'Task {0} -- epoch {1} -- Dev {2}:\n {3}'.format( dataset, epoch, key, val)) else: logger.warning( 'Task {0} -- epoch {1} -- Dev {2}: {3:.3f}'.format( dataset, epoch, key, val)) score_file = os.path.join( output_dir, '{}_dev_scores_{}.json'.format(dataset, epoch)) results = { 'metrics': dev_metrics, 'predictions': dev_predictions, 'uids': dev_ids, 'scores': scores } dump(score_file, results) if args.glue_format_on: from experiments.glue.glue_utils import submit official_score_file = os.path.join( output_dir, '{}_dev_scores_{}.tsv'.format(dataset, epoch)) submit(official_score_file, results, label_dict) # test eval test_data = test_data_list[idx] if test_data is not None: with torch.no_grad(): test_metrics, test_predictions, scores, golds, test_ids = eval_model( model, test_data, metric_meta=task_def.metric_meta, use_cuda=args.cuda, with_label=False, label_mapper=label_dict, task_type=task_def.task_type) score_file = os.path.join( output_dir, '{}_test_scores_{}.json'.format(dataset, epoch)) results = { 'metrics': test_metrics, 'predictions': test_predictions, 'uids': test_ids, 'scores': scores } dump(score_file, results) if args.glue_format_on: from experiments.glue.glue_utils import submit official_score_file = os.path.join( output_dir, '{}_test_scores_{}.tsv'.format(dataset, epoch)) submit(official_score_file, results, label_dict) logger.info('[new test scores saved.]') model_file = os.path.join(output_dir, 'model_{}.pt'.format(epoch)) model.save(model_file) if args.tensorboard: tensorboard.close()
def main(): logger.info('Launching the MT-DNN training') opt = vars(args) # update data dir opt['data_dir'] = data_dir batch_size = args.batch_size tasks = {} tasks_class = {} nclass_list = [] decoder_opts = [] task_types = [] dropout_list = [] loss_types = [] kd_loss_types = [] train_datasets = [] for dataset in args.train_datasets: prefix = dataset.split('_')[0] if prefix in tasks: continue assert prefix in task_defs.n_class_map assert prefix in task_defs.data_type_map data_type = task_defs.data_type_map[prefix] nclass = task_defs.n_class_map[prefix] task_id = len(tasks) if args.mtl_opt > 0: task_id = tasks_class[nclass] if nclass in tasks_class else len( tasks_class) task_type = task_defs.task_type_map[prefix] dopt = generate_decoder_opt(task_defs.enable_san_map[prefix], opt['answer_opt']) if task_id < len(decoder_opts): decoder_opts[task_id] = min(decoder_opts[task_id], dopt) else: decoder_opts.append(dopt) task_types.append(task_type) loss_types.append(task_defs.loss_map[prefix]) kd_loss_types.append(task_defs.kd_loss_map[prefix]) if prefix not in tasks: tasks[prefix] = len(tasks) if args.mtl_opt < 1: nclass_list.append(nclass) if (nclass not in tasks_class): tasks_class[nclass] = len(tasks_class) if args.mtl_opt > 0: nclass_list.append(nclass) dropout_p = task_defs.dropout_p_map.get(prefix, args.dropout_p) dropout_list.append(dropout_p) train_path = os.path.join(data_dir, '{}_train.json'.format(dataset)) logger.info('Loading {} as task {}'.format(train_path, task_id)) train_data_set = SingleTaskDataset(train_path, True, maxlen=args.max_seq_len, task_id=task_id, task_type=task_type, data_type=data_type) train_datasets.append(train_data_set) train_collater = Collater(dropout_w=args.dropout_w, encoder_type=encoder_type) multi_task_train_dataset = MultiTaskDataset(train_datasets) multi_task_batch_sampler = MultiTaskBatchSampler(train_datasets, args.batch_size, args.mix_opt, args.ratio) multi_task_train_data = DataLoader(multi_task_train_dataset, batch_sampler=multi_task_batch_sampler, collate_fn=train_collater.collate_fn, pin_memory=args.cuda) opt['answer_opt'] = decoder_opts opt['task_types'] = task_types opt['tasks_dropout_p'] = dropout_list opt['loss_types'] = loss_types opt['kd_loss_types'] = kd_loss_types args.label_size = ','.join([str(l) for l in nclass_list]) logger.info(args.label_size) dev_data_list = [] test_data_list = [] test_collater = Collater(is_train=False, encoder_type=encoder_type) for dataset in args.test_datasets: prefix = dataset.split('_')[0] task_id = tasks_class[ task_defs. n_class_map[prefix]] if args.mtl_opt > 0 else tasks[prefix] task_type = task_defs.task_type_map[prefix] pw_task = False if task_type == TaskType.Ranking: pw_task = True assert prefix in task_defs.data_type_map data_type = task_defs.data_type_map[prefix] dev_path = os.path.join(data_dir, '{}_dev.json'.format(dataset)) dev_data = None if os.path.exists(dev_path): dev_data_set = SingleTaskDataset(dev_path, False, maxlen=args.max_seq_len, task_id=task_id, task_type=task_type, data_type=data_type) dev_data = DataLoader(dev_data_set, batch_size=args.batch_size_eval, collate_fn=test_collater.collate_fn, pin_memory=args.cuda) dev_data_list.append(dev_data) test_path = os.path.join(data_dir, '{}_test.json'.format(dataset)) test_data = None if os.path.exists(test_path): test_data_set = SingleTaskDataset(test_path, False, maxlen=args.max_seq_len, task_id=task_id, task_type=task_type, data_type=data_type) test_data = DataLoader(test_data_set, batch_size=args.batch_size_eval, collate_fn=test_collater.collate_fn, pin_memory=args.cuda) test_data_list.append(test_data) logger.info('#' * 20) logger.info(opt) logger.info('#' * 20) # div number of grad accumulation. num_all_batches = args.epochs * len( multi_task_train_data) // args.grad_accumulation_step logger.info('############# Gradient Accumulation Info #############') logger.info('number of step: {}'.format(args.epochs * len(multi_task_train_data))) logger.info('number of grad grad_accumulation step: {}'.format( args.grad_accumulation_step)) logger.info('adjusted number of step: {}'.format(num_all_batches)) logger.info('############# Gradient Accumulation Info #############') bert_model_path = args.init_checkpoint state_dict = None if encoder_type == EncoderModelType.BERT: if os.path.exists(bert_model_path): state_dict = torch.load(bert_model_path) config = state_dict['config'] config['attention_probs_dropout_prob'] = args.bert_dropout_p config['hidden_dropout_prob'] = args.bert_dropout_p config['multi_gpu_on'] = opt["multi_gpu_on"] opt.update(config) else: logger.error('#' * 20) logger.error( 'Could not find the init model!\n The parameters will be initialized randomly!' ) logger.error('#' * 20) config = BertConfig(vocab_size_or_config_json_file=30522).to_dict() config['multi_gpu_on'] = opt["multi_gpu_on"] opt.update(config) elif encoder_type == EncoderModelType.ROBERTA: bert_model_path = '{}/model.pt'.format(bert_model_path) if os.path.exists(bert_model_path): new_state_dict = {} state_dict = torch.load(bert_model_path) for key, val in state_dict['model'].items(): if key.startswith('decoder.sentence_encoder'): key = 'bert.model.{}'.format(key) new_state_dict[key] = val elif key.startswith('classification_heads'): key = 'bert.model.{}'.format(key) new_state_dict[key] = val state_dict = {'state': new_state_dict} model = MTDNNModel(opt, state_dict=state_dict, num_train_step=num_all_batches) if args.resume and args.model_ckpt: logger.info('loading model from {}'.format(args.model_ckpt)) model.load(args.model_ckpt) #### model meta str headline = '############# Model Arch of MT-DNN #############' ### print network logger.info('\n{}\n{}\n'.format(headline, model.network)) # dump config config_file = os.path.join(output_dir, 'config.json') with open(config_file, 'w', encoding='utf-8') as writer: writer.write('{}\n'.format(json.dumps(opt))) writer.write('\n{}\n{}\n'.format(headline, model.network)) logger.info("Total number of params: {}".format(model.total_param)) # tensorboard if args.tensorboard: args.tensorboard_logdir = os.path.join(args.output_dir, args.tensorboard_logdir) tensorboard = SummaryWriter(log_dir=args.tensorboard_logdir) for epoch in range(0, args.epochs): logger.warning('At epoch {}'.format(epoch)) start = datetime.now() for i, (batch_meta, batch_data) in enumerate(multi_task_train_data): batch_meta, batch_data = Collater.patch_data( args.cuda, batch_meta, batch_data) task_id = batch_meta['task_id'] model.update(batch_meta, batch_data) if (model.local_updates) % (args.log_per_updates * args.grad_accumulation_step ) == 0 or model.local_updates == 1: ramaining_time = str( (datetime.now() - start) / (i + 1) * (len(multi_task_train_data) - i - 1)).split('.')[0] logger.info( 'Task [{0:2}] updates[{1:6}] train loss[{2:.5f}] remaining[{3}]' .format(task_id, model.updates, model.train_loss.avg, ramaining_time)) if args.tensorboard: tensorboard.add_scalar('train/loss', model.train_loss.avg, global_step=model.updates) if args.save_per_updates_on and ( (model.local_updates) % (args.save_per_updates * args.grad_accumulation_step) == 0): model_file = os.path.join( output_dir, 'model_{}_{}.pt'.format(epoch, model.updates)) logger.info('Saving mt-dnn model to {}'.format(model_file)) model.save(model_file) for idx, dataset in enumerate(args.test_datasets): prefix = dataset.split('_')[0] label_dict = task_defs.global_map.get(prefix, None) dev_data = dev_data_list[idx] if dev_data is not None: with torch.no_grad(): dev_metrics, dev_predictions, scores, golds, dev_ids = eval_model( model, dev_data, metric_meta=task_defs.metric_meta_map[prefix], use_cuda=args.cuda, label_mapper=label_dict, task_type=task_defs.task_type_map[prefix]) for key, val in dev_metrics.items(): if args.tensorboard: tensorboard.add_scalar('dev/{}/{}'.format( dataset, key), val, global_step=epoch) if isinstance(val, str): logger.warning( 'Task {0} -- epoch {1} -- Dev {2}:\n {3}'.format( dataset, epoch, key, val)) else: logger.warning( 'Task {0} -- epoch {1} -- Dev {2}: {3:.3f}'.format( dataset, epoch, key, val)) score_file = os.path.join( output_dir, '{}_dev_scores_{}.json'.format(dataset, epoch)) results = { 'metrics': dev_metrics, 'predictions': dev_predictions, 'uids': dev_ids, 'scores': scores } dump(score_file, results) if args.glue_format_on: from experiments.glue.glue_utils import submit official_score_file = os.path.join( output_dir, '{}_dev_scores_{}.tsv'.format(dataset, epoch)) submit(official_score_file, results, label_dict) # test eval test_data = test_data_list[idx] if test_data is not None: with torch.no_grad(): test_metrics, test_predictions, scores, golds, test_ids = eval_model( model, test_data, metric_meta=task_defs.metric_meta_map[prefix], use_cuda=args.cuda, with_label=False, label_mapper=label_dict, task_type=task_defs.task_type_map[prefix]) score_file = os.path.join( output_dir, '{}_test_scores_{}.json'.format(dataset, epoch)) results = { 'metrics': test_metrics, 'predictions': test_predictions, 'uids': test_ids, 'scores': scores } dump(score_file, results) if args.glue_format_on: from experiments.glue.glue_utils import submit official_score_file = os.path.join( output_dir, '{}_test_scores_{}.tsv'.format(dataset, epoch)) submit(official_score_file, results, label_dict) logger.info('[new test scores saved.]') model_file = os.path.join(output_dir, 'model_{}.pt'.format(epoch)) model.save(model_file) if args.tensorboard: tensorboard.close()
assert os.path.exists(checkpoint_path) if args.cuda: state_dict = torch.load(checkpoint_path) else: state_dict = torch.load(checkpoint_path, map_location="cpu") config = state_dict['config'] config["cuda"] = args.cuda task_def = task_defs.get_task_def(prefix) task_def_list = [task_def] config['task_def_list'] = task_def_list ## temp fix config['answer_opt'] = 0 config['adv_train'] = False model = MTDNNModel(config, state_dict=state_dict) model.load(checkpoint_path) encoder_type = config.get('encoder_type', EncoderModelType.BERT) # load data test_data_set = SingleTaskDataset(args.prep_input, False, maxlen=args.max_seq_len, task_id=args.task_id, task_def=task_def) collater = Collater(is_train=False, encoder_type=encoder_type) test_data = DataLoader(test_data_set, batch_size=args.batch_size_eval, collate_fn=collater.collate_fn, pin_memory=args.cuda) with torch.no_grad(): test_metrics, test_predictions, scores, golds, test_ids = eval_model(model, test_data, metric_meta=metric_meta, use_cuda=args.cuda, with_label=args.with_label) results = {'metrics': test_metrics, 'predictions': test_predictions, 'uids': test_ids, 'scores': scores} dump(args.score, results) if args.with_label: print(test_metrics)
False, maxlen=args.max_seq_len, task_id=args.task_id, task_def=task_def, ) collater = Collater(is_train=False, encoder_type=encoder_type) test_data = DataLoader( test_data_set, batch_size=args.batch_size_eval, collate_fn=collater.collate_fn, pin_memory=args.cuda, ) with torch.no_grad(): test_metrics, test_predictions, scores, golds, test_ids = eval_model( model, test_data, metric_meta=metric_meta, device=device, with_label=args.with_label, ) results = { "metrics": test_metrics, "predictions": test_predictions, "uids": test_ids, "scores": scores, } dump(args.score, results) if args.with_label: print(test_metrics)
def main(): logger.info('Launching the MT-DNN training') opt = vars(args) # update data dir opt['data_dir'] = data_dir batch_size = args.batch_size # tensorboard tensorboard = None if args.tensorboard: args.tensorboard_logdir = os.path.join(args.output_dir, args.tensorboard_logdir) tensorboard = SummaryWriter(log_dir=args.tensorboard_logdir) json_logfile = os.path.join(args.output_dir, "runtime_log.json") tasks = {} tasks_class = {} nclass_list = [] decoder_opts = [] task_types = [] dropout_list = [] loss_types = [] kd_loss_types = [] train_datasets = [] for dataset in args.train_datasets: prefix = dataset.split('_')[0] if prefix in tasks: continue assert prefix in task_defs.n_class_map assert prefix in task_defs.data_type_map data_type = task_defs.data_type_map[prefix] nclass = task_defs.n_class_map[prefix] task_id = len(tasks) if args.mtl_opt > 0: task_id = tasks_class[nclass] if nclass in tasks_class else len( tasks_class) task_type = task_defs.task_type_map[prefix] dopt = generate_decoder_opt(task_defs.enable_san_map[prefix], opt['answer_opt']) if task_id < len(decoder_opts): decoder_opts[task_id] = min(decoder_opts[task_id], dopt) else: decoder_opts.append(dopt) task_types.append(task_type) loss_types.append(task_defs.loss_map[prefix]) kd_loss_types.append(task_defs.kd_loss_map[prefix]) if prefix not in tasks: tasks[prefix] = len(tasks) if args.mtl_opt < 1: nclass_list.append(nclass) if (nclass not in tasks_class): tasks_class[nclass] = len(tasks_class) if args.mtl_opt > 0: nclass_list.append(nclass) dropout_p = task_defs.dropout_p_map.get(prefix, args.dropout_p) dropout_list.append(dropout_p) train_path = os.path.join(data_dir, '{}_train.json'.format(dataset)) logger.info('Loading {} as task {}'.format(train_path, task_id)) train_data_set = SingleTaskDataset(train_path, True, maxlen=args.max_seq_len, task_id=task_id, task_type=task_type, data_type=data_type) train_datasets.append(train_data_set) train_collater = Collater(dropout_w=args.dropout_w, encoder_type=encoder_type) multi_task_train_dataset = MultiTaskDataset(train_datasets) # MTSampler = SAMPLERS[args.sampler] n_tasks = len(tasks) dataset_sizes = [len(dataset) for dataset in train_datasets] if "random" in args.controller: controller = CONTROLLERS[args.controller]( n_task=n_tasks, dataset_names=args.train_datasets, dataset_sizes=dataset_sizes, batch_size=args.batch_size, rebatch_size=args.batch_size_train, tensorboard=tensorboard, log_filename=json_logfile) else: controller = CONTROLLERS[args.controller]( n_task=n_tasks, phi=args.phi, K=args.concurrent_cnt, dataset_names=args.train_datasets, dataset_sizes=dataset_sizes, max_cnt=args.max_queue_cnt, batch_size=args.batch_size, rebatch_size=args.batch_size_train, tensorboard=tensorboard, log_filename=json_logfile) multi_task_batch_sampler = ACLSampler(train_datasets, args.batch_size, controller=controller) # controller.max_step = len(multi_task_batch_sampler) multi_task_train_data = DataLoader(multi_task_train_dataset, batch_sampler=multi_task_batch_sampler, collate_fn=train_collater.collate_fn, pin_memory=args.cuda) opt['answer_opt'] = decoder_opts opt['task_types'] = task_types opt['tasks_dropout_p'] = dropout_list opt['loss_types'] = loss_types opt['kd_loss_types'] = kd_loss_types args.label_size = ','.join([str(l) for l in nclass_list]) logger.info(args.label_size) dev_data_list = [] test_data_list = [] test_collater = Collater(is_train=False, encoder_type=encoder_type) for dataset in args.test_datasets: prefix = dataset.split('_')[0] task_id = tasks_class[ task_defs. n_class_map[prefix]] if args.mtl_opt > 0 else tasks[prefix] task_type = task_defs.task_type_map[prefix] pw_task = False if task_type == TaskType.Ranking: pw_task = True assert prefix in task_defs.data_type_map data_type = task_defs.data_type_map[prefix] dev_path = os.path.join(data_dir, '{}_dev.json'.format(dataset)) dev_data = None if os.path.exists(dev_path): dev_data_set = SingleTaskDataset(dev_path, False, maxlen=args.max_seq_len, task_id=task_id, task_type=task_type, data_type=data_type) dev_data = DataLoader(dev_data_set, batch_size=args.batch_size_eval, collate_fn=test_collater.collate_fn, pin_memory=args.cuda) dev_data_list.append(dev_data) test_path = os.path.join(data_dir, '{}_test.json'.format(dataset)) test_data = None if os.path.exists(test_path): test_data_set = SingleTaskDataset(test_path, False, maxlen=args.max_seq_len, task_id=task_id, task_type=task_type, data_type=data_type) test_data = DataLoader(test_data_set, batch_size=args.batch_size_eval, collate_fn=test_collater.collate_fn, pin_memory=args.cuda) test_data_list.append(test_data) logger.info('#' * 20) logger.info(opt) logger.info('#' * 20) # div number of grad accumulation. num_all_batches = args.epochs * len( multi_task_train_data) // args.grad_accumulation_step logger.info('############# Gradient Accumulation Info #############') logger.info('number of step: {}'.format(args.epochs * len(multi_task_train_data))) logger.info('number of grad grad_accumulation step: {}'.format( args.grad_accumulation_step)) logger.info('adjusted number of step: {}'.format(num_all_batches)) logger.info('############# Gradient Accumulation Info #############') bert_model_path = args.init_checkpoint state_dict = None if encoder_type == EncoderModelType.BERT: if os.path.exists(bert_model_path): state_dict = torch.load(bert_model_path) config = state_dict['config'] config['attention_probs_dropout_prob'] = args.bert_dropout_p config['hidden_dropout_prob'] = args.bert_dropout_p config['multi_gpu_on'] = opt["multi_gpu_on"] opt.update(config) else: logger.error('#' * 20) logger.error( 'Could not find the init model!\n The parameters will be initialized randomly!' ) logger.error('#' * 20) config = BertConfig(vocab_size_or_config_json_file=30522).to_dict() config['multi_gpu_on'] = opt["multi_gpu_on"] opt.update(config) elif encoder_type == EncoderModelType.ROBERTA: bert_model_path = '{}/model.pt'.format(bert_model_path) if os.path.exists(bert_model_path): new_state_dict = {} state_dict = torch.load(bert_model_path) for key, val in state_dict['model'].items(): if key.startswith('decoder.sentence_encoder'): key = 'bert.model.{}'.format(key) new_state_dict[key] = val elif key.startswith('classification_heads'): key = 'bert.model.{}'.format(key) new_state_dict[key] = val state_dict = {'state': new_state_dict} # add score history score_history = [[] for _ in range(len(args.test_datasets))] total_scores = [] model = MTDNNModel(opt, state_dict=state_dict, num_train_step=num_all_batches) if args.resume and args.model_ckpt: logger.info('loading model from {}'.format(args.model_ckpt)) model.load(args.model_ckpt) #### model meta str headline = '############# Model Arch of MT-DNN #############' ### print network logger.info('\n{}\n{}\n'.format(headline, model.network)) # dump config config_file = os.path.join(output_dir, 'config.json') with open(config_file, 'w', encoding='utf-8') as writer: writer.write('{}\n'.format(json.dumps(opt))) writer.write('\n{}\n{}\n'.format(headline, model.network)) logger.info("Total number of params: {}".format(model.total_param)) for epoch in range(0, args.epochs): logger.warning('At epoch {0}/{1}'.format(epoch + 1, args.epochs)) start = datetime.now() total_len = len(controller) controller.set_epoch(epoch) for i, (batch_meta, batch_data) in enumerate(multi_task_train_data): batch_meta, batch_data = Collater.patch_data( args.cuda, batch_meta, batch_data) task_id = batch_meta['task_id'] loss = model.calculate_loss(batch_meta, batch_data) controller.insert(task_id, (batch_meta, batch_data), loss.item()) if i % args.log_per_updates == 0: ramaining_time = str( (datetime.now() - start) / (controller.cur_step + 1) * (total_len - controller.cur_step - 1)).split('.')[0] logger.info("Epoch {0} Progress {1} / {2} ({3:.2%})".format( epoch + 1, controller.cur_step, total_len, controller.cur_step * 1.0 / total_len)) # logger.info("Progress {0} / {1} ({2:.2f}%)".format(i, total_len, i*100.0/total_len)) logger.info( 'Task [{0:2}] updates[{1:6}] train loss[{2:.5f}] remaining[{3}]' .format(task_id, model.updates, model.train_loss.avg, ramaining_time)) summary_str = controller.summary() for line in summary_str.split("\n"): logger.info(line) # avg_loss, out_loss, loss_change, min_loss, min_out_loss = controller.get_loss() # logger.info('List of loss {}'.format(",".join(avg_loss))) # logger.info('List of out_loss {}'.format(",".join(out_loss))) # logger.info('List of loss_change {}'.format(",".join(loss_change))) # logger.info('List of min_loss {}'.format(",".join(min_loss))) # logger.info('List of min_out_loss {}'.format(",".join(min_out_loss))) # chosen = [ "%s:%.3f "%(k,v) for k, v in controller.scaled_dict.items()] # logger.info('List of Scaled Choosen time {}'.format(",".join(chosen))) if args.tensorboard: tensorboard.add_scalar('train/loss', model.train_loss.avg, global_step=model.updates) controller.step(model=model) if args.save_per_updates_on and ( (model.local_updates) % (args.save_per_updates * args.grad_accumulation_step) == 0): model_file = os.path.join( output_dir, 'model_{}_{}.pt'.format(epoch, model.updates)) logger.info('Saving mt-dnn model to {}'.format(model_file)) model.save(model_file) total_average_score = 0.0 scoring_cnt = 0 score_dict = dict() scoring_datasets = "cola,sst,mrpc,stsb,qqp,mnli,qnli,rte,wnli".split( ",") logger.info('Start Testing') for idx, dataset in enumerate(args.test_datasets): prefix = dataset.split('_')[0] label_dict = task_defs.global_map.get(prefix, None) dev_data = dev_data_list[idx] if dev_data is not None: with torch.no_grad(): dev_metrics, dev_predictions, scores, golds, dev_ids = eval_model( model, dev_data, metric_meta=task_defs.metric_meta_map[prefix], use_cuda=args.cuda, label_mapper=label_dict, task_type=task_defs.task_type_map[prefix]) task_score = 0.0 for key, val in dev_metrics.items(): if args.tensorboard: tensorboard.add_scalar('dev/{}/{}'.format( dataset, key), val, global_step=epoch) if isinstance(val, str): logger.warning( 'Task {0} -- epoch {1} -- Dev {2}:\n {3}'.format( dataset, epoch + 1, key, val)) else: logger.warning( 'Task {0} -- epoch {1} -- Dev {2}: {3:.2f}'.format( dataset, epoch + 1, key, val)) task_score += val if len(dev_metrics) > 1: task_score /= len(dev_metrics) logger.warning( 'Task {0} -- epoch {1} -- Dev {2}: {3:.2f}'.format( dataset, epoch + 1, "Average", task_score)) if prefix in scoring_datasets: scoring_cnt += 1 if prefix not in score_dict: score_dict[prefix] = task_score else: score_dict[prefix] = (score_dict[prefix] + task_score) / 2 total_average_score += task_score score_history[idx].append("%.2f" % task_score) logger.warning('Task {0} -- epoch {1} -- Dev {2}: {3}'.format( dataset, epoch + 1, "History", score_history[idx])) score_file = os.path.join( output_dir, '{}_dev_scores_{}.json'.format(dataset, epoch)) results = { 'metrics': dev_metrics, 'predictions': dev_predictions, 'uids': dev_ids, 'scores': scores } dump(score_file, results) if args.glue_format_on: from experiments.glue.glue_utils import submit official_score_file = os.path.join( output_dir, '{}_dev_scores_{}.tsv'.format(dataset, epoch)) submit(official_score_file, results, label_dict) # test eval test_data = test_data_list[idx] if test_data is not None: with torch.no_grad(): test_metrics, test_predictions, scores, golds, test_ids = eval_model( model, test_data, metric_meta=task_defs.metric_meta_map[prefix], use_cuda=args.cuda, with_label=False, label_mapper=label_dict, task_type=task_defs.task_type_map[prefix]) score_file = os.path.join( output_dir, '{}_test_scores_{}.json'.format(dataset, epoch)) results = { 'metrics': test_metrics, 'predictions': test_predictions, 'uids': test_ids, 'scores': scores } dump(score_file, results) if args.glue_format_on: from experiments.glue.glue_utils import submit official_score_file = os.path.join( output_dir, '{}_test_scores_{}.tsv'.format(dataset, epoch)) submit(official_score_file, results, label_dict) logger.info('[new test scores saved.]') scoreing_cnt = len(score_dict) if scoreing_cnt > 0: mean_value = np.mean([v for k, v in score_dict.items()]) logger.warning( 'Epoch {0} -- Dev {1} Tasks, Average Score : {2:.3f}'.format( epoch + 1, scoring_cnt, mean_value)) score_dict['avg'] = mean_value total_scores.append(score_dict) model_file = os.path.join(output_dir, 'model_{}.pt'.format(epoch)) model.save(model_file) for i, total_score in enumerate(total_scores): logger.info(total_score) if args.tensorboard: tensorboard.close()