def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": run_classifier.ColaProcessor, "mnli": run_classifier.MnliProcessor, "mrpc": run_classifier.MrpcProcessor, } if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = create_tokenizer_from_hub_module(FLAGS.bert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( num_labels=len(label_list), learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, bert_hub_module_handle=FLAGS.bert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_features = run_classifier.convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = run_classifier.input_fn_builder( features=train_features, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) eval_features = run_classifier.convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: # Eval will be slightly WRONG on the TPU because it will truncate # the last batch. eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = run_classifier.input_fn_builder( features=eval_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) if FLAGS.use_tpu: # Discard batch remainder if running on TPU n = len(predict_examples) predict_examples = predict_examples[:( n - n % FLAGS.predict_batch_size)] predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") run_classifier.file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = run_classifier.file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=FLAGS.use_tpu) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: tf.logging.info("***** Predict results *****") for prediction in result: probabilities = prediction["probabilities"] output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line)
optimizer = BertAdam(optimizer_grouped_parameters, lr=learning_rate, warmup=warmup_proportion, t_total=t_total) # optimizer = AdamW(optimizer_grouped_parameters, # lr = learning_rate, # args.learning_rate - default is 5e-5, our notebook had 2e-5 # eps = 1e-8, # args.adam_epsilon - default is 1e-8. # correct_bias=False # ) # scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=t_total) # PyTorch scheduler # In[15]: global_step = 0 train_features = convert_examples_to_features(train_examples, label_list, max_seq_length, tokenizer) claim_features = convert_claims_to_features(train_examples, label_list, max_seq_length, tokenizer) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", train_batch_size) logger.info(" Num steps = %d", num_train_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
def train_and_test(data_dir, bert_model="bert-base-uncased", task_name=None, output_dir=None, max_seq_length=32, do_train=False, do_eval=False, do_lower_case=False, train_batch_size=32, eval_batch_size=8, learning_rate=5e-5, num_train_epochs=5, warmup_proportion=0.1, no_cuda=False, local_rank=-1, seed=42, gradient_accumulation_steps=1, optimize_on_cpu=False, fp16=False, loss_scale=128, saved_model=""): # ## Required parameters # parser.add_argument("--data_dir", # default=None, # type=str, # required=True, # help="The input data dir. Should contain the .tsv files (or other data files) for the task.") # parser.add_argument("--bert_model", default=None, type=str, required=True, # help="Bert pre-trained model selected in the list: bert-base-uncased, " # "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.") # parser.add_argument("--task_name", # default=None, # type=str, # required=True, # help="The name of the task to train.") # parser.add_argument("--output_dir", # default=None, # type=str, # required=True, # help="The output directory where the model checkpoints will be written.") ## Other parameters # parser.add_argument("--max_seq_length", # default=128, # type=int, # help="The maximum total input sequence length after WordPiece tokenization. \n" # "Sequences longer than this will be truncated, and sequences shorter \n" # "than this will be padded.") # parser.add_argument("--do_train", # default=False, # action='store_true', # help="Whether to run training.") # parser.add_argument("--do_eval", # default=False, # action='store_true', # help="Whether to run eval on the dev set.") # parser.add_argument("--do_lower_case", # default=False, # action='store_true', # help="Set this flag if you are using an uncased model.") # parser.add_argument("--train_batch_size", # default=32, # type=int, # help="Total batch size for training.") # parser.add_argument("--eval_batch_size", # default=8, # type=int, # help="Total batch size for eval.") # parser.add_argument("--learning_rate", # default=5e-5, # type=float, # help="The initial learning rate for Adam.") # parser.add_argument("--num_train_epochs", # default=3.0, # type=float, # help="Total number of training epochs to perform.") # parser.add_argument("--warmup_proportion", # default=0.1, # type=float, # help="Proportion of training to perform linear learning rate warmup for. " # "E.g., 0.1 = 10%% of training.") # parser.add_argument("--no_cuda", # default=False, # action='store_true', # help="Whether not to use CUDA when available") # parser.add_argument("--local_rank", # type=int, # default=-1, # help="local_rank for distributed training on gpus") # parser.add_argument('--seed', # type=int, # default=42, # help="random seed for initialization") # parser.add_argument('--gradient_accumulation_steps', # type=int, # default=1, # help="Number of updates steps to accumulate before performing a backward/update pass.") # parser.add_argument('--optimize_on_cpu', # default=False, # action='store_true', # help="Whether to perform optimization and keep the optimizer averages on CPU") # parser.add_argument('--fp16', # default=False, # action='store_true', # help="Whether to use 16-bit float precision instead of 32-bit") # parser.add_argument('--loss_scale', # type=float, default=128, # help='Loss scaling, positive power of 2 values can improve fp16 convergence.') # args = parser.parse_args() processors = { # "cola": ColaProcessor, # "mnli": MnliProcessor, "mrpc": MrpcProcessor, "stance": StanceProcessor } if local_rank == -1 or no_cuda: device = torch.device( "cuda" if torch.cuda.is_available() and not no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: device = torch.device("cuda", local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') if fp16: logger.info( "16-bits training currently not supported in distributed training" ) fp16 = False # (see https://github.com/pytorch/pytorch/pull/13496) logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(local_rank != -1)) if gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(gradient_accumulation_steps)) train_batch_size = int(train_batch_size / gradient_accumulation_steps) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if n_gpu > 0: torch.cuda.manual_seed_all(seed) if not do_train and not do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if do_train: # if os.path.exists(output_dir) and os.listdir(output_dir): # raise ValueError("Output directory ({}) already exists and is not emp1ty.".format(output_dir)) os.makedirs(output_dir, exist_ok=True) task_name = task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() # tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=do_lower_case) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') train_examples = None num_train_steps = None if do_train: train_examples = processor.get_train_examples(data_dir) num_train_steps = int( len(train_examples) / train_batch_size / gradient_accumulation_steps * num_train_epochs) # Prepare model # model = BertForSequenceClassification.from_pretrained(bert_model, # cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(local_rank), num_labels = 2) model = BertForConsistencyCueClassification.from_pretrained( 'bert-base-uncased', num_labels=2) model.to(device) if fp16: model.half() if local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer if fp16: param_optimizer = [ (n, param.clone().detach().to('cpu').float().requires_grad_()) for n, param in model.named_parameters() ] elif optimize_on_cpu: param_optimizer = [ (n, param.clone().detach().to('cpu').requires_grad_()) for n, param in model.named_parameters() ] else: param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay_rate': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0 }] t_total = num_train_steps # print(t_total) if local_rank != -1: t_total = t_total // torch.distributed.get_world_size() if do_train: optimizer = BertAdam(optimizer_grouped_parameters, lr=learning_rate, warmup=warmup_proportion, t_total=t_total) global_step = 0 if do_train: train_features = convert_examples_to_features(train_examples, label_list, max_seq_length, tokenizer) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", train_batch_size) logger.info(" Num steps = %d", num_train_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) if local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=train_batch_size) model.train() for _ in trange(int(num_train_epochs), desc="Epoch"): tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate( tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids, = batch loss = model(input_ids, segment_ids, input_mask, label_ids) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if fp16 and loss_scale != 1.0: # rescale loss for fp16 training # see https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html loss = loss * loss_scale if gradient_accumulation_steps > 1: loss = loss / gradient_accumulation_steps loss.backward() tr_loss += loss.item() nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 if (step + 1) % gradient_accumulation_steps == 0: if fp16 or optimize_on_cpu: if fp16 and loss_scale != 1.0: # scale down gradients for fp16 training for param in model.parameters(): if param.grad is not None: param.grad.data = param.grad.data / loss_scale is_nan = set_optimizer_params_grad( param_optimizer, model.named_parameters(), test_nan=True) if is_nan: logger.info( "FP16 TRAINING: Nan in gradients, reducing loss scaling" ) loss_scale = loss_scale / 2 model.zero_grad() continue optimizer.step() copy_optimizer_params_to_model( model.named_parameters(), param_optimizer) else: optimizer.step() model.zero_grad() global_step += 1 torch.save(model.state_dict(), output_dir + "ibmcs_non_reverse_bertcons_epoch5.pth") if do_eval and (local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = processor.get_test_examples(data_dir) # eval_examples = processor.get_dev_examples(data_dir) eval_features = convert_examples_to_features(eval_examples, label_list, max_seq_length, tokenizer) claim_features = convert_claims_to_features(eval_examples, label_list, max_seq_length, tokenizer) logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_examples)) logger.info(" Batch size = %d", eval_batch_size) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) claims_input_ids = torch.tensor([f.input_ids for f in claim_features], dtype=torch.long) claims_input_mask = torch.tensor( [f.input_mask for f in claim_features], dtype=torch.long) claims_segment_ids = torch.tensor( [f.segment_ids for f in claim_features], dtype=torch.long) claims_label_ids = torch.tensor([f.label_id for f in claim_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids, claims_input_ids, claims_input_mask, claims_segment_ids, claims_label_ids) # Run prediction for full data # eval_sampler = SequentialSampler(eval_data) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=eval_batch_size) # print('all_input_ids:') # print(all_input_ids) # model.load_state_dict(torch.load(saved_model)) model_state_dict = torch.load(saved_model) model = BertForConsistencyCueClassification.from_pretrained( 'bert-base-uncased', num_labels=2, state_dict=model_state_dict) model.to(device) model.eval() # eval_loss, eval_accuracy = 0, 0 eval_tp, eval_pred_c, eval_gold_c = 0, 0, 0 eval_loss, eval_macro_p, eval_macro_r = 0, 0, 0 raw_score = [] nb_eval_steps, nb_eval_examples = 0, 0 for input_ids, input_mask, segment_ids, label_ids, claim_input_ids, claim_input_mask, claim_segment_ids, claim_label_ids in eval_dataloader: input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) label_ids = label_ids.to(device) claim_input_ids = claim_input_ids.to(device) claim_input_mask = claim_input_mask.to(device) claim_segment_ids = claim_segment_ids.to(device) claim_label_ids = claim_label_ids.to(device) # print("start") # print(input_ids) # print(input_mask) # print(segment_ids) # print(label_ids) # print(claim_input_ids) # print(claim_input_mask) # print(claim_segment_ids) # print(claim_label_ids) # print("end") with torch.no_grad(): tmp_eval_loss = model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids, input_ids2=claim_input_ids, token_type_ids2=claim_segment_ids, attention_mask2=claim_input_mask, labels2=claim_label_ids) logits = model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, input_ids2=claim_input_ids, token_type_ids2=claim_segment_ids, attention_mask2=claim_input_mask) # print(logits) # print(logits[0]) logits = logits.detach().cpu().numpy() # print(logits) label_ids = label_ids.to('cpu').numpy() # print(label_ids) # Micro F1 (aggregated tp, fp, fn counts across all examples) tmp_tp, tmp_pred_c, tmp_gold_c = tp_pcount_gcount( logits, label_ids) eval_tp += tmp_tp eval_pred_c += tmp_pred_c eval_gold_c += tmp_gold_c pred_label = np.argmax(logits, axis=1) raw_score += zip(logits, pred_label, label_ids) # Macro F1 (averaged P, R across mini batches) tmp_eval_p, tmp_eval_r, tmp_eval_f1 = p_r_f1(logits, label_ids) eval_macro_p += tmp_eval_p eval_macro_r += tmp_eval_r eval_loss += tmp_eval_loss.mean().item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 # Micro F1 (aggregated tp, fp, fn counts across all examples) eval_micro_p = eval_tp / eval_pred_c eval_micro_r = eval_tp / eval_gold_c eval_micro_f1 = 2 * eval_micro_p * eval_micro_r / (eval_micro_p + eval_micro_r) # Macro F1 (averaged P, R across mini batches) eval_macro_p = eval_macro_p / nb_eval_steps eval_macro_r = eval_macro_r / nb_eval_steps eval_macro_f1 = 2 * eval_macro_p * eval_macro_r / (eval_macro_p + eval_macro_r) eval_loss = eval_loss / nb_eval_steps result = { 'eval_loss': eval_loss, 'eval_micro_p': eval_micro_p, 'eval_micro_r': eval_micro_r, 'eval_micro_f1': eval_micro_f1, 'eval_macro_p': eval_macro_p, 'eval_macro_r': eval_macro_r, 'eval_macro_f1': eval_macro_f1, # 'global_step': global_step, # 'loss': tr_loss/nb_tr_steps } output_eval_file = os.path.join( output_dir, "train_on_ibmcs_eval_on_ibmcs_bert_cons_epoch5_eval_results.txt") output_raw_score = os.path.join( output_dir, "train_on_ibmcs_eval_on_ibmcs_bert_cons_epoch5_raw_score.csv") with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) with open(output_raw_score, 'w') as fout: fields = [ "undermine_score", "support_score", "predict_label", "gold" ] writer = csv.DictWriter(fout, fieldnames=fields) writer.writeheader() for score, pred, gold in raw_score: writer.writerow({ "undermine_score": str(score[0]), "support_score": str(score[1]), "predict_label": str(pred), "gold": str(gold) })
def main(_): """For current work, we only use the following four categories, but you can add others if you would like to.""" categories = [ 'Tools_and_Home_Improvement', 'Patio_Lawn_and_Garden', 'Electronics', 'Baby', ] models = ['FLTR', 'BertQA'] data_path = os.path.join(FLAGS.data_dir, 'Annotated_Data.txt') data = pd.read_csv(data_path, sep='\t', encoding='utf-8', converters={ 'annotation_score': ast.literal_eval, 'reviews': ast.literal_eval }) data = data.reset_index() data['qr'] = data[['index', 'question', 'reviews' ]].apply(lambda x: [[x['index'], x['question'], i] for i in x['reviews']], axis=1) d = [] for category in categories: qr = data[data['category'] == category]['qr'].tolist() qr = [item for sublist in qr for item in sublist] qr = pd.DataFrame(columns=['index', 'question', 'review'], data=qr) qr['label'] = 1 temp = qr.copy() temp['question'] = temp['question'].apply(str) temp['review'] = temp['review'].apply(str) DATA_COLUMN_A = 'question' DATA_COLUMN_B = 'review' LABEL_COLUMN = 'label' label_list = [0, 1] tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=True) test_InputExamples = temp.apply( lambda x: run_classifier.InputExample(guid=None, text_a=x[DATA_COLUMN_A], text_b=x[DATA_COLUMN_B], label=x[LABEL_COLUMN]), axis=1) test_features = run_classifier.convert_examples_to_features( test_InputExamples, label_list, FLAGS.max_seq_length, tokenizer) t = data[data['category'] == category] t = t.reset_index(drop=True) for model in models: OUTPUT_DIR = os.path.join(FLAGS.model_output_dir, category + '_' + model) run_config = tf.estimator.RunConfig(model_dir=OUTPUT_DIR, save_summary_steps=100, save_checkpoints_steps=100) model_fn = None if model == 'BertQA': model_fn = model_fn_builder_BertQA( bert_config=modeling.BertConfig.from_json_file( FLAGS.bert_config_file), num_labels=len(label_list), init_checkpoint=OUTPUT_DIR, learning_rate=FLAGS.learning_rate, num_train_steps=100, num_warmup_steps=100) else: model_fn = model_fn_builder_FLTR( bert_config=modeling.BertConfig.from_json_file( FLAGS.bert_config_file), num_labels=len(label_list), init_checkpoint=OUTPUT_DIR, learning_rate=FLAGS.learning_rate, num_train_steps=100, num_warmup_steps=100) estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config, params={"batch_size": FLAGS.train_batch_size}) test_input_fn = run_classifier.input_fn_builder( features=test_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) predictions = estimator.predict(test_input_fn) probabilities = [ prediction['probabilities'] for prediction in predictions ] probabilities = [list(item) for item in probabilities] if model == 'FLTR': probabilities = [item[1] for item in probabilities] else: probabilities = [item[0] for item in probabilities] print(model, ' :', probabilities[:10]) temp[model + '_score'] = probabilities temp_groupby = temp.groupby( ['index', 'question'], sort=False)[model + '_score'].apply(list).reset_index( name=model + '_score') t = pd.concat([t, temp_groupby[model + '_score']], axis=1) if len(d) == 0: d = t else: d = pd.concat([d, t], axis=0, ignore_index=True) d.to_csv(os.path.join(FLAGS.data_dir, 'test_predictions.txt'), index=None, sep='\t', mode='w')
def main(argv): BERT_MODEL = 'uncased_L-12_H-768_A-12' VOCAB_FILE = '/root/cyliu/tftuner/selftf/tf_job/nlp/zmwu/bert_tf2/vocab.txt' CONFIG_FILE = '/root/cyliu/tftuner/selftf/tf_job/nlp/zmwu/bert_tf2/bert_config.json' INIT_CHECKPOINT = '/root/cyliu/tftuner/selftf/tf_job/nlp/zmwu/bert_tf2/bert_model.ckpt' DO_LOWER_CASE = BERT_MODEL.startswith('uncased') model_dir = "{}/{}".format("/opt/tftuner", mltunerUtil.get_job_id()) # model fix parameter TRAIN_BATCH_SIZE = mltunerUtil.get_batch_size() NUM_TRAIN_EPOCHS = 3 LEARNING_RATE = mltunerUtil.get_learning_rate() WARMUP_PROPORTION = 0.05 EVAL_BATCH_SIZE = 8 MAX_SEQ_LENGTH = 128 #data loading train_df = pd.read_csv( '/root/cyliu/tftuner/selftf/tf_job/nlp/zmwu/bert_tf2/train.csv') train_df = train_df.sample(1000) train, test = train_test_split(train_df, test_size=0.1, random_state=42) train_lines, train_labels = train.question_text.values, train.target.values test_lines, test_labels = test.question_text.values, test.target.values label_list = ['0', '1'] tokenizer = tokenization.FullTokenizer(vocab_file=VOCAB_FILE, do_lower_case=DO_LOWER_CASE) train_examples = create_examples(train_lines, 'train', labels=train_labels) num_train_steps = int( len(train_examples) / TRAIN_BATCH_SIZE * NUM_TRAIN_EPOCHS) num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION) strategy = tf.distribute.experimental.ParameterServerStrategy() session_config = mltunerUtil.get_tf_session_config() config = tf.compat.v1.estimator.tpu.RunConfig( train_distribute=strategy, model_dir=model_dir, save_checkpoints_steps=None, save_checkpoints_secs=None, session_config=session_config) model_fn = run_classifier.model_fn_builder( bert_config=modeling.BertConfig.from_json_file(CONFIG_FILE), num_labels=len(label_list), init_checkpoint=None, learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu= False, #If False training will fall on CPU or GPU, depending on what is available use_one_hot_embeddings=True) estimator = tf.compat.v1.estimator.tpu.TPUEstimator( use_tpu= False, #If False training will fall on CPU or GPU, depending on what is available model_fn=model_fn, config=config, train_batch_size=TRAIN_BATCH_SIZE, eval_batch_size=EVAL_BATCH_SIZE) class LoggerHook(tf.estimator.SessionRunHook): """Logs loss and runtime.""" def __init__(self): self.last_run_timestamp = time.time() def after_run(self, run_context, run_values): session: tf.Session = run_context.session loss, step = session.run([ tf.compat.v1.get_collection("losses")[0], tf.compat.v1.get_collection("global_step_read_op_cache")[0] ]) logging.debug("step:{} loss:{}".format(step, loss)) mltunerUtil.report_iter_loss(step, loss, time.time() - self.last_run_timestamp) self.last_run_timestamp = time.time() # prepare for train train_features = run_classifier.convert_examples_to_features( train_examples, label_list, MAX_SEQ_LENGTH, tokenizer) train_input_fn = input_fn_builder(features=train_features, seq_length=MAX_SEQ_LENGTH, is_training=True, drop_remainder=True) predict_examples = create_examples(test_lines, 'test') predict_features = run_classifier.convert_examples_to_features( predict_examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = input_fn_builder(features=predict_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=num_train_steps, hooks=[LoggerHook()]) eval_spec = tf.estimator.EvalSpec(input_fn=predict_input_fn) # wait for chief ready? if not (mltunerUtil.is_chief() or mltunerUtil.is_ps()): time.sleep(1) if not tf.io.gfile.exists(model_dir): logging.debug("wait for chief init") time.sleep(1) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
model_fn = run_classifier.model_fn_builder( bert_config=modeling.BertConfig.from_json_file(CONFIG_FILE), num_labels=len(label_list), init_checkpoint=INIT_CHECKPOINT, learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=False, use_one_hot_embeddings=True) estimator = tf.contrib.tpu.TPUEstimator(use_tpu=False, model_fn=model_fn, config=run_config, train_batch_size=TRAIN_BATCH_SIZE, eval_batch_size=EVAL_BATCH_SIZE) # estimator = tf.contrib.estimator.SavedModelEstimator(BERT_PRETRAINED_DIR) # Eval the model. eval_examples = processor.get_dev_examples(TASK_DATA_DIR) eval_features = run_classifier.convert_examples_to_features( eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer) eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE) eval_input_fn = run_classifier.input_fn_builder(features=eval_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) print(result)
LABEL_COLUMN = 'label' label_list = [0, 1] # Use the InputExample class from BERT's run_classifier code to create examples from the data train_InputExamples = train.apply(lambda x: run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = x[LABEL_COLUMN]), axis = 1) test_InputExamples = test.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = x[LABEL_COLUMN]), axis = 1) # Convert our train and test features to InputFeatures that BERT understands. train_features = run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer) test_features = run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer) # Compute # train and warmup steps from batch size num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS) num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION) # Specify outpit directory and number of checkpoint steps to save run_config = tf.estimator.RunConfig( model_dir=OUTPUT_DIR, save_summary_steps=SAVE_SUMMARY_STEPS, save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS) model_fn = model_fn_builder( num_labels=len(label_list),
def main(_): """For current work, we only use the following four categories, but you can add others if you would like to.""" categories = [ 'Tools_and_Home_Improvement', 'Patio_Lawn_and_Garden', 'Electronics', 'Baby', ] data = None """Cross-domain pre-training (All_Categories) to boost the performance.""" if FLAGS.category_name == "All_Categories": for category in categories: category_data_path = os.path.join(FLAGS.data_dir,category+'.txt') category_data = pd.read_csv(category_data_path,sep='\t',encoding='utf-8',nrows=10000, converters={'reviewText':ast.literal_eval,'FLTR_scores':ast.literal_eval}) if data is None: data = category_data else: data = pd.concat([data,category_data],axis=0) data = data.sample(n=len(data)) else: data_path = os.path.join(FLAGS.data_dir,FLAGS.category_name+'.txt') data = pd.read_csv(data_path,sep='\t',encoding='utf-8',#nrows=10000, cconverters={'reviewText':ast.literal_eval,'FLTR_scores':ast.literal_eval}) #data['len_questions'] = data["question"].apply(lambda x: len(x.split())) #data = data[data['len_questions']<=10] data['FLTR_Top10'] = data.apply(FLTR_Top10,axis=1) list_of_answers = list(data['answer']) list_of_answers=shuffle(list_of_answers) data['non_answer']= list_of_answers train = data[:int(len(data)*0.8)] train = train.sample(n=min(20000,len(train))) test = data[int(len(data)*0.8):] print(train.shape,test.shape) DATA_COLUMN_A = 'senA' DATA_COLUMN_B = 'senB' LABEL_COLUMN = 'Label' label_list = [0, 1] train = train.apply(qar_pair,axis=1) test = test.apply(qar_pair,axis=1) temp = train.tolist() flat_list = [item for sublist in temp for item in sublist] train =pd.DataFrame(flat_list,columns=['senA','senB']) train['Label'] =1 train['senA']=train['senA'].apply(str) train['senB']=train['senB'].apply(str) temp = test.tolist() flat_list = [item for sublist in temp for item in sublist] test = pd.DataFrame(flat_list,columns=['senA','senB']) test['Label'] = 1 test['senA'] = test['senA'].apply(str) test['senB'] = test['senB'].apply(str) print(train.shape,test.shape) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=True) train_InputExamples = train.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = x[LABEL_COLUMN]), axis = 1) test_InputExamples = test.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = x[LABEL_COLUMN]), axis = 1) train_features = run_classifier.convert_examples_to_features(train_InputExamples, label_list, FLAGS.max_seq_length, tokenizer) test_features = run_classifier.convert_examples_to_features(test_InputExamples, label_list, FLAGS.max_seq_length, tokenizer) OUTPUT_DIR = os.path.join(FLAGS.model_output_dir,FLAGS.category_name+"_BertQA") tf.gfile.MakeDirs(OUTPUT_DIR) run_config = tf.estimator.RunConfig( model_dir=OUTPUT_DIR, keep_checkpoint_max=2, save_summary_steps=FLAGS.save_summary_steps, save_checkpoints_steps=FLAGS.save_checkpoints_steps) num_train_steps = int(len(train_features) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file), num_labels = len(label_list), init_checkpoint = FLAGS.init_checkpoint, learning_rate = FLAGS.learning_rate, num_train_steps = num_train_steps, num_warmup_steps = num_warmup_steps) estimator = tf.estimator.Estimator( model_fn = model_fn, config = run_config, params = {"batch_size": FLAGS.train_batch_size}) train_input_fn = run_classifier.input_fn_builder( features = train_features, seq_length = FLAGS.max_seq_length, is_training = True, drop_remainder = True) print("Beginning Training!") current_time = datetime.now() #early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook( # estimator,metric_name='loss',max_steps_without_decrease=1000,min_steps=100) estimator.train(input_fn = train_input_fn, max_steps = num_train_steps) #,hooks=[early_stopping] print("Training took time ", datetime.now() - current_time) test_input_fn = run_classifier.input_fn_builder( features = test_features, seq_length = FLAGS.max_seq_length, is_training = False, drop_remainder = True) predictions = estimator.predict(test_input_fn) x=[prediction['scores'] for prediction in predictions] print('\n') print("The accuracy of BertQA on "+FLAGS.category_name+" is: "+str(sum(i > 0 for i in x)/len(x))) print('\n')
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument( "--data_dir", default="data/VSNLI/", type=str, help= "The input data dir. Should contain the .tsv files (or other data files) for the task." ) parser.add_argument( "--bert_model", default="bert-base-uncased", type=str, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " "bert-base-multilingual-cased, bert-base-chinese.") parser.add_argument("--task_name", default="snliimg", type=str, help="The name of the task to train.") parser.add_argument( "--output_dir", default="output_vsnli", type=str, help= "The output directory where the model predictions and checkpoints will be written." ) parser.add_argument( "--tagger_path", default=None, type=str, help= "tagger_path for predictions if needing real-time tagging. Default: None, by loading pre-tagged data" "For example, the trained models by AllenNLP") parser.add_argument("--best_epochs", default=1.0, type=float, help="Best training epochs for prediction.") parser.add_argument("--max_num_aspect", default=3, type=int, help="max_num_aspect") ## Other parameters parser.add_argument("--grounding", action='store_true', help="whether to enable grounding.") parser.add_argument("--hypothesis_only", action='store_true', help="whether to enable grounding.") parser.add_argument( "--cache_dir", default="", type=str, help= "Where do you want to store the pre-trained models downloaded from s3") parser.add_argument( "--max_seq_length", default=128, type=int, help= "The maximum total input sequence length after WordPiece tokenization. \n" "Sequences longer than this will be truncated, and sequences shorter \n" "than this will be padded.") parser.add_argument("--do_train", action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", action='store_true', help="Whether to run eval on the dev set.") parser.add_argument("--do_predict", action='store_true', help="Whether to run eval on the dev set.") parser.add_argument( "--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.") parser.add_argument("--eval_batch_size", default=8, type=int, help="Total batch size for eval.") parser.add_argument("--learning_rate", default=3e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--warmup_proportion", default=0.1, type=float, help= "Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10%% of training.") parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument( '--fp16', action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument( '--loss_scale', type=float, default=0, help= "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" "0 (default value): dynamic loss scaling.\n" "Positive power of 2: static loss scaling value.\n") parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.") parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.") args = parser.parse_args() if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() processors = {"snliimg": SnliImgProcessor, "gsnliimg": GSnliImgProcessor} if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') logger.info( "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}". format(device, n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_eval and not args.do_predict: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if os.path.exists(args.output_dir) and os.listdir( args.output_dir) and args.do_train: raise ValueError( "Output directory ({}) already exists and is not empty.".format( args.output_dir)) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) task_name = args.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() # num_labels = num_labels_task[task_name] label_list = processor.get_labels() num_labels = len(label_list) tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) if args.tagger_path != None: srl_predictor = SRLPredictor(args.tagger_path) else: srl_predictor = None train_examples = None num_train_optimization_steps = None if args.do_train: if args.grounding: train_premise_examples, train_hypothesis_examples = processor.get_train_examples( args.data_dir) train_examples = (train_premise_examples, train_hypothesis_examples) num_train_optimization_steps = int( len(train_premise_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs else: train_examples = processor.get_train_examples(args.data_dir) num_train_optimization_steps = int( len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs if args.local_rank != -1: num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size( ) train_features = None if args.do_train: if args.grounding: hypothesis_features = convert_examples_to_features( train_examples[1], label_list, args.max_seq_length, tokenizer, srl_predictor=srl_predictor) premises_features = convert_examples_to_features( train_examples[0], label_list, args.max_seq_length, tokenizer, srl_predictor=srl_predictor) train_features = (premises_features, hypothesis_features) else: train_features = convert_examples_to_features( train_examples, label_list, args.max_seq_length, tokenizer, srl_predictor=srl_predictor) # TagTokenizer.make_tag_vocab("tag_vocab", tag_vocab) tag_tokenizer = TagTokenizer() vocab_size = len(tag_tokenizer.ids_to_tags) print("tokenizer vocab size: ", str(vocab_size)) tag_config = TagConfig(tag_vocab_size=vocab_size, hidden_size=10, layer_num=1, output_dim=10, dropout_prob=0.1, num_aspect=args.max_num_aspect) # Prepare model cache_dir = args.cache_dir if args.cache_dir else os.path.join( PYTORCH_PRETRAINED_BERT_CACHE, 'distributed_{}'.format( args.local_rank)) if args.grounding: if args.hypothesis_only: model = GroundedImgClassificationTag.from_pretrained( args.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank), num_labels=num_labels, tag_config=tag_config, image_emb_size=2048, hypothesis_only=True) else: model = GroundedImgClassificationTag.from_pretrained( args.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank), num_labels=num_labels, tag_config=tag_config, image_emb_size=2048) else: model = BertForSequenceImgClassificationTag.from_pretrained( args.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank), num_labels=num_labels, tag_config=tag_config, image_emb_size=2048) if args.fp16: model.half() model.to(device) if args.local_rank != -1: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) model = DDP(model) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if args.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=args.learning_rate, bias_correction=False, max_grad_norm=1.0) if args.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=num_train_optimization_steps) global_step = 0 nb_tr_steps = 0 tr_loss = 0 best_epoch = 0 best_result = 0.0 transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) if args.do_train: if not args.grounding: train_features = transform_tag_features(args.max_num_aspect, train_features, tag_tokenizer, args.max_seq_length) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_optimization_steps) # prepare data training data all_input_ids = [f.input_ids for f in train_features] all_input_mask = [f.input_mask for f in train_features] all_segment_ids = [f.segment_ids for f in train_features] all_label_ids = [f.label_id for f in train_features] all_start_end_idx = [ f.orig_to_token_split_idx for f in train_features ] all_input_tag_ids = [f.input_tag_ids for f in train_features] all_images = [f.image for f in train_features] train_data = SequenceImageDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_end_idx, all_input_tag_ids, all_label_ids, all_images, transform, IMAGE_DIR) else: premises_train_features = transform_tag_features( args.max_num_aspect, train_features[0], tag_tokenizer, args.max_seq_length) hypothesis_train_features = transform_tag_features( args.max_num_aspect, train_features[1], tag_tokenizer, args.max_seq_length) assert len(premises_train_features) == len( hypothesis_train_features) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples[0])) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_optimization_steps) # prepare the premise training data all_premises_input_ids = [ f.input_ids for f in premises_train_features ] all_premises_input_mask = [ f.input_mask for f in premises_train_features ] all_premises_segment_ids = [ f.segment_ids for f in premises_train_features ] all_premises_start_end_idx = [ f.orig_to_token_split_idx for f in premises_train_features ] all_premises_input_tag_ids = [ f.input_tag_ids for f in premises_train_features ] # prepare the hypothesis training data all_hypothesis_input_ids = [ f.input_ids for f in hypothesis_train_features ] all_hypothesis_input_mask = [ f.input_mask for f in hypothesis_train_features ] all_hypothesis_segment_ids = [ f.segment_ids for f in hypothesis_train_features ] all_hypothesis_start_end_idx = [ f.orig_to_token_split_idx for f in hypothesis_train_features ] all_hypothesis_input_tag_ids = [ f.input_tag_ids for f in hypothesis_train_features ] all_images = [f.image for f in premises_train_features] all_label_ids = [f.label_id for f in premises_train_features] train_data = GroundedSequenceImageDataset( all_premises_input_ids, all_hypothesis_input_ids, all_premises_input_mask, all_hypothesis_input_mask, all_premises_segment_ids, all_hypothesis_segment_ids, all_premises_start_end_idx, all_hypothesis_start_end_idx, all_premises_input_tag_ids, all_hypothesis_input_tag_ids, all_label_ids, all_images, transform, IMAGE_DIR) if args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) # prepare validation data if args.grounding: eval_premise_examples, eval_hypothesis_examples = processor.get_dev_examples( args.data_dir) eval_hypothesis_features = convert_examples_to_features( eval_hypothesis_examples, label_list, args.max_seq_length, tokenizer, srl_predictor=srl_predictor) eval_premises_features = convert_examples_to_features( eval_premise_examples, label_list, args.max_seq_length, tokenizer, srl_predictor=srl_predictor) eval_premises_features = transform_tag_features( args.max_num_aspect, eval_premises_features, tag_tokenizer, args.max_seq_length) eval_hypothesis_features = transform_tag_features( args.max_num_aspect, eval_hypothesis_features, tag_tokenizer, args.max_seq_length) # prepare the premise training data all_premises_input_ids = [ f.input_ids for f in eval_premises_features ] all_premises_input_mask = [ f.input_mask for f in eval_premises_features ] all_premises_segment_ids = [ f.segment_ids for f in eval_premises_features ] all_premises_start_end_idx = [ f.orig_to_token_split_idx for f in eval_premises_features ] all_premises_input_tag_ids = [ f.input_tag_ids for f in eval_premises_features ] # prepare the hypothesis training data all_hypothesis_input_ids = [ f.input_ids for f in eval_hypothesis_features ] all_hypothesis_input_mask = [ f.input_mask for f in eval_hypothesis_features ] all_hypothesis_segment_ids = [ f.segment_ids for f in eval_hypothesis_features ] all_hypothesis_start_end_idx = [ f.orig_to_token_split_idx for f in eval_hypothesis_features ] all_hypothesis_input_tag_ids = [ f.input_tag_ids for f in eval_hypothesis_features ] all_images = [f.image for f in eval_hypothesis_features] all_label_ids = [f.label_id for f in eval_hypothesis_features] eval_data = GroundedSequenceImageDataset( all_premises_input_ids, all_hypothesis_input_ids, all_premises_input_mask, all_hypothesis_input_mask, all_premises_segment_ids, all_hypothesis_segment_ids, all_premises_start_end_idx, all_hypothesis_start_end_idx, all_premises_input_tag_ids, all_hypothesis_input_tag_ids, all_label_ids, all_images, transform, IMAGE_DIR) else: eval_examples = processor.get_dev_examples(args.data_dir) eval_features = convert_examples_to_features( eval_examples, label_list, args.max_seq_length, tokenizer, srl_predictor=srl_predictor) eval_features = transform_tag_features(args.max_num_aspect, eval_features, tag_tokenizer, args.max_seq_length) all_input_ids = [f.input_ids for f in eval_features] all_input_mask = [f.input_mask for f in eval_features] all_segment_ids = [f.segment_ids for f in eval_features] all_label_ids = [f.label_id for f in eval_features] all_start_end_idx = [ f.orig_to_token_split_idx for f in eval_features ] all_input_tag_ids = [f.input_tag_ids for f in eval_features] all_images = [f.image for f in eval_features] eval_data = SequenceImageDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_end_idx, all_input_tag_ids, all_label_ids, all_images, transform, IMAGE_DIR) logger.info("***** Evaluation data *****") logger.info(" Num examples = %d", len(eval_examples)) logger.info(" Batch size = %d", args.eval_batch_size) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) for epoch in trange(int(args.num_train_epochs), desc="Epoch"): model.train() tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate( tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, start_end_idx, input_tag_ids, images, label_ids = batch loss = model(input_ids, segment_ids, input_mask, start_end_idx, input_tag_ids, images, label_ids) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: optimizer.backward(loss) else: loss.backward() tr_loss += loss.item() nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 if (step + 1) % args.gradient_accumulation_steps == 0: # modify learning rate with special warm up BERT uses lr_this_step = args.learning_rate * warmup_linear( global_step / num_train_optimization_steps, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 # Save a trained model model_to_save = model.module if hasattr( model, 'module') else model # Only save the model it-self output_model_file = os.path.join(args.output_dir, str(epoch) + "_pytorch_model.bin") if args.do_train: torch.save(model_to_save.state_dict(), output_model_file) # run evaluation on dev data model_state_dict = torch.load(output_model_file) if not args.grounding: predict_model = BertForSequenceImgClassificationTag.from_pretrained( args.bert_model, state_dict=model_state_dict, num_labels=num_labels, tag_config=tag_config, image_emb_size=2048) else: predict_model = GroundedImgClassificationTag.from_pretrained( args.bert_model, state_dict=model_state_dict, num_labels=num_labels, tag_config=tag_config, image_emb_size=2048) predict_model.to(device) predict_model.eval() eval_loss, eval_accuracy = 0, 0 nb_eval_steps, nb_eval_examples = 0, 0 total_precision = np.zeros(3) total_recall = np.zeros(3) total_fscore = np.zeros(3) total_support = np.zeros(3, dtype=int) output_logits_file = os.path.join( args.output_dir, str(epoch) + "_eval_logits_results.tsv") with open(output_logits_file, "w") as writer: writer.write("index" + "\t" + "\t".join( ["logits " + str(i) for i in range(len(label_list))]) + "\n") for batch_number, batch in enumerate( tqdm(eval_dataloader, desc="Evaluating")): input_ids, input_mask, segment_ids, start_end_idx, input_tag_ids, images, label_ids = batch input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) label_ids = label_ids.to(device) start_end_idx = start_end_idx.to(device) input_tag_ids = input_tag_ids.to(device) images = images.to(device) with torch.no_grad(): tmp_eval_loss = predict_model(input_ids, segment_ids, input_mask, start_end_idx, input_tag_ids, images, label_ids) logits = predict_model(input_ids, segment_ids, input_mask, start_end_idx, input_tag_ids, images, None) logits = logits.detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() tmp_eval_accuracy = accuracy_score(label_ids, np.argmax(logits, axis=1), normalize=False) eval_loss += tmp_eval_loss.mean().item() eval_accuracy += tmp_eval_accuracy precision, recall, fscore, support = precision_recall_fscore_support( label_ids, np.argmax(logits, axis=1), labels=[0, 1, 2]) total_precision = total_precision + precision total_recall = total_recall + recall total_fscore = total_fscore + fscore total_support = total_support + support nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 del predict_model eval_loss = eval_loss / nb_eval_steps eval_accuracy = eval_accuracy / nb_eval_examples total_precision = total_precision / (batch_number + 1) total_recall = total_recall / (batch_number + 1) total_fscore = total_fscore / (batch_number + 1) if eval_accuracy > best_result: best_epoch = epoch best_result = eval_accuracy loss = tr_loss / nb_tr_steps if args.do_train else None result = { 'eval_loss': eval_loss, 'loss': loss, 'eval_accuracy': eval_accuracy, 'total_precision': { k: total_precision.tolist()[v] for k, v in processor.get_labels_map().items() }, 'total_recall': { k: total_recall.tolist()[v] for k, v in processor.get_labels_map().items() }, 'total_fscore': { k: total_fscore.tolist()[v] for k, v in processor.get_labels_map().items() }, 'total_support': { k: total_support.tolist()[v] for k, v in processor.get_labels_map().items() }, 'macro_precision': total_precision.mean(), 'macro_recall': total_recall.mean(), 'macro_support': total_support.sum(), 'macro_f1score': total_fscore.mean(), 'number_of_examples': nb_eval_examples } output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "a") as writer: logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info("Epoch: %s, %s = %s", str(epoch), key, str(result[key])) writer.write("Epoch: %s, %s = %s\n" % (str(epoch), key, str(result[key]))) logger.info("best epoch: %s, result: %s", str(best_epoch), str(best_result))
def BerQA_train_predict(data, is_training=True): d = data.copy() scores = [] max_inputs = 30000 LEARNING_RATE = 2e-5 NUM_TRAIN_EPOCHS = 2 WARMUP_PROPORTION = 0.1 # Model configs SAVE_CHECKPOINTS_STEPS = 1000 SAVE_SUMMARY_STEPS = 500 num_train_steps = 100 max_steps = 100000 DATA_COLUMN_A = 'senA' DATA_COLUMN_B = 'senB' LABEL_COLUMN = 'Label' label_list = [0, 1] while (len(d) > 0 and num_train_steps <= max_steps): line = min(max_inputs, len(d)) temp = d[:line] temp_t = temp.apply(qar_pair, axis=1) temp_t = temp_t.tolist() flat_list = [item for sublist in temp_t for item in sublist] temp_t = pd.DataFrame(flat_list, columns=['senA', 'senB']) temp_t['Label'] = 1 temp_t['senA'] = temp_t['senA'].apply(str) temp_t['senB'] = temp_t['senB'].apply(str) temp_InputExamples = temp_t.apply( lambda x: run_classifier.InputExample(guid=None, text_a=x[DATA_COLUMN_A], text_b=x[DATA_COLUMN_B], label=x[LABEL_COLUMN]), axis=1) temp_features = run_classifier.convert_examples_to_features( temp_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer) num_train_steps = int( len(temp_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS) + num_train_steps num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION) # Specify outpit directory and number of checkpoint steps to save run_config = tf.estimator.RunConfig( model_dir=OUTPUT_DIR, save_summary_steps=SAVE_SUMMARY_STEPS, save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS) model_fn = model_fn_builder(num_labels=len(label_list), learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps) estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config, params={"batch_size": BATCH_SIZE}) input_fn = input_fn_builder(features=temp_features, seq_length=MAX_SEQ_LENGTH, is_training=is_training, drop_remainder=True) if is_training: print('Beginning Training!') early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook( estimator, metric_name='loss', max_steps_without_decrease=1000, min_steps=100) current_time = datetime.now() #tf.estimator.train_and_evaluate(estimator,train_spec=tf.estimator.TrainSpec(input_fn, hooks=[early_stopping])) estimator.train(input_fn=input_fn, max_steps=num_train_steps, hooks=[early_stopping]) print("Training took time ", datetime.now() - current_time) else: predictions = estimator.predict(input_fn) outputs = [(prediction['probabilities'], prediction['crq']) for prediction in predictions] x = [i[0] for i in outputs] y = [i[1] for i in outputs] print('\n') print('Accuracy of ' + category + ' is: ' + str(sum(i > 0 for i in x) / len(x))) print('\n') scores = scores + y if len(d) > max_inputs: d = d[line:] d = d.reset_index(drop=True) else: d = [] if is_training is False: data = data[:len(scores)] scores = [item.tolist() for item in scores] #BERTQA_scores = pd.DataFrame(data=scores) data['BERTQA_scores'] = scores #data = pd.concat([data,BERTQA_scores],axis=1,ignore_index=True) return data