def classify(search_sentence): examples = [] guid = "dev-1" text_a = tokenization.convert_to_unicode(search_sentence) label = tokenization.convert_to_unicode("1") # put dummy input examples.append( run_classifier.InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) predict_features = run_classifier.convert_examples_to_features( examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=predict_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True) result = estimator.predict(input_fn=predict_input_fn) for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= 1: break emotion = {"happy": 0, "sad": 0, "angry": 0, "surprised": 0} _emotion = ["happy", "sad", "angry", "surprised"] for (i, class_probability) in enumerate(probabilities): emotion[_emotion[i]] = class_probability print(_emotion[i] + ": " + str(class_probability)) return emotion
def model_eval(estimator): # Eval the model. eval_examples = processor.get_dev_examples(TASK_DATA_DIR) eval_features = run_classifier.convert_examples_to_features( eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer) print('***** Started evaluation at {} *****'.format( datetime.datetime.now())) print(' Num examples = {}'.format(len(eval_examples))) print(' Batch size = {}'.format(EVAL_BATCH_SIZE)) # Eval will be slightly WRONG on the TPU because it will truncate # the last batch. eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE) eval_input_fn = run_classifier.input_fn_builder(features=eval_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) print('***** Finished evaluation at {} *****'.format( datetime.datetime.now())) output_eval_file = os.path.join(OUTPUT_DIR, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: print("***** Eval results *****") for key in sorted(result.keys()): print(' {} = {}'.format(key, str(result[key]))) writer.write("%s = %s\n" % (key, str(result[key])))
def predict(sentence): input_example = run_classifier.InputExample( guid="", text_a=sentence, text_b=None, label="0") # here, "" is just a dummy label input_examples = [input_example] input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(input_fn=predict_input_fn) df = pd.DataFrame(predictions) label_index = df.iat[0, 0].argmax() confidence = df.iat[0, 0].max() predicted_label = emotions.iat[int(label_index), 0] return { 'predicted_label': predicted_label, 'confidence': confidence.item() }
def getPrediction(in_sentences): ''' Function to provide class predictions for list of input sentences :param in_sentences: :return: ''' #labels = ["Negative", "Positive"] # pre-process input input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, label_list, FLAGS.MAX_SEQ_LENGTH, tokenizer) # create model function input predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=FLAGS.MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) # calculate the predictions predictions = estimator.predict(predict_input_fn) # output = [(sentence, prediction['probabilities'], prediction['labels']) for sentence, prediction in zip(in_sentences, predictions)] return output
def FLTR_Prediction(d,tokenizer,estimator): """ Ranking all of reviews accoring to how they are relevant to a given quetion.""" d = d.reset_index(drop=True) test_t = d.apply(QR_pair,axis=1) test_t = test_t.tolist() flat_list = [item for sublist in test_t for item in sublist] test_t = pd.DataFrame(flat_list,columns=['question','review']) test_t['question'] = test_t['question'].apply(str) test_t['review'] = test_t['review'].apply(str) DATA_COLUMN_A = 'question' DATA_COLUMN_B = 'review' label_list = [0, 1] max_inputs = 1000000 probs = [] temp_test = test_t.copy() while len(temp_test)>0: line = min(max_inputs,len(temp_test)) temp = temp_test[:line] inputExamples = temp.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = 0), axis = 1) input_features = run_classifier.convert_examples_to_features(inputExamples, label_list, FLAGS.max_seq_length, tokenizer) predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) probabilities = [prediction['probabilities'] for prediction in predictions] probs = probs+[item.tolist()[1] for item in probabilities] if len(temp_test)>max_inputs: temp_test = temp_test[line:] temp_test = temp_test.reset_index(drop=True) else: temp_test = [] test_t['probilities']=probs num_reviews = d['num_reviews'].tolist() d['FLTR_scores'] = '' for i in range(0,len(d)): n = num_reviews[i] #print(probs[:n]) d.at[i,'FLTR_scores'] = probs[:n] #print(d.at[i,'FLTR_scores']) if i!=len(d)-1: probs=probs[n:] return d
def predict(self, pred_data): input_features = run_classifier.convert_examples_to_features( pred_data, self.label_list, MAX_SEQ_LENGTH, self.tokenizer) predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = self.estimator.predict(predict_input_fn) return predictions
def model_predict(estimator): # Make predictions on a subset of eval examples prediction_examples = processor.get_dev_examples( TASK_DATA_DIR)[:PREDICT_BATCH_SIZE] input_features = run_classifier.convert_examples_to_features( prediction_examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True) predictions = estimator.predict(predict_input_fn) for example, prediction in zip(prediction_examples, predictions): print('text_a: %s\ntext_b: %s\nlabel:%s\nprediction:%s\n' % (example.text_a, example.text_b, str( example.label), prediction['probabilities']))
def getPrediction(self, in_sentences): label_list = [0, 1] input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, label_list, self.MAX_SEQ_LENGTH, self.tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=self.MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = self.estimator.predict(predict_input_fn) return [(sentence, prediction['encode_vec']) for sentence, prediction in zip(in_sentences, predictions)]
def model_train(estimator): print( 'MRPC/CoLA on BERT base model normally takes about 2-3 minutes. Please wait...' ) # We'll set sequences to be at most 128 tokens long. train_features = run_classifier.convert_examples_to_features( train_examples, label_list, MAX_SEQ_LENGTH, tokenizer) print('***** Started training at {} *****'.format(datetime.datetime.now())) print(' Num examples = {}'.format(len(train_examples))) print(' Batch size = {}'.format(TRAIN_BATCH_SIZE)) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = run_classifier.input_fn_builder(features=train_features, seq_length=MAX_SEQ_LENGTH, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) print('***** Finished training at {} *****'.format( datetime.datetime.now()))
def scorePredict(self): sentences = self.sentences input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label="0") for x in sentences ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, ['0', '1', '-1'], self.MAX_SEQ_LENGTH, self.tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=self.MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = self.estimator.predict(input_fn=predict_input_fn) self.pred = list(predictions) return self.pred
def predict(xs, max_seq_length, processor, estimator, tokenizer, dataset_name): if dataset_name == 'yelp': lines = [["0", x] for x in xs] else: lines = [[]] lines += [["0", x] for x in xs] predict_examples = processor._create_examples(lines, 'test') tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d", len(predict_examples)) features = convert_examples_to_features(predict_examples, processor.get_labels(), max_seq_length, tokenizer) predict_input_fn = input_fn_builder(features, seq_length = max_seq_length, is_training = False, drop_remainder = False) result = estimator.predict(input_fn=predict_input_fn) return np.array(list(result))
def fit(self, train_data): params = self.params # Compute number of train and warmup steps from batch size num_train_steps = int(len(train_data) / TRAIN_BATCH_SIZE * params['NUM_TRAIN_EPOCHS']) num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION) run_config = tf.estimator.RunConfig( model_dir=self.OUTPUT_DIR, save_summary_steps=SAVE_SUMMARY_STEPS, save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS) model_fn = self.model_fn_builder( num_labels=len(self.label_list), learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps) self.estimator = tf.estimator.Estimator( model_fn=model_fn, model_dir=self.OUTPUT_DIR, config=run_config, params={"batch_size": TRAIN_BATCH_SIZE}) train_features = run_classifier.convert_examples_to_features( train_data, self.label_list, MAX_SEQ_LENGTH, self.tokenizer) print('***** Started training at {} *****'.format(datetime.datetime.now())) print(' Num examples = {}'.format(len(train_data))) print(' Batch size = {}'.format(TRAIN_BATCH_SIZE)) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = run_classifier.input_fn_builder( features=train_features, seq_length=MAX_SEQ_LENGTH, is_training=True, drop_remainder=False) self.estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) print('***** Finished training at {} *****'.format(datetime.datetime.now()))
def main(_): """For current work, we only use the following four categories, but you can add others if you would like to.""" categories = [ 'Tools_and_Home_Improvement', 'Patio_Lawn_and_Garden', 'Electronics', 'Baby', ] data = None if FLAGS.category_name == "All_Categories": """Cross-domain pre-training (All_Categories).""" for category in categories: category_data_path = os.path.join(FLAGS.data_dir,category+'.txt') category_data = pd.read_csv(category_data_path,sep='\t',encoding='utf-8',#nrows=5000, converters={'QA':ast.literal_eval,'reviewText':ast.literal_eval}) category_data = category_data[:int(len(data)*0.8)] if data is None: data = category_data else: data = pd.concat([data,category_data],axis=0) data = data.sample(n=min(len(data),60000)) else: data_path = os.path.join(FLAGS.data_dir,FLAGS.category_name+'.txt') data = pd.read_csv(data_path,sep='\t',encoding='utf-8',#nrows=10000, converters={'QA':ast.literal_eval,'reviewText':ast.literal_eval}) data['question'] = data['QA'].apply(lambda x: x['questionText']) data['answer'] = data['QA'].apply(lambda x: x['answers'][0]['answerText'] if len(x['answers'])>0 else PAD_WORD) data['num_reviews']= data['reviewText'].apply(lambda x: len(x)) train = data[:int(len(data)*0.8)] list_of_answers = list(train['answer']) list_of_answers=shuffle(list_of_answers) qa = train[['question','answer']] nqa = pd.DataFrame({'question': train['question'].tolist(),'answer':list_of_answers}) qa['label']=1 nqa['label']=0 d = pd.concat([qa,nqa],axis=0) d=shuffle(d) d['question']=d['question'].apply(str) d['answer']=d['answer'].apply(str) split = int(len(d)*0.9) dtrain = d[0:split] dtest = d[split:] DATA_COLUMN_A = 'question' DATA_COLUMN_B = 'answer' LABEL_COLUMN = 'label' label_list = [0, 1] tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=True) train_InputExamples = dtrain.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = x[LABEL_COLUMN]), axis = 1) test_InputExamples = dtest.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = x[LABEL_COLUMN]), axis = 1) train_features = run_classifier.convert_examples_to_features(train_InputExamples, label_list, FLAGS.max_seq_length, tokenizer) test_features = run_classifier.convert_examples_to_features(test_InputExamples, label_list, FLAGS.max_seq_length, tokenizer) OUTPUT_DIR = os.path.join(FLAGS.model_output_dir,FLAGS.category_name+'_FLTR') tf.gfile.MakeDirs(OUTPUT_DIR) run_config = tf.estimator.RunConfig( model_dir=OUTPUT_DIR, save_summary_steps=FLAGS.save_summary_steps, save_checkpoints_steps=FLAGS.save_checkpoints_steps) num_train_steps = int(len(train_features) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file), num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps) estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config, params={"batch_size": FLAGS.train_batch_size}) train_input_fn = run_classifier.input_fn_builder( features=train_features, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) print("Beginning Training!") current_time = datetime.now() estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) print("Training took time ", datetime.now() - current_time) test_input_fn = run_classifier.input_fn_builder( features=test_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) predictions = estimator.predict(test_input_fn) x=[np.argmax(prediction['probabilities']) for prediction in predictions] dtest['prediction']=x print("The accuracy of FLTR on "+FLAGS.category_name+" is: "+str(accuracy_score(dtest.label,dtest.prediction))) if FLAGS.do_predict: print("Beginning Prediction!") data_with_FLTR_predictions = FLTR_Prediction(data,tokenizer,estimator) if(data_with_FLTR_predictions.isnull().values.any()): data_with_FLTR_predictions = data_with_FLTR_predictions.replace(np.nan, "[PAD]", regex=True) data_with_FLTR_predictions.to_csv(os.path.join(FLAGS.data_dir, FLAGS.category_name+'.txt'), index=None, sep='\t', mode='w') print("Prediction End!")
# For TPU, We will append `PaddingExample` for maintaining batch size if USE_TPU: while (len(predict_examples) % EVAL_BATCH_SIZE != 0): predict_examples.append(run_classifier.PaddingInputExample()) # Converting to features predict_features = run_classifier.convert_examples_to_features( predict_examples, label_list, MAX_SEQ_LENGTH, tokenizer) print(' Num examples = {}'.format(num_predict_examples)) print(' Batch size = {}'.format(PREDICT_BATCH_SIZE)) # Input function for prediction predict_input_fn = run_classifier.input_fn_builder(predict_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) result = list(estimator.predict(input_fn=predict_input_fn)) print(result) for ex_i in range(num_predict_examples): print("****** Example {} ******".format(ex_i)) print("Question1 :", sent_pairs[ex_i][0]) print("Question2 :", sent_pairs[ex_i][1]) print("Prediction :", result[ex_i]['probabilities'][1]) # In[ ]: # Converting test examples to features print("################ Processing Test Data #####################") TEST_TF_RECORD = os.path.join(OUTPUT_DIR, "test.tf_record") test_examples = processor.get_test_examples(TASK_DATA_DIR)
def main(_): """For current work, we only use the following four categories, but you can add others if you would like to.""" categories = [ 'Tools_and_Home_Improvement', 'Patio_Lawn_and_Garden', 'Electronics', 'Baby', ] models = ['FLTR', 'BertQA'] data_path = os.path.join(FLAGS.data_dir, 'Annotated_Data.txt') data = pd.read_csv(data_path, sep='\t', encoding='utf-8', converters={ 'annotation_score': ast.literal_eval, 'reviews': ast.literal_eval }) data = data.reset_index() data['qr'] = data[['index', 'question', 'reviews' ]].apply(lambda x: [[x['index'], x['question'], i] for i in x['reviews']], axis=1) d = [] for category in categories: qr = data[data['category'] == category]['qr'].tolist() qr = [item for sublist in qr for item in sublist] qr = pd.DataFrame(columns=['index', 'question', 'review'], data=qr) qr['label'] = 1 temp = qr.copy() temp['question'] = temp['question'].apply(str) temp['review'] = temp['review'].apply(str) DATA_COLUMN_A = 'question' DATA_COLUMN_B = 'review' LABEL_COLUMN = 'label' label_list = [0, 1] tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=True) test_InputExamples = temp.apply( lambda x: run_classifier.InputExample(guid=None, text_a=x[DATA_COLUMN_A], text_b=x[DATA_COLUMN_B], label=x[LABEL_COLUMN]), axis=1) test_features = run_classifier.convert_examples_to_features( test_InputExamples, label_list, FLAGS.max_seq_length, tokenizer) t = data[data['category'] == category] t = t.reset_index(drop=True) for model in models: OUTPUT_DIR = os.path.join(FLAGS.model_output_dir, category + '_' + model) run_config = tf.estimator.RunConfig(model_dir=OUTPUT_DIR, save_summary_steps=100, save_checkpoints_steps=100) model_fn = None if model == 'BertQA': model_fn = model_fn_builder_BertQA( bert_config=modeling.BertConfig.from_json_file( FLAGS.bert_config_file), num_labels=len(label_list), init_checkpoint=OUTPUT_DIR, learning_rate=FLAGS.learning_rate, num_train_steps=100, num_warmup_steps=100) else: model_fn = model_fn_builder_FLTR( bert_config=modeling.BertConfig.from_json_file( FLAGS.bert_config_file), num_labels=len(label_list), init_checkpoint=OUTPUT_DIR, learning_rate=FLAGS.learning_rate, num_train_steps=100, num_warmup_steps=100) estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config, params={"batch_size": FLAGS.train_batch_size}) test_input_fn = run_classifier.input_fn_builder( features=test_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) predictions = estimator.predict(test_input_fn) probabilities = [ prediction['probabilities'] for prediction in predictions ] probabilities = [list(item) for item in probabilities] if model == 'FLTR': probabilities = [item[1] for item in probabilities] else: probabilities = [item[0] for item in probabilities] print(model, ' :', probabilities[:10]) temp[model + '_score'] = probabilities temp_groupby = temp.groupby( ['index', 'question'], sort=False)[model + '_score'].apply(list).reset_index( name=model + '_score') t = pd.concat([t, temp_groupby[model + '_score']], axis=1) if len(d) == 0: d = t else: d = pd.concat([d, t], axis=0, ignore_index=True) d.to_csv(os.path.join(FLAGS.data_dir, 'test_predictions.txt'), index=None, sep='\t', mode='w')
def main(): tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tpu.InputPipelineConfig.PER_HOST_V2 run_config = tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tpu.TPUConfig(iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) session_config = tf.ConfigProto(log_device_placement=True) session_config.gpu_options.allow_growth = True run_config.replace(session_config=session_config) num_train_steps = None num_warmup_steps = None with open('cqa_data.pkl', 'rb') as fr: train_features, dev_cid, dev_features = pkl.load(fr) dev_label = [feature.label_id for feature in dev_features] if FLAGS.do_train: num_train_steps = int( len(train_features) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, num_labels=2, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, dev_cid=dev_cid) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, # params={'batch_size': FLAGS.train_batch_size}, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.eval_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_features)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = input_fn_builder(features=train_features, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=[ EvalHook(estimator=estimator, dev_features=dev_features, dev_label=dev_label, dev_cid=dev_cid, max_seq_length=FLAGS.max_seq_length, eval_steps=FLAGS.save_checkpoints_steps, checkpoint_dir=FLAGS.output_dir) ]) if FLAGS.do_eval: tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(dev_features)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: # Eval will be slightly WRONG on the TPU because it will truncate # the last batch. eval_steps = int(len(dev_features) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = input_fn_builder(features=dev_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) predictions = estimator.predict(eval_input_fn, yield_single_examples=False) res = np.concatenate([a for a in predictions], axis=0) print(res.shape, np.array(dev_label).shape) metrics = PRF(np.array(dev_label), res.argmax(axis=-1)) # print((np.array(dev_label) != res.argmax(axis=-1))[:1000]) MAP, AvgRec, MRR = eval_reranker(dev_cid, dev_label, res[:, 0]) metrics['MAP'] = MAP metrics['AvgRec'] = AvgRec metrics['MRR'] = MRR print_metrics(metrics, 'dev')
num_labels=len(label_list), learning_rate=FLAGS.LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps) # initializes the estimator estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config, params={"batch_size": FLAGS.BATCH_SIZE}) # <------------------ Begin training # Create an input function for training. drop_remainder = True for using TPUs. train_input_fn = run_classifier.input_fn_builder( features=train_features, seq_length=FLAGS.MAX_SEQ_LENGTH, is_training=True, drop_remainder=False) tf.logging.info('Beginning Training!') current_time = datetime.now() estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) tf.logging.info("Training took time %s" % (datetime.now() - current_time)) # <------------------ Begin evaluation on test data tf.logging.info('Beginning Evaluation!') test_input_fn = run_classifier.input_fn_builder( features=test_features, seq_length=FLAGS.MAX_SEQ_LENGTH, is_training=False,
model_fn = run_classifier.model_fn_builder( bert_config=modeling.BertConfig.from_json_file(CONFIG_FILE), num_labels=len(label_list), init_checkpoint=INIT_CHECKPOINT, learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=False, use_one_hot_embeddings=True) estimator = tf.contrib.tpu.TPUEstimator(use_tpu=False, model_fn=model_fn, config=run_config, train_batch_size=TRAIN_BATCH_SIZE, eval_batch_size=EVAL_BATCH_SIZE) print('\n__________\nStarted training at {} '.format(datetime.datetime.now())) print('\nNum examples = {}'.format(len(train_examples))) print('\nBatch size = {}'.format(TRAIN_BATCH_SIZE)) tf.logging.info("Num steps = %d", num_train_steps) train_input_fn = run_classifier.input_fn_builder(features=train_features, seq_length=MAX_SEQ_LENGTH, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) print('\n__________\nFinished training at {}'.format(datetime.datetime.now())) print('\nTotal time taken to fine tune the model ', round(time.time() - t, 2), ' s')
def run_experiment(experiments, use_tpu, tpu_address, repeat, num_train_steps, username, comment, store_last_layer): logger.info( f'Getting ready to run the following experiments for {repeat} repeats: {experiments}' ) def get_run_config(output_dir): return tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=output_dir, save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=ITERATIONS_PER_LOOP, num_shards=NUM_TPU_CORES, per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig. PER_HOST_V2)) def parse_experiments_argument(experiments): """Returns list of experiments from comma-separated string as list""" exp_list = [] for part in experiments.split(','): if '-' in part: a, b = part.split('-') a, b = int(a), int(b) exp_list.extend(range(a, b + 1)) else: exp_list.append(part) exp_list = [str(s) for s in exp_list] return exp_list experiments = parse_experiments_argument(experiments) last_completed_train = "" completed_train_dirs = [] for exp_nr in experiments: logger.info(f"***** Starting Experiment {exp_nr} *******") logger.info(f"***** {experiment_definitions[exp_nr]['name']} ******") logger.info("***********************************************") #Get a unique ID for every experiment run experiment_id = str(uuid.uuid4()) ########################### ######### TRAINING ######## ########################### #We should only train a new model if a similar model hasnt just been trained. Save considerable computation time train_annot_dataset = experiment_definitions[exp_nr][ "train_annot_dataset"] if train_annot_dataset != last_completed_train: #Set a fresh new output directory every time training starts, and set the cache to this directory temp_output_dir = os.path.join(TEMP_OUTPUT_BASEDIR, experiment_id) os.environ['TFHUB_CACHE_DIR'] = temp_output_dir logger.info(f"***** Setting temporary dir {temp_output_dir} **") logger.info(f"***** Train started in {temp_output_dir} **") tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join( BERT_MODEL_DIR, 'vocab.txt'), do_lower_case=LOWER_CASED) if tpu_address: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( tpu_address) else: tpu_cluster_resolver = None processor = vaccineStanceProcessor() label_list = processor.get_labels() label_mapping = dict(zip(range(len(label_list)), label_list)) train_examples = processor.get_train_examples( os.path.join('data', train_annot_dataset)) num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION) #Initiation bert_config = modeling.BertConfig.from_json_file( os.path.join(BERT_MODEL_DIR, 'bert_config.json')) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=BERT_MODEL_FILE, learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=use_tpu, use_one_hot_embeddings=True, extract_last_layer=store_last_layer) estimator = tf.contrib.tpu.TPUEstimator( use_tpu=use_tpu, model_fn=model_fn, config=get_run_config(temp_output_dir), train_batch_size=TRAIN_BATCH_SIZE, eval_batch_size=EVAL_BATCH_SIZE, predict_batch_size=PREDICT_BATCH_SIZE, ) train_features = run_classifier.convert_examples_to_features( train_examples, label_list, MAX_SEQ_LENGTH, tokenizer) logger.info( '***** Fine tuning BERT base model normally takes a few minutes. Please wait...' ) logger.info('***** Started training using {} at {} *****'.format( train_annot_dataset, datetime.datetime.now())) logger.info(' Num examples = {}'.format(len(train_examples))) logger.info(' Batch size = {}'.format(TRAIN_BATCH_SIZE)) logger.info(' Train steps = {}'.format(num_train_steps)) logger.info( ' Number of training steps = {}'.format(num_train_steps)) tf.logging.info(' Num steps = %d', num_train_steps) train_input_fn = run_classifier.input_fn_builder( features=train_features, seq_length=MAX_SEQ_LENGTH, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) logger.info('***** Finished training using {} at {} *****'.format( train_annot_dataset, datetime.datetime.now())) last_completed_train = train_annot_dataset completed_train_dirs.append(temp_output_dir) ###################################### ######### TRAINING PREDICTION ######## ###################################### train_pred_input_fn = run_classifier.input_fn_builder( features=train_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(input_fn=train_pred_input_fn) probabilities, last_layer = list( zip(*[[p['probabilities'], p['last_layer']] for p in predictions])) probabilities = np.array(probabilities) if store_last_layer: # extract state for CLS token last_layer = [_l[0] for _l in last_layer] else: last_layer = None y_true = [e.label_id for e in train_features] guid = [e.guid for e in train_examples] predictions_output = get_predictions_output( experiment_id, guid, probabilities, y_true, cls_hidden_state=last_layer, label_mapping=label_mapping, dataset='train') save_to_json( predictions_output, os.path.join(PREDICTIONS_JSON_DIR, f'train_{experiment_id}.json')) ############################# ######### EVALUATING ######## ############################# eval_annot_dataset = experiment_definitions[exp_nr][ "eval_annot_dataset"] eval_examples = processor.get_dev_examples( os.path.join('data', eval_annot_dataset)) eval_features = run_classifier.convert_examples_to_features( eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer) logger.info('***** Started evaluation of {} at {} *****'.format( experiment_definitions[exp_nr]["name"], datetime.datetime.now())) logger.info('Num examples = {}'.format(len(eval_examples))) logger.info('Batch size = {}'.format(EVAL_BATCH_SIZE)) # Eval will be slightly WRONG on the TPU because it will truncate the last batch. eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE) eval_input_fn = run_classifier.input_fn_builder( features=eval_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) logger.info( '***** Finished first half of evaluation of {} at {} *****'.format( experiment_definitions[exp_nr]["name"], datetime.datetime.now())) output_eval_file = os.path.join(temp_output_dir, 'eval_results.txt') with tf.gfile.GFile(output_eval_file, 'w') as writer: logger.info('***** Eval results *****') for key in sorted(result.keys()): logger.info(' {} = {}'.format(key, str(result[key]))) writer.write('%s = %s\n' % (key, str(result[key]))) predictions = estimator.predict(eval_input_fn) probabilities = np.array([p['probabilities'] for p in predictions]) y_pred = np.argmax(probabilities, axis=1) y_true = [e.label_id for e in eval_features] guid = [e.guid for e in eval_examples] scores = performance_metrics(y_true, y_pred, label_mapping=label_mapping) logger.info('Final scores:') logger.info(scores) logger.info( '***** Finished second half of evaluation of {} at {} *****'. format(experiment_definitions[exp_nr]["name"], datetime.datetime.now())) # write full dev prediction output predictions_output = get_predictions_output( experiment_id, guid, probabilities, y_true, label_mapping=label_mapping, dataset='dev') save_to_json( predictions_output, os.path.join(PREDICTIONS_JSON_DIR, f'dev_{experiment_id}.json')) # Write log to Training Log File data = { 'Experiment_Name': experiment_definitions[exp_nr]["name"], 'Experiment_Id': experiment_id, 'Date': format(datetime.datetime.now()), 'User': username, 'Model': BERT_MODEL_NAME, 'Num_Train_Steps': num_train_steps, 'Train_Annot_Dataset': train_annot_dataset, 'Eval_Annot_Dataset': eval_annot_dataset, 'Learning_Rate': LEARNING_RATE, 'Max_Seq_Length': MAX_SEQ_LENGTH, 'Eval_Loss': result['eval_loss'], 'Loss': result['loss'], 'Comment': comment, **scores } append_to_csv(data, os.path.join(LOG_CSV_DIR, 'fulltrainlog.csv')) logger.info(f"***** Completed Experiment {exp_nr} *******") logger.info( f"***** Completed all experiments in {repeat} repeats. We should now clean up all remaining files *****" ) for c in completed_train_dirs: logger.info("Deleting these directories: ") logger.info("gsutil -m rm -r " + c) os.system("gsutil -m rm -r " + c)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": run_classifier.ColaProcessor, "mnli": run_classifier.MnliProcessor, "mrpc": run_classifier.MrpcProcessor, } if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = create_tokenizer_from_hub_module(FLAGS.bert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( num_labels=len(label_list), learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, bert_hub_module_handle=FLAGS.bert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_features = run_classifier.convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = run_classifier.input_fn_builder( features=train_features, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) eval_features = run_classifier.convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: # Eval will be slightly WRONG on the TPU because it will truncate # the last batch. eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = run_classifier.input_fn_builder( features=eval_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) if FLAGS.use_tpu: # Discard batch remainder if running on TPU n = len(predict_examples) predict_examples = predict_examples[:( n - n % FLAGS.predict_batch_size)] predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") run_classifier.file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = run_classifier.file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=FLAGS.use_tpu) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: tf.logging.info("***** Predict results *****") for prediction in result: probabilities = prediction["probabilities"] output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line)
def main(_): """For current work, we only use the following four categories, but you can add others if you would like to.""" categories = [ 'Tools_and_Home_Improvement', 'Patio_Lawn_and_Garden', 'Electronics', 'Baby', ] data = None """Cross-domain pre-training (All_Categories) to boost the performance.""" if FLAGS.category_name == "All_Categories": for category in categories: category_data_path = os.path.join(FLAGS.data_dir,category+'.txt') category_data = pd.read_csv(category_data_path,sep='\t',encoding='utf-8',nrows=10000, converters={'reviewText':ast.literal_eval,'FLTR_scores':ast.literal_eval}) if data is None: data = category_data else: data = pd.concat([data,category_data],axis=0) data = data.sample(n=len(data)) else: data_path = os.path.join(FLAGS.data_dir,FLAGS.category_name+'.txt') data = pd.read_csv(data_path,sep='\t',encoding='utf-8',#nrows=10000, cconverters={'reviewText':ast.literal_eval,'FLTR_scores':ast.literal_eval}) #data['len_questions'] = data["question"].apply(lambda x: len(x.split())) #data = data[data['len_questions']<=10] data['FLTR_Top10'] = data.apply(FLTR_Top10,axis=1) list_of_answers = list(data['answer']) list_of_answers=shuffle(list_of_answers) data['non_answer']= list_of_answers train = data[:int(len(data)*0.8)] train = train.sample(n=min(20000,len(train))) test = data[int(len(data)*0.8):] print(train.shape,test.shape) DATA_COLUMN_A = 'senA' DATA_COLUMN_B = 'senB' LABEL_COLUMN = 'Label' label_list = [0, 1] train = train.apply(qar_pair,axis=1) test = test.apply(qar_pair,axis=1) temp = train.tolist() flat_list = [item for sublist in temp for item in sublist] train =pd.DataFrame(flat_list,columns=['senA','senB']) train['Label'] =1 train['senA']=train['senA'].apply(str) train['senB']=train['senB'].apply(str) temp = test.tolist() flat_list = [item for sublist in temp for item in sublist] test = pd.DataFrame(flat_list,columns=['senA','senB']) test['Label'] = 1 test['senA'] = test['senA'].apply(str) test['senB'] = test['senB'].apply(str) print(train.shape,test.shape) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=True) train_InputExamples = train.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = x[LABEL_COLUMN]), axis = 1) test_InputExamples = test.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN_A], text_b = x[DATA_COLUMN_B], label = x[LABEL_COLUMN]), axis = 1) train_features = run_classifier.convert_examples_to_features(train_InputExamples, label_list, FLAGS.max_seq_length, tokenizer) test_features = run_classifier.convert_examples_to_features(test_InputExamples, label_list, FLAGS.max_seq_length, tokenizer) OUTPUT_DIR = os.path.join(FLAGS.model_output_dir,FLAGS.category_name+"_BertQA") tf.gfile.MakeDirs(OUTPUT_DIR) run_config = tf.estimator.RunConfig( model_dir=OUTPUT_DIR, keep_checkpoint_max=2, save_summary_steps=FLAGS.save_summary_steps, save_checkpoints_steps=FLAGS.save_checkpoints_steps) num_train_steps = int(len(train_features) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file), num_labels = len(label_list), init_checkpoint = FLAGS.init_checkpoint, learning_rate = FLAGS.learning_rate, num_train_steps = num_train_steps, num_warmup_steps = num_warmup_steps) estimator = tf.estimator.Estimator( model_fn = model_fn, config = run_config, params = {"batch_size": FLAGS.train_batch_size}) train_input_fn = run_classifier.input_fn_builder( features = train_features, seq_length = FLAGS.max_seq_length, is_training = True, drop_remainder = True) print("Beginning Training!") current_time = datetime.now() #early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook( # estimator,metric_name='loss',max_steps_without_decrease=1000,min_steps=100) estimator.train(input_fn = train_input_fn, max_steps = num_train_steps) #,hooks=[early_stopping] print("Training took time ", datetime.now() - current_time) test_input_fn = run_classifier.input_fn_builder( features = test_features, seq_length = FLAGS.max_seq_length, is_training = False, drop_remainder = True) predictions = estimator.predict(test_input_fn) x=[prediction['scores'] for prediction in predictions] print('\n') print("The accuracy of BertQA on "+FLAGS.category_name+" is: "+str(sum(i > 0 for i in x)/len(x))) print('\n')