def classify(search_sentence):

    examples = []
    guid = "dev-1"
    text_a = tokenization.convert_to_unicode(search_sentence)
    label = tokenization.convert_to_unicode("1")  # put dummy input
    examples.append(
        run_classifier.InputExample(guid=guid,
                                    text_a=text_a,
                                    text_b=None,
                                    label=label))

    predict_features = run_classifier.convert_examples_to_features(
        examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=predict_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=True)
    result = estimator.predict(input_fn=predict_input_fn)
    for (i, prediction) in enumerate(result):
        probabilities = prediction["probabilities"]
        if i >= 1: break

        emotion = {"happy": 0, "sad": 0, "angry": 0, "surprised": 0}
        _emotion = ["happy", "sad", "angry", "surprised"]
        for (i, class_probability) in enumerate(probabilities):
            emotion[_emotion[i]] = class_probability
            print(_emotion[i] + ": " + str(class_probability))

    return emotion
Beispiel #2
0
def model_eval(estimator):
    # Eval the model.
    eval_examples = processor.get_dev_examples(TASK_DATA_DIR)
    eval_features = run_classifier.convert_examples_to_features(
        eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    print('***** Started evaluation at {} *****'.format(
        datetime.datetime.now()))
    print('  Num examples = {}'.format(len(eval_examples)))
    print('  Batch size = {}'.format(EVAL_BATCH_SIZE))

    # Eval will be slightly WRONG on the TPU because it will truncate
    # the last batch.
    eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE)
    eval_input_fn = run_classifier.input_fn_builder(features=eval_features,
                                                    seq_length=MAX_SEQ_LENGTH,
                                                    is_training=False,
                                                    drop_remainder=True)
    result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
    print('***** Finished evaluation at {} *****'.format(
        datetime.datetime.now()))
    output_eval_file = os.path.join(OUTPUT_DIR, "eval_results.txt")
    with tf.gfile.GFile(output_eval_file, "w") as writer:
        print("***** Eval results *****")
        for key in sorted(result.keys()):
            print('  {} = {}'.format(key, str(result[key])))
            writer.write("%s = %s\n" % (key, str(result[key])))
Beispiel #3
0
def predict(sentence):
    input_example = run_classifier.InputExample(
        guid="", text_a=sentence, text_b=None,
        label="0")  # here, "" is just a dummy label
    input_examples = [input_example]

    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)

    predictions = estimator.predict(input_fn=predict_input_fn)

    df = pd.DataFrame(predictions)
    label_index = df.iat[0, 0].argmax()
    confidence = df.iat[0, 0].max()
    predicted_label = emotions.iat[int(label_index), 0]

    return {
        'predicted_label': predicted_label,
        'confidence': confidence.item()
    }
Beispiel #4
0
def getPrediction(in_sentences):
    '''
  Function to provide class predictions for list of input sentences
  :param in_sentences:
  :return:
  '''

    #labels = ["Negative", "Positive"]

    # pre-process input
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in in_sentences
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, FLAGS.MAX_SEQ_LENGTH, tokenizer)

    # create model function input
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=FLAGS.MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)

    # calculate the predictions
    predictions = estimator.predict(predict_input_fn)

    #
    output = [(sentence, prediction['probabilities'], prediction['labels'])
              for sentence, prediction in zip(in_sentences, predictions)]
    return output
Beispiel #5
0
def FLTR_Prediction(d,tokenizer,estimator):
    """ Ranking all of reviews accoring to how they are relevant to a given quetion."""
    
    d = d.reset_index(drop=True)
    test_t = d.apply(QR_pair,axis=1)
    test_t = test_t.tolist()
    flat_list = [item for sublist in test_t for item in sublist]
    test_t = pd.DataFrame(flat_list,columns=['question','review'])
    test_t['question'] = test_t['question'].apply(str)
    test_t['review'] = test_t['review'].apply(str)
    DATA_COLUMN_A = 'question'
    DATA_COLUMN_B = 'review'
    label_list = [0, 1]
    max_inputs = 1000000
    probs = []
    temp_test = test_t.copy()
    
    while len(temp_test)>0:
        line = min(max_inputs,len(temp_test))
        temp = temp_test[:line]
        
        inputExamples = temp.apply(lambda x: run_classifier.InputExample(guid=None,
                                                                         text_a = x[DATA_COLUMN_A],
                                                                         text_b = x[DATA_COLUMN_B],
                                                                         label = 0), 
                                   axis = 1)
                                    
        input_features = run_classifier.convert_examples_to_features(inputExamples, 
                                                                     label_list, 
                                                                      FLAGS.max_seq_length, 
                                                                      tokenizer)  
        
        predict_input_fn = run_classifier.input_fn_builder(features=input_features, 
                                                           seq_length=FLAGS.max_seq_length,
                                                           is_training=False, 
                                                           drop_remainder=False)
        
        predictions = estimator.predict(predict_input_fn)
        probabilities = [prediction['probabilities'] for prediction in  predictions]
        probs = probs+[item.tolist()[1] for item in probabilities]
        
        if len(temp_test)>max_inputs:
            temp_test = temp_test[line:]
            temp_test = temp_test.reset_index(drop=True)
        else:
            temp_test = []

    test_t['probilities']=probs
    num_reviews = d['num_reviews'].tolist()
    d['FLTR_scores'] = ''
    for i in range(0,len(d)):
        n = num_reviews[i]
        #print(probs[:n])
        d.at[i,'FLTR_scores'] = probs[:n]
        #print(d.at[i,'FLTR_scores'])
        if i!=len(d)-1:
            probs=probs[n:]
            
    return d
Beispiel #6
0
    def predict(self, pred_data):
        input_features = run_classifier.convert_examples_to_features(
            pred_data, self.label_list, MAX_SEQ_LENGTH, self.tokenizer)
        predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH,
                                                           is_training=False, drop_remainder=False)
        predictions = self.estimator.predict(predict_input_fn)

        return predictions
Beispiel #7
0
def model_predict(estimator):
    # Make predictions on a subset of eval examples
    prediction_examples = processor.get_dev_examples(
        TASK_DATA_DIR)[:PREDICT_BATCH_SIZE]
    input_features = run_classifier.convert_examples_to_features(
        prediction_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=True)
    predictions = estimator.predict(predict_input_fn)

    for example, prediction in zip(prediction_examples, predictions):
        print('text_a: %s\ntext_b: %s\nlabel:%s\nprediction:%s\n' %
              (example.text_a, example.text_b, str(
                  example.label), prediction['probabilities']))
Beispiel #8
0
 def getPrediction(self, in_sentences):
     label_list = [0, 1]
     input_examples = [
         run_classifier.InputExample(guid="",
                                     text_a=x,
                                     text_b=None,
                                     label=0) for x in in_sentences
     ]  # here, "" is just a dummy label
     input_features = run_classifier.convert_examples_to_features(
         input_examples, label_list, self.MAX_SEQ_LENGTH, self.tokenizer)
     predict_input_fn = run_classifier.input_fn_builder(
         features=input_features,
         seq_length=self.MAX_SEQ_LENGTH,
         is_training=False,
         drop_remainder=False)
     predictions = self.estimator.predict(predict_input_fn)
     return [(sentence, prediction['encode_vec'])
             for sentence, prediction in zip(in_sentences, predictions)]
Beispiel #9
0
def model_train(estimator):
    print(
        'MRPC/CoLA on BERT base model normally takes about 2-3 minutes. Please wait...'
    )
    # We'll set sequences to be at most 128 tokens long.
    train_features = run_classifier.convert_examples_to_features(
        train_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    print('***** Started training at {} *****'.format(datetime.datetime.now()))
    print('  Num examples = {}'.format(len(train_examples)))
    print('  Batch size = {}'.format(TRAIN_BATCH_SIZE))
    tf.logging.info("  Num steps = %d", num_train_steps)
    train_input_fn = run_classifier.input_fn_builder(features=train_features,
                                                     seq_length=MAX_SEQ_LENGTH,
                                                     is_training=True,
                                                     drop_remainder=True)
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
    print('***** Finished training at {} *****'.format(
        datetime.datetime.now()))
 def scorePredict(self):
     sentences = self.sentences
     input_examples = [
         run_classifier.InputExample(guid="",
                                     text_a=x,
                                     text_b=None,
                                     label="0") for x in sentences
     ]  # here, "" is just a dummy label
     input_features = run_classifier.convert_examples_to_features(
         input_examples, ['0', '1', '-1'], self.MAX_SEQ_LENGTH,
         self.tokenizer)
     predict_input_fn = run_classifier.input_fn_builder(
         features=input_features,
         seq_length=self.MAX_SEQ_LENGTH,
         is_training=False,
         drop_remainder=False)
     predictions = self.estimator.predict(input_fn=predict_input_fn)
     self.pred = list(predictions)
     return self.pred
Beispiel #11
0
def predict(xs, max_seq_length, processor, estimator, tokenizer, dataset_name): 
  if dataset_name == 'yelp':
    lines = [["0", x] for x in xs]
  else:
    lines = [[]]
    lines += [["0", x] for x in xs] 
    

  predict_examples = processor._create_examples(lines, 'test')   
  tf.logging.info("***** Running prediction*****")
  tf.logging.info("  Num examples = %d", len(predict_examples))

  features = convert_examples_to_features(predict_examples, processor.get_labels(), max_seq_length, tokenizer)

  predict_input_fn = input_fn_builder(features, seq_length = max_seq_length, is_training = False, drop_remainder = False)


  result = estimator.predict(input_fn=predict_input_fn)


  return np.array(list(result))
Beispiel #12
0
    def fit(self, train_data):
        params = self.params

        # Compute number of train and warmup steps from batch size
        num_train_steps = int(len(train_data) / TRAIN_BATCH_SIZE * params['NUM_TRAIN_EPOCHS'])
        num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

        run_config = tf.estimator.RunConfig(
            model_dir=self.OUTPUT_DIR,
            save_summary_steps=SAVE_SUMMARY_STEPS,
            save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

        model_fn = self.model_fn_builder(
            num_labels=len(self.label_list),
            learning_rate=LEARNING_RATE,
            num_train_steps=num_train_steps,
            num_warmup_steps=num_warmup_steps)

        self.estimator = tf.estimator.Estimator(
            model_fn=model_fn,
            model_dir=self.OUTPUT_DIR,
            config=run_config,
            params={"batch_size": TRAIN_BATCH_SIZE})

        train_features = run_classifier.convert_examples_to_features(
            train_data, self.label_list, MAX_SEQ_LENGTH, self.tokenizer)
        print('***** Started training at {} *****'.format(datetime.datetime.now()))
        print('  Num examples = {}'.format(len(train_data)))
        print('  Batch size = {}'.format(TRAIN_BATCH_SIZE))
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = run_classifier.input_fn_builder(
            features=train_features,
            seq_length=MAX_SEQ_LENGTH,
            is_training=True,
            drop_remainder=False)

        self.estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
        print('***** Finished training at {} *****'.format(datetime.datetime.now()))
Beispiel #13
0
def main(_):
    """For current work, we only use the following four categories, 
    but you can add others if you would like to."""
    categories = [
              'Tools_and_Home_Improvement', 
              'Patio_Lawn_and_Garden',
              'Electronics',
              'Baby',
    ]
    
    data = None
    
    if FLAGS.category_name == "All_Categories":
        """Cross-domain pre-training (All_Categories)."""
        for category in categories:
            category_data_path = os.path.join(FLAGS.data_dir,category+'.txt')
            category_data = pd.read_csv(category_data_path,sep='\t',encoding='utf-8',#nrows=5000,
                  converters={'QA':ast.literal_eval,'reviewText':ast.literal_eval})
            
            category_data = category_data[:int(len(data)*0.8)]
            if data is None:
                data = category_data
            else:
                data = pd.concat([data,category_data],axis=0)
        data = data.sample(n=min(len(data),60000))
    else:
        data_path = os.path.join(FLAGS.data_dir,FLAGS.category_name+'.txt')
        data = pd.read_csv(data_path,sep='\t',encoding='utf-8',#nrows=10000,
                  converters={'QA':ast.literal_eval,'reviewText':ast.literal_eval})
    
    data['question'] = data['QA'].apply(lambda x: x['questionText'])
    data['answer'] = data['QA'].apply(lambda x: x['answers'][0]['answerText'] if len(x['answers'])>0 else PAD_WORD)
    data['num_reviews']= data['reviewText'].apply(lambda x: len(x))

    train = data[:int(len(data)*0.8)]
    
    list_of_answers = list(train['answer'])
    list_of_answers=shuffle(list_of_answers)
    qa = train[['question','answer']]
    nqa =  pd.DataFrame({'question': train['question'].tolist(),'answer':list_of_answers})
    qa['label']=1
    nqa['label']=0

    d = pd.concat([qa,nqa],axis=0)
    d=shuffle(d)
    d['question']=d['question'].apply(str)
    d['answer']=d['answer'].apply(str)
    split = int(len(d)*0.9)
    dtrain = d[0:split]
    dtest = d[split:]

    DATA_COLUMN_A = 'question'
    DATA_COLUMN_B = 'answer'
    LABEL_COLUMN = 'label'
    label_list = [0, 1]

    tokenizer = tokenization.FullTokenizer(
                                           vocab_file=FLAGS.vocab_file, 
                                           do_lower_case=True)
    
    train_InputExamples = dtrain.apply(lambda x: run_classifier.InputExample(guid=None, 
                                                                        text_a = x[DATA_COLUMN_A],
                                                                        text_b = x[DATA_COLUMN_B],
                                                                        label = x[LABEL_COLUMN]), 
                                       axis = 1)

    test_InputExamples = dtest.apply(lambda x: run_classifier.InputExample(guid=None,
                                                                           text_a = x[DATA_COLUMN_A],
                                                                           text_b = x[DATA_COLUMN_B],
                                                                           label = x[LABEL_COLUMN]), 
                                     axis = 1)
                                            
    train_features = run_classifier.convert_examples_to_features(train_InputExamples, 
                                                  label_list, 
                                                  FLAGS.max_seq_length, 
                                                  tokenizer)
    test_features = run_classifier.convert_examples_to_features(test_InputExamples, 
                                                 label_list, 
                                                 FLAGS.max_seq_length, 
                                                 tokenizer)
   

    OUTPUT_DIR = os.path.join(FLAGS.model_output_dir,FLAGS.category_name+'_FLTR')
    tf.gfile.MakeDirs(OUTPUT_DIR)
    
    run_config = tf.estimator.RunConfig(
                                    model_dir=OUTPUT_DIR,
                                    save_summary_steps=FLAGS.save_summary_steps,
                                    save_checkpoints_steps=FLAGS.save_checkpoints_steps)

    num_train_steps = int(len(train_features) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
    
    
    model_fn = model_fn_builder(
                            bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file),
                            num_labels=len(label_list),
                            init_checkpoint=FLAGS.init_checkpoint,
                            learning_rate=FLAGS.learning_rate,
                            num_train_steps=num_train_steps,
                            num_warmup_steps=num_warmup_steps)

    estimator = tf.estimator.Estimator(
                                   model_fn=model_fn,
                                   config=run_config,
                                   params={"batch_size": FLAGS.train_batch_size})

    train_input_fn = run_classifier.input_fn_builder(
                                                 features=train_features,
                                                 seq_length=FLAGS.max_seq_length,
                                                 is_training=True,
                                                 drop_remainder=True)

    print("Beginning Training!")
    current_time = datetime.now()
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
    print("Training took time ", datetime.now() - current_time)
    
    test_input_fn = run_classifier.input_fn_builder(
                                                features=test_features,
                                                seq_length=FLAGS.max_seq_length,
                                                is_training=False,
                                                drop_remainder=False)
                                                

    predictions = estimator.predict(test_input_fn)
    x=[np.argmax(prediction['probabilities']) for prediction in predictions]
    dtest['prediction']=x
    print("The accuracy of FLTR on "+FLAGS.category_name+" is: "+str(accuracy_score(dtest.label,dtest.prediction)))
    
    if FLAGS.do_predict:
        print("Beginning Prediction!")
        data_with_FLTR_predictions = FLTR_Prediction(data,tokenizer,estimator)
        
        if(data_with_FLTR_predictions.isnull().values.any()):
            data_with_FLTR_predictions = data_with_FLTR_predictions.replace(np.nan, "[PAD]", regex=True)
            
        data_with_FLTR_predictions.to_csv(os.path.join(FLAGS.data_dir, 
                                                       FLAGS.category_name+'.txt'), 
                                          index=None, sep='\t', mode='w') 
        print("Prediction End!")
Beispiel #14
0
# For TPU, We will append `PaddingExample` for maintaining batch size
if USE_TPU:
    while (len(predict_examples) % EVAL_BATCH_SIZE != 0):
        predict_examples.append(run_classifier.PaddingInputExample())

# Converting to features
predict_features = run_classifier.convert_examples_to_features(
    predict_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

print('  Num examples = {}'.format(num_predict_examples))
print('  Batch size = {}'.format(PREDICT_BATCH_SIZE))

# Input function for prediction
predict_input_fn = run_classifier.input_fn_builder(predict_features,
                                                   seq_length=MAX_SEQ_LENGTH,
                                                   is_training=False,
                                                   drop_remainder=False)
result = list(estimator.predict(input_fn=predict_input_fn))
print(result)
for ex_i in range(num_predict_examples):
    print("****** Example {} ******".format(ex_i))
    print("Question1 :", sent_pairs[ex_i][0])
    print("Question2 :", sent_pairs[ex_i][1])
    print("Prediction :", result[ex_i]['probabilities'][1])

# In[ ]:

# Converting test examples to features
print("################  Processing Test Data #####################")
TEST_TF_RECORD = os.path.join(OUTPUT_DIR, "test.tf_record")
test_examples = processor.get_test_examples(TASK_DATA_DIR)
Beispiel #15
0
def main(_):
    """For current work, we only use the following four categories, 
    but you can add others if you would like to."""
    categories = [
        'Tools_and_Home_Improvement',
        'Patio_Lawn_and_Garden',
        'Electronics',
        'Baby',
    ]

    models = ['FLTR', 'BertQA']

    data_path = os.path.join(FLAGS.data_dir, 'Annotated_Data.txt')
    data = pd.read_csv(data_path,
                       sep='\t',
                       encoding='utf-8',
                       converters={
                           'annotation_score': ast.literal_eval,
                           'reviews': ast.literal_eval
                       })
    data = data.reset_index()
    data['qr'] = data[['index', 'question', 'reviews'
                       ]].apply(lambda x: [[x['index'], x['question'], i]
                                           for i in x['reviews']],
                                axis=1)

    d = []
    for category in categories:
        qr = data[data['category'] == category]['qr'].tolist()
        qr = [item for sublist in qr for item in sublist]
        qr = pd.DataFrame(columns=['index', 'question', 'review'], data=qr)
        qr['label'] = 1

        temp = qr.copy()
        temp['question'] = temp['question'].apply(str)
        temp['review'] = temp['review'].apply(str)
        DATA_COLUMN_A = 'question'
        DATA_COLUMN_B = 'review'
        LABEL_COLUMN = 'label'
        label_list = [0, 1]

        tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                               do_lower_case=True)

        test_InputExamples = temp.apply(
            lambda x: run_classifier.InputExample(guid=None,
                                                  text_a=x[DATA_COLUMN_A],
                                                  text_b=x[DATA_COLUMN_B],
                                                  label=x[LABEL_COLUMN]),
            axis=1)

        test_features = run_classifier.convert_examples_to_features(
            test_InputExamples, label_list, FLAGS.max_seq_length, tokenizer)

        t = data[data['category'] == category]
        t = t.reset_index(drop=True)
        for model in models:

            OUTPUT_DIR = os.path.join(FLAGS.model_output_dir,
                                      category + '_' + model)
            run_config = tf.estimator.RunConfig(model_dir=OUTPUT_DIR,
                                                save_summary_steps=100,
                                                save_checkpoints_steps=100)

            model_fn = None
            if model == 'BertQA':
                model_fn = model_fn_builder_BertQA(
                    bert_config=modeling.BertConfig.from_json_file(
                        FLAGS.bert_config_file),
                    num_labels=len(label_list),
                    init_checkpoint=OUTPUT_DIR,
                    learning_rate=FLAGS.learning_rate,
                    num_train_steps=100,
                    num_warmup_steps=100)
            else:
                model_fn = model_fn_builder_FLTR(
                    bert_config=modeling.BertConfig.from_json_file(
                        FLAGS.bert_config_file),
                    num_labels=len(label_list),
                    init_checkpoint=OUTPUT_DIR,
                    learning_rate=FLAGS.learning_rate,
                    num_train_steps=100,
                    num_warmup_steps=100)

            estimator = tf.estimator.Estimator(
                model_fn=model_fn,
                config=run_config,
                params={"batch_size": FLAGS.train_batch_size})

            test_input_fn = run_classifier.input_fn_builder(
                features=test_features,
                seq_length=FLAGS.max_seq_length,
                is_training=False,
                drop_remainder=False)

            predictions = estimator.predict(test_input_fn)
            probabilities = [
                prediction['probabilities'] for prediction in predictions
            ]
            probabilities = [list(item) for item in probabilities]

            if model == 'FLTR':
                probabilities = [item[1] for item in probabilities]
            else:
                probabilities = [item[0] for item in probabilities]

            print(model, ' :', probabilities[:10])
            temp[model + '_score'] = probabilities
            temp_groupby = temp.groupby(
                ['index', 'question'],
                sort=False)[model + '_score'].apply(list).reset_index(
                    name=model + '_score')
            t = pd.concat([t, temp_groupby[model + '_score']], axis=1)

        if len(d) == 0:
            d = t
        else:
            d = pd.concat([d, t], axis=0, ignore_index=True)

    d.to_csv(os.path.join(FLAGS.data_dir, 'test_predictions.txt'),
             index=None,
             sep='\t',
             mode='w')
Beispiel #16
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tpu.TPUConfig(iterations_per_loop=FLAGS.iterations_per_loop,
                                 num_shards=FLAGS.num_tpu_cores,
                                 per_host_input_for_training=is_per_host))

    session_config = tf.ConfigProto(log_device_placement=True)
    session_config.gpu_options.allow_growth = True
    run_config.replace(session_config=session_config)

    num_train_steps = None
    num_warmup_steps = None

    with open('cqa_data.pkl', 'rb') as fr:
        train_features, dev_cid, dev_features = pkl.load(fr)
        dev_label = [feature.label_id for feature in dev_features]

    if FLAGS.do_train:
        num_train_steps = int(
            len(train_features) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=2,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu,
                                dev_cid=dev_cid)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        # params={'batch_size': FLAGS.train_batch_size},
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.eval_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_features))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = input_fn_builder(features=train_features,
                                          seq_length=FLAGS.max_seq_length,
                                          is_training=True,
                                          drop_remainder=True)

        estimator.train(input_fn=train_input_fn,
                        max_steps=num_train_steps,
                        hooks=[
                            EvalHook(estimator=estimator,
                                     dev_features=dev_features,
                                     dev_label=dev_label,
                                     dev_cid=dev_cid,
                                     max_seq_length=FLAGS.max_seq_length,
                                     eval_steps=FLAGS.save_checkpoints_steps,
                                     checkpoint_dir=FLAGS.output_dir)
                        ])

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d", len(dev_features))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            # Eval will be slightly WRONG on the TPU because it will truncate
            # the last batch.
            eval_steps = int(len(dev_features) / FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = input_fn_builder(features=dev_features,
                                         seq_length=FLAGS.max_seq_length,
                                         is_training=False,
                                         drop_remainder=eval_drop_remainder)

        predictions = estimator.predict(eval_input_fn,
                                        yield_single_examples=False)
        res = np.concatenate([a for a in predictions], axis=0)
        print(res.shape, np.array(dev_label).shape)
        metrics = PRF(np.array(dev_label), res.argmax(axis=-1))
        # print((np.array(dev_label) != res.argmax(axis=-1))[:1000])
        MAP, AvgRec, MRR = eval_reranker(dev_cid, dev_label, res[:, 0])
        metrics['MAP'] = MAP
        metrics['AvgRec'] = AvgRec
        metrics['MRR'] = MRR

        print_metrics(metrics, 'dev')
Beispiel #17
0
                            num_labels=len(label_list),
                            learning_rate=FLAGS.LEARNING_RATE,
                            num_train_steps=num_train_steps,
                            num_warmup_steps=num_warmup_steps)

# initializes the estimator
estimator = tf.estimator.Estimator(model_fn=model_fn,
                                   config=run_config,
                                   params={"batch_size": FLAGS.BATCH_SIZE})

# <------------------ Begin training

# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = run_classifier.input_fn_builder(
    features=train_features,
    seq_length=FLAGS.MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

tf.logging.info('Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
tf.logging.info("Training took time %s" % (datetime.now() - current_time))

# <------------------ Begin evaluation on test data

tf.logging.info('Beginning Evaluation!')
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=FLAGS.MAX_SEQ_LENGTH,
    is_training=False,
Beispiel #18
0
model_fn = run_classifier.model_fn_builder(
    bert_config=modeling.BertConfig.from_json_file(CONFIG_FILE),
    num_labels=len(label_list),
    init_checkpoint=INIT_CHECKPOINT,
    learning_rate=LEARNING_RATE,
    num_train_steps=num_train_steps,
    num_warmup_steps=num_warmup_steps,
    use_tpu=False,
    use_one_hot_embeddings=True)

estimator = tf.contrib.tpu.TPUEstimator(use_tpu=False,
                                        model_fn=model_fn,
                                        config=run_config,
                                        train_batch_size=TRAIN_BATCH_SIZE,
                                        eval_batch_size=EVAL_BATCH_SIZE)

print('\n__________\nStarted training at {} '.format(datetime.datetime.now()))
print('\nNum examples = {}'.format(len(train_examples)))
print('\nBatch size = {}'.format(TRAIN_BATCH_SIZE))
tf.logging.info("Num steps = %d", num_train_steps)

train_input_fn = run_classifier.input_fn_builder(features=train_features,
                                                 seq_length=MAX_SEQ_LENGTH,
                                                 is_training=True,
                                                 drop_remainder=True)
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print('\n__________\nFinished training at {}'.format(datetime.datetime.now()))
print('\nTotal time taken to fine tune the model ', round(time.time() - t, 2),
      ' s')
Beispiel #19
0
def run_experiment(experiments, use_tpu, tpu_address, repeat, num_train_steps,
                   username, comment, store_last_layer):
    logger.info(
        f'Getting ready to run the following experiments for {repeat} repeats: {experiments}'
    )

    def get_run_config(output_dir):
        return tf.contrib.tpu.RunConfig(
            cluster=tpu_cluster_resolver,
            model_dir=output_dir,
            save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS,
            tpu_config=tf.contrib.tpu.TPUConfig(
                iterations_per_loop=ITERATIONS_PER_LOOP,
                num_shards=NUM_TPU_CORES,
                per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig.
                PER_HOST_V2))

    def parse_experiments_argument(experiments):
        """Returns list of experiments from comma-separated string as list"""
        exp_list = []
        for part in experiments.split(','):
            if '-' in part:
                a, b = part.split('-')
                a, b = int(a), int(b)
                exp_list.extend(range(a, b + 1))
            else:
                exp_list.append(part)
        exp_list = [str(s) for s in exp_list]
        return exp_list

    experiments = parse_experiments_argument(experiments)
    last_completed_train = ""
    completed_train_dirs = []

    for exp_nr in experiments:
        logger.info(f"***** Starting Experiment {exp_nr} *******")
        logger.info(f"***** {experiment_definitions[exp_nr]['name']} ******")
        logger.info("***********************************************")

        #Get a unique ID for every experiment run
        experiment_id = str(uuid.uuid4())

        ###########################
        ######### TRAINING ########
        ###########################

        #We should only train a new model if a similar model hasnt just been trained. Save considerable computation time
        train_annot_dataset = experiment_definitions[exp_nr][
            "train_annot_dataset"]

        if train_annot_dataset != last_completed_train:
            #Set a fresh new output directory every time training starts, and set the cache to this directory
            temp_output_dir = os.path.join(TEMP_OUTPUT_BASEDIR, experiment_id)

            os.environ['TFHUB_CACHE_DIR'] = temp_output_dir
            logger.info(f"***** Setting temporary dir {temp_output_dir} **")
            logger.info(f"***** Train started in {temp_output_dir} **")

            tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join(
                BERT_MODEL_DIR, 'vocab.txt'),
                                                   do_lower_case=LOWER_CASED)

            if tpu_address:
                tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
                    tpu_address)
            else:
                tpu_cluster_resolver = None

            processor = vaccineStanceProcessor()
            label_list = processor.get_labels()
            label_mapping = dict(zip(range(len(label_list)), label_list))

            train_examples = processor.get_train_examples(
                os.path.join('data', train_annot_dataset))
            num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

            #Initiation

            bert_config = modeling.BertConfig.from_json_file(
                os.path.join(BERT_MODEL_DIR, 'bert_config.json'))
            model_fn = model_fn_builder(bert_config=bert_config,
                                        num_labels=len(label_list),
                                        init_checkpoint=BERT_MODEL_FILE,
                                        learning_rate=LEARNING_RATE,
                                        num_train_steps=num_train_steps,
                                        num_warmup_steps=num_warmup_steps,
                                        use_tpu=use_tpu,
                                        use_one_hot_embeddings=True,
                                        extract_last_layer=store_last_layer)

            estimator = tf.contrib.tpu.TPUEstimator(
                use_tpu=use_tpu,
                model_fn=model_fn,
                config=get_run_config(temp_output_dir),
                train_batch_size=TRAIN_BATCH_SIZE,
                eval_batch_size=EVAL_BATCH_SIZE,
                predict_batch_size=PREDICT_BATCH_SIZE,
            )

            train_features = run_classifier.convert_examples_to_features(
                train_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

            logger.info(
                '***** Fine tuning BERT base model normally takes a few minutes. Please wait...'
            )
            logger.info('***** Started training using {} at {} *****'.format(
                train_annot_dataset, datetime.datetime.now()))
            logger.info('  Num examples = {}'.format(len(train_examples)))
            logger.info('  Batch size = {}'.format(TRAIN_BATCH_SIZE))
            logger.info('  Train steps = {}'.format(num_train_steps))
            logger.info(
                '  Number of training steps = {}'.format(num_train_steps))

            tf.logging.info('  Num steps = %d', num_train_steps)
            train_input_fn = run_classifier.input_fn_builder(
                features=train_features,
                seq_length=MAX_SEQ_LENGTH,
                is_training=True,
                drop_remainder=True)

            estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
            logger.info('***** Finished training using {} at {} *****'.format(
                train_annot_dataset, datetime.datetime.now()))

            last_completed_train = train_annot_dataset
            completed_train_dirs.append(temp_output_dir)

            ######################################
            ######### TRAINING PREDICTION ########
            ######################################
            train_pred_input_fn = run_classifier.input_fn_builder(
                features=train_features,
                seq_length=MAX_SEQ_LENGTH,
                is_training=False,
                drop_remainder=False)

            predictions = estimator.predict(input_fn=train_pred_input_fn)
            probabilities, last_layer = list(
                zip(*[[p['probabilities'], p['last_layer']]
                      for p in predictions]))
            probabilities = np.array(probabilities)
            if store_last_layer:
                # extract state for CLS token
                last_layer = [_l[0] for _l in last_layer]
            else:
                last_layer = None
            y_true = [e.label_id for e in train_features]
            guid = [e.guid for e in train_examples]
            predictions_output = get_predictions_output(
                experiment_id,
                guid,
                probabilities,
                y_true,
                cls_hidden_state=last_layer,
                label_mapping=label_mapping,
                dataset='train')
            save_to_json(
                predictions_output,
                os.path.join(PREDICTIONS_JSON_DIR,
                             f'train_{experiment_id}.json'))

        #############################
        ######### EVALUATING ########
        #############################
        eval_annot_dataset = experiment_definitions[exp_nr][
            "eval_annot_dataset"]

        eval_examples = processor.get_dev_examples(
            os.path.join('data', eval_annot_dataset))
        eval_features = run_classifier.convert_examples_to_features(
            eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
        logger.info('***** Started evaluation of {} at {} *****'.format(
            experiment_definitions[exp_nr]["name"], datetime.datetime.now()))
        logger.info('Num examples = {}'.format(len(eval_examples)))
        logger.info('Batch size = {}'.format(EVAL_BATCH_SIZE))

        # Eval will be slightly WRONG on the TPU because it will truncate the last batch.
        eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE)
        eval_input_fn = run_classifier.input_fn_builder(
            features=eval_features,
            seq_length=MAX_SEQ_LENGTH,
            is_training=False,
            drop_remainder=True)
        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        logger.info(
            '***** Finished first half of evaluation of {} at {} *****'.format(
                experiment_definitions[exp_nr]["name"],
                datetime.datetime.now()))

        output_eval_file = os.path.join(temp_output_dir, 'eval_results.txt')
        with tf.gfile.GFile(output_eval_file, 'w') as writer:
            logger.info('***** Eval results *****')
            for key in sorted(result.keys()):
                logger.info('  {} = {}'.format(key, str(result[key])))
                writer.write('%s = %s\n' % (key, str(result[key])))

        predictions = estimator.predict(eval_input_fn)
        probabilities = np.array([p['probabilities'] for p in predictions])
        y_pred = np.argmax(probabilities, axis=1)
        y_true = [e.label_id for e in eval_features]
        guid = [e.guid for e in eval_examples]
        scores = performance_metrics(y_true,
                                     y_pred,
                                     label_mapping=label_mapping)
        logger.info('Final scores:')
        logger.info(scores)
        logger.info(
            '***** Finished second half of evaluation of {} at {} *****'.
            format(experiment_definitions[exp_nr]["name"],
                   datetime.datetime.now()))

        # write full dev prediction output
        predictions_output = get_predictions_output(
            experiment_id,
            guid,
            probabilities,
            y_true,
            label_mapping=label_mapping,
            dataset='dev')
        save_to_json(
            predictions_output,
            os.path.join(PREDICTIONS_JSON_DIR, f'dev_{experiment_id}.json'))

        # Write log to Training Log File
        data = {
            'Experiment_Name': experiment_definitions[exp_nr]["name"],
            'Experiment_Id': experiment_id,
            'Date': format(datetime.datetime.now()),
            'User': username,
            'Model': BERT_MODEL_NAME,
            'Num_Train_Steps': num_train_steps,
            'Train_Annot_Dataset': train_annot_dataset,
            'Eval_Annot_Dataset': eval_annot_dataset,
            'Learning_Rate': LEARNING_RATE,
            'Max_Seq_Length': MAX_SEQ_LENGTH,
            'Eval_Loss': result['eval_loss'],
            'Loss': result['loss'],
            'Comment': comment,
            **scores
        }

        append_to_csv(data, os.path.join(LOG_CSV_DIR, 'fulltrainlog.csv'))
        logger.info(f"***** Completed Experiment {exp_nr} *******")

    logger.info(
        f"***** Completed all experiments in {repeat} repeats. We should now clean up all remaining files *****"
    )
    for c in completed_train_dirs:
        logger.info("Deleting these directories: ")
        logger.info("gsutil -m rm -r " + c)
        os.system("gsutil -m rm -r " + c)
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "cola": run_classifier.ColaProcessor,
        "mnli": run_classifier.MnliProcessor,
        "mrpc": run_classifier.MrpcProcessor,
    }

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = create_tokenizer_from_hub_module(FLAGS.bert_hub_module_handle)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(
        num_labels=len(label_list),
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        bert_hub_module_handle=FLAGS.bert_hub_module_handle)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        train_features = run_classifier.convert_examples_to_features(
            train_examples, label_list, FLAGS.max_seq_length, tokenizer)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = run_classifier.input_fn_builder(
            features=train_features,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        eval_features = run_classifier.convert_examples_to_features(
            eval_examples, label_list, FLAGS.max_seq_length, tokenizer)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d", len(eval_examples))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            # Eval will be slightly WRONG on the TPU because it will truncate
            # the last batch.
            eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = run_classifier.input_fn_builder(
            features=eval_features,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        if FLAGS.use_tpu:
            # Discard batch remainder if running on TPU
            n = len(predict_examples)
            predict_examples = predict_examples[:(
                n - n % FLAGS.predict_batch_size)]

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        run_classifier.file_based_convert_examples_to_features(
            predict_examples, label_list, FLAGS.max_seq_length, tokenizer,
            predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d", len(predict_examples))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_input_fn = run_classifier.file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=FLAGS.use_tpu)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as writer:
            tf.logging.info("***** Predict results *****")
            for prediction in result:
                probabilities = prediction["probabilities"]
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                writer.write(output_line)
Beispiel #21
0
def main(_):
    """For current work, we only use the following four categories, 
    but you can add others if you would like to."""
    categories = [
              'Tools_and_Home_Improvement', 
              'Patio_Lawn_and_Garden',
              'Electronics',
              'Baby',
    ]
    
    data = None
    
    """Cross-domain pre-training (All_Categories) to boost the performance."""
    if FLAGS.category_name == "All_Categories":
        for category in categories:
            category_data_path = os.path.join(FLAGS.data_dir,category+'.txt')
            category_data = pd.read_csv(category_data_path,sep='\t',encoding='utf-8',nrows=10000,
                  converters={'reviewText':ast.literal_eval,'FLTR_scores':ast.literal_eval})
            if data is None:
                data = category_data
            else:
                data = pd.concat([data,category_data],axis=0)
        data = data.sample(n=len(data))
    else:
        data_path = os.path.join(FLAGS.data_dir,FLAGS.category_name+'.txt')
        data = pd.read_csv(data_path,sep='\t',encoding='utf-8',#nrows=10000,
                  cconverters={'reviewText':ast.literal_eval,'FLTR_scores':ast.literal_eval})
    
    #data['len_questions'] = data["question"].apply(lambda x: len(x.split()))
    #data = data[data['len_questions']<=10]
    
    data['FLTR_Top10'] = data.apply(FLTR_Top10,axis=1)
    list_of_answers = list(data['answer'])
    list_of_answers=shuffle(list_of_answers)
    data['non_answer']= list_of_answers
    
    train = data[:int(len(data)*0.8)]
    train = train.sample(n=min(20000,len(train)))
    test = data[int(len(data)*0.8):]
    print(train.shape,test.shape)

    DATA_COLUMN_A = 'senA'
    DATA_COLUMN_B = 'senB'
    LABEL_COLUMN = 'Label'
    label_list = [0, 1]
         
    train = train.apply(qar_pair,axis=1)    
    test = test.apply(qar_pair,axis=1)
    
    temp = train.tolist()
    flat_list = [item for sublist in temp for item in sublist]
    train =pd.DataFrame(flat_list,columns=['senA','senB'])
    train['Label'] =1
    train['senA']=train['senA'].apply(str)
    train['senB']=train['senB'].apply(str)
    
    temp = test.tolist()
    flat_list = [item for sublist in temp for item in sublist]
    test = pd.DataFrame(flat_list,columns=['senA','senB'])
    test['Label'] = 1
    test['senA'] = test['senA'].apply(str)
    test['senB'] = test['senB'].apply(str)
    print(train.shape,test.shape)
    
    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, 
                                           do_lower_case=True)
    
    train_InputExamples = train.apply(lambda x: run_classifier.InputExample(guid=None, 
                                                                        text_a = x[DATA_COLUMN_A],
                                                                        text_b = x[DATA_COLUMN_B],
                                                                        label = x[LABEL_COLUMN]), 
                                       axis = 1)

    test_InputExamples = test.apply(lambda x: run_classifier.InputExample(guid=None,
                                                                           text_a = x[DATA_COLUMN_A],
                                                                           text_b = x[DATA_COLUMN_B],
                                                                           label = x[LABEL_COLUMN]), 
                                     axis = 1)
                                            
    train_features = run_classifier.convert_examples_to_features(train_InputExamples, 
                                                  label_list, 
                                                  FLAGS.max_seq_length, 
                                                  tokenizer)
    test_features = run_classifier.convert_examples_to_features(test_InputExamples, 
                                                 label_list, 
                                                 FLAGS.max_seq_length, 
                                                 tokenizer)

    
    OUTPUT_DIR = os.path.join(FLAGS.model_output_dir,FLAGS.category_name+"_BertQA")
    tf.gfile.MakeDirs(OUTPUT_DIR)
    
    run_config = tf.estimator.RunConfig(
                                    model_dir=OUTPUT_DIR,
                                    keep_checkpoint_max=2,
                                    save_summary_steps=FLAGS.save_summary_steps,
                                    save_checkpoints_steps=FLAGS.save_checkpoints_steps)

    num_train_steps = int(len(train_features) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
    
    
    model_fn = model_fn_builder(
                            bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file),
                            num_labels = len(label_list),
                            init_checkpoint = FLAGS.init_checkpoint,
                            learning_rate = FLAGS.learning_rate,
                            num_train_steps = num_train_steps,
                            num_warmup_steps = num_warmup_steps)

    estimator = tf.estimator.Estimator(
                                   model_fn = model_fn,
                                   config = run_config,
                                   params = {"batch_size": FLAGS.train_batch_size})

    train_input_fn = run_classifier.input_fn_builder(
                                                 features = train_features,
                                                 seq_length = FLAGS.max_seq_length,
                                                 is_training = True,
                                                 drop_remainder = True)

    print("Beginning Training!")
    current_time = datetime.now()
    #early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook(
    #                 estimator,metric_name='loss',max_steps_without_decrease=1000,min_steps=100)

    estimator.train(input_fn = train_input_fn, max_steps = num_train_steps) #,hooks=[early_stopping]
    print("Training took time ", datetime.now() - current_time)
    
    test_input_fn = run_classifier.input_fn_builder(
                                                features = test_features,
                                                seq_length = FLAGS.max_seq_length,
                                                is_training = False,
                                                drop_remainder = True)
                                                

    predictions = estimator.predict(test_input_fn)
    x=[prediction['scores'] for prediction in predictions]
    print('\n')
    print("The accuracy of BertQA on "+FLAGS.category_name+" is: "+str(sum(i > 0 for i in x)/len(x)))
    print('\n')