def evaluate_and_get_hidden_context(estimator,input_fn_for_test,input_fn_for_hidden,is_progressive = False,hidden_context=None):
  MAX_SEQ_LENGTH = 128
 
  if not is_progressive:
    test_input_fn = run_classifier.input_fn_builder(
      features=input_fn_for_test,
      seq_length=MAX_SEQ_LENGTH,
      is_training=False,
      drop_remainder=False)
     
    estimator.evaluate(input_fn=test_input_fn, steps=None)
    hidden_input_fn = run_classifier.input_fn_builder(
        features=input_fn_for_hidden,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    res = estimator.predict(hidden_input_fn)
    hidden_context = []
    for i in res:
      hidden_context.append(i["hidden_context"])
    hidden_context = np.array(hidden_context)
    return hidden_context
  else:
    test_input_fn = input_fn_builder(
      features=input_fn_for_test,
      hidden_context=hidden_context,
      seq_length=MAX_SEQ_LENGTH,
      is_training=False,
      drop_remainder=False)
    estimator.evaluate(input_fn=test_input_fn, steps=None)
def run_on_dfs(train, test, predict, DATA_COLUMN, LABEL_COLUMN,
               MAX_SEQ_LENGTH=128,
               BATCH_SIZE=32,
               LEARNING_RATE=2e-5,
               NUM_TRAIN_EPOCHS=3.0,
               WARMUP_PROPORTION=0.1,
               SAVE_SUMMARY_STEPS=100,
               SAVE_CHECKPOINTS_STEPS=10000,
               bert_model_hub=""):  # "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"):

    label_list = train[LABEL_COLUMN].unique().tolist()

    tokenizer = create_tokenizer_from_hub_module(bert_model_hub)

    train_features = make_features(train, label_list, MAX_SEQ_LENGTH, tokenizer, DATA_COLUMN, LABEL_COLUMN)
    test_features = make_features(test, label_list, MAX_SEQ_LENGTH, tokenizer, DATA_COLUMN, LABEL_COLUMN)

    predict_features = make_features(predict, label_list, MAX_SEQ_LENGTH, tokenizer, DATA_COLUMN, LABEL_COLUMN)

    num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
    num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

    estimator, model_fn, run_config = estimator_builder(
        bert_model_hub,
        OUTPUT_DIR,
        SAVE_SUMMARY_STEPS,
        SAVE_CHECKPOINTS_STEPS,
        label_list,
        LEARNING_RATE,
        num_train_steps,
        num_warmup_steps,
        BATCH_SIZE)

    train_input_fn = bert.run_classifier.input_fn_builder(
        features=train_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=True,
        drop_remainder=False)

    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    test_input_fn = run_classifier.input_fn_builder(
        features=test_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)

    predict_input_fn = run_classifier.input_fn_builder(
        features=predict_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)

    result_dict = estimator.evaluate(input_fn=test_input_fn, steps=None)

    # print(time.localtime(time.time()))
    result_predict = estimator.predict(input_fn=test_input_fn)
    # print(time.localtime(time.time()))

    return result_dict, result_predict, estimator
def train_and_evaluate(train_sents,test_sents,labels_train,labels_test):
    train_InputExamples = [ run_classifier.InputExample(guid=None,text_a=sentence,text_b=None,label=label) for sentence,label in zip(train_sents, labels_train) ]
    input_features = convert_examples_to_features_RE(train_InputExamples,label_list, MAX_SEQUENCE_LENGTH,tokenizer)
    train_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH, is_training=True,drop_remainder=False)
    print("############ Beginning of training #####################")
    estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)
    print("############ Ending of training #####################")
    test_InputExamples = [run_classifier.InputExample(guid=None, text_a=sentence, text_b=None, label=label) for sentence, label in zip(test_sents, labels_test)]
    input_features = convert_examples_to_features_RE(test_InputExamples, label_list, MAX_SEQUENCE_LENGTH, tokenizer)
    test_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH, is_training=False, drop_remainder=False)
    estimator.evaluate(input_fn=test_input_fn, steps=None)
 def evaluate(self, examples):
     features = run_classifier.convert_examples_to_features(
         examples, self.label_list, self.max_len, self.tokenizer)
     input_fn = run_classifier.input_fn_builder(features=features,
                                                seq_length=self.max_len,
                                                is_training=False)
     self.estimator.evaluate(input_fn=input_fn, steps=None)
Exemple #5
0
def get_final_predictions(in_contexts, in_last_sentences, tokenizer,
                          estimator: tf.estimator.Estimator, label_list):
    """
    Return the log probabilities based on the story context and the endings proposed

    Parameters
    ----------
    in_contexts:            str of the story context
    in_last_sentences:      proposed last sentence
    tokenizer:              bert tokenizer
    estimator:              tf.estimator
    label_list:             possible values
    """
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=y, label=0)
        for x, y in zip(in_contexts, in_last_sentences)
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, flags.max_seq_length, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=flags.max_seq_length,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    predictions = [prediction['probabilities'] for prediction in predictions]

    return predictions
    def getListPrediction(self, in_sentences):
        #print(in_sentences)
        #1
        input_examples = [
            run_classifier.InputExample(guid="",
                                        text_a=x,
                                        text_b=None,
                                        label="0") for x in in_sentences
        ]  # here, "" is just a dummy label

        #2
        input_features = run_classifier.convert_examples_to_features(
            input_examples, label_list, MAX_SEQ_LENGTH, self.tokenizer)

        #3
        predict_input_fn = run_classifier.input_fn_builder(
            features=input_features,
            seq_length=MAX_SEQ_LENGTH,
            is_training=False,
            drop_remainder=False)

        #4
        predictions = self.estimator.predict(input_fn=predict_input_fn)

        return predictions
Exemple #7
0
def getPrediction(in_sentences):
    labels = ["Non-Sensitive", "Sensitive"]
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in in_sentences
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    estimator = run()
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, prediction['probabilities'],
             labels[prediction['labels']])
            for sentence, prediction in zip(in_sentences, predictions)]


# pred_sentences = [
#   "He drinks apple",
#   "He drinks milk",
#   "A mosquito stings me",
#   "I sting a mosquito",
#   "A niece is a person.",
#   "A giraffe is a person.",
#   "I like to ride my chocolate",
#   "I like to ride my bike",
#   "he put elephant into the jug",
# ]
# predictions = getPrediction(pred_sentences)

# print(predictions,"predictions")
def getPrediction(in_sentences, type_output="features"):
    #A list to map the actual labels to the predictions
    labels = np.unique(train['label'])
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in in_sentences
    ]
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    #Predicting the classes
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)

    if type_output == "features":
        return [
            prediction['pooled_output']
            for _, prediction in enumerate(predictions)
        ]
    else:
        return ([(sentence, prediction['probabilities'], prediction['labels'],
                  labels[prediction['labels']])
                 for sentence, prediction in zip(in_sentences, predictions)])
Exemple #9
0
def getPrediction(in_sentences):
    labels = ["Not an error", "Is an error"]
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x[0], text_b=x[1], label=0)
        for x in in_sentences
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn,
                                    checkpoint_path="./" + OUTPUT_DIR +
                                    "/model.ckpt-56165",
                                    yield_single_examples=False)
    prediction_list = []
    i = 0
    for prediction in predictions:
        if hasattr(prediction["labels"], '__len__'):
            for j in range(len(prediction["labels"])):
                prediction_list.append(
                    (in_sentences[i], prediction["probabilities"][j],
                     labels[prediction["labels"][j]]))
                i += 1
        else:
            prediction_list.append(
                (in_sentences[i], prediction["probabilities"],
                 labels[prediction["labels"]]))
            i += 1
    return prediction_list
 def evaluateModel(self, test_features, estimator):
     test_input_fn = run_classifier.input_fn_builder(
         features=test_features,
         seq_length=ConfigFile.MAX_SEQ_LENGTH,
         is_training=False,
         drop_remainder=False)
     estimator.evaluate(input_fn=test_input_fn, steps=None)
Exemple #11
0
def getPrediction(in_sentences):
    labels = [0, 1, 2]
    input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in
                      in_sentences]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH,
                                                       is_training=False, drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    indice=0
    resultado = []



    #
    try:
        for prediction in predictions:
            #elimian


            #

            from sklearn.preprocessing import label_binarize
            resultado.append((in_sentences[indice], prediction['probabilities'], label_binarize([labels[prediction['labels']]], classes=[0, 1, 2])[0] ))
            print(str(indice) +"###"+str(len(in_sentences)))
            indice = indice + 1


    except Exception as e:
        print(e)
        return resultado

    return resultado
    '''
Exemple #12
0
    def getRatings(self, in_sentences):
        #print(in_sentences)
        #1
        input_examples = [
            run_classifier.InputExample(guid="",
                                        text_a=x,
                                        text_b=None,
                                        label="1") for x in in_sentences
        ]  # here, "" is just a dummy label

        #2
        input_features = run_classifier.convert_examples_to_features(
            input_examples, label_list, MAX_SEQ_LENGTH, self.tokenizer
        )  #[<bert.run_classifier.InputFeatures object at 0x0000015AF00D7448>]

        #3
        predict_input_fn = run_classifier.input_fn_builder(
            features=input_features,
            seq_length=MAX_SEQ_LENGTH,
            is_training=False,
            drop_remainder=False
        )  #function input_fn_builder.<locals>.input_fn at 0x0000015B23F65678

        #4
        predictions = self.estimator.predict(input_fn=predict_input_fn)

        return predictions
    def do_eval(self):
        print(f"[INFO] Started working on evaluation...")
        print("[INFO] Preparing test InputExample...")
        test_inputExamples = self.test.apply(
            lambda x: run_classifier.InputExample(
                guid=None, text_a=x['sentence'], text_b=None, label=x['label'
                                                                      ]),
            axis=1)
        print("[INFO] Done preparing test InputExample...\n")

        label_list = list(range(len(self.labels)))
        print("[INFO] Preparing test features...")
        test_features = run_classifier.convert_examples_to_features(
            test_inputExamples, label_list, self.max_seq_length,
            self.tokenizer)
        print("[INFO] Done preparing test features...\n")

        test_input_fn = run_classifier.input_fn_builder(
            features=test_features,
            seq_length=self.max_seq_length,
            is_training=False,
            drop_remainder=False)

        print(f'[INFO] Begin evaluating...!')
        result = self.estimator.evaluate(input_fn=test_input_fn, steps=None)
        print(f"[INFO] Done evaluating...\n")
        for key in sorted(result.keys()):
            print(f"[INFO]  {key} = {result[key]}")
Exemple #14
0
def model_eval(estimator, processor, input_dir, label_list,
               max_sequence_length, tokenizer, eval_batch_size, output_dir):
    eval_examples = processor.get_dev_examples(input_dir)
    eval_features = run_classifier.convert_examples_to_features(
        eval_examples, label_list, max_sequence_length, tokenizer)
    print('***** Started evaluation at {} *****'.format(
        datetime.datetime.now()))
    print('  Num examples = {}'.format(len(eval_examples)))
    print('  Batch size = {}'.format(eval_batch_size))

    # Eval will be slightly WRONG on the TPU because it will truncate
    # the last batch.
    eval_steps = int(len(eval_examples) / eval_batch_size)
    eval_input_fn = run_classifier.input_fn_builder(
        features=eval_features,
        seq_length=max_sequence_length,
        is_training=False,
        drop_remainder=True)
    result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
    print('***** Finished evaluation at {} *****'.format(
        datetime.datetime.now()))
    output_eval_file = os.path.join(output_dir, "eval_results.txt")
    with tf.gfile.GFile(output_eval_file, "w") as writer:
        print("***** Eval results *****")
        for key in sorted(result.keys()):
            print('  {} = {}'.format(key, str(result[key])))
            writer.write("%s = %s\n" % (key, str(result[key])))
Exemple #15
0
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
def evaluate_bert(test_features, estimator):

    test_input_fn = run_classifier.input_fn_builder(features=test_features,
                                                    seq_length=MAX_SEQ_LENGTH,
                                                    is_training=False,
                                                    drop_remainder=False)
    result = estimator.evaluate(input_fn=test_input_fn, steps=None)
    return result
 def predict(self, examples):
     features = run_classifier.convert_examples_to_features(
         examples, self.label_list, self.max_len, self.tokenizer)
     input_fn = run_classifier.input_fn_builder(features=features,
                                                seq_length=self.max_len,
                                                is_training=False)
     predictions = self.estimator.predict(input_fn=input_fn)
     return predictions['probabilities']
Exemple #18
0
def main():
    train_features, test_features = data_processor()

    # Compute # train and warmup steps from batch size
    num_train_steps = int(
        len(train_features) / args.BATCH_SIZE * args.NUM_TRAIN_EPOCHS)
    num_warmup_steps = int(num_train_steps * args.WARMUP_PROPORTION)

    # Specify outpit directory and number of checkpoint steps to save
    run_config = tf.estimator.RunConfig(
        model_dir=args.output_dir,
        save_summary_steps=args.SAVE_SUMMARY_STEPS,
        save_checkpoints_steps=args.SAVE_CHECKPOINTS_STEPS)

    model_fn = model_fn_builder(num_labels=len(args.label_list),
                                learning_rate=args.LEARNING_RATE,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps)

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       config=run_config,
                                       params={"batch_size": args.BATCH_SIZE})

    # Create an input function for training. drop_remainder = True for using TPUs.
    train_input_fn = run_classifier.input_fn_builder(
        features=train_features,
        seq_length=args.MAX_SEQ_LENGTH,
        is_training=True,
        drop_remainder=False)

    print(f'Beginning Training!')
    current_time = datetime.now()
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
    print("Training took time ", datetime.now() - current_time)

    test_input_fn = run_classifier.input_fn_builder(
        features=test_features,
        seq_length=args.MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)

    estimator.evaluate(input_fn=test_input_fn, steps=None)
def predict(in_sentences):
    """ predicts the output relation of sentences"""
    input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences]
    # print(input_examples)
    input_features = convert_examples_to_features_RE(input_examples, label_list, MAX_SEQUENCE_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH,
                                                       is_training=False, drop_remainder=False)
    predictions = estimator.predict(predict_input_fn, yield_single_examples=True)
    #return predictions
    return [(sentence, prediction['probabilities'], prediction['labels']) for sentence, prediction in
            zip(in_sentences, predictions)]
    def test(self, df):
        test_features = self._feature_extractor(df)
        
        test_input_fn = run_classifier.input_fn_builder(
            features=test_features,
            seq_length=self.config['max_seq_length'],
            is_training=False,
            drop_remainder=False)

        self.test_metrics = self.estimator.evaluate(input_fn=test_input_fn, steps=None)
        
        return self.test_metrics
Exemple #21
0
def getPrediction(in_sentences):
  #A list to map the actual labels to the predictions
  labels = ["0", "1","2","3"]

  #Transforming the test data into BERT accepted form
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] 
  
  #Creating input features for Test data
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

  #Predicting the classes 
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'],prediction['labels'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
def getPrediction(estimator, in_sentences, labels, label_list, max_seq_len,
                  tokenizer):
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in in_sentences
    ]
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, max_seq_len, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features,
                                                       seq_length=max_seq_len,
                                                       is_training=False,
                                                       drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, pred['probabilities'], labels[pred['labels']])
            for sentence, pred in zip(in_sentences, predictions)]
    def do_train(self):
        print("[INFO] Preparing train InputExample...")
        train_inputExamples = self.train.apply(
            lambda x: run_classifier.InputExample(
                guid=None, text_a=x['sentence'], text_b=None, label=x['label'
                                                                      ]),
            axis=1)
        print("[INFO] Done preparing train InputExample...\n")

        label_list = list(range(len(self.labels)))
        print("[INFO] Preparing train features...")
        train_features = run_classifier.convert_examples_to_features(
            train_inputExamples, label_list, self.max_seq_length,
            self.tokenizer)
        print("[INFO] Done preparing train features...\n")

        train_input_fn = run_classifier.input_fn_builder(
            features=train_features,
            seq_length=self.max_seq_length,
            is_training=True,
            drop_remainder=False)

        num_train_steps = \
            int(len(train_features)/self.train_batch_size*self.num_train_epochs)
        num_warmup_steps = int(num_train_steps * self.warmup_proportion)

        print(f"[INFO] No. of train steps: {num_train_steps}")
        print(f"[INFO] No. of warmup steps: {num_warmup_steps}")

        self.estimator = get_estimator(
            self.init_checkpoint,
            self.bert_config_file,
            self.labels,
            self.train_batch_size,
            self.model_dir,
            save_summary_steps=self.save_summary_steps,
            save_checkpoints_steps=self.save_checkpoints_steps,
            learning_rate=self.learning_rate,
            num_train_steps=num_train_steps,
            num_warmup_steps=num_warmup_steps)

        print(f'[INFO] Begin Training...!')
        current_time = datetime.now()
        self.estimator.train(input_fn=train_input_fn,
                             max_steps=num_train_steps)
        print(
            f"[INFO] Training took time {datetime.now() - current_time} sec..!\n"
        )
Exemple #24
0
def model_predict(estimator, processor, input_dir, predict_batch_size,
                  label_list, max_sequence_length, tokenizer):
    prediction_examples = processor.get_dev_examples(
        input_dir)[:predict_batch_size]
    input_features = run_classifier.convert_examples_to_features(
        prediction_examples, label_list, max_sequence_length, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=max_sequence_length,
        is_training=False,
        drop_remainder=True)
    predictions = estimator.predict(predict_input_fn)
    for example, prediction in zip(prediction_examples, predictions):
        print('text_a: %s\ntext_b: %s\nlabel:%s\nprediction:%s\n' %
              (example.text_a, example.text_b, str(
                  example.label), prediction['probabilities']))
def classify_sentences(estimator, tokenizer, test_features, testfile):
    test_input_fn = run_classifier.input_fn_builder(
        features=test_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)

    estimator.evaluate(input_fn=test_input_fn, steps=None)

    #ul_test_path = os.path.join(project_path,'unlabeled_test_with_noise.tsv')
    #print(ul_test_path)
    ul_test = pd.read_csv(testfile, sep='\n')

    ul_test.head()

    ul_test.columns = ['Text']
    ul_test.head
    [count, col] = ul_test.shape
    print("count, col", count, col)
    dft2 = ul_test["Text"]

    pred_sentences = []
    for i in range (0,count):
      pred_sentences.append(dft2[i])

    print(len(pred_sentences))

    input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 1) for x in pred_sentences]

    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

    #pred_sentences = [
    #  "That movie was absolutely awful",
    #  "Please finish this work by Monday",
    #  "I am going to attend the meeting in Germany",
    #  "The results are very good this year. We can close the deal",
    #  "abhi said I oft manerism",
    #  " utkash said he also so anyone can be confusing.",
    #  " if the say otherwise in great in the dark but you know also I'll be able to text thing ."
    #]
    print(pred_sentences)

    predictions = getPrediction(pred_sentences, estimator, tokenizer)

    print (predictions)

    return predictions
Exemple #26
0
def getPrediction(input_features, estimator):
    t1 = datetime.datetime.now()
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    print('bert feature time taken: %s' % (datetime.datetime.now() - t1))
    predictions = estimator.predict(predict_input_fn)
    print('bert model time taken: %s' % (datetime.datetime.now() - t1))
    label = ['negative', 'positive']
    vals = [{
        'labels': label[prediction['labels']],
        'prediction': [str(x) for x in prediction['probabilities']]
    } for prediction in predictions]
    print('bert prediction time taken: %s' % (datetime.datetime.now() - t1))
    return vals
def predict_class(sentences):
    labels = ['1', '2', '3', '4', '5']
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=4)
        for x in sentences
    ]
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, prediction['probabilities'],
             labels[prediction['labels']])
            for sentence, prediction in zip(sentences, predictions)]
Exemple #28
0
 def getPrediction(data_to_predict):
     input_examples = data_to_predict.apply(
         lambda x: bert.run_classifier.InputExample(guid=x[GUID],
                                                    text_a=x[DATA_COLUMN],
                                                    text_b=None,
                                                    label='mentioning'),
         axis=1)
     input_features = run_classifier.convert_examples_to_features(
         input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
     predict_input_fn = run_classifier.input_fn_builder(
         features=input_features,
         seq_length=MAX_SEQ_LENGTH,
         is_training=False,
         drop_remainder=False)
     predictions = estimator.predict(predict_input_fn)
     return [(sentence, label_list[prediction['labels']]) for sentence,
             prediction in zip(list(data_to_predict['text']), predictions)]
Exemple #29
0
 def predict(self, input_sentences: list):
     input_examples = [
         run_classifier.InputExample(guid="",
                                     text_a=x,
                                     text_b=None,
                                     label=self.label_list[0])
         for x in input_sentences
     ]  # here, "" is just a dummy label
     input_features = run_classifier.convert_examples_to_features(
         input_examples, self.label_list, MAX_SEQ_LENGTH, self.tokenizer)
     predict_input_fn = run_classifier.input_fn_builder(
         features=input_features,
         seq_length=MAX_SEQ_LENGTH,
         is_training=False,
         drop_remainder=False)
     predictions = self.estimator.predict(predict_input_fn)
     return [p for p in predictions]
Exemple #30
0
    def predict(self, df):
        """
        Predicts over a pandas dataframe.

        Params:
        df -- Pandas dataframe to train with at least (text, type) columns

        Returns:

        Dictionary with predicted labels and probabilities.
        """
        # TODO: REMOVE type column

        tokenizer = self.__create_tokenizer_from_hub_module()
        label_list = test_other[LABEL_COLUMN].unique().tolist()
        #label_list = [0, 1]
        test_features = self.__create_features(
            df, label_list,
            self.max_seq_len, tokenizer, 'text', 'type'
        )

        preds = []
        if type(self.model) == tf.estimator.Estimator:
            # Is trained
            input_fn = input_fn_builder(
                features=test_features,
                seq_length=self.max_seq_len,
                is_training=False,
                drop_remainder=False)
            pred = self.model.predict(input_fn=input_fn)
            for p in pred:
                preds.append(p)
        else:
            # Is loaded from a SavedModel
            # Format inputs
            inpu = {
                'label_ids': np.array([x.label_id for x in test_features]).reshape(-1,),
                'input_ids': np.array([x.input_ids for x in test_features]).reshape(-1, self.max_seq_len),
                'input_mask': np.array([x.input_mask for x in test_features]).reshape(-1, self.max_seq_len),
                'segment_ids': np.array([x.segment_ids for x in test_features]).reshape(-1, self.max_seq_len)
            }
            preds = self.model(inpu)

        return preds