def get_prediction(in_sentences): labels = ["Negative", "Positive"] input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn, checkpoint_path=checkpoint_path) return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
def run_train(data_file_path, output_dir): print('***** Model output directory: {} *****'.format(output_dir)) # get data from data loader train, _, _ = ContextualRelevance(data_file_path).get_data() print(train.columns) # Use the InputExample class from BERT's run_classifier code to create examples from the data train_InputExamples = train.apply( lambda x: run_classifier.InputExample( guid= None, # Globally unique ID for bookkeeping, unused in this example text_a=x[DATA_COLUMN], text_b=x[ANSWER_COLUMN], label=x[LABEL_COLUMN]), axis=1) # get bert_code tokenizer form hub model tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB, False) print(tokenizer.tokenize("מריצים אימון...")) # Convert our train and test features to InputFeatures that BERT understands. train_features = run_classifier.convert_examples_to_features( train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer) # Compute # train and warmup steps from batch size num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS) num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION) # Specify outpit directory and number of checkpoint steps to save run_config = tf.compat.v1.estimator.RunConfig( model_dir=output_dir, save_summary_steps=SAVE_SUMMARY_STEPS, save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS) model_fn = model_fn_builder(num_labels=len(label_list), learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, bert_model_hub=BERT_MODEL_HUB) estimator = tf.compat.v1.estimator.Estimator( model_fn=model_fn, config=run_config, params={"batch_size": BATCH_SIZE}) # Create an input function for training. drop_remainder = True for using TPUs. train_input_fn = run_classifier.input_fn_builder(features=train_features, seq_length=MAX_SEQ_LENGTH, is_training=True, drop_remainder=False) print('Beginning Training!') current_time = datetime.now() estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) print("Training took time ", datetime.now() - current_time)
def run(checkpoint_path, data_flie_path): # get model (make sure to change checkpoint according to the model in the configurations file) _, test, false_negatives_test_set = ContextualRelevance( data_flie_path).get_data() # get bert_code tokenizer form hub model tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB) test_InputExamples = test.apply( lambda x: run_classifier.InputExample(guid=None, text_a=x[DATA_COLUMN], text_b=x[ANSWER_COLUMN], label=x[LABEL_COLUMN]), axis=1) test_features = run_classifier.convert_examples_to_features( test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer) test_input_fn = run_classifier.input_fn_builder(features=test_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) model_fn = model_fn_builder(num_labels=len(label_list), learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, bert_model_hub=BERT_MODEL_HUB) estimator = tf.compat.v1.estimator.Estimator( model_fn, params={"batch_size": BATCH_SIZE}) metric_result = estimator.evaluate(input_fn=test_input_fn, steps=None, checkpoint_path=checkpoint_path) metric_result['false_negatives'] += false_negatives_test_set metric_result['recall'] = metric_result['true_positives'] / ( metric_result['true_positives'] + metric_result['false_negatives']) metric_result['eval_accuracy'] = ( metric_result['true_positives'] + metric_result['true_negatives'] ) / (metric_result['true_positives'] + metric_result['false_negatives'] + metric_result['true_negatives'] + metric_result['false_positives']) precision = metric_result['precision'] recall = metric_result['recall'] metric_result['F1'] = 2 * (precision * recall) / (precision + recall) return metric_result
lambda x: run_classifier.InputExample( guid= None, # Globally unique ID for bookkeeping, unused in this example text_a=x[DATA_COLUMN], text_b=None, label=x[LABEL_COLUMN]), axis=1) # get bert_code tokenizer form hub model tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB) test_InputExamples = test.apply(lambda x: run_classifier.InputExample( guid=None, text_a=x[DATA_COLUMN], text_b=None, label=x[LABEL_COLUMN]), axis=1) test_features = run_classifier.convert_examples_to_features( test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer) test_input_fn = run_classifier.input_fn_builder(features=test_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) model_fn = model_fn_builder(num_labels=len(label_list), learning_rate=LEARNING_RATE, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, bert_model_hub=BERT_MODEL_HUB) estimator = tf.compat.v1.estimator.Estimator(model_fn, params={"batch_size": BATCH_SIZE}) tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB)