コード例 #1
0
def get_prediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn, checkpoint_path=checkpoint_path)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
コード例 #2
0
ファイル: train.py プロジェクト: rinagalperin/biomedical_nel
def run_train(data_file_path, output_dir):
    print('***** Model output directory: {} *****'.format(output_dir))

    # get data from data loader
    train, _, _ = ContextualRelevance(data_file_path).get_data()
    print(train.columns)

    # Use the InputExample class from BERT's run_classifier code to create examples from the data
    train_InputExamples = train.apply(
        lambda x: run_classifier.InputExample(
            guid=
            None,  # Globally unique ID for bookkeeping, unused in this example
            text_a=x[DATA_COLUMN],
            text_b=x[ANSWER_COLUMN],
            label=x[LABEL_COLUMN]),
        axis=1)

    # get bert_code tokenizer form hub model
    tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB, False)
    print(tokenizer.tokenize("מריצים אימון..."))

    # Convert our train and test features to InputFeatures that BERT understands.
    train_features = run_classifier.convert_examples_to_features(
        train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

    # Compute # train and warmup steps from batch size
    num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
    num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

    # Specify outpit directory and number of checkpoint steps to save
    run_config = tf.compat.v1.estimator.RunConfig(
        model_dir=output_dir,
        save_summary_steps=SAVE_SUMMARY_STEPS,
        save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

    model_fn = model_fn_builder(num_labels=len(label_list),
                                learning_rate=LEARNING_RATE,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                bert_model_hub=BERT_MODEL_HUB)

    estimator = tf.compat.v1.estimator.Estimator(
        model_fn=model_fn,
        config=run_config,
        params={"batch_size": BATCH_SIZE})

    # Create an input function for training. drop_remainder = True for using TPUs.
    train_input_fn = run_classifier.input_fn_builder(features=train_features,
                                                     seq_length=MAX_SEQ_LENGTH,
                                                     is_training=True,
                                                     drop_remainder=False)

    print('Beginning Training!')
    current_time = datetime.now()

    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
    print("Training took time ", datetime.now() - current_time)
コード例 #3
0
def run(checkpoint_path, data_flie_path):
    # get model (make sure to change checkpoint according to the model in the configurations file)
    _, test, false_negatives_test_set = ContextualRelevance(
        data_flie_path).get_data()
    # get bert_code tokenizer form hub model
    tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB)

    test_InputExamples = test.apply(
        lambda x: run_classifier.InputExample(guid=None,
                                              text_a=x[DATA_COLUMN],
                                              text_b=x[ANSWER_COLUMN],
                                              label=x[LABEL_COLUMN]),
        axis=1)

    test_features = run_classifier.convert_examples_to_features(
        test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
    test_input_fn = run_classifier.input_fn_builder(features=test_features,
                                                    seq_length=MAX_SEQ_LENGTH,
                                                    is_training=False,
                                                    drop_remainder=False)

    model_fn = model_fn_builder(num_labels=len(label_list),
                                learning_rate=LEARNING_RATE,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                bert_model_hub=BERT_MODEL_HUB)

    estimator = tf.compat.v1.estimator.Estimator(
        model_fn, params={"batch_size": BATCH_SIZE})
    metric_result = estimator.evaluate(input_fn=test_input_fn,
                                       steps=None,
                                       checkpoint_path=checkpoint_path)
    metric_result['false_negatives'] += false_negatives_test_set
    metric_result['recall'] = metric_result['true_positives'] / (
        metric_result['true_positives'] + metric_result['false_negatives'])
    metric_result['eval_accuracy'] = (
        metric_result['true_positives'] + metric_result['true_negatives']
    ) / (metric_result['true_positives'] + metric_result['false_negatives'] +
         metric_result['true_negatives'] + metric_result['false_positives'])
    precision = metric_result['precision']
    recall = metric_result['recall']

    metric_result['F1'] = 2 * (precision * recall) / (precision + recall)
    return metric_result
コード例 #4
0
    lambda x: run_classifier.InputExample(
        guid=
        None,  # Globally unique ID for bookkeeping, unused in this example
        text_a=x[DATA_COLUMN],
        text_b=None,
        label=x[LABEL_COLUMN]),
    axis=1)

# get bert_code tokenizer form hub model
tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB)

test_InputExamples = test.apply(lambda x: run_classifier.InputExample(
    guid=None, text_a=x[DATA_COLUMN], text_b=None, label=x[LABEL_COLUMN]),
                                axis=1)

test_features = run_classifier.convert_examples_to_features(
    test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

test_input_fn = run_classifier.input_fn_builder(features=test_features,
                                                seq_length=MAX_SEQ_LENGTH,
                                                is_training=False,
                                                drop_remainder=False)

model_fn = model_fn_builder(num_labels=len(label_list),
                            learning_rate=LEARNING_RATE,
                            num_train_steps=num_train_steps,
                            num_warmup_steps=num_warmup_steps,
                            bert_model_hub=BERT_MODEL_HUB)

estimator = tf.compat.v1.estimator.Estimator(model_fn,
                                             params={"batch_size": BATCH_SIZE})
tokenizer = create_tokenizer_from_hub_module(BERT_MODEL_HUB)