コード例 #1
0
def load_input_data(genre, level, data_type, input_config, add_features, scale_features):
    if input_config in ['token', 'elmo_id', 'token_combine_elmo_id']:
        _data = load_processed_data(genre, level, data_type)
        input_data = {'x': [_data['premise'], _data['hypothesis']], 'y': _data['label']}
    elif input_config == 'elmo_s':
        _data = load_processed_text_data(genre, data_type)
        _data['premise'] = np.array(_data['premise'], dtype=object)[:, np.newaxis]
        _data['hypothesis'] = np.array(_data['hypothesis'], dtype=object)[:, np.newaxis]
        _data['label'] = load_processed_data(genre, level, data_type)['label']
        input_data = {'x': [_data['premise'], _data['hypothesis']], 'y': _data['label']}
    elif input_config == 'token_combine_elmo_s':
        _data = load_processed_data(genre, level, data_type)
        _text_data = load_processed_text_data(genre, data_type)
        _text_data['premise'] = [np.array(_text_data['premise'], dtype=object)[:, np.newaxis]]
        _text_data['hypothesis'] = [np.array(_text_data['hypothesis'], dtype=object)[:, np.newaxis]]
        input_data = {'x': [_data['premise'], _data['hypothesis'], _text_data['premise'], _text_data['hypothesis']],
                      'y': _data['label']}
    elif input_config == 'bert':
        # prepare input examples for bert model
        _data = load_processed_text_data(genre, data_type)
        if _data['label'] is None:
            input_data = [run_classifier.InputExample(guid=None, text_a=p, text_b=h) for p, h
                          in zip(_data['premise'], _data['hypothesis'])]
        else:
            input_data = [run_classifier.InputExample(guid=None, text_a=p, text_b=h, label=l) for p, h, l
                          in zip(_data['premise'], _data['hypothesis'], _data['label'])]
    else:
        raise ValueError('input config Not Understood: {}'.format(input_config))
    if add_features and input_config != 'bert':
        input_data['x'].append(load_features(genre, data_type, scale_features))
    return input_data
コード例 #2
0
def train_and_evaluate(train_sents,test_sents,labels_train,labels_test):
    train_InputExamples = [ run_classifier.InputExample(guid=None,text_a=sentence,text_b=None,label=label) for sentence,label in zip(train_sents, labels_train) ]
    input_features = convert_examples_to_features_RE(train_InputExamples,label_list, MAX_SEQUENCE_LENGTH,tokenizer)
    train_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH, is_training=True,drop_remainder=False)
    print("############ Beginning of training #####################")
    estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)
    print("############ Ending of training #####################")
    test_InputExamples = [run_classifier.InputExample(guid=None, text_a=sentence, text_b=None, label=label) for sentence, label in zip(test_sents, labels_test)]
    input_features = convert_examples_to_features_RE(test_InputExamples, label_list, MAX_SEQUENCE_LENGTH, tokenizer)
    test_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH, is_training=False, drop_remainder=False)
    estimator.evaluate(input_fn=test_input_fn, steps=None)
コード例 #3
0
    def do_eval(self):
        print(f"[INFO] Started working on evaluation...")
        print("[INFO] Preparing test InputExample...")
        test_inputExamples = self.test.apply(
            lambda x: run_classifier.InputExample(
                guid=None, text_a=x['sentence'], text_b=None, label=x['label'
                                                                      ]),
            axis=1)
        print("[INFO] Done preparing test InputExample...\n")

        label_list = list(range(len(self.labels)))
        print("[INFO] Preparing test features...")
        test_features = run_classifier.convert_examples_to_features(
            test_inputExamples, label_list, self.max_seq_length,
            self.tokenizer)
        print("[INFO] Done preparing test features...\n")

        test_input_fn = run_classifier.input_fn_builder(
            features=test_features,
            seq_length=self.max_seq_length,
            is_training=False,
            drop_remainder=False)

        print(f'[INFO] Begin evaluating...!')
        result = self.estimator.evaluate(input_fn=test_input_fn, steps=None)
        print(f"[INFO] Done evaluating...\n")
        for key in sorted(result.keys()):
            print(f"[INFO]  {key} = {result[key]}")
コード例 #4
0
def getPrediction(in_sentences, type_output="features"):
    #A list to map the actual labels to the predictions
    labels = np.unique(train['label'])
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in in_sentences
    ]
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    #Predicting the classes
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)

    if type_output == "features":
        return [
            prediction['pooled_output']
            for _, prediction in enumerate(predictions)
        ]
    else:
        return ([(sentence, prediction['probabilities'], prediction['labels'],
                  labels[prediction['labels']])
                 for sentence, prediction in zip(in_sentences, predictions)])
コード例 #5
0
def getPrediction(in_sentences):
  labels = ["Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
コード例 #6
0
    def getRatings(self, in_sentences):
        #print(in_sentences)
        #1
        input_examples = [
            run_classifier.InputExample(guid="",
                                        text_a=x,
                                        text_b=None,
                                        label="1") for x in in_sentences
        ]  # here, "" is just a dummy label

        #2
        input_features = run_classifier.convert_examples_to_features(
            input_examples, label_list, MAX_SEQ_LENGTH, self.tokenizer
        )  #[<bert.run_classifier.InputFeatures object at 0x0000015AF00D7448>]

        #3
        predict_input_fn = run_classifier.input_fn_builder(
            features=input_features,
            seq_length=MAX_SEQ_LENGTH,
            is_training=False,
            drop_remainder=False
        )  #function input_fn_builder.<locals>.input_fn at 0x0000015B23F65678

        #4
        predictions = self.estimator.predict(input_fn=predict_input_fn)

        return predictions
コード例 #7
0
def getPrediction(in_sentences):
    labels = [0, 1, 2]
    input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in
                      in_sentences]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH,
                                                       is_training=False, drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    indice=0
    resultado = []



    #
    try:
        for prediction in predictions:
            #elimian


            #

            from sklearn.preprocessing import label_binarize
            resultado.append((in_sentences[indice], prediction['probabilities'], label_binarize([labels[prediction['labels']]], classes=[0, 1, 2])[0] ))
            print(str(indice) +"###"+str(len(in_sentences)))
            indice = indice + 1


    except Exception as e:
        print(e)
        return resultado

    return resultado
    '''
コード例 #8
0
def predict(sentences, predict_fn):
    labels = [0, 1]
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in sentences
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, labels, MAX_SEQ_LENGTH, tokenizer)

    all_input_ids = []
    all_input_mask = []
    all_segment_ids = []
    all_label_ids = []

    for feature in input_features:
        all_input_ids.append(feature.input_ids)
        all_input_mask.append(feature.input_mask)
        all_segment_ids.append(feature.segment_ids)
        all_label_ids.append(feature.label_id)

    pred_dict = {
        'input_ids': all_input_ids,
        'input_mask': all_input_mask,
        'segment_ids': all_segment_ids,
        'label_ids': all_label_ids
    }

    predictions = predict_fn(pred_dict)
    return [(sentence, prediction, label)
            for sentence, prediction, label in zip(
                pred_sentences, predictions['probabilities'],
                predictions['labels'])]
コード例 #9
0
def create_bert_input_example(row: pd.Series) -> run_classifier.InputExample:
    return run_classifier.InputExample(
        guid=None,  # Globally unique ID for bookkeeping, unused in this example
        text_a=row[DATA_COLUMN],
        text_b=None,
        label=row[LABEL_COLUMN],
    )
コード例 #10
0
def get_final_predictions(in_contexts, in_last_sentences, tokenizer,
                          estimator: tf.estimator.Estimator, label_list):
    """
    Return the log probabilities based on the story context and the endings proposed

    Parameters
    ----------
    in_contexts:            str of the story context
    in_last_sentences:      proposed last sentence
    tokenizer:              bert tokenizer
    estimator:              tf.estimator
    label_list:             possible values
    """
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=y, label=0)
        for x, y in zip(in_contexts, in_last_sentences)
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, flags.max_seq_length, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=flags.max_seq_length,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    predictions = [prediction['probabilities'] for prediction in predictions]

    return predictions
コード例 #11
0
ファイル: train.py プロジェクト: soumyagulla/commonsense
def getPrediction(in_sentences):
    labels = ["Non-Sensitive", "Sensitive"]
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in in_sentences
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    estimator = run()
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, prediction['probabilities'],
             labels[prediction['labels']])
            for sentence, prediction in zip(in_sentences, predictions)]


# pred_sentences = [
#   "He drinks apple",
#   "He drinks milk",
#   "A mosquito stings me",
#   "I sting a mosquito",
#   "A niece is a person.",
#   "A giraffe is a person.",
#   "I like to ride my chocolate",
#   "I like to ride my bike",
#   "he put elephant into the jug",
# ]
# predictions = getPrediction(pred_sentences)

# print(predictions,"predictions")
コード例 #12
0
    def getListPrediction(self, in_sentences):
        #print(in_sentences)
        #1
        input_examples = [
            run_classifier.InputExample(guid="",
                                        text_a=x,
                                        text_b=None,
                                        label="0") for x in in_sentences
        ]  # here, "" is just a dummy label

        #2
        input_features = run_classifier.convert_examples_to_features(
            input_examples, label_list, MAX_SEQ_LENGTH, self.tokenizer)

        #3
        predict_input_fn = run_classifier.input_fn_builder(
            features=input_features,
            seq_length=MAX_SEQ_LENGTH,
            is_training=False,
            drop_remainder=False)

        #4
        predictions = self.estimator.predict(input_fn=predict_input_fn)

        return predictions
コード例 #13
0
def main():
    MAX_SEQ_LENGTH, LABELS_LIST, VOCAB_FILE_PATH = get_config("cn")
    # get model from the server
    channel = grpc.insecure_channel(server)
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

    # get the sentences of input
    sentences = globals.request.form.to_dict()

    # convert single sentence to feature
    tokenizer = tokenization.FullTokenizer(
        vocab_file=VOCAB_FILE_PATH, do_lower_case=True)

    # Construct the request to tensorflow serving
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.signature_name = 'serving_default'

    results = {}
    for key, sentence in sentences.items():
        example = run_classifier.InputExample(
            guid="test-0", text_a=tokenization.convert_to_unicode(sentence), text_b=None, label=LABELS_LIST[0])
        feature = run_classifier.convert_single_example(
            0, example, LABELS_LIST, MAX_SEQ_LENGTH, tokenizer)

        # get the input of model
        input_ids = np.reshape([feature.input_ids], (1, MAX_SEQ_LENGTH))
        input_mask = np.reshape([feature.input_mask], (1, MAX_SEQ_LENGTH))
        segment_ids = np.reshape([feature.segment_ids], (MAX_SEQ_LENGTH))
        label_ids = [feature.label_id]

        # package the input into request, Note the format of the input(follow the model)
        request.inputs['input_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(input_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32))
        request.inputs['input_mask'].CopyFrom(
            tf.contrib.util.make_tensor_proto(input_mask, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32))
        request.inputs['label_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(label_ids, shape=[1], dtype=tf.int32))
        request.inputs['segment_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(segment_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32))

        # do predict
        result = stub.Predict(request, 10.0)  # 10 secs timeout

        # parse the result
        probabilities_tensor_proto = result.outputs["probabilities"]
        probabilities = list(probabilities_tensor_proto.float_val)
        probabilities_np = np.array(probabilities)
        top3_index_np = probabilities_np.argsort()[-3:][::-1]
        probabilities_top3 = probabilities_np[top3_index_np]
        label_top3 = np.array(LABELS_LIST)[top3_index_np]
        # shape = tf.TensorShape(probabilities_tensor_proto.tensor_shape)
        # probabilities = np.array(probabilities_tensor_proto.float_val).reshape(
        #     shape.as_list())
        result_list = []
        for index in range(3):
            result_list.append(
                {"label": label_top3[index], "score": str(probabilities_top3[index])})
        results[key] = result_list
    return Response(json.dumps(results), mimetype='application/json')
コード例 #14
0
def getPrediction(in_sentences):
    labels = ["Not an error", "Is an error"]
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x[0], text_b=x[1], label=0)
        for x in in_sentences
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn,
                                    checkpoint_path="./" + OUTPUT_DIR +
                                    "/model.ckpt-56165",
                                    yield_single_examples=False)
    prediction_list = []
    i = 0
    for prediction in predictions:
        if hasattr(prediction["labels"], '__len__'):
            for j in range(len(prediction["labels"])):
                prediction_list.append(
                    (in_sentences[i], prediction["probabilities"][j],
                     labels[prediction["labels"][j]]))
                i += 1
        else:
            prediction_list.append(
                (in_sentences[i], prediction["probabilities"],
                 labels[prediction["labels"]]))
            i += 1
    return prediction_list
コード例 #15
0
ファイル: handler.py プロジェクト: rogervaas/cortex
def pre_inference(payload, signature, metadata):
    input_example = run_classifier.InputExample(guid="",
                                                text_a=payload["review"],
                                                label=0)
    input_feature = run_classifier.convert_single_example(
        0, input_example, [0, 1], 128, tokenizer)
    return {"input_ids": [input_feature.input_ids]}
コード例 #16
0
    def predict(self):
        """
        Predict and store the predicted label in self.predicted_class
        """
        if not self.empty:
            """ See runclassifier.py from the bert git for more details"""
            input_examples = [
                run_classifier.InputExample(guid="",
                                            text_a=self.clean_text,
                                            text_b=None,
                                            label=self.label_list[0])
            ]  # here, "" is just a dummy label
            self.input_features = run_classifier.convert_examples_to_features(
                input_examples, self.label_list, self.max_seq_length,
                self.tokenizer)
            input_ids = np.array(self.input_features[0].input_ids).reshape(
                -1, self.max_seq_length)
            input_mask = np.array(self.input_features[0].input_mask).reshape(
                -1, self.max_seq_length)
            label_ids = np.array(self.input_features[0].label_id).reshape(-1, )
            segment_ids = np.array(self.input_features[0].segment_ids).reshape(
                -1, self.max_seq_length)

            result = SinglePredict.sess.run(
                SinglePredict.tensor_outputs,
                feed_dict={
                    SinglePredict.tensor_input_ids: input_ids,
                    SinglePredict.tensor_input_mask: input_mask,
                    SinglePredict.tensor_label_ids: label_ids,
                    SinglePredict.tensor_segment_ids: segment_ids
                })
            self.predicted_class = self.label_list[result]
コード例 #17
0
def data_processor():
    if os.path.exists("sentiment_data/train.csv"):
        x_train = pd.read_csv("sentiment_data/train.csv")
        x_test = pd.read_csv("sentiment_data/test.csv")
    else:
        data = pd.read_csv("sentiment_data/simplifyweibo_4_moods.csv")
        data["label"] = data["label"].replace(0, "happy").replace(
            1, "angry").replace(2, "disgust").replace(3, "sad")
        x_train, x_test, y_train, y_test = train_test_split(data,
                                                            data["label"],
                                                            test_size=0.4,
                                                            random_state=0)
        x_test, x_dev, y_test, y_dev = train_test_split(x_test,
                                                        y_test,
                                                        test_size=0.4,
                                                        random_state=0)
        x_train.to_csv("sentiment_data/train.csv", index=False)
        x_test.to_csv("sentiment_data/test.csv", index=False)
        x_dev.to_csv("sentiment_data/dev.csv", index=False)

    train_InputExamples = x_train.apply(
        lambda x: run_classifier.InputExample(
            guid=None,
            # Globally unique ID for bookkeeping, unused in this example
            text_a=x[args.DATA_COLUMN],
            text_b=None,
            label=x[args.LABEL_COLUMN]),
        axis=1)

    test_InputExamples = x_test.apply(
        lambda x: run_classifier.InputExample(guid=None,
                                              text_a=x[args.DATA_COLUMN],
                                              text_b=None,
                                              label=x[args.LABEL_COLUMN]),
        axis=1)

    #获取tokenizer
    tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file,
                                           do_lower_case=True)

    #获取特征
    train_features = bert.run_classifier.convert_examples_to_features(
        train_InputExamples, args.label_list, args.MAX_SEQ_LENGTH, tokenizer)
    test_features = bert.run_classifier.convert_examples_to_features(
        test_InputExamples, args.label_list, args.MAX_SEQ_LENGTH, tokenizer)

    return train_features, test_features
コード例 #18
0
    def embed(self,
              sequences: Union[str, Iterable[str]],
              per_token: bool = _DEFAULT_PER_TOKEN_EMBEDDING) -> np.ndarray:
        """
        Embeds a sequence or multiple sequences using the BERT model

        Args:
            sequences (Union[str, Iterable[str]]): the sequence(s) to embed
            per_token (bool): whether to produce an embedding per token or a pooled embedding for the whole sequence

        Returns:
            a numpy array with the embedding(s) of the sequence(s)
        """
        single_input = isinstance(sequences, str)
        if single_input:
            sequences = [sequences]

        # Convert sequnces into BERT input format
        input_examples = [
            run_classifier.InputExample(guid=None,
                                        text_a=sequence,
                                        text_b=None,
                                        label=0) for sequence in sequences
        ]
        input_features = run_classifier.convert_examples_to_features(
            input_examples, [0], self._input_ids.shape[1], self._tokenizer)

        # Execute the computation graph on the inputs
        if self._session is not None:
            output = self._session.run(
                self.
                _outputs["sequence_output" if per_token else "pooled_output"],
                feed_dict={
                    self._input_ids:
                    [sequence.input_ids for sequence in input_features],
                    self._input_mask:
                    [sequence.input_mask for sequence in input_features],
                    self._segment_ids:
                    [sequence.segment_ids for sequence in input_features]
                })
        else:
            with tf.compat.v1.Session(graph=self._graph) as session:
                session.run(tf.global_variables_initializer())

                output = session.run(
                    self._outputs[
                        "sequence_output" if per_token else "pooled_output"],
                    feed_dict={
                        self._input_ids:
                        [sequence.input_ids for sequence in input_features],
                        self._input_mask:
                        [sequence.input_mask for sequence in input_features],
                        self._segment_ids:
                        [sequence.segment_ids for sequence in input_features]
                    })

        if single_input:
            output = output.reshape(output.shape[1:])
        return output
コード例 #19
0
 def predict(self, payload):
     input_example = run_classifier.InputExample(guid="", text_a=payload["review"], label=0)
     input_feature = run_classifier.convert_single_example(
         0, input_example, [0, 1], 128, self._tokenizer
     )
     model_input = {"input_ids": [input_feature.input_ids]}
     prediction = self.client.predict(model_input)
     return labels[prediction["labels"][0]]
def data_prep(stressors_list, label_list, MAX_SEQ_LENGTH, tokenizer):
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=1)
        for x in stressors_list
    ]
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

    return input_features
コード例 #21
0
def predict(in_sentences):
    """ predicts the output relation of sentences"""
    input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences]
    # print(input_examples)
    input_features = convert_examples_to_features_RE(input_examples, label_list, MAX_SEQUENCE_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQUENCE_LENGTH,
                                                       is_training=False, drop_remainder=False)
    predictions = estimator.predict(predict_input_fn, yield_single_examples=True)
    #return predictions
    return [(sentence, prediction['probabilities'], prediction['labels']) for sentence, prediction in
            zip(in_sentences, predictions)]
コード例 #22
0
def get_bert_features(text):
    if len(text) == 1:
        in_sentences = [text[0], 'aha']
    else:
        in_sentences = text
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in in_sentences
    ]
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    return input_features
コード例 #23
0
ファイル: run_directly.py プロジェクト: zhc0757/bert-api
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         guid = "%s-%s" % (set_type, i)
         text_a = bert.tokenization.convert_to_unicode(line[1])
         label = bert.tokenization.convert_to_unicode(line[0])
         examples.append(
             brc.InputExample(guid=guid,
                              text_a=text_a,
                              text_b=None,
                              label=label))
     return examples
コード例 #24
0
def predict_serve(in_sentences):
    """ predicts the output relation of sentences"""
    input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences]
    # print(input_examples)
    input_features = [convert_single_example_RE(0,input_example, label_list, MAX_SEQUENCE_LENGTH, tokenizer) for input_example in input_examples]
    input_ids = [input_feature.input_ids for input_feature in input_features]
    label_ids = [input_feature.label_id for input_feature in input_features]
    input_mask = [input_feature.input_mask for input_feature in input_features]
    segment_ids = [input_feature.segment_ids for input_feature in input_features]
    predictions = predict_fn({'input_ids':input_ids,'label_ids':label_ids,
                                'input_mask':input_mask,'segment_ids':segment_ids}) #predict_input_fn(params={"batch_size": BATCH_SIZE}))
    # return predictions
    return [(sentence,probs,label) for sentence, probs ,label in zip(in_sentences, predictions['probabilities'], predictions['labels'])]
コード例 #25
0
def convert_sent_to_vec(tokenizer,
                        dataset,
                        DATA_COLUMN='text_1',
                        DATA_COLUMN2='text_2'):
    label_list = [1, 0]

    input_example = dataset.apply(lambda x: run_classifier.InputExample(
        guid=None, text_a=x[DATA_COLUMN], text_b=x[DATA_COLUMN2], label=1),
                                  axis=1)
    features = run_classifier.convert_examples_to_features(
        input_example, cfg.label_list, cfg.MAX_SEQ_LENGTH, tokenizer)

    return features
コード例 #26
0
def getPrediction(in_sentences):
  #A list to map the actual labels to the predictions
  labels = ["0", "1","2","3"]

  #Transforming the test data into BERT accepted form
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] 
  
  #Creating input features for Test data
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

  #Predicting the classes 
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'],prediction['labels'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
コード例 #27
0
def create_bert_examples(lines, set_type, labels=None):
    #Generate data for the BERT model
    guid = f'{set_type}'
    examples = []
    if guid == 'train':
        for line, label in zip(lines, labels):
            text_a = line
            label = str(label)
            examples.append(
                run_classifier.InputExample(guid=guid,
                                            text_a=text_a,
                                            text_b=None,
                                            label=label))
    else:
        for line in lines:
            text_a = line
            label = '0'
            examples.append(
                run_classifier.InputExample(guid=guid,
                                            text_a=text_a,
                                            text_b=None,
                                            label=label))
    return examples
コード例 #28
0
    def fit(self, X, y):
        self.labels_ = sorted(set(y))

        train_InputExamples = [
            run_classifier.InputExample(guid=None,
                                        text_a=x_,
                                        text_b=None,
                                        label=y_) for (x_, y_) in zip(X, y)
        ]

        # Convert our train and test features to InputFeatures that BERT understands.
        train_features = bert.run_classifier.convert_examples_to_features(
            train_InputExamples, self.labels_, self.max_seq_length,
            self.tokenizer)

        # Compute # train and warmup steps from batch size
        num_train_steps = int(
            len(train_features) / self.batch_size * self.num_train_epochs)
        num_warmup_steps = int(num_train_steps * self.warmup_proportion)

        # Specify outpit directory and number of checkpoint steps to save
        run_config = tf.estimator.RunConfig(
            model_dir=self.model_dir,
            save_summary_steps=self.save_summary_steps,
            save_checkpoints_steps=self.save_checkpoints_steps)

        model_fn = model_fn_builder(num_labels=len(self.labels_),
                                    learning_rate=self.learning_rate,
                                    num_train_steps=num_train_steps,
                                    num_warmup_steps=num_warmup_steps)

        self.estimator_ = tf.estimator.Estimator(
            model_fn=model_fn,
            config=run_config,
            params={"batch_size": self.batch_size})

        # Create an input function for training. drop_remainder = True for using TPUs.
        train_input_fn = bert.run_classifier.input_fn_builder(
            features=train_features,
            seq_length=self.max_seq_length,
            is_training=True,
            drop_remainder=False)

        print('Beginning Training!')
        current_time = datetime.now()
        self.estimator_.train(input_fn=train_input_fn,
                              max_steps=num_train_steps)
        print("Training took time ", datetime.now() - current_time)

        return self
コード例 #29
0
def getPrediction(estimator, in_sentences, labels, label_list, max_seq_len,
                  tokenizer):
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0)
        for x in in_sentences
    ]
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, max_seq_len, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features,
                                                       seq_length=max_seq_len,
                                                       is_training=False,
                                                       drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, pred['probabilities'], labels[pred['labels']])
            for sentence, pred in zip(in_sentences, predictions)]
コード例 #30
0
    def do_train(self):
        print("[INFO] Preparing train InputExample...")
        train_inputExamples = self.train.apply(
            lambda x: run_classifier.InputExample(
                guid=None, text_a=x['sentence'], text_b=None, label=x['label'
                                                                      ]),
            axis=1)
        print("[INFO] Done preparing train InputExample...\n")

        label_list = list(range(len(self.labels)))
        print("[INFO] Preparing train features...")
        train_features = run_classifier.convert_examples_to_features(
            train_inputExamples, label_list, self.max_seq_length,
            self.tokenizer)
        print("[INFO] Done preparing train features...\n")

        train_input_fn = run_classifier.input_fn_builder(
            features=train_features,
            seq_length=self.max_seq_length,
            is_training=True,
            drop_remainder=False)

        num_train_steps = \
            int(len(train_features)/self.train_batch_size*self.num_train_epochs)
        num_warmup_steps = int(num_train_steps * self.warmup_proportion)

        print(f"[INFO] No. of train steps: {num_train_steps}")
        print(f"[INFO] No. of warmup steps: {num_warmup_steps}")

        self.estimator = get_estimator(
            self.init_checkpoint,
            self.bert_config_file,
            self.labels,
            self.train_batch_size,
            self.model_dir,
            save_summary_steps=self.save_summary_steps,
            save_checkpoints_steps=self.save_checkpoints_steps,
            learning_rate=self.learning_rate,
            num_train_steps=num_train_steps,
            num_warmup_steps=num_warmup_steps)

        print(f'[INFO] Begin Training...!')
        current_time = datetime.now()
        self.estimator.train(input_fn=train_input_fn,
                             max_steps=num_train_steps)
        print(
            f"[INFO] Training took time {datetime.now() - current_time} sec..!\n"
        )