Exemple #1
0
 def convert(self,line):
     feature = convert_single_example(0, line, self.label2id, FLAGS.max_seq_length, self.tokenizer, 'xxxx')
     input_ids = np.reshape(feature.input_ids, [batch_size, FLAGS.max_seq_length])
     input_mask = np.reshape(feature.input_mask, [batch_size, FLAGS.max_seq_length])
     segment_ids = np.reshape(feature.segment_ids, [batch_size, FLAGS.max_seq_length])
     laebls_ids = np.reshape(feature.label_ids, [batch_size, FLAGS.max_seq_length])
     return input_ids, input_mask, segment_ids, laebls_ids
Exemple #2
0
def pre_inference(payload, signature, metadata):
    input_example = run_classifier.InputExample(guid="",
                                                text_a=payload["review"],
                                                label=0)
    input_feature = run_classifier.convert_single_example(
        0, input_example, [0, 1], 128, tokenizer)
    return {"input_ids": [input_feature.input_ids]}
def main():
    MAX_SEQ_LENGTH, LABELS_LIST, VOCAB_FILE_PATH = get_config("cn")
    # get model from the server
    channel = grpc.insecure_channel(server)
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

    # get the sentences of input
    sentences = globals.request.form.to_dict()

    # convert single sentence to feature
    tokenizer = tokenization.FullTokenizer(
        vocab_file=VOCAB_FILE_PATH, do_lower_case=True)

    # Construct the request to tensorflow serving
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.signature_name = 'serving_default'

    results = {}
    for key, sentence in sentences.items():
        example = run_classifier.InputExample(
            guid="test-0", text_a=tokenization.convert_to_unicode(sentence), text_b=None, label=LABELS_LIST[0])
        feature = run_classifier.convert_single_example(
            0, example, LABELS_LIST, MAX_SEQ_LENGTH, tokenizer)

        # get the input of model
        input_ids = np.reshape([feature.input_ids], (1, MAX_SEQ_LENGTH))
        input_mask = np.reshape([feature.input_mask], (1, MAX_SEQ_LENGTH))
        segment_ids = np.reshape([feature.segment_ids], (MAX_SEQ_LENGTH))
        label_ids = [feature.label_id]

        # package the input into request, Note the format of the input(follow the model)
        request.inputs['input_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(input_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32))
        request.inputs['input_mask'].CopyFrom(
            tf.contrib.util.make_tensor_proto(input_mask, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32))
        request.inputs['label_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(label_ids, shape=[1], dtype=tf.int32))
        request.inputs['segment_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(segment_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32))

        # do predict
        result = stub.Predict(request, 10.0)  # 10 secs timeout

        # parse the result
        probabilities_tensor_proto = result.outputs["probabilities"]
        probabilities = list(probabilities_tensor_proto.float_val)
        probabilities_np = np.array(probabilities)
        top3_index_np = probabilities_np.argsort()[-3:][::-1]
        probabilities_top3 = probabilities_np[top3_index_np]
        label_top3 = np.array(LABELS_LIST)[top3_index_np]
        # shape = tf.TensorShape(probabilities_tensor_proto.tensor_shape)
        # probabilities = np.array(probabilities_tensor_proto.float_val).reshape(
        #     shape.as_list())
        result_list = []
        for index in range(3):
            result_list.append(
                {"label": label_top3[index], "score": str(probabilities_top3[index])})
        results[key] = result_list
    return Response(json.dumps(results), mimetype='application/json')
Exemple #4
0
 def predict(self, payload):
     input_example = run_classifier.InputExample(guid="", text_a=payload["review"], label=0)
     input_feature = run_classifier.convert_single_example(
         0, input_example, [0, 1], 128, self._tokenizer
     )
     model_input = {"input_ids": [input_feature.input_ids]}
     prediction = self.client.predict(model_input)
     return labels[prediction["labels"][0]]
def convert_examples_to_features(examples, label_list, max_seq_length,
                                 tokenizer):
    """Convert a set of `InputExample`s to a list of `InputFeatures`."""

    features = []
    for (ex_index, example) in enumerate(examples):
        if ex_index % 10000 == 0:
            tf.logging.info("Writing example %d of %d" %
                            (ex_index, len(examples)))

        feature = run_classifier.convert_single_example(
            10, example, label_list, max_seq_length, tokenizer)

        features.append(feature)
    return features
Exemple #6
0
def generateInferenceRequest(sentence: str):
    req = {}
    req['signature_name'] = "serving_default"
    req['inputs'] = {}
    example = run_classifier.InputExample(
        guid="test-0",
        text_a=tokenization.convert_to_unicode(sentence),
        text_b=None,
        label=LABELS_LIST[0])
    feature = run_classifier.convert_single_example(0, example, LABELS_LIST,
                                                    MAX_SEQ_LENGTH, TOKENIZER)
    req['inputs']['input_ids'] = feature.input_ids
    req['inputs']['input_mask'] = feature.input_mask
    req['inputs']['segment_ids'] = feature.segment_ids
    req['inputs']['label_ids'] = feature.label_id
    req = json.dumps(req)
    return req
Exemple #7
0
    def convert_line(line, label_list, max_seq_length, tokenizer):
        """Function to convert a line that should be predicted into BERT
    input features.
    """
        label = tokenization.convert_to_unicode("0")  # Mock label
        text_a = tokenization.convert_to_unicode(line)
        example = rc.InputExample(guid=0,
                                  text_a=text_a,
                                  text_b=None,
                                  label=label)
        feature = rc.convert_single_example(0, example, label_list,
                                            max_seq_length, tokenizer)

        input_ids = np.reshape([feature.input_ids], (1, max_seq_length))
        input_mask = np.reshape([feature.input_mask], (1, max_seq_length))
        segment_ids = np.reshape([feature.segment_ids], (max_seq_length))
        label_ids = [feature.label_id]

        return input_ids, input_mask, segment_ids, label_ids
def convert_tfrecord_for_bert(filenames, input_data_path, output_data_path,
                              bert_tfhub_url, text_key, label_key,
                              max_seq_length):
    """Converts input TFRecords into the format expected by the BERT model."""
    tokenizer = create_tokenizer_from_hub_module(bert_tfhub_url)
    for filename in filenames:
        print('Working on {}...'.format(filename))
        in_filepath = '{}{}'.format(input_data_path, filename)
        #TODO: Check if file exists, if not write new file
        #TODO: Have the filename reflect the max_sequence_length and path reflect model
        out_filepath = '{}{}'.format(output_data_path, filename)
        record_iterator = tf.python_io.tf_record_iterator(path=in_filepath)
        writer = tf.python_io.TFRecordWriter(out_filepath)
        for ex_index, string_record in enumerate(record_iterator):
            example = tf.train.Example()
            example.ParseFromString(string_record)
            text = example.features.feature[text_key].bytes_list.value[0]
            label = example.features.feature[label_key].float_list.value[0]
            label = round(label)
            ex = run_classifier.InputExample(
                guid=None,  # Globally unique ID for bookkeeping
                text_a=text,
                text_b=None,
                label=label)
            label_list = [0, 1]
            feature = run_classifier.convert_single_example(
                ex_index, ex, label_list, max_seq_length, tokenizer)
            features = collections.OrderedDict()
            features["input_ids"] = create_int_feature(feature.input_ids)
            features["input_mask"] = create_int_feature(feature.input_mask)
            features["segment_ids"] = create_int_feature(feature.segment_ids)
            features["label_ids"] = create_int_feature([feature.label_id])
            features["is_real_example"] = create_int_feature(
                [int(feature.is_real_example)])

            tf_example = tf.train.Example(features=tf.train.Features(
                feature=features))
            writer.write(tf_example.SerializeToString())
        writer.close()
        print('... Done!')
def main():

    credentials = grpc.ssl_channel_credentials(
        root_certificates=ROOT_CERT.encode())
    channel = grpc.secure_channel(
        '{}:{}'.format(MODEL_SERVER_HOST, MODEL_SERVER_PORT), credentials)
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

    # get the sentence of input
    sentence = str(globals.request.headers.getlist('Text')[0])

    # convert single sentence to feature
    tokenizer = tokenization.FullTokenizer(vocab_file=VOCAB_FILE_PATH,
                                           do_lower_case=True)
    example = run_classifier.InputExample(
        guid="test-0",
        text_a=tokenization.convert_to_unicode(sentence),
        text_b=None,
        label=LABELS_LIST[0])
    feature = run_classifier.convert_single_example(0, example, LABELS_LIST,
                                                    MAX_SEQ_LENGTH, tokenizer)

    # get the input of model
    input_ids = np.reshape([feature.input_ids], (1, MAX_SEQ_LENGTH))
    input_mask = np.reshape([feature.input_mask], (1, MAX_SEQ_LENGTH))
    segment_ids = np.reshape([feature.segment_ids], (MAX_SEQ_LENGTH))
    label_ids = [feature.label_id]

    # Construct the request to tensorflow serving
    request = predict_pb2.PredictRequest()
    request.model_spec.name = MODEL_NAME
    request.model_spec.signature_name = 'serving_default'

    # package the input into request, Note the format of the input(follow the model)
    request.inputs['input_ids'].CopyFrom(
        tf.contrib.util.make_tensor_proto(input_ids,
                                          shape=[1, MAX_SEQ_LENGTH],
                                          dtype=tf.int32))
    request.inputs['input_mask'].CopyFrom(
        tf.contrib.util.make_tensor_proto(input_mask,
                                          shape=[1, MAX_SEQ_LENGTH],
                                          dtype=tf.int32))
    request.inputs['label_ids'].CopyFrom(
        tf.contrib.util.make_tensor_proto(label_ids, shape=[1],
                                          dtype=tf.int32))
    request.inputs['segment_ids'].CopyFrom(
        tf.contrib.util.make_tensor_proto(segment_ids,
                                          shape=[1, MAX_SEQ_LENGTH],
                                          dtype=tf.int32))

    # do predict
    result = stub.Predict(request, 100,
                          metadata=metadata_transformer())  # 120 secs timeout

    # parse the result
    probabilities_tensor_proto = result.outputs["probabilities"]
    probabilities = list(probabilities_tensor_proto.float_val)
    probabilities_np = np.array(probabilities)
    top3_index_np = probabilities_np.argsort()[-3:][::-1]
    probabilities_top3 = probabilities_np[top3_index_np]
    label_top3 = np.array(LABELS_LIST)[top3_index_np]
    # shape = tf.TensorShape(probabilities_tensor_proto.tensor_shape)
    # probabilities = np.array(probabilities_tensor_proto.float_val).reshape(
    #     shape.as_list())
    result_list = []
    for index in range(3):
        result_list.append({
            "label": label_top3[index],
            "score": str(probabilities_top3[index])
        })
    output_json = {"predictions": [{"results": result_list}]}
    return Response(json.dumps(output_json), mimetype='application/json')