def convert(self,line): feature = convert_single_example(0, line, self.label2id, FLAGS.max_seq_length, self.tokenizer, 'xxxx') input_ids = np.reshape(feature.input_ids, [batch_size, FLAGS.max_seq_length]) input_mask = np.reshape(feature.input_mask, [batch_size, FLAGS.max_seq_length]) segment_ids = np.reshape(feature.segment_ids, [batch_size, FLAGS.max_seq_length]) laebls_ids = np.reshape(feature.label_ids, [batch_size, FLAGS.max_seq_length]) return input_ids, input_mask, segment_ids, laebls_ids
def pre_inference(payload, signature, metadata): input_example = run_classifier.InputExample(guid="", text_a=payload["review"], label=0) input_feature = run_classifier.convert_single_example( 0, input_example, [0, 1], 128, tokenizer) return {"input_ids": [input_feature.input_ids]}
def main(): MAX_SEQ_LENGTH, LABELS_LIST, VOCAB_FILE_PATH = get_config("cn") # get model from the server channel = grpc.insecure_channel(server) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # get the sentences of input sentences = globals.request.form.to_dict() # convert single sentence to feature tokenizer = tokenization.FullTokenizer( vocab_file=VOCAB_FILE_PATH, do_lower_case=True) # Construct the request to tensorflow serving request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.signature_name = 'serving_default' results = {} for key, sentence in sentences.items(): example = run_classifier.InputExample( guid="test-0", text_a=tokenization.convert_to_unicode(sentence), text_b=None, label=LABELS_LIST[0]) feature = run_classifier.convert_single_example( 0, example, LABELS_LIST, MAX_SEQ_LENGTH, tokenizer) # get the input of model input_ids = np.reshape([feature.input_ids], (1, MAX_SEQ_LENGTH)) input_mask = np.reshape([feature.input_mask], (1, MAX_SEQ_LENGTH)) segment_ids = np.reshape([feature.segment_ids], (MAX_SEQ_LENGTH)) label_ids = [feature.label_id] # package the input into request, Note the format of the input(follow the model) request.inputs['input_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(input_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) request.inputs['input_mask'].CopyFrom( tf.contrib.util.make_tensor_proto(input_mask, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) request.inputs['label_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(label_ids, shape=[1], dtype=tf.int32)) request.inputs['segment_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(segment_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) # do predict result = stub.Predict(request, 10.0) # 10 secs timeout # parse the result probabilities_tensor_proto = result.outputs["probabilities"] probabilities = list(probabilities_tensor_proto.float_val) probabilities_np = np.array(probabilities) top3_index_np = probabilities_np.argsort()[-3:][::-1] probabilities_top3 = probabilities_np[top3_index_np] label_top3 = np.array(LABELS_LIST)[top3_index_np] # shape = tf.TensorShape(probabilities_tensor_proto.tensor_shape) # probabilities = np.array(probabilities_tensor_proto.float_val).reshape( # shape.as_list()) result_list = [] for index in range(3): result_list.append( {"label": label_top3[index], "score": str(probabilities_top3[index])}) results[key] = result_list return Response(json.dumps(results), mimetype='application/json')
def predict(self, payload): input_example = run_classifier.InputExample(guid="", text_a=payload["review"], label=0) input_feature = run_classifier.convert_single_example( 0, input_example, [0, 1], 128, self._tokenizer ) model_input = {"input_ids": [input_feature.input_ids]} prediction = self.client.predict(model_input) return labels[prediction["labels"][0]]
def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer): """Convert a set of `InputExample`s to a list of `InputFeatures`.""" features = [] for (ex_index, example) in enumerate(examples): if ex_index % 10000 == 0: tf.logging.info("Writing example %d of %d" % (ex_index, len(examples))) feature = run_classifier.convert_single_example( 10, example, label_list, max_seq_length, tokenizer) features.append(feature) return features
def generateInferenceRequest(sentence: str): req = {} req['signature_name'] = "serving_default" req['inputs'] = {} example = run_classifier.InputExample( guid="test-0", text_a=tokenization.convert_to_unicode(sentence), text_b=None, label=LABELS_LIST[0]) feature = run_classifier.convert_single_example(0, example, LABELS_LIST, MAX_SEQ_LENGTH, TOKENIZER) req['inputs']['input_ids'] = feature.input_ids req['inputs']['input_mask'] = feature.input_mask req['inputs']['segment_ids'] = feature.segment_ids req['inputs']['label_ids'] = feature.label_id req = json.dumps(req) return req
def convert_line(line, label_list, max_seq_length, tokenizer): """Function to convert a line that should be predicted into BERT input features. """ label = tokenization.convert_to_unicode("0") # Mock label text_a = tokenization.convert_to_unicode(line) example = rc.InputExample(guid=0, text_a=text_a, text_b=None, label=label) feature = rc.convert_single_example(0, example, label_list, max_seq_length, tokenizer) input_ids = np.reshape([feature.input_ids], (1, max_seq_length)) input_mask = np.reshape([feature.input_mask], (1, max_seq_length)) segment_ids = np.reshape([feature.segment_ids], (max_seq_length)) label_ids = [feature.label_id] return input_ids, input_mask, segment_ids, label_ids
def convert_tfrecord_for_bert(filenames, input_data_path, output_data_path, bert_tfhub_url, text_key, label_key, max_seq_length): """Converts input TFRecords into the format expected by the BERT model.""" tokenizer = create_tokenizer_from_hub_module(bert_tfhub_url) for filename in filenames: print('Working on {}...'.format(filename)) in_filepath = '{}{}'.format(input_data_path, filename) #TODO: Check if file exists, if not write new file #TODO: Have the filename reflect the max_sequence_length and path reflect model out_filepath = '{}{}'.format(output_data_path, filename) record_iterator = tf.python_io.tf_record_iterator(path=in_filepath) writer = tf.python_io.TFRecordWriter(out_filepath) for ex_index, string_record in enumerate(record_iterator): example = tf.train.Example() example.ParseFromString(string_record) text = example.features.feature[text_key].bytes_list.value[0] label = example.features.feature[label_key].float_list.value[0] label = round(label) ex = run_classifier.InputExample( guid=None, # Globally unique ID for bookkeeping text_a=text, text_b=None, label=label) label_list = [0, 1] feature = run_classifier.convert_single_example( ex_index, ex, label_list, max_seq_length, tokenizer) features = collections.OrderedDict() features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) features["label_ids"] = create_int_feature([feature.label_id]) features["is_real_example"] = create_int_feature( [int(feature.is_real_example)]) tf_example = tf.train.Example(features=tf.train.Features( feature=features)) writer.write(tf_example.SerializeToString()) writer.close() print('... Done!')
def main(): credentials = grpc.ssl_channel_credentials( root_certificates=ROOT_CERT.encode()) channel = grpc.secure_channel( '{}:{}'.format(MODEL_SERVER_HOST, MODEL_SERVER_PORT), credentials) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # get the sentence of input sentence = str(globals.request.headers.getlist('Text')[0]) # convert single sentence to feature tokenizer = tokenization.FullTokenizer(vocab_file=VOCAB_FILE_PATH, do_lower_case=True) example = run_classifier.InputExample( guid="test-0", text_a=tokenization.convert_to_unicode(sentence), text_b=None, label=LABELS_LIST[0]) feature = run_classifier.convert_single_example(0, example, LABELS_LIST, MAX_SEQ_LENGTH, tokenizer) # get the input of model input_ids = np.reshape([feature.input_ids], (1, MAX_SEQ_LENGTH)) input_mask = np.reshape([feature.input_mask], (1, MAX_SEQ_LENGTH)) segment_ids = np.reshape([feature.segment_ids], (MAX_SEQ_LENGTH)) label_ids = [feature.label_id] # Construct the request to tensorflow serving request = predict_pb2.PredictRequest() request.model_spec.name = MODEL_NAME request.model_spec.signature_name = 'serving_default' # package the input into request, Note the format of the input(follow the model) request.inputs['input_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(input_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) request.inputs['input_mask'].CopyFrom( tf.contrib.util.make_tensor_proto(input_mask, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) request.inputs['label_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(label_ids, shape=[1], dtype=tf.int32)) request.inputs['segment_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(segment_ids, shape=[1, MAX_SEQ_LENGTH], dtype=tf.int32)) # do predict result = stub.Predict(request, 100, metadata=metadata_transformer()) # 120 secs timeout # parse the result probabilities_tensor_proto = result.outputs["probabilities"] probabilities = list(probabilities_tensor_proto.float_val) probabilities_np = np.array(probabilities) top3_index_np = probabilities_np.argsort()[-3:][::-1] probabilities_top3 = probabilities_np[top3_index_np] label_top3 = np.array(LABELS_LIST)[top3_index_np] # shape = tf.TensorShape(probabilities_tensor_proto.tensor_shape) # probabilities = np.array(probabilities_tensor_proto.float_val).reshape( # shape.as_list()) result_list = [] for index in range(3): result_list.append({ "label": label_top3[index], "score": str(probabilities_top3[index]) }) output_json = {"predictions": [{"results": result_list}]} return Response(json.dumps(output_json), mimetype='application/json')