def pack_user_func_return_value( self, return_result: ApiFuncReturnValue, tasks: Sequence[InferenceTask], ) -> Sequence[InferenceResult[str]]: results = [] for json_obj, task in regroup_return_value(return_result, tasks): args = task.cli_args if args: parser = argparse.ArgumentParser() parser.add_argument( "-o", "--output", default="json", choices=["str", "json"] ) parsed_args, _ = parser.parse_known_args(args) output = parsed_args.output else: output = "json" try: if output == "json": json_str = json.dumps(json_obj, cls=NumpyJsonEncoder) else: json_str = str(json_obj) results.append( InferenceResult( data=json_str, http_status=200, http_headers={"Content-Type": "application/json"}, ) ) except AssertionError as e: results.append(InferenceError(err_msg=str(e), http_status=400,)) except Exception as e: # pylint: disable=broad-except results.append(InferenceError(err_msg=str(e), http_status=500,)) return tuple(results)
def predict(self, parsed_json: JsonSerializable): text = parsed_json.get("text") model, tokenizer = self.get_artifacts() # tokenize inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512) # invalidate token lengths of less than 10 if len(inputs[0]) < 10: return InferenceError(err_msg="text too short", http_status=400) # summarize text, top 4 results output = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True) # decode most likely output = tokenizer.decode(output[0], skip_special_tokens=True).replace(" .", ".") json_out = json.dumps({"result": output}) return InferenceResult( data=json_out, http_status=200, http_headers={"Content-Type": "application/json"}, )
def pack_user_func_return_value( self, return_result, tasks: Sequence[InferenceTask], ) -> Sequence[InferenceResult[str]]: rv = [] results = tf_to_numpy(return_result) for result, _ in regroup_return_value(results, tasks): try: result_str = json.dumps(result, cls=TfTensorJsonEncoder) rv.append(InferenceResult(data=result_str, http_status=200)) except Exception as e: # pylint: disable=broad-except rv.append(InferenceError(err_msg=str(e), http_status=500)) return rv
def pack_user_func_return_value( self, return_result, tasks: Sequence[InferenceTask], ) -> Sequence[InferenceResult[str]]: rv = [] i = 0 for task in tasks: if task.batch is None: result = return_result[i:i + 1] i += 1 else: result = return_result[i:i + task.batch] i += task.batch try: result = df_to_json(result, self.output_orient) rv.append(InferenceResult(http_status=200, data=result)) except Exception as e: # pylint: disable=broad-except rv.append(InferenceError(err_msg=str(e), http_status=500)) return rv
def predict(self, parsed_json: JsonSerializable): text = parsed_json.get("text") model, tokenizer = self.get_artifacts() # tokenize tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text))) inputs = tokenizer.encode(text, return_tensors="pt") # invalidate token lengths of less than 10 if len(inputs[0]) < 10: return InferenceError(err_msg="text too short", http_status=400) # get logits and argmax outputs = model(inputs).logits output = torch.argmax(outputs, dim=2)[0].numpy() # token fragment grouping res = [] prev_decoded = 'O' for token, prediction in zip(tokens, output): decoded = self.label_list[prediction] if decoded != 'O': if decoded == prev_decoded: if token.startswith('##'): new_token = res[-1][0] + token[2:] else: new_token = res[-1][0] + ' ' + token res[-1] = (new_token, decoded) else: res.append((token, decoded)) prev_decoded = decoded json_out = json.dumps({"result": res}) return InferenceResult( data=json_out, http_status=200, http_headers={"Content-Type": "application/json"}, )
def predict_direct_json(self, input_data, task: InferenceTask = None): if task.http_headers.content_type != "application/json": return InferenceError(http_status=400, err_msg="application/json only") result = self.artifacts.model.predict_json([input_data])[0] return InferenceResult(http_status=200, data=json.dumps(result))