def pack_user_func_return_value( self, return_result, tasks: Sequence[InferenceTask], ) -> Sequence[InferenceResult[str]]: rv = [] i = 0 for task in tasks: if task.batch is None: result = return_result[i:i + 1] i += 1 else: result = return_result[i:i + task.batch] i += task.batch try: result = df_to_json(result, self.output_orient) rv.append( InferenceResult(context=InferenceContext( http_status=200, ), data=result)) except Exception as e: # pylint: disable=broad-except rv.append( InferenceResult(context=DefaultErrorContext( err_msg=str(e), http_status=500, ), )) return rv
def infer(self, inf_tasks: Iterable[InferenceTask]) -> Sequence[InferenceResult]: inf_tasks = tuple(inf_tasks) # extract args user_args = self.input_adapter.extract_user_func_args(inf_tasks) filtered_tasks = tuple(t for t in inf_tasks if not t.is_discarded) # call user function if not self.batch: # For single inputs user_return = [] for task, legacy_user_args in zip( filtered_tasks, self.input_adapter.iter_batch_args(user_args, tasks=filtered_tasks), ): ret = self.user_func(*legacy_user_args, task=task) if task.is_discarded: continue else: user_return.append(ret) if (isinstance(user_return, (list, tuple)) and len(user_return) and isinstance(user_return[0], InferenceResult)): inf_results = user_return else: # pack return value filtered_tasks = tuple(t for t in inf_tasks if not t.is_discarded) inf_results = self.output_adapter.pack_user_func_return_value( user_return, tasks=filtered_tasks) else: user_return = self.user_func(*user_args, tasks=filtered_tasks) if (isinstance(user_return, (list, tuple)) and len(user_return) and isinstance(user_return[0], InferenceResult)): inf_results = user_return else: # pack return value filtered_tasks = tuple(t for t in inf_tasks if not t.is_discarded) inf_results = self.output_adapter.pack_user_func_return_value( user_return, tasks=filtered_tasks) full_results = InferenceResult.complete_discarded( inf_tasks, inf_results) log_data = dict( service_name=self.service.name if self.service else "", service_version=self.service.version if self.service else "", api=self.name, ) for task, result in zip(inf_tasks, inf_results): prediction_logger.info( dict( log_data, task=task.to_json(), result=result.to_json(), request_id=task.task_id, )) return tuple(full_results)
def pack_user_func_return_value( self, return_result: ApiFuncReturnValue, tasks: Sequence[InferenceTask], ) -> Sequence[InferenceResult[str]]: results = [] for json_obj, task in regroup_return_value(return_result, tasks): args = task.cli_args if args: parser = argparse.ArgumentParser() parser.add_argument( "-o", "--output", default="json", choices=["str", "json"] ) parsed_args, _ = parser.parse_known_args(args) output = parsed_args.output else: output = "json" try: if output == "json": json_str = json.dumps(json_obj, cls=NumpyJsonEncoder) else: json_str = str(json_obj) results.append( InferenceResult( data=json_str, http_status=200, http_headers={"Content-Type": "application/json"}, ) ) except AssertionError as e: results.append(InferenceError(err_msg=str(e), http_status=400,)) except Exception as e: # pylint: disable=broad-except results.append(InferenceError(err_msg=str(e), http_status=500,)) return tuple(results)
def infer(self, inf_tasks: Iterable[InferenceTask]) -> Sequence[InferenceResult]: # task validation inf_tasks = tuple(inf_tasks) # extract args user_args = self.input_adapter.extract_user_func_args( tuple(self._filter_tasks(inf_tasks))) filtered_tasks = tuple(t for t in inf_tasks if not t.is_discarded) # call user function if not self.input_adapter.BATCH_MODE_SUPPORTED: # For legacy input adapters user_return = tuple( self.user_func(*legacy_user_args, tasks=(task, )) for task, *legacy_user_args in zip(filtered_tasks, *user_args)) else: user_return = self.user_func(*user_args, tasks=filtered_tasks) if (isinstance(user_return, (list, tuple)) and len(user_return) and isinstance(user_return[0], InferenceResult)): inf_results = user_return else: # pack return value inf_results = self.output_adapter.pack_user_func_return_value( user_return, tasks=filtered_tasks) full_results = InferenceResult.complete_discarded( inf_tasks, inf_results) return tuple(full_results)
def predict(self, parsed_json: JsonSerializable): text = parsed_json.get("text") model, tokenizer = self.get_artifacts() # tokenize inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512) # invalidate token lengths of less than 10 if len(inputs[0]) < 10: return InferenceError(err_msg="text too short", http_status=400) # summarize text, top 4 results output = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True) # decode most likely output = tokenizer.decode(output[0], skip_special_tokens=True).replace(" .", ".") json_out = json.dumps({"result": output}) return InferenceResult( data=json_out, http_status=200, http_headers={"Content-Type": "application/json"}, )
def predict(self, parsed_json: JsonSerializable): text = parsed_json.get("text") model, tokenizer = self.get_artifacts() def encode(hypothesis): return tokenizer.encode(text, hypothesis, padding='longest', return_tensors='pt', truncation_strategy='only_first') hypotheses = self._get_hypotheses() inputs = [encode(hypothesis) for hypothesis in hypotheses] stacked = torch.stack(inputs, dim=1) logits = model(stacked[0])[0] entail_contradiction_logits = logits[:, [0, 2]] probs = entail_contradiction_logits.softmax(dim=1)[:, 1] res = {} for label, prob in zip(self.categories, probs): res[label] = prob.item() json_out = json.dumps({"result": res}) return InferenceResult( data=json_out, http_status=200, http_headers={"Content-Type": "application/json"}, )
def pack_user_func_return_value( self, return_result, tasks: Sequence[InferenceTask], ) -> Sequence[InferenceResult[str]]: rv = [] results = tf_to_numpy(return_result) assert isinstance(results, np.ndarray) for result, _ in regroup_return_value(results, tasks): try: result_str = json.dumps(result, cls=TfTensorJsonEncoder) rv.append(InferenceResult(data=result_str)) except Exception as e: # pylint: disable=broad-except rv.append( InferenceResult(context=DefaultErrorContext( err_msg=str(e), http_status=500, ), )) return rv
def predict_direct_json(self, input_datas, tasks: Sequence[InferenceTask] = None): filtered_jsons = [] for j, t in zip(input_datas, tasks): if t.http_headers.content_type != "application/json": t.discard(http_status=400, err_msg="application/json only") else: filtered_jsons.append(j) rets = self.artifacts.model.predict_json(filtered_jsons) return [ InferenceResult(http_status=200, data=json.dumps(result)) for result in rets ]
def infer(self, inf_tasks: Iterable[InferenceTask]) -> Sequence[InferenceResult]: # task validation inf_tasks = tuple(inf_tasks) # extract args user_args = self.input_adapter.extract_user_func_args( tuple(self._filter_tasks(inf_tasks))) filtered_tasks = tuple(t for t in inf_tasks if not t.is_discarded) # call user function if not self.input_adapter.BATCH_MODE_SUPPORTED: # For legacy input adapters user_return = tuple( self.user_func(*legacy_user_args, tasks=(task, )) for task, *legacy_user_args in zip(filtered_tasks, *user_args)) else: user_return = self.user_func(*user_args, tasks=filtered_tasks) if (isinstance(user_return, (list, tuple)) and len(user_return) and isinstance(user_return[0], InferenceResult)): inf_results = user_return else: # pack return value inf_results = self.output_adapter.pack_user_func_return_value( user_return, tasks=filtered_tasks) full_results = InferenceResult.complete_discarded( inf_tasks, inf_results) log_data = dict( service_name=self.service.name if self.service else "", service_version=self.service.version if self.service else "", api=self.name, ) for task, result in zip(inf_tasks, inf_results): prediction_logger.info( dict( log_data, task=task.to_json(), result=result.to_json(), request_id=task.task_id, )) return tuple(full_results)
def predict(self, parsed_json: JsonSerializable): text = parsed_json.get("text") model, tokenizer = self.get_artifacts() # tokenize tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text))) inputs = tokenizer.encode(text, return_tensors="pt") # invalidate token lengths of less than 10 if len(inputs[0]) < 10: return InferenceError(err_msg="text too short", http_status=400) # get logits and argmax outputs = model(inputs).logits output = torch.argmax(outputs, dim=2)[0].numpy() # token fragment grouping res = [] prev_decoded = 'O' for token, prediction in zip(tokens, output): decoded = self.label_list[prediction] if decoded != 'O': if decoded == prev_decoded: if token.startswith('##'): new_token = res[-1][0] + token[2:] else: new_token = res[-1][0] + ' ' + token res[-1] = (new_token, decoded) else: res.append((token, decoded)) prev_decoded = decoded json_out = json.dumps({"result": res}) return InferenceResult( data=json_out, http_status=200, http_headers={"Content-Type": "application/json"}, )
def predict_direct_json(self, input_data, task: InferenceTask = None): if task.http_headers.content_type != "application/json": return InferenceError(http_status=400, err_msg="application/json only") result = self.artifacts.model.predict_json([input_data])[0] return InferenceResult(http_status=200, data=json.dumps(result))