Esempio n. 1
0
 def pack_user_func_return_value(
     self, return_result: ApiFuncReturnValue, tasks: Sequence[InferenceTask],
 ) -> Sequence[InferenceResult[str]]:
     results = []
     for json_obj, task in regroup_return_value(return_result, tasks):
         args = task.cli_args
         if args:
             parser = argparse.ArgumentParser()
             parser.add_argument(
                 "-o", "--output", default="json", choices=["str", "json"]
             )
             parsed_args, _ = parser.parse_known_args(args)
             output = parsed_args.output
         else:
             output = "json"
         try:
             if output == "json":
                 json_str = json.dumps(json_obj, cls=NumpyJsonEncoder)
             else:
                 json_str = str(json_obj)
             results.append(
                 InferenceResult(
                     data=json_str,
                     http_status=200,
                     http_headers={"Content-Type": "application/json"},
                 )
             )
         except AssertionError as e:
             results.append(InferenceError(err_msg=str(e), http_status=400,))
         except Exception as e:  # pylint: disable=broad-except
             results.append(InferenceError(err_msg=str(e), http_status=500,))
     return tuple(results)
Esempio n. 2
0
    def predict(self, parsed_json: JsonSerializable):
        text = parsed_json.get("text")
        model, tokenizer = self.get_artifacts()

        # tokenize
        inputs = tokenizer.encode("summarize: " + text,
                                  return_tensors="pt",
                                  max_length=512)

        # invalidate token lengths of less than 10
        if len(inputs[0]) < 10:
            return InferenceError(err_msg="text too short", http_status=400)

        # summarize text, top 4 results
        output = model.generate(inputs,
                                max_length=150,
                                min_length=40,
                                length_penalty=2.0,
                                num_beams=4,
                                early_stopping=True)

        # decode most likely
        output = tokenizer.decode(output[0],
                                  skip_special_tokens=True).replace(" .", ".")
        json_out = json.dumps({"result": output})
        return InferenceResult(
            data=json_out,
            http_status=200,
            http_headers={"Content-Type": "application/json"},
        )
Esempio n. 3
0
 def pack_user_func_return_value(
     self,
     return_result,
     tasks: Sequence[InferenceTask],
 ) -> Sequence[InferenceResult[str]]:
     rv = []
     results = tf_to_numpy(return_result)
     for result, _ in regroup_return_value(results, tasks):
         try:
             result_str = json.dumps(result, cls=TfTensorJsonEncoder)
             rv.append(InferenceResult(data=result_str, http_status=200))
         except Exception as e:  # pylint: disable=broad-except
             rv.append(InferenceError(err_msg=str(e), http_status=500))
     return rv
Esempio n. 4
0
 def pack_user_func_return_value(
     self,
     return_result,
     tasks: Sequence[InferenceTask],
 ) -> Sequence[InferenceResult[str]]:
     rv = []
     i = 0
     for task in tasks:
         if task.batch is None:
             result = return_result[i:i + 1]
             i += 1
         else:
             result = return_result[i:i + task.batch]
             i += task.batch
         try:
             result = df_to_json(result, self.output_orient)
             rv.append(InferenceResult(http_status=200, data=result))
         except Exception as e:  # pylint: disable=broad-except
             rv.append(InferenceError(err_msg=str(e), http_status=500))
     return rv
Esempio n. 5
0
    def predict(self, parsed_json: JsonSerializable):
        text = parsed_json.get("text")
        model, tokenizer = self.get_artifacts()

        # tokenize
        tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text)))
        inputs = tokenizer.encode(text, return_tensors="pt")

        # invalidate token lengths of less than 10
        if len(inputs[0]) < 10:
            return InferenceError(err_msg="text too short", http_status=400)

        # get logits and argmax
        outputs = model(inputs).logits
        output = torch.argmax(outputs, dim=2)[0].numpy()

        # token fragment grouping
        res = []
        prev_decoded = 'O'
        for token, prediction in zip(tokens, output):
            decoded = self.label_list[prediction]
            if decoded != 'O':
                if decoded == prev_decoded:
                    if token.startswith('##'):
                        new_token = res[-1][0] + token[2:]
                    else:
                        new_token = res[-1][0] + ' ' + token
                    res[-1] = (new_token, decoded)
                else:
                    res.append((token, decoded))
            prev_decoded = decoded

        json_out = json.dumps({"result": res})
        return InferenceResult(
            data=json_out,
            http_status=200,
            http_headers={"Content-Type": "application/json"},
        )
Esempio n. 6
0
 def predict_direct_json(self, input_data, task: InferenceTask = None):
     if task.http_headers.content_type != "application/json":
         return InferenceError(http_status=400, err_msg="application/json only")
     result = self.artifacts.model.predict_json([input_data])[0]
     return InferenceResult(http_status=200, data=json.dumps(result))