Beispiel #1
0
    def postprocess(self, response: Response) -> Response:
        from tokenizer_tools.tagset.exceptions import TagSetDecodeError
        from tokenizer_tools.tagset.offset.sequence import Sequence

        tags_list = response.data
        raw_text_list = self.request_query

        infer_result = []

        for raw_text, tags in zip(raw_text_list, tags_list):
            # decode Unicode
            tags_seq = [
                i.decode() if isinstance(i, bytes) else i for i in tags
            ]

            # BILUO to offset
            is_failed = False
            exec_msg = None
            try:
                seq = self.decoder.to_offset(tags_seq, raw_text)
            except TagSetDecodeError as e:
                exec_msg = str(e)

                # invalid tag sequence will raise exception
                # so return a empty result to avoid batch fail
                seq = Sequence(raw_text)
                is_failed = True

            infer_result.append(PredictResult(seq, is_failed, exec_msg))

        response.update_data(infer_result)

        return response
    def postprocess(self, response: Response) -> Response:
        tag_lookup_table = self.lookup_table_registry["tag"]

        data_str_list = []
        for data_int in response.data:
            data_str = [tag_lookup_table.inverse_lookup(i) for i in data_int]
            data_str_list.append(data_str)

        response.update_data(data_str_list)

        return response