Example #1
0
 def __init__(self):
     """
     初始化一些必要的全局变量
     """
     system_info = SystemInfo()
     entity = EntityCode()
     self.account_label = entity.get_account_label()
     self.entity_code = entity.get_entity_code()
     self.account_identify = yaml.load(open(
         system_info.get_account_label_path(), encoding='utf-8'),
                                       Loader=yaml.SafeLoader)
     self.identity_account = {
         value: k
         for k, v in self.account_identify.items() for value in v
     }
Example #2
0
    def __init__(self, test_mode=False):

        self.system_info = SystemInfo(is_test=test_mode)

        self.client = ModelServing(self.system_info.MODE_NER,
                                   is_test=test_mode)

        self.config = self.system_info.get_config()

        self.entity_code = EntityCode()
        self.ner_entities = self.entity_code.get_ner_entities()
        self.code = self.entity_code.get_entity_code()

        self.labels_list = []
        self.labels_list_split = []
        self.__init_specific_label_combine()
        self.__init_jieba()
Example #3
0
    def __init__(self):

        self.system_info = SystemInfo()

        self.client = ModelServing(self.system_info.MODE_NER)

        self.config = self.system_info.get_config()

        self.entity_code = EntityCode()
        self.ner_entities = self.entity_code.get_ner_entities()
        self.code = self.entity_code.get_entity_code()
        self.entity_map_dic = {
            "ORG": "CPNY_NAME",
            "PER": "NAME",
            "DATE": "DATE",
            "LOC": "ADDR_VALUE"
        }

        self.labels_list = []
        self.labels_list_split = []
        self.__init_specific_label_combine()
        self.__init_jieba()
 def __init__(self):
     self.config = SystemInfo().get_config()
     self.server_ip = self.config['dependence_parser_ip']
     self.server_port = self.config['dependence_parser_port']
     self.annotators = self.config['server_type']['depparse']
Example #5
0
    def __init__(self, mode, is_test=False):
        self.system_info = SystemInfo()
        if is_test:
            # 测试模式下加载配置文件
            self.config = load_config('../../config/')
            self.time_out = self.config["grpc_request_timeout"]
            self.batch_size = self.config["pred_batch_size"]
            self.hidden_size = self.config["hidden_size"]

            with open('../../output/labels/label_list.pkl', 'rb') as rf:
                self.label_list = pickle.load(rf)
            with open('../../output/labels/label2id.pkl', 'rb') as rf:
                self.label2id = pickle.load(rf)
                self.id2label = {
                    value: key
                    for key, value in self.label2id.items()
                }

            self.label_map = {}
            for (i, label) in enumerate(self.label_list, 1):
                self.label_map[label] = i
            self.tokenizer = tokenization.FullTokenizer(
                vocab_file='../../chinese_L-12_H-768_A-12/vocab.txt',
                do_lower_case=self.config["do_lower_case"])
            channel = grpc.insecure_channel(self.config["model_ner_address"])
            self.stub = prediction_service_pb2_grpc.PredictionServiceStub(
                channel)
        else:
            self.config = self.system_info.get_config()
            self.time_out = self.config["grpc_request_timeout"]
            self.batch_size = self.config["pred_batch_size"]
            self.hidden_size = self.config["hidden_size"]
            self.max_seq_length = self.config["max_seq_length"]

            label_path = self.system_info.get_labels_path()

            with open(os.path.join(label_path, 'label_list.pkl'), 'rb') as rf:
                self.label_list = pickle.load(rf)

            with open(os.path.join(label_path, 'label2id.pkl'), 'rb') as rf:
                self.label2id = pickle.load(rf)
                self.id2label = {
                    value: key
                    for key, value in self.label2id.items()
                }

            self.label_map = {}
            # 1表示从1开始对label进行index化
            for (i, label) in enumerate(self.label_list, 1):
                self.label_map[label] = i

            self.tokenizer = tokenization.FullTokenizer(
                vocab_file=os.getcwd() + self.config["vocab_file"],
                do_lower_case=self.config["do_lower_case"])

            if mode == self.system_info.MODE_NER:
                channel = grpc.insecure_channel(
                    self.config["model_ner_address"])
                self.stub = prediction_service_pb2_grpc.PredictionServiceStub(
                    channel)
            else:
                logger.error('Please config ip address and port first.')
Example #6
0
class ModelServing(object):
    """
    通过 grpc 的方式请求部署在Docker上的TensorFlow Serving服务。
    提供了两种服务:NER 和 SEN 服务
    NER:识别句子中的实体
    SEN:将句子表示成向量形式
    """
    def __init__(self, mode, is_test=False):
        self.system_info = SystemInfo()
        if is_test:
            # 测试模式下加载配置文件
            self.config = load_config('../../config/')
            self.time_out = self.config["grpc_request_timeout"]
            self.batch_size = self.config["pred_batch_size"]
            self.hidden_size = self.config["hidden_size"]

            with open('../../output/labels/label_list.pkl', 'rb') as rf:
                self.label_list = pickle.load(rf)
            with open('../../output/labels/label2id.pkl', 'rb') as rf:
                self.label2id = pickle.load(rf)
                self.id2label = {
                    value: key
                    for key, value in self.label2id.items()
                }

            self.label_map = {}
            for (i, label) in enumerate(self.label_list, 1):
                self.label_map[label] = i
            self.tokenizer = tokenization.FullTokenizer(
                vocab_file='../../chinese_L-12_H-768_A-12/vocab.txt',
                do_lower_case=self.config["do_lower_case"])
            channel = grpc.insecure_channel(self.config["model_ner_address"])
            self.stub = prediction_service_pb2_grpc.PredictionServiceStub(
                channel)
        else:
            self.config = self.system_info.get_config()
            self.time_out = self.config["grpc_request_timeout"]
            self.batch_size = self.config["pred_batch_size"]
            self.hidden_size = self.config["hidden_size"]
            self.max_seq_length = self.config["max_seq_length"]

            label_path = self.system_info.get_labels_path()

            with open(os.path.join(label_path, 'label_list.pkl'), 'rb') as rf:
                self.label_list = pickle.load(rf)

            with open(os.path.join(label_path, 'label2id.pkl'), 'rb') as rf:
                self.label2id = pickle.load(rf)
                self.id2label = {
                    value: key
                    for key, value in self.label2id.items()
                }

            self.label_map = {}
            # 1表示从1开始对label进行index化
            for (i, label) in enumerate(self.label_list, 1):
                self.label_map[label] = i

            self.tokenizer = tokenization.FullTokenizer(
                vocab_file=os.getcwd() + self.config["vocab_file"],
                do_lower_case=self.config["do_lower_case"])

            if mode == self.system_info.MODE_NER:
                channel = grpc.insecure_channel(
                    self.config["model_ner_address"])
                self.stub = prediction_service_pb2_grpc.PredictionServiceStub(
                    channel)
            else:
                logger.error('Please config ip address and port first.')

    def convert_single_example(self, ex_index, example, max_seq_length,
                               tokenizer, mode):
        """
        将一个样本进行分析,然后将字转化为id, 标签转化为id,然后结构化到InputFeatures对象中
        :param ex_index: index
        :param example: 一个样本
        :param max_seq_length:
        :param tokenizer:
        :param mode:
        :return:
        """

        tokens = example
        # 序列截断
        if len(tokens) >= max_seq_length - 1:
            tokens = tokens[0:(max_seq_length - 2)]  # -2 的原因是因为序列需要加一个句首和句尾标志
        ntokens = []
        segment_ids = []
        label_ids = []
        ntokens.append("[CLS]")  # 句子开始设置CLS 标志
        segment_ids.append(0)
        label_ids.append(
            self.label_map["[CLS]"]
        )  # O OR CLS 没有任何影响,不过我觉得O 会减少标签个数,不过拒收和句尾使用不同的标志来标注,使用LCS 也没毛病
        for i, token in enumerate(tokens):
            ntokens.append(token)
            segment_ids.append(0)
            label_ids.append(0)
        ntokens.append("[SEP]")  # 句尾添加[SEP] 标志
        segment_ids.append(0)
        label_ids.append(self.label_map["[SEP]"])
        input_ids = tokenizer.convert_tokens_to_ids(
            ntokens)  # 将序列中的字(ntokens)转化为ID形式
        input_mask = [1] * len(input_ids)

        # padding, 使用
        while len(input_ids) < max_seq_length:
            input_ids.append(0)
            input_mask.append(0)
            segment_ids.append(0)
            label_ids.append(0)
            ntokens.append("**NULL**")
        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length
        assert len(label_ids) == max_seq_length

        # 结构化为一个类
        feature = InputFeatures(
            input_ids=input_ids,
            input_mask=input_mask,
            segment_ids=segment_ids,
            label_ids=label_ids,
        )
        return feature

    def convert(self, line, seq_length=128):
        feature = self.convert_single_example(0, line, seq_length,
                                              self.tokenizer, 'p')

        input_ids = np.reshape([feature.input_ids],
                               (self.batch_size, seq_length)).tolist()
        input_mask = np.reshape([feature.input_mask],
                                (self.batch_size, seq_length)).tolist()
        segment_ids = np.reshape([feature.segment_ids],
                                 (self.batch_size, seq_length)).tolist()
        label_ids = np.reshape([feature.label_ids],
                               (self.batch_size, seq_length)).tolist()
        return input_ids, input_mask, segment_ids, label_ids

    def send_grpc_request_ner(self, raw_sen):
        """
        发送grpc请求到服务器,获取对句子实体识别的结果
        :param raw_sen: 待识别的句子
        :return:
        """
        sentence = self.tokenizer.tokenize(raw_sen)

        input_ids, input_mask, segment_ids, label_ids = self.convert(
            sentence, self.config["max_seq_length"])

        # create the request object and set the name and signature_name params
        request = predict_pb2.PredictRequest()
        request.model_spec.name = MODEL_NAME_NER
        request.model_spec.signature_name = MODEL_SIGNATURE_NER

        # fill in the request object with the necessary data
        request.inputs['input_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(input_ids))
        request.inputs['input_mask'].CopyFrom(
            tf.contrib.util.make_tensor_proto(input_mask))
        request.inputs['segment_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(segment_ids))
        request.inputs['label_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(label_ids))

        result_future = self.stub.Predict.future(request, self.time_out)
        exception = result_future.exception()

        if exception:
            logger.error('process sentence: {0}, raise exception: {1}'.format(
                raw_sen, exception))
            return None, None
        else:
            pred_ids_result = np.array(
                result_future.result().outputs['pred_ids'].int_val)
            pred_label_result = convert_id_to_label(pred_ids_result,
                                                    self.id2label)

            return sentence, pred_label_result

    def test_send_grpc_request_ner(self, raw_sen):
        """
        测试 ner 服务是否可以正常使用
        :param raw_sen:
        :return:
        """
        sentence = self.tokenizer.tokenize(raw_sen)

        input_ids, input_mask, segment_ids, label_ids = self.convert(
            sentence, self.config["max_seq_length"])

        # create the request object and set the name and signature_name params
        request = predict_pb2.PredictRequest()
        request.model_spec.name = MODEL_NAME_NER
        request.model_spec.signature_name = MODEL_SIGNATURE_NER

        # fill in the request object with the necessary data
        request.inputs['input_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(input_ids))
        request.inputs['input_mask'].CopyFrom(
            tf.contrib.util.make_tensor_proto(input_mask))
        request.inputs['segment_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(segment_ids))
        request.inputs['label_ids'].CopyFrom(
            tf.contrib.util.make_tensor_proto(label_ids))

        result_future = self.stub.Predict.future(request, self.time_out)
        exception = result_future.exception()

        if exception:
            print('process sentence: {0}, raise exception: {1}'.format(
                raw_sen, exception))
            return False
        else:
            pred_ids_result = np.array(
                result_future.result().outputs['pred_ids'].int_val)
            pred_label_result = convert_id_to_label(pred_ids_result,
                                                    self.id2label)

            print(sentence)
            print(pred_label_result)
            return True
Example #7
0
class SemanticSearch(object):
    """
    通过调用 sentence_ner_entities 函数实现对:人名、组织结构名、地名和日期 的识别
    """
    def __init__(self):

        self.system_info = SystemInfo()

        self.client = ModelServing(self.system_info.MODE_NER)

        self.config = self.system_info.get_config()

        self.entity_code = EntityCode()
        self.ner_entities = self.entity_code.get_ner_entities()
        self.code = self.entity_code.get_entity_code()
        self.entity_map_dic = {
            "ORG": "CPNY_NAME",
            "PER": "NAME",
            "DATE": "DATE",
            "LOC": "ADDR_VALUE"
        }

        self.labels_list = []
        self.labels_list_split = []
        self.__init_specific_label_combine()
        self.__init_jieba()

    def __init_specific_label_combine(self):
        """
        初始化labels_list和labels_list_split列表
        用于将出现的此类标签:“NAMECOMPANY” 分开成 “NAME#COMPANY”
        :return:
        """
        entities = self.entity_code.get_entities()
        for i in range(0, len(entities)):
            for j in range(0, len(entities)):
                if i != j:
                    self.labels_list.append(entities[i] + entities[j])
                    self.labels_list_split.append(
                        (entities[i] + "#" + entities[j]))

                    self.labels_list.append(entities[j] + entities[i])
                    self.labels_list_split.append(
                        (entities[j] + "#" + entities[i]))

    def __init_jieba(self):
        """
        可以给分词工具加入领域词汇辅助分词,加入公司名称可以有效提升分词工具对公司名称分词的准确度
        :return:
        """
        entities = self.entity_code.get_entities()
        for label in entities:
            jieba.add_word(label)

    @staticmethod
    def __combine_label(entities, label=None):
        """
        合并实体列表中相连且相同的label
        :param entities:
        :param label:
        :return:
        """
        pre_label = False
        first_label = None
        entities_copy = []
        for i in range(len(entities)):
            if entities[i][1] != label:
                pre_label = False
                if first_label is not None:
                    entities_copy.append(first_label)
                    first_label = None
                entities_copy.append(entities[i])
            elif pre_label is False and entities[i][1] == label:
                pre_label = True
                first_label = entities[i]
            elif pre_label and first_label is not None and entities[i][
                    1] == label:
                temp = first_label
                first_label = [temp[0] + entities[i][0], temp[1]]

        if first_label is not None:
            entities_copy.append(first_label)

        return entities_copy

    def __combine_com_add(self, entities):
        """
        合并 COMPANYADDR 和 ADDRCOMPANY 这类实体为 COMPANY
        :param entities:
        :return:
        """
        company_index = -1
        addr_index = -1

        for i, entity in enumerate(entities):
            if self.ner_entities['COMPANY'] == entity[1]:
                company_index = i
            if self.ner_entities['ADDR'] == entity[1]:
                addr_index = i

        if company_index != -1 and addr_index != -1:
            if company_index == addr_index + 1:
                entities[company_index][
                    0] = entities[addr_index][0] + entities[company_index][0]
                entities.remove(entities[addr_index])
            elif company_index == addr_index - 1:
                entities[company_index][
                    0] = entities[company_index][0] + entities[addr_index][0]
                entities.remove(entities[addr_index])

    def __split_diff_labels(self, template_sen):
        """
        检测模板句中是否有不同的label相互连接的情况,eg. "ADDRNAME",这种情况分词工具无法正确分词
        如果存在相连的label,使用“#”将两个label分开
        :param template_sen: 模板句子
        :return:
        """
        for i, label in enumerate(self.labels_list):
            if label in template_sen:
                template_sen = template_sen.replace(label,
                                                    self.labels_list_split[i])
        return template_sen

    def __get_entities(self, sentence, pred_label_result):
        """
        根据BIO标签从识别结果中找出所有的实体
        :param sentence: 待识别的句子
        :param pred_label_result: 对该句子预测的标签
        :return: 返回识别的实体
        """
        word = ""
        label = ""
        entities = []
        for i in range(len(sentence)):
            temp_label = pred_label_result[i]
            if temp_label[0] == 'B':
                if word != "":
                    if "##" in word:
                        word = word.replace('##', '')
                    if len(word) > 1:
                        entities.append([word, label])
                    word = ""

                label = self.entity_map_dic[temp_label[2:]]

                word += sentence[i]
            elif temp_label[0] == 'I' and word != "":
                word += sentence[i]
            elif temp_label == 'O' and word != "":
                if "##" in word:
                    word = word.replace('##', '')
                if len(word) > 1:
                    entities.append([word, label])
                word = ""
                label = ""
        if word != "":
            if "##" in word:
                word = word.replace('##', '')
            if len(word) > 1:
                entities.append([word, label])

        return entities

    def get_ner_result(self, query):
        """
        发送 gRPC 请求到 Docker 服务,对 query 进行命名实体识别
        :param query: 问句
        :return:
        """
        sentence, pred_label_result = self.client.send_grpc_request_ner(query)

        if pred_label_result is None:
            logger.error("句子: {0}\t实体识别结果为空".format(query))
            return None

        entities = self.__get_entities(sentence, pred_label_result)

        # if len(entities) != 0:
        #     self.__combine_com_add(entities)

        entity = []
        for word, label in entities:
            begin = query.find(word)
            if begin != -1 and word.isdigit() is False:
                entity.append({
                    "type":
                    label,
                    "value":
                    word,
                    "code":
                    self.code[label],
                    "begin":
                    begin,
                    "end":
                    begin + len(word) + 1 if begin != -1 else -1
                })
        return entity, entities

    def sentence_ner_entities(self, result_intent):
        """
        使用 BERT 模型对句子进行实体识别,返回标记实体的句子
        :param result_intent: 意图识别模块的输出
        :return:
            entities: 列表,存储的是 BERT 识别出来的实体信息:(word, label)
            result: account_label 模块返回的结果
        """
        sentence = result_intent["query"]

        entity, entities = self.get_ner_result(sentence)

        result_intent["entity"] = entity
        # 如果一个词被标识为命名实体,而该词又被检测为关系,那么从关系中将该词去除
        for index, rel in enumerate(result_intent["relation"]):
            for word, _ in entities:
                if word.find(rel["value"]) != -1:
                    result_intent["relation"].pop(index)
        # 如果识别的实体已经被识别为账户,那么其为账户的可能性更大,从实体列表里面去除该实体
        for index, entity in enumerate(result_intent["entity"]):
            for account in result_intent["accounts"]:
                if account["value"].find(entity["value"]) != -1:
                    result_intent["entity"].pop(index)

        # 提取出账户识别模块识别的所有 UNLABEL 标签
        unlabels = []
        for value in result_intent["accounts"]:
            if value["type"] == "UNLABEL":
                unlabels.append(value["value"])
        if len(unlabels) == 0:
            unlabel_result = None
        else:
            unlabel_result = {
                "sentence": sentence,
                "unlabels": unlabels,
                "error": "账户类型不明确"
            }
        return result_intent, unlabel_result
Example #8
0
class SemanticSearch(object):
    """
    通过调用 sentence_ner_entities 函数实现对:人名、组织结构名、地名和日期 的识别
    """
    def __init__(self, test_mode=False):

        self.system_info = SystemInfo(is_test=test_mode)

        self.client = ModelServing(self.system_info.MODE_NER,
                                   is_test=test_mode)

        self.config = self.system_info.get_config()

        self.entity_code = EntityCode()
        self.ner_entities = self.entity_code.get_ner_entities()
        self.code = self.entity_code.get_entity_code()

        self.labels_list = []
        self.labels_list_split = []
        self.__init_specific_label_combine()
        self.__init_jieba()

    def __init_specific_label_combine(self):
        """
        初始化labels_list和labels_list_split列表
        用于将出现的此类标签:“NAMECOMPANY” 分开成 “NAME#COMPANY”
        :return:
        """
        entities = self.entity_code.get_entities()
        for i in range(0, len(entities)):
            for j in range(0, len(entities)):
                if i != j:
                    self.labels_list.append(entities[i] + entities[j])
                    self.labels_list_split.append(
                        (entities[i] + "#" + entities[j]))

                    self.labels_list.append(entities[j] + entities[i])
                    self.labels_list_split.append(
                        (entities[j] + "#" + entities[i]))

    def __init_jieba(self):
        """
        可以给分词工具加入领域词汇辅助分词,加入公司名称可以有效提升分词工具对公司名称分词的准确度
        :return:
        """
        entities = self.entity_code.get_entities()
        for label in entities:
            jieba.add_word(label)

    def __combine_label(self, entities, label=None):
        """
        合并实体列表中相连且相同的label
        :param entities:
        :param label:
        :return:
        """
        pre_label = False
        first_label = None
        entities_copy = []
        for i in range(len(entities)):
            if entities[i][1] != label:
                pre_label = False
                if first_label is not None:
                    entities_copy.append(first_label)
                    first_label = None
                entities_copy.append(entities[i])
            elif pre_label is False and entities[i][1] == label:
                pre_label = True
                first_label = entities[i]
            elif pre_label and first_label is not None and entities[i][
                    1] == label:
                temp = first_label
                first_label = [temp[0] + entities[i][0], temp[1]]

        if first_label is not None:
            entities_copy.append(first_label)

        return entities_copy

    def __combine_com_add(self, entities):
        """
        合并 COMPANYADDR 和 ADDRCOMPANY 这类实体为 COMPANY
        :param entities:
        :return:
        """
        company_index = -1
        addr_index = -1

        for i, entity in enumerate(entities):
            if self.ner_entities['COMPANY'] == entity[1]:
                company_index = i
            if self.ner_entities['ADDR'] == entity[1]:
                addr_index = i

        if company_index != -1 and addr_index != -1:
            if company_index == addr_index + 1:
                entities[company_index][
                    0] = entities[addr_index][0] + entities[company_index][0]
                entities.remove(entities[addr_index])
            elif company_index == addr_index - 1:
                entities[company_index][
                    0] = entities[company_index][0] + entities[addr_index][0]
                entities.remove(entities[addr_index])

    def __split_diff_labels(self, template_sen):
        """
        检测模板句中是否有不同的label相互连接的情况,eg. "ADDRNAME",这种情况分词工具无法正确分词
        如果存在相连的label,使用“#”将两个label分开
        :param template_sen: 模板句子
        :return:
        """
        for i, label in enumerate(self.labels_list):
            if label in template_sen:
                template_sen = template_sen.replace(label,
                                                    self.labels_list_split[i])
        return template_sen

    def __convert_output_data_format(self, data_param):
        """
        将 data_param 数据转换成问答图模块需要的数据格式
        :param data_param:
        :return:
        """
        output = defaultdict()
        output["query"] = data_param["raw_input"]
        output["template"] = data_param["new_input"]
        entity = []
        for key, values in data_param["labels"].items():
            for v in values:
                begin = data_param["raw_input"].find(v)
                entity.append({
                    "type": key,
                    "value": v,
                    "code": self.code[key],
                    "begin": begin,
                    "end": begin + len(v) + 1 if begin != -1 else -1
                })
        output["entity"] = entity
        return output

    def sentence_ner_entities(self, result):
        """
        使用 BERT 模型对句子进行实体识别,返回标记实体的句子
        :param result: account_label 模块返回的结果
        :return:
            entities: 列表,存储的是 BERT 识别出来的实体信息:(word, label)
            result: account_label 模块返回的结果
        """
        sentence = result["new_input"]

        sentence, pred_label_result = self.client.send_grpc_request_ner(
            sentence)

        word = ""
        label = ""
        entities = []

        if sentence is None or pred_label_result is None:
            return entities, result

        for i in range(len(sentence)):
            temp_label = pred_label_result[i]
            if temp_label[0] == 'B':
                if word != "":
                    if "##" in word:
                        word = word.replace('##', '')
                    entities.append([word, label])
                    word = ""

                if temp_label[2:] == 'ORG':
                    label = self.ner_entities['COMPANY']
                elif temp_label[2:] == 'PER':
                    label = self.ner_entities['NAME']
                elif temp_label[2:] == 'DATE':
                    label = self.ner_entities['DATE']
                else:
                    label = self.ner_entities['ADDR']

                word += sentence[i]
            elif temp_label[0] == 'I' and word != "":
                word += sentence[i]
            elif temp_label == 'O' and word != "":
                if "##" in word:
                    word = word.replace('##', '')
                entities.append([word, label])
                word = ""
                label = ""
        if word != "":
            if "##" in word:
                word = word.replace('##', '')
            entities.append([word, label])

        if len(entities) != 0:
            entities = self.__combine_label(entities,
                                            label=self.ner_entities['ADDR'])
            entities = self.__combine_label(entities,
                                            label=self.ner_entities['COMPANY'])
            entities = self.__combine_label(entities,
                                            label=self.ner_entities['NAME'])
            entities = self.__combine_label(entities,
                                            label=self.ner_entities['DATE'])

            self.__combine_com_add(entities)
            entities = self.__combine_label(entities,
                                            label=self.ner_entities['COMPANY'])

        for (word, label) in entities:
            result["new_input"] = result["new_input"].replace(word, label)
            result["labels"].setdefault(label, []).append(word)

        result = self.__convert_output_data_format(result)
        return result