def __preHandling(self): """ 待匹配字符串的清理空白符和语气助词以及大写数字转化的预处理 :return: """ self.target = StringPreHandler.delKeyword(self.target, u"\\s+") # 清理空白符 self.target = StringPreHandler.delKeyword(self.target, u"[的]+") # 清理语气助词 self.target = StringPreHandler.numberTranslator(self.target) # 大写数字转化
def _filter(self, input_query): # 这里对于下个周末这种做转化 把个给移除掉 input_query = StringPreHandler.numberTranslator(input_query) rule = u"[0-9]月[0-9]" pattern = re.compile(rule) match = pattern.search(input_query) if match != None: index = input_query.find('月') rule = u"日|号" pattern = re.compile(rule) match = pattern.search(input_query[index:]) if match == None: rule = u"[0-9]月[0-9]+" pattern = re.compile(rule) match = pattern.search(input_query) if match != None: end = match.span()[1] input_query = input_query[:end] + '号' + input_query[end:] rule = u"月" pattern = re.compile(rule) match = pattern.search(input_query) if match == None: input_query = input_query.replace('个', '') input_query = input_query.replace('中旬', '15号') input_query = input_query.replace('傍晚', '午后') input_query = input_query.replace('大年', '') input_query = input_query.replace('五一', '劳动节') input_query = input_query.replace('白天', '早上') input_query = input_query.replace(':', ':') return input_query