def pattern_hour(cls): left_bounds = RegexTool.left_wordbounds() right_bounds = lchain( RegexTool.right_wordbounds(), [r":"], ) rstr = RegexTool.rstr2bounded(r"\d+", left_bounds, right_bounds) return re.compile(rstr, re.I)
def lang2pattern(cls, lang): from henrique.main.document.price.rate.rate_entity import RateEntity logger = HenriqueLogger.func_level2logger(cls.lang2pattern, logging.DEBUG) left_bounds = [ RateEntity.rstr_last_char(), r"\s", ] right_bounds = RegexTool.right_wordbounds() rstr = RegexTool.rstr2bounded(cls.lang2rstr(lang), left_bounds, right_bounds) logger.debug({"left_bounds": left_bounds, "rstr": rstr}) return re.compile(rstr, re.I)
def texts2pattern(texts): rstr_raw = RegexTool.rstr_iter2or(map(re.escape, texts)) left_bounds = lchain( RegexTool.bounds2suffixed(RegexTool.left_wordbounds(), "\d"), RegexTool.left_wordbounds(), ) right_bounds = RegexTool.right_wordbounds() rstr = RegexTool.rstr2bounded(rstr_raw, left_bounds, right_bounds) logger.debug({ "rstr": rstr, "rstr_raw": rstr_raw, }) return re.compile(rstr, re.I)
def texts2pattern_word(cls, texts): regex_raw = cls.texts2regex(texts) regex_word = RegexTool.rstr2bounded(regex_raw, RegexTool.left_wordbounds(), RegexTool.right_wordbounds()) return re.compile(regex_word, ) # re.I can be dealt with normalizer