def lang2pattern(cls, lang): from henrique.main.document.price.trend.trend_entity import TrendEntity logger = HenriqueLogger.func_level2logger(cls.lang2pattern, logging.DEBUG) # rstr_suffix = format_str("{}?", # RegexTool.rstr2wrapped(TrendEntity.lang2rstr(lang)), # ) ### may be concatenated with port/tradegood name # rstr_prefixed = RegexTool.rstr2rstr_words_prefixed(cls.rstr()) # raise Exception({"rstr_suffix":rstr_suffix}) rstr_trend = TrendEntity.lang2rstr(lang) # bound_right_list_raw = RegexTool.right_wordbounds() right_bounds = lchain( RegexTool.bounds2prefixed(RegexTool.right_wordbounds(), rstr_trend), RegexTool.right_wordbounds(), ) rstr_rightbound = RegexTool.rstr2right_bounded(cls.rstr(), right_bounds) logger.debug({#"rstr_trend":rstr_trend, #"right_bounds":right_bounds, "rstr_rightbound":rstr_rightbound, }) # rstr_suffixed = RegexTool.rstr2rstr_words_suffixed(cls.rstr(), rstr_suffix=rstr_suffix) # raise Exception({"rstr_trend": rstr_trend, "rstr_suffixed": rstr_suffixed}) # return re.compile(RegexTool.rstr2wordbounded(cls.rstr())) return re.compile(rstr_rightbound, re.I)
def test_03(self): rstr = "asdf" rstr_right_bounded = RegexTool.rstr2right_bounded( rstr, RegexTool.right_wordbounds()) self.assertTrue(re.search(rstr_right_bounded, "ijilijasdf")) self.assertFalse(re.search(rstr_right_bounded, "asdfuhuef")) rstr_left_bounded = RegexTool.rstr2left_bounded( rstr, RegexTool.left_wordbounds()) self.assertFalse(re.search(rstr_left_bounded, "ijilijasdf")) self.assertTrue(re.search(rstr_left_bounded, "asdfuhuef"))
def pattern_number(cls): rstr_leftbound = RegexTool.rstr2left_bounded( r"\d{1,2}", RegexTool.left_wordbounds()) rstr_bound_right_list = lchain( RegexTool.right_wordbounds(), lchain(*TimedeltaEntityUnit.gazetteer_all().values()), ) rstr_bound = RegexTool.rstr2right_bounded(rstr_leftbound, rstr_bound_right_list) return re.compile(rstr_bound, re.I)
def pattern_suffix(cls): left_bounds = RegexTool.left_wordbounds() right_bounds = lchain( RegexTool.right_wordbounds(), [ RegexTool.bound2prefixed(b, r"시") for b in RegexTool.right_wordbounds() ], ) rstr_rightbounded = RegexTool.rstr2right_bounded(r"\d+", right_bounds) def bound_iter_left(): b_list_raw = RegexTool.left_wordbounds() for b in b_list_raw: yield b yield r"{}{}".format(b, r"{1,2}") bound_list_left = list(bound_iter_left()) rstr_bound = RegexTool.rstr2left_bounded(rstr_rightbound, bound_list_left) return re.compile(rstr_bound)