def texts2pattern(texts): rstr_raw = RegexTool.rstr_iter2or(map(re.escape, texts)) left_bounds = lchain( RegexTool.bounds2suffixed(RegexTool.left_wordbounds(), "\d"), RegexTool.left_wordbounds(), ) right_bounds = RegexTool.right_wordbounds() rstr = RegexTool.rstr2bounded(rstr_raw, left_bounds, right_bounds) logger.debug({ "rstr": rstr, "rstr_raw": rstr_raw, }) return re.compile(rstr, re.I)
def pattern_hour(cls): left_bounds = RegexTool.left_wordbounds() right_bounds = lchain( RegexTool.right_wordbounds(), [r":"], ) rstr = RegexTool.rstr2bounded(r"\d+", left_bounds, right_bounds) return re.compile(rstr, re.I)
def test_03(self): rstr = "asdf" rstr_right_bounded = RegexTool.rstr2right_bounded( rstr, RegexTool.right_wordbounds()) self.assertTrue(re.search(rstr_right_bounded, "ijilijasdf")) self.assertFalse(re.search(rstr_right_bounded, "asdfuhuef")) rstr_left_bounded = RegexTool.rstr2left_bounded( rstr, RegexTool.left_wordbounds()) self.assertFalse(re.search(rstr_left_bounded, "ijilijasdf")) self.assertTrue(re.search(rstr_left_bounded, "asdfuhuef"))
def pattern_number(cls): rstr_leftbound = RegexTool.rstr2left_bounded( r"\d{1,2}", RegexTool.left_wordbounds()) rstr_bound_right_list = lchain( RegexTool.right_wordbounds(), lchain(*TimedeltaEntityUnit.gazetteer_all().values()), ) rstr_bound = RegexTool.rstr2right_bounded(rstr_leftbound, rstr_bound_right_list) return re.compile(rstr_bound, re.I)
def pattern_suffix(cls): left_bounds = RegexTool.left_wordbounds() right_bounds = lchain( RegexTool.right_wordbounds(), [ RegexTool.bound2prefixed(b, r"시") for b in RegexTool.right_wordbounds() ], ) rstr_rightbounded = RegexTool.rstr2right_bounded(r"\d+", right_bounds) def bound_iter_left(): b_list_raw = RegexTool.left_wordbounds() for b in b_list_raw: yield b yield r"{}{}".format(b, r"{1,2}") bound_list_left = list(bound_iter_left()) rstr_bound = RegexTool.rstr2left_bounded(rstr_rightbound, bound_list_left) return re.compile(rstr_bound)
def bound_iter_left(): b_list_raw = RegexTool.left_wordbounds() for b in b_list_raw: yield b yield r"{}{}".format(b, r"{1,2}")
def texts2pattern_word(cls, texts): regex_raw = cls.texts2regex(texts) regex_word = RegexTool.rstr2bounded(regex_raw, RegexTool.left_wordbounds(), RegexTool.right_wordbounds()) return re.compile(regex_word, ) # re.I can be dealt with normalizer