Beispiel #1
0
        def texts2pattern(texts):
            rstr_raw = RegexTool.rstr_iter2or(map(re.escape, texts))

            left_bounds = lchain(
                RegexTool.bounds2suffixed(RegexTool.left_wordbounds(), "\d"),
                RegexTool.left_wordbounds(),
            )
            right_bounds = RegexTool.right_wordbounds()

            rstr = RegexTool.rstr2bounded(rstr_raw, left_bounds, right_bounds)
            logger.debug({
                "rstr": rstr,
                "rstr_raw": rstr_raw,
            })
            return re.compile(rstr, re.I)
Beispiel #2
0
    def pattern_hour(cls):
        left_bounds = RegexTool.left_wordbounds()
        right_bounds = lchain(
            RegexTool.right_wordbounds(),
            [r":"],
        )
        rstr = RegexTool.rstr2bounded(r"\d+", left_bounds, right_bounds)

        return re.compile(rstr, re.I)
Beispiel #3
0
    def test_03(self):
        rstr = "asdf"
        rstr_right_bounded = RegexTool.rstr2right_bounded(
            rstr, RegexTool.right_wordbounds())
        self.assertTrue(re.search(rstr_right_bounded, "ijilijasdf"))
        self.assertFalse(re.search(rstr_right_bounded, "asdfuhuef"))

        rstr_left_bounded = RegexTool.rstr2left_bounded(
            rstr, RegexTool.left_wordbounds())
        self.assertFalse(re.search(rstr_left_bounded, "ijilijasdf"))
        self.assertTrue(re.search(rstr_left_bounded, "asdfuhuef"))
Beispiel #4
0
    def pattern_number(cls):
        rstr_leftbound = RegexTool.rstr2left_bounded(
            r"\d{1,2}", RegexTool.left_wordbounds())

        rstr_bound_right_list = lchain(
            RegexTool.right_wordbounds(),
            lchain(*TimedeltaEntityUnit.gazetteer_all().values()),
        )
        rstr_bound = RegexTool.rstr2right_bounded(rstr_leftbound,
                                                  rstr_bound_right_list)
        return re.compile(rstr_bound, re.I)
Beispiel #5
0
    def pattern_suffix(cls):

        left_bounds = RegexTool.left_wordbounds()
        right_bounds = lchain(
            RegexTool.right_wordbounds(),
            [
                RegexTool.bound2prefixed(b, r"시")
                for b in RegexTool.right_wordbounds()
            ],
        )

        rstr_rightbounded = RegexTool.rstr2right_bounded(r"\d+", right_bounds)

        def bound_iter_left():
            b_list_raw = RegexTool.left_wordbounds()
            for b in b_list_raw:
                yield b
                yield r"{}{}".format(b, r"{1,2}")

        bound_list_left = list(bound_iter_left())
        rstr_bound = RegexTool.rstr2left_bounded(rstr_rightbound,
                                                 bound_list_left)

        return re.compile(rstr_bound)
Beispiel #6
0
 def bound_iter_left():
     b_list_raw = RegexTool.left_wordbounds()
     for b in b_list_raw:
         yield b
         yield r"{}{}".format(b, r"{1,2}")
Beispiel #7
0
 def texts2pattern_word(cls, texts):
     regex_raw = cls.texts2regex(texts)
     regex_word = RegexTool.rstr2bounded(regex_raw, RegexTool.left_wordbounds(), RegexTool.right_wordbounds())
     return re.compile(regex_word, )  # re.I can be dealt with normalizer