Beispiel #1
0
def get_numbers_from_tree(tree):

    res = []
    for line in tree.split('\n'):
        if len(line.strip()) == 0:
            # regexes.append(None)
            continue

        if not (line.strip().startswith('<') and \
                line.strip().endswith('>')):
            text_regex= regex.build_regex(line)
            compiled_regex = None
            if (text_regex not in compiled_regexes.keys()):
                compiled_regex = compiled_regexes[text_regex] = re.compile(text_regex)
            else:
                compiled_regex = compiled_regexes[text_regex]

            match = compiled_regex.match(line)
            if match != None:
                for i in range(len(match.groups())):
                    if i == 0:
                        continue

                    text = match.group(i)
                    if numbers_compiled_regex.match(text) != None:
                        res.append(text)

    return res
Beispiel #2
0
 def test_zero_or_one(self):
     p = regex.build_regex("ba?")
     result = regex.match(p, "ba")
     self.assertTrue(result)
     result = regex.match(p, "b")
     self.assertTrue(result)
     result = regex.match(p, "aa")
     self.assertFalse(result)
Beispiel #3
0
 def test_zero_or_one(self):
     p = regex.build_regex("ba?")
     result = regex.match(p, "ba")
     self.assertTrue(result)
     result = regex.match(p, "b")
     self.assertTrue(result)
     result = regex.match(p, "aa")
     self.assertFalse(result)
Beispiel #4
0
 def test_match_many(self):
     p = regex.build_regex("ab[cde]fg")
     result = regex.match(p, "abcfg")
     self.assertTrue(result)
     result = regex.match(p, "abdfg")
     self.assertTrue(result)
     result = regex.match(p, "abefg")
     self.assertTrue(result)
     result = regex.match(p, "abfg")
     self.assertFalse(result)
Beispiel #5
0
 def test_match_many(self):
     p = regex.build_regex("ab[cde]fg")
     result = regex.match(p, "abcfg")
     self.assertTrue(result)
     result = regex.match(p, "abdfg")
     self.assertTrue(result)
     result = regex.match(p, "abefg")
     self.assertTrue(result)
     result = regex.match(p, "abfg")
     self.assertFalse(result)
Beispiel #6
0
def build_regexes_for_all_unique_trees():
    print('len(html_analysis[unique_trees]):',
          len(html_analysis[unique_trees]))
    for unique_tree in html_analysis[unique_trees]:
        adler_crc = get_adler_crc(unique_tree)
        tree = html_analysis[unique_trees_actual][adler_crc]
        regexes = []
        for line in tree.split('\n'):
            if len(line.strip()) == 0:
                regexes.append(None)
                continue

            if not (line.strip().startswith('<') and \
                    line.strip().endswith('>')):
                regexes.append(regex.build_regex(line.strip().strip('"')))
            else:
                regexes.append(None)
        add_regex_for_tree(unique_tree, regexes)
Beispiel #7
0
 def test_one_or_more_exception(self):
     try:
         p = regex.build_regex("+")
         self.assertTrue(False)
     except SyntaxError:
         self.assertTrue(True)
Beispiel #8
0
 def test_multi_many3(self):
     p = regex.build_regex(".[cd]+.f")
     result = regex.match(p, "cddcdccdchef")
     self.assertFalse(result)
Beispiel #9
0
 def test_one_or_more3(self):
     p = regex.build_regex("a+")
     result = regex.match(p, "b")
     self.assertFalse(result)
Beispiel #10
0
 def test_one_or_more3(self):
     p = regex.build_regex("a+")
     result = regex.match(p, "b")
     self.assertFalse(result)
Beispiel #11
0
 def test_match_any_end(self):
     p = regex.build_regex("ab.")
     result = regex.match(p, "abk")
     self.assertTrue(result)
Beispiel #12
0
 def test_match_zero_or_more_matches_any(self):
     p = regex.build_regex("a.*c")
     result = regex.match(p,
                          "aasdhfjkli ieuxnreu;anjanxeearunjkljadsxnfldjc")
     self.assertTrue(result)
Beispiel #13
0
 def test_zero_or_one_with_many(self):
     p = regex.build_regex("h[abc]?d")
     result = regex.match(p, "had")
     self.assertTrue(result)
Beispiel #14
0
 def test_dont_match_any1(self):
     p = regex.build_regex("ab[^cde]fg")
     result = regex.match(p, "abhfg")
     self.assertTrue(result)
Beispiel #15
0
 def test_dont_match_any1(self):
     p = regex.build_regex("ab[^cde]fg")
     result = regex.match(p, "abhfg")
     self.assertTrue(result)
Beispiel #16
0
 def test_multi_many3(self):
     p = regex.build_regex(".[cd]+.f")
     result = regex.match(p, "cddcdccdchef")
     self.assertFalse(result)
Beispiel #17
0
 def test_dont_match_any(self):
     p = regex.build_regex("ab[^cde]fg")
     result = regex.match(p, "abcdefg")
     self.assertFalse(result)
Beispiel #18
0
 def test_dont_match1(self):
     p = regex.build_regex("ab!cdef")
     result = regex.match(p, "abcdef")
     self.assertFalse(result)
Beispiel #19
0
 def test_dont_match(self):
     p = regex.build_regex("ab!cdef")
     result = regex.match(p, "abqdef")
     self.assertTrue(result)
Beispiel #20
0
 def test_multi_many5(self):
     p = regex.build_regex(".+")
     result = regex.match(p, "aaasdf")
     self.assertTrue(result)
Beispiel #21
0
 def test_single_letter_at_end_of_long_string(self):
     p = regex.build_regex("e")
     result = regex.match(p, "abcde")
     self.assertTrue(result)
Beispiel #22
0
 def test_dont_match1(self):
     p = regex.build_regex("ab!cdef")
     result = regex.match(p, "abcdef")
     self.assertFalse(result)
Beispiel #23
0
 def test_letter_not_in_string(self):
     p = regex.build_regex("e")
     result = regex.match(p, "abcd")
     self.assertFalse(result)
Beispiel #24
0
 def test_multi_many5(self):
     p = regex.build_regex(".+")
     result = regex.match(p, "aaasdf")
     self.assertTrue(result)
Beispiel #25
0
 def test_char_pattern_in_string(self):
     p = regex.build_regex("This")
     result = regex.match(p, "Is This The Real Life")
     self.assertTrue(result)
Beispiel #26
0
 def test_match_zero_or_more_end1(self):
     p = regex.build_regex("a*b*c*")
     result = regex.match(p, "aaaaabbbbbbbc")
     self.assertTrue(result)
Beispiel #27
0
 def test_match_any_char(self):
     p = regex.build_regex(".")
     result = regex.match(p, "a")
     self.assertTrue(result)
Beispiel #28
0
 def test_escaped_chars(self):
     p = regex.build_regex("\*\.")
     result = regex.match(p, "*.")
     self.assertTrue(result)
Beispiel #29
0
 def test_match_any_char_longer(self):
     p = regex.build_regex("a.c")
     result = regex.match(p, "abc")
     self.assertTrue(result)
Beispiel #30
0
 def test_multi_many1(self):
     p = regex.build_regex("[cd]*")
     result = regex.match(p, "cddcdccdce")
     self.assertTrue(result)
Beispiel #31
0
 def test_match_beginning(self):
     p = regex.build_regex(".bc")
     result = regex.match(p, "abc")
     self.assertTrue(result)
Beispiel #32
0
 def test_one_or_more_exception(self):
     try:
         p = regex.build_regex("+")
         self.assertTrue(False)
     except SyntaxError:
         self.assertTrue(True)
Beispiel #33
0
 def test_match_any_end(self):
     p = regex.build_regex("ab.")
     result = regex.match(p, "abk")
     self.assertTrue(result)
Beispiel #34
0
 def test_multi_many1(self):
     p = regex.build_regex("[cd]*")
     result = regex.match(p, "cddcdccdce")
     self.assertTrue(result)
Beispiel #35
0
 def test_single_letter_at_end_of_long_string(self):
     p = regex.build_regex("e")
     result = regex.match(p, "abcde")
     self.assertTrue(result)
Beispiel #36
0
 def test_multi_many4(self):
     p = regex.build_regex(".[cd]+.f")
     result = regex.match(p, "cddcdccdchf")
     self.assertTrue(result)
Beispiel #37
0
 def test_letter_not_in_string(self):
     p = regex.build_regex("e")
     result = regex.match(p, "abcd")
     self.assertFalse(result)
Beispiel #38
0
 def test_dont_match(self):
     p = regex.build_regex("ab!cdef")
     result = regex.match(p, "abqdef")
     self.assertTrue(result)
Beispiel #39
0
 def test_match_any_char(self):
     p = regex.build_regex(".")
     result = regex.match(p, "a")
     self.assertTrue(result)
Beispiel #40
0
 def test_dont_match_any(self):
     p = regex.build_regex("ab[^cde]fg")
     result = regex.match(p, "abcdefg")
     self.assertFalse(result)
Beispiel #41
0
 def test_match_beginning(self):
     p = regex.build_regex(".bc")
     result = regex.match(p, "abc")
     self.assertTrue(result)
Beispiel #42
0
 def test_single_letter(self):
     p = regex.build_regex("a")
     result = regex.match(p, "a")
     self.assertTrue(result)
Beispiel #43
0
 def test_multi_many4(self):
     p = regex.build_regex(".[cd]+.f")
     result = regex.match(p, "cddcdccdchf")
     self.assertTrue(result)
Beispiel #44
0
 def test_zero_or_one_with_many(self):
     p = regex.build_regex("h[abc]?d")
     result = regex.match(p, "had")
     self.assertTrue(result)
Beispiel #45
0
 def test_match_zero_or_more_begining(self):
     p = regex.build_regex("a*b*c")
     result = regex.match(p, "bbbbbbbc")
     self.assertTrue(result)
Beispiel #46
0
 def test_char_pattern_in_string(self):
     p = regex.build_regex("This")
     result = regex.match(p, "Is This The Real Life")
     self.assertTrue(result)
Beispiel #47
0
 def test_match_zero_or_more_end1(self):
     p = regex.build_regex("a*b*c*")
     result = regex.match(p, "aaaaabbbbbbbc")
     self.assertTrue(result)
Beispiel #48
0
 def test_match_any_char_longer(self):
     p = regex.build_regex("a.c")
     result = regex.match(p, "abc")
     self.assertTrue(result)
Beispiel #49
0
 def test_match_zero_or_more_matches_empty(self):
     p = regex.build_regex("a*b*c*")
     result = regex.match(p, "")
     self.assertTrue(result)
Beispiel #50
0
 def test_single_letter(self):
     p = regex.build_regex("a")
     result = regex.match(p, "a")
     self.assertTrue(result)
Beispiel #51
0
 def test_match_zero_or_more_matches_any(self):
     p = regex.build_regex("a.*c")
     result = regex.match(p, "aasdhfjkli ieuxnreu;anjanxeearunjkljadsxnfldjc")
     self.assertTrue(result)
Beispiel #52
0
 def test_match_zero_or_more_begining(self):
     p = regex.build_regex("a*b*c")
     result = regex.match(p, "bbbbbbbc")
     self.assertTrue(result)
Beispiel #53
0
 def test_match_complicated(self):
     p = regex.build_regex("a .og was* wa*lking down .h. stre*t")
     result = regex.match(p, "a dog was walking down the street")
     self.assertTrue(result)
Beispiel #54
0
 def test_match_zero_or_more_matches_empty(self):
     p = regex.build_regex("a*b*c*")
     result = regex.match(p, "")
     self.assertTrue(result)
Beispiel #55
0
 def test_escaped_chars(self):
     p = regex.build_regex("\*\.")
     result = regex.match(p, "*.")
     self.assertTrue(result)
Beispiel #56
0
 def test_match_complicated(self):
     p = regex.build_regex("a .og was* wa*lking down .h. stre*t")
     result = regex.match(p, "a dog was walking down the street")
     self.assertTrue(result)
Beispiel #57
0
 def test_exception(self):
     try:
         p = regex.build_regex("**.")
         self.assertTrue(False)
     except SyntaxError:
         self.assertTrue(True)
Beispiel #58
0
 def test_exception(self):
     try:
         p = regex.build_regex("**.")
         self.assertTrue(False)
     except SyntaxError:
         self.assertTrue(True)
Beispiel #59
0
 def test_one_or_more2(self):
     p = regex.build_regex("a+")
     result = regex.match(p, "bab")
     self.assertTrue(result)