Example #1
0
 def check_strictness_low(self, annotations: List[str],
                          texts: List[Tuple[str, str]]):
     builder = regen.RegexBuilder(annotation_mode=regen.Strictness.LOW)
     for a in annotations:
         for t in texts:
             regex = builder.make_annotation_regex(t[0], a)
             self.assertRegex(t[0] + t[1], regex)
Example #2
0
 def test_starting_letters_no_fix_first_uncertainty_one(self):
     builder = regen.RegexBuilder(search_mode=regen.SearchMode.MATCH,
                                  uncertainty=1,
                                  fix_first=False)
     tests = [([["A"]], [("A", True), ("E", True), ("I", True)]),
              ([["'", "I"], [("I", True), ("E", False), ("A", True)]]),
              ([["NT"], ["DN"]], [("N", True), ("T", True), ("D", True),
                                  ("M", True)])]
     self.run_tests(builder, tests)
Example #3
0
 def run_tests(self, uncertainty, tests):
     builder = regen.RegexBuilder()
     for test in tests:
         stroke = test[0]
         for text, expected in test[1]:
             regex = builder.make_uncertainty_regex(stroke, uncertainty)
             with self.subTest():
                 if expected:
                     self.assertTrue(re.fullmatch(regex, text))
                 else:
                     self.assertFalse(re.fullmatch(regex, text))
Example #4
0
 def check_strictness_medium(self, strokes, tests):
     builder = regen.RegexBuilder(annotation_mode=regen.Strictness.MEDIUM)
     for stroke in strokes:
         for test in tests:
             annotations = test[0]
             for text, expected in test[1]:
                 regex = builder.make_annotation_regex(stroke, annotations)
                 with self.subTest(annotations=annotations,
                                   text=stroke + text):
                     if expected:
                         self.assertRegex(stroke + text, regex)
                     else:
                         self.assertNotRegex(stroke + text, regex)
Example #5
0
 def test_strictness_high(self):
     builder = regen.RegexBuilder(disjoiner_mode=regen.Strictness.HIGH)
     tests = [(["A", "B"], [("AB", True), ("A^B", False)]),
              (["A", "^", "B"], [("AB", False), ("A^B", True)]),
              (["A", "B", "D"], [("ABD", True), ("A^BD", False),
                                 ("AB^D", False), ("A^B^D", False)]),
              (["A", "^", "B", "D"], [("ABD", False), ("A^BD", True),
                                      ("AB^D", False), ("A^B^D", False)]),
              (["A", "B", "^", "D"], [("ABD", False), ("A^BD", False),
                                      ("AB^D", True), ("A^B^D", False)]),
              (["A", "^", "B", "^", "D"], [("ABD", False), ("A^BD", False),
                                           ("AB^D", False),
                                           ("A^B^D", True)])]
     self.run_tests(builder, tests)
Example #6
0
 def check_strictness_high(self, strokes, tests):
     builder = regen.RegexBuilder(annotation_mode=regen.Strictness.HIGH)
     for stroke in strokes:
         for test in tests:
             annotations = test[0]
             for text, expected in test[1]:
                 regex = builder.make_annotation_regex(stroke, annotations)
                 with self.subTest(annotations=annotations,
                                   text=stroke + text):
                     if expected:
                         self.assertTrue(re.fullmatch(regex, stroke + text))
                     else:
                         self.assertFalse(re.fullmatch(
                             regex, stroke + text))
Example #7
0
    def search(self, **kwargs):
        """
        :param grascii: [Required] The grascii string to use in the search.
        :param uncertainty: The uncertainty of the grascii string.
        :param search_mode: The search mode to use.
        :param annotation_mode: How to handle annotations in the search.
        :param aspirate_mode: How to handle annotations in the search.
        :param disjoiner_mode: How to handle annotations in the search.
        :param fix_first: Apply an uncertainty of 0 to the first token.
        :param interpretation: How to handle ambiguous grascii strings.
        :type grascii: str
        :type uncertainty: int: 0, 1, or 2
        :type search_mode: str: one of regen.SearchMode values
        :type annotation_mode: one of regen.Strictness values
        :type aspirate_mode: one of regen.Strictness values
        :type disjoiner_mode: one of regen.Strictness values
        :type fix_first: bool
        :type interpretation: "best" or "all"
        :returns: A list of search results.
        :rtype: List[str]
        """

        grascii = kwargs["grascii"].upper()
        self.extract_search_args(**kwargs)
        tree = self.parse_grascii(grascii)
        if not tree:
            raise Exception
            return

        interpretations = self.flatten_tree(tree)
        interpretations = list(
            self.get_unique_interpretations(interpretations).values())
        builder = regen.RegexBuilder(uncertainty=self.uncertainty,
                                     search_mode=self.search_mode,
                                     aspirate_mode=self.aspirate_mode,
                                     annotation_mode=self.annotation_mode,
                                     disjoiner_mode=self.disjoiner_mode,
                                     fix_first=self.fix_first)

        interps = interpretations[
            0:1] if self.interpretation_mode == "best" else interpretations
        patterns = builder.generate_patterns_map(interps)
        starting_letters = builder.get_starting_letters(interps)

        results = self.perform_search(patterns, starting_letters,
                                      metrics.standard)
        return list(results)
Example #8
0
    def interactive_search(self, previous: str = None) -> Optional[str]:
        """Run an interactive search.

        :param previous: The previous search performed in this interactive
            session.
        :returns: The search string used.
        """

        search, tree = self.get_grascii_search(previous)
        if search is None:
            return previous
        parses = self.flatten_tree(tree)
        display_interpretations = self.get_unique_interpretations(parses)
        interpretations = list(display_interpretations.values())
        index = self.choose_interpretation(interpretations)
        builder = regen.RegexBuilder(uncertainty=self.uncertainty,
                                     search_mode=self.search_mode,
                                     fix_first=self.fix_first,
                                     annotation_mode=self.annotation_mode,
                                     aspirate_mode=self.aspirate_mode,
                                     disjoiner_mode=self.disjoiner_mode)
        if index == 0:
            interps = interpretations
        else:
            interps = interpretations[index - 1:index]
        patterns = builder.generate_patterns_map(interps)
        starting_letters = builder.get_starting_letters(interps)
        results = self.perform_search(patterns, starting_letters,
                                      metrics.standard)
        count = 0
        display_all = False
        for result in results:
            count += 1
            action = "Next"
            if not display_all:
                action = questionary.select(
                    "Search Results",
                    ["Next", "Display All", "End Search"]).ask()
            print(result.strip())
            if action is None or action == "End Search":
                break
            elif action == "Display All":
                display_all = True

        print("Results:", count)
        print()
        return search
Example #9
0
 def test_strictness_low(self):
     builder = regen.RegexBuilder(aspirate_mode=regen.Strictness.LOW)
     tests = [
         (["A"], [("A", True), ("'A", True)]),
         (["'", "A"], [("A", True), ("'A", True)]),
         (["A", "D", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True),
                            ("AD'E", True), ("'A'DE", True),
                            ("'AD'E", True), ("A'D'E", True),
                            ("'A'D'E", True)]),
         (["'", "A", "D", "E"], [("ADE", True), ("'ADE", True),
                                 ("A'DE", True), ("AD'E", True),
                                 ("'A'DE", True), ("'AD'E", True),
                                 ("A'D'E", True), ("'A'D'E", True)]),
         (["'", "A", "'", "D", "E"], [("ADE", True), ("'ADE", True),
                                      ("A'DE", True), ("AD'E", True),
                                      ("'A'DE", True), ("'AD'E", True),
                                      ("A'D'E", True), ("'A'D'E", True)]),
         (["'", "A", "'", "D", "'", "E"], [("ADE", True), ("'ADE", True),
                                           ("A'DE", True), ("AD'E", True),
                                           ("'A'DE", True), ("'AD'E", True),
                                           ("A'D'E", True),
                                           ("'A'D'E", True)]),
         (["A", "'", "D", "'", "E"], [("ADE", True), ("'ADE", True),
                                      ("A'DE", True), ("AD'E", True),
                                      ("'A'DE", True), ("'AD'E", True),
                                      ("A'D'E", True), ("'A'D'E", True)]),
         (["'", "A", "D", "'", "E"], [("ADE", True), ("'ADE", True),
                                      ("A'DE", True), ("AD'E", True),
                                      ("'A'DE", True), ("'AD'E", True),
                                      ("A'D'E", True), ("'A'D'E", True)]),
         (["A", "'", "D", "E"], [("ADE", True), ("'ADE", True),
                                 ("A'DE", True), ("AD'E", True),
                                 ("'A'DE", True), ("'AD'E", True),
                                 ("A'D'E", True), ("'A'D'E", True)]),
         (["A", "D", "'", "E"], [("ADE", True), ("'ADE", True),
                                 ("A'DE", True), ("AD'E", True),
                                 ("'A'DE", True), ("'AD'E", True),
                                 ("A'D'E", True), ("'A'D'E", True)]),
     ]
     self.run_tests(builder, tests)
Example #10
0
 def test_starting_letters_search_mode_contains(self):
     builder = regen.RegexBuilder(search_mode=regen.SearchMode.CONTAIN)
     tests = [[["A"]], [["B"]], [["K", "P"]], [["'", "I"]]]
     for test in tests:
         letters = builder.get_starting_letters(test)
         self.assertSetEqual(letters, grammar.HARD_CHARACTERS)
Example #11
0
 def test_fix_first_on(self):
     builder = regen.RegexBuilder(fix_first=True, uncertainty=1)
     tests = [(["A", "B", "D"], [("ABD", True), ("EBD", False),
                                 ("IBD", False), ("APT", True),
                                 ("EPDD", False), ("IBDT", False)])]
     self.run_tests(builder, tests)
Example #12
0
 def test_contains(self):
     builder = regen.RegexBuilder(search_mode=regen.SearchMode.CONTAIN)
     tests = [(["A", "B"], [("AB", True), ("ABU", True), ("DAB", True)])]
     self.run_tests(builder, tests)
Example #13
0
 def test_start(self):
     builder = regen.RegexBuilder(search_mode=regen.SearchMode.START)
     tests = [(["A", "B"], [("AB", True), ("ABU", True), ("DAB", False)])]
     self.run_tests(builder, tests)