def check_strictness_low(self, annotations: List[str], texts: List[Tuple[str, str]]): builder = regen.RegexBuilder(annotation_mode=regen.Strictness.LOW) for a in annotations: for t in texts: regex = builder.make_annotation_regex(t[0], a) self.assertRegex(t[0] + t[1], regex)
def test_starting_letters_no_fix_first_uncertainty_one(self): builder = regen.RegexBuilder(search_mode=regen.SearchMode.MATCH, uncertainty=1, fix_first=False) tests = [([["A"]], [("A", True), ("E", True), ("I", True)]), ([["'", "I"], [("I", True), ("E", False), ("A", True)]]), ([["NT"], ["DN"]], [("N", True), ("T", True), ("D", True), ("M", True)])] self.run_tests(builder, tests)
def run_tests(self, uncertainty, tests): builder = regen.RegexBuilder() for test in tests: stroke = test[0] for text, expected in test[1]: regex = builder.make_uncertainty_regex(stroke, uncertainty) with self.subTest(): if expected: self.assertTrue(re.fullmatch(regex, text)) else: self.assertFalse(re.fullmatch(regex, text))
def check_strictness_medium(self, strokes, tests): builder = regen.RegexBuilder(annotation_mode=regen.Strictness.MEDIUM) for stroke in strokes: for test in tests: annotations = test[0] for text, expected in test[1]: regex = builder.make_annotation_regex(stroke, annotations) with self.subTest(annotations=annotations, text=stroke + text): if expected: self.assertRegex(stroke + text, regex) else: self.assertNotRegex(stroke + text, regex)
def test_strictness_high(self): builder = regen.RegexBuilder(disjoiner_mode=regen.Strictness.HIGH) tests = [(["A", "B"], [("AB", True), ("A^B", False)]), (["A", "^", "B"], [("AB", False), ("A^B", True)]), (["A", "B", "D"], [("ABD", True), ("A^BD", False), ("AB^D", False), ("A^B^D", False)]), (["A", "^", "B", "D"], [("ABD", False), ("A^BD", True), ("AB^D", False), ("A^B^D", False)]), (["A", "B", "^", "D"], [("ABD", False), ("A^BD", False), ("AB^D", True), ("A^B^D", False)]), (["A", "^", "B", "^", "D"], [("ABD", False), ("A^BD", False), ("AB^D", False), ("A^B^D", True)])] self.run_tests(builder, tests)
def check_strictness_high(self, strokes, tests): builder = regen.RegexBuilder(annotation_mode=regen.Strictness.HIGH) for stroke in strokes: for test in tests: annotations = test[0] for text, expected in test[1]: regex = builder.make_annotation_regex(stroke, annotations) with self.subTest(annotations=annotations, text=stroke + text): if expected: self.assertTrue(re.fullmatch(regex, stroke + text)) else: self.assertFalse(re.fullmatch( regex, stroke + text))
def search(self, **kwargs): """ :param grascii: [Required] The grascii string to use in the search. :param uncertainty: The uncertainty of the grascii string. :param search_mode: The search mode to use. :param annotation_mode: How to handle annotations in the search. :param aspirate_mode: How to handle annotations in the search. :param disjoiner_mode: How to handle annotations in the search. :param fix_first: Apply an uncertainty of 0 to the first token. :param interpretation: How to handle ambiguous grascii strings. :type grascii: str :type uncertainty: int: 0, 1, or 2 :type search_mode: str: one of regen.SearchMode values :type annotation_mode: one of regen.Strictness values :type aspirate_mode: one of regen.Strictness values :type disjoiner_mode: one of regen.Strictness values :type fix_first: bool :type interpretation: "best" or "all" :returns: A list of search results. :rtype: List[str] """ grascii = kwargs["grascii"].upper() self.extract_search_args(**kwargs) tree = self.parse_grascii(grascii) if not tree: raise Exception return interpretations = self.flatten_tree(tree) interpretations = list( self.get_unique_interpretations(interpretations).values()) builder = regen.RegexBuilder(uncertainty=self.uncertainty, search_mode=self.search_mode, aspirate_mode=self.aspirate_mode, annotation_mode=self.annotation_mode, disjoiner_mode=self.disjoiner_mode, fix_first=self.fix_first) interps = interpretations[ 0:1] if self.interpretation_mode == "best" else interpretations patterns = builder.generate_patterns_map(interps) starting_letters = builder.get_starting_letters(interps) results = self.perform_search(patterns, starting_letters, metrics.standard) return list(results)
def interactive_search(self, previous: str = None) -> Optional[str]: """Run an interactive search. :param previous: The previous search performed in this interactive session. :returns: The search string used. """ search, tree = self.get_grascii_search(previous) if search is None: return previous parses = self.flatten_tree(tree) display_interpretations = self.get_unique_interpretations(parses) interpretations = list(display_interpretations.values()) index = self.choose_interpretation(interpretations) builder = regen.RegexBuilder(uncertainty=self.uncertainty, search_mode=self.search_mode, fix_first=self.fix_first, annotation_mode=self.annotation_mode, aspirate_mode=self.aspirate_mode, disjoiner_mode=self.disjoiner_mode) if index == 0: interps = interpretations else: interps = interpretations[index - 1:index] patterns = builder.generate_patterns_map(interps) starting_letters = builder.get_starting_letters(interps) results = self.perform_search(patterns, starting_letters, metrics.standard) count = 0 display_all = False for result in results: count += 1 action = "Next" if not display_all: action = questionary.select( "Search Results", ["Next", "Display All", "End Search"]).ask() print(result.strip()) if action is None or action == "End Search": break elif action == "Display All": display_all = True print("Results:", count) print() return search
def test_strictness_low(self): builder = regen.RegexBuilder(aspirate_mode=regen.Strictness.LOW) tests = [ (["A"], [("A", True), ("'A", True)]), (["'", "A"], [("A", True), ("'A", True)]), (["A", "D", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True), ("AD'E", True), ("'A'DE", True), ("'AD'E", True), ("A'D'E", True), ("'A'D'E", True)]), (["'", "A", "D", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True), ("AD'E", True), ("'A'DE", True), ("'AD'E", True), ("A'D'E", True), ("'A'D'E", True)]), (["'", "A", "'", "D", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True), ("AD'E", True), ("'A'DE", True), ("'AD'E", True), ("A'D'E", True), ("'A'D'E", True)]), (["'", "A", "'", "D", "'", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True), ("AD'E", True), ("'A'DE", True), ("'AD'E", True), ("A'D'E", True), ("'A'D'E", True)]), (["A", "'", "D", "'", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True), ("AD'E", True), ("'A'DE", True), ("'AD'E", True), ("A'D'E", True), ("'A'D'E", True)]), (["'", "A", "D", "'", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True), ("AD'E", True), ("'A'DE", True), ("'AD'E", True), ("A'D'E", True), ("'A'D'E", True)]), (["A", "'", "D", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True), ("AD'E", True), ("'A'DE", True), ("'AD'E", True), ("A'D'E", True), ("'A'D'E", True)]), (["A", "D", "'", "E"], [("ADE", True), ("'ADE", True), ("A'DE", True), ("AD'E", True), ("'A'DE", True), ("'AD'E", True), ("A'D'E", True), ("'A'D'E", True)]), ] self.run_tests(builder, tests)
def test_starting_letters_search_mode_contains(self): builder = regen.RegexBuilder(search_mode=regen.SearchMode.CONTAIN) tests = [[["A"]], [["B"]], [["K", "P"]], [["'", "I"]]] for test in tests: letters = builder.get_starting_letters(test) self.assertSetEqual(letters, grammar.HARD_CHARACTERS)
def test_fix_first_on(self): builder = regen.RegexBuilder(fix_first=True, uncertainty=1) tests = [(["A", "B", "D"], [("ABD", True), ("EBD", False), ("IBD", False), ("APT", True), ("EPDD", False), ("IBDT", False)])] self.run_tests(builder, tests)
def test_contains(self): builder = regen.RegexBuilder(search_mode=regen.SearchMode.CONTAIN) tests = [(["A", "B"], [("AB", True), ("ABU", True), ("DAB", True)])] self.run_tests(builder, tests)
def test_start(self): builder = regen.RegexBuilder(search_mode=regen.SearchMode.START) tests = [(["A", "B"], [("AB", True), ("ABU", True), ("DAB", False)])] self.run_tests(builder, tests)