def test_GraphTransliterator_ignore_errors(): # if ignore_errors is not set and no matching transliteration rule # raise NoMatchingTransliterationRule exception yaml_str = """ tokens: a: [class1] b: [class1] ' ': [wb] rules: a a: B2 b: B whitespace: default: ' ' consolidate: true token_class: wb """ # check that ignore_errors works assert (GraphTransliterator.from_yaml( yaml_str, ignore_errors=True).transliterate("a") == "") with pytest.raises(NoMatchingTransliterationRuleException): gt = GraphTransliterator.from_yaml(yaml_str, ignore_errors=False) assert gt.ignore_errors is False gt.transliterate("a") with pytest.raises(UnrecognizableInputTokenException): gt = GraphTransliterator.from_yaml(yaml_str, ignore_errors=False) assert gt.ignore_errors is False gt.transliterate("!") gt = GraphTransliterator.from_yaml(yaml_str, ignore_errors=True) assert gt.ignore_errors is True assert gt.tokenize("b!b") == [" ", "b", "b", " "] assert gt.transliterate("b!b") == "BB" with pytest.raises(UnrecognizableInputTokenException): gt = GraphTransliterator.from_yaml(yaml_str, ignore_errors=False) assert gt.ignore_errors is False gt.transliterate("b!") # # test ignore_errors keyword value checking on init # with pytest.raises(ValueError): # GraphTransliterator.from_yaml(yaml_str, ignore_errors="maybe") # test ignore_errors keyword property # test ignore_errors setter and property gt.ignore_errors = True assert gt.ignore_errors is True gt.ignore_errors = False assert gt.ignore_errors is False
def load_transliterator(source, **kwargs): """Loads transliterator (format, parameter).""" format, parameter = source if format == "bundled": mod = __import__("graphtransliterator.transliterators") transliterators_mod = mod.transliterators transliterator_class = getattr(transliterators_mod, parameter) return transliterator_class(**kwargs) elif format == "json": return GraphTransliterator.loads(parameter, **kwargs) elif format == "json_file": with open(parameter, "r") as f: return GraphTransliterator.loads(f.read(), **kwargs) elif format == "yaml_file": return GraphTransliterator.from_yaml_file(parameter, **kwargs)
def test_GraphTransliterator_productions(): """Test productions.""" tokens = {"ab": ["class_ab"], " ": ["wb"]} whitespace = {"default": " ", "token_class": "wb", "consolidate": True} rules = {"ab": "AB", " ": "_"} settings = {"tokens": tokens, "rules": rules, "whitespace": whitespace} assert set(GraphTransliterator.from_easyreading_dict(settings).productions) == set( ["AB", "_"] )
def test_GraphTransliterator_graph(): """Test graph.""" tokens = {"ab": ["class_ab"], " ": ["wb"]} whitespace = {"default": " ", "token_class": "wb", "consolidate": True} rules = {"ab": "AB", " ": "_"} settings = {"tokens": tokens, "rules": rules, "whitespace": whitespace} gt = GraphTransliterator.from_easyreading_dict(settings) assert gt._graph assert gt._graph.node[0]["type"] == "Start" # test for Start assert gt
def test_GraphParser_check_ambiguity(): """ Test for rules that can both match the same thing.""" yaml_for_test = r""" tokens: a: [token, class1, class2] b: [token, class1, class2] ' ': [wb] rules: a <class1>: A<class1> # these should be ambiguous a <class2>: A<class2> <class1> a: <class1>A # these should be ambiguous <class2> a: <class2>A # these should be ambiguous (<class1> b) a (b <class2>): A # ambigous (<class2> b) a (b <class1>): A # ambiguous a: A # not ambiguous whitespace: default: ' ' token_class: 'wb' consolidate: true """ with pytest.raises(AmbiguousTransliterationRulesException): GraphTransliterator.from_yaml(yaml_for_test, check_for_ambiguity=True) # check that ambiguity matches if rules are of different shape yaml = """ tokens: a: [] ' ': [wb] rules: <wb> a: _A a <wb>: A_ a: a ' ': ' ' whitespace: default: " " # default whitespace token consolidate: true # whitespace should be consolidated token_class: wb # whitespace token class """ with pytest.raises(AmbiguousTransliterationRulesException): GraphTransliterator.from_yaml(yaml, check_for_ambiguity=True)
def test_cli_transliterate_tests(tmpdir): """Tests transliterate command, and loading of all formats.""" transliterator = GraphTransliterator.from_yaml(test_yaml) runner = CliRunner() # test bundled bundled_result = runner.invoke( cli.main, ["transliterate", "--from", "bundled", "Example", "a"]) assert bundled_result.exit_code == 0 assert bundled_result.output.strip() == "A" # test multiple inputs with python output bundled_multiple_result = runner.invoke( cli.main, ["transliterate", "--from", "bundled", "Example", "a", "a"]) assert bundled_multiple_result.exit_code == 0 assert bundled_multiple_result.output.strip() == str(["A", "A"]) # test json bundled_multiple_json_result = runner.invoke( cli.main, [ "transliterate", "--from", "bundled", "Example", "--to", "json", "a", "a" ], ) assert bundled_multiple_json_result.exit_code == 0 assert bundled_multiple_json_result.output.strip() == json.dumps( ["A", "A"]) # test transliterate from JSON json_ = transliterator.dumps() json_result = runner.invoke( cli.main, ["transliterate", "--from", "json", json_, "a"]) assert json_result.exit_code == 0 assert json_result.output.strip() == "A" # test transliterate from json file json_file = tmpdir.mkdir("sub").join("test.json") json_file.write(json_) json_file_result = runner.invoke( cli.main, ["transliterate", "--from", "json_file", json_file.strpath, "a"]) assert json_file_result.exit_code == 0 assert json_file_result.output.strip() == "A" # test transliterate from yaml file yaml_file = tmpdir.join("test.yaml") yaml_file.write(test_yaml) yaml_file_result = runner.invoke( cli.main, ["transliterate", "--from", "yaml_file", yaml_file.strpath, "a"]) assert yaml_file_result.exit_code == 0 assert yaml_file_result.output.strip() == "A"
def test_cli_dump(): """Test `dump` command.""" runner = CliRunner() dump_result = runner.invoke(cli.main, ["dump", "--from", "bundled", "Example"]) assert dump_result.exit_code == 0 json_ = dump_result.output assert GraphTransliterator.loads(json_).transliterate("a") == "A" # check that dump remains the same (important for version control) for i in range(0, 50): _ = runner.invoke(cli.main, ["dump", "--from", "bundled", "Example"]) assert _.output == json_, "JSON dump varies"
def __init__( self, meters_list=None, find_feet=None, meters_filter=None, with_mir=True ): if not meters_list: meters_list = _load_yaml(meters_filename) if with_mir: mir_meters = _load_yaml(mir_meters_filename) meters_list = meters_list + mir_meters self._scans_with_feet = _gen_possible_feet(meters_list) find_feet = self.find_feet if meters_filter: meters_list = meters_filter(meters_list) Scanner.__init__( self, GraphTransliterator.from_yaml_file(transcription_filename), GraphTransliterator.from_yaml_file(long_parser_filename), GraphTransliterator.from_yaml_file(short_parser_filename), _load_yaml(constraints_filename), meters_list, find_feet=find_feet, post_scan_filter=filter_scans, )
def test_compression(): gt = GraphTransliterator.from_yaml(test_config) compressed_config = compression.compress_config(gt.dump()) decompressed_config = compression.decompress_config(compressed_config) gt_from_decompressed = GraphTransliterator.load(decompressed_config) # Compare JSON dumps with sorted keys. assert (json.dumps(gt.dump(), sort_keys=True) == json.dumps( gt_from_decompressed.dump(), sort_keys=True)) # Test bad compression level with pytest.raises(ValueError): gt.dump( compression_level=graphtransliterator.HIGHEST_COMPRESSION_LEVEL + 1) # Test compression at level 0 (should likely not be called) assert "compressed_settings" not in compression.compress_config( gt.dump(), compression_level=0) # Test compression levels assert '"tokens": ' in gt.dumps(compression_level=0) assert '"compressed_settings"' in gt.dumps(compression_level=1) assert '"compressed_settings"' in gt.dumps(compression_level=2) for i in range(0, graphtransliterator.HIGHEST_COMPRESSION_LEVEL + 1): x = gt.dumps(compression_level=i) y = gt.loads(x) assert y.transliterate("a") == "A"
def test_GraphTransliterator_pruned_of(): gt = GraphTransliterator.from_yaml(""" tokens: a: [class1] b: [class2] ' ': [wb] rules: a: A b: B whitespace: default: ' ' consolidate: true token_class: wb """) assert len(gt.rules) == 2 assert len(gt.pruned_of("B").rules) == 1 assert gt.pruned_of("B").rules[0].production == "A" assert gt.pruned_of(["A", "B"]) # if no rules present will still work
def test_match_all(): """Test GraphTransliterator transliterate.""" YAML = r""" tokens: a: [class_a] " ": [wb] rules: a: A a a: A*2 whitespace: default: ' ' consolidate: True token_class: wb """ gt = GraphTransliterator.from_yaml(YAML) assert gt.rules[0].cost < gt.rules[1].cost tokens = gt.tokenize("aa") assert gt.match_at(1, tokens, match_all=False) == 0 assert gt.match_at(1, tokens, match_all=True) == [0, 1]
def test_GraphTransliterator_from_YAML(): """Test YAML loading of GraphTransliterator.""" good_yaml = """ tokens: a: [class1] ' ': [wb] rules: a: A whitespace: default: ' ' consolidate: true token_class: wb """ assert GraphTransliterator.from_yaml(good_yaml) bad_yaml = """ tokens: a: class1 ' ': wb rules: a: A whitespace: default: ' ' consolidate: true token_class: wb """ with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) bad_yaml = """ tokens: a: class1 ' ': wb rules: a: A whitespace: default: ' ' consolidate: true token_class: wb """ # tokens values are not lists with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) bad_yaml = """ rules: a: A whitespace: default: ' ' consolidate: true token_class: wb """ with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) bad_yaml = """ rules: a: A tokens: a: [token] ' ': [wb] whitespace: default: 'BAD' consolidate: true token_class: bad """ # whitespace errors with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) bad_yaml = """ tokens: a: [class1] ' ': [wb] whitespace: default: ' ' consolidate: true token_class: wb """ with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) bad_yaml = """ tokens: a: [class1] ' ': [wb] rules: b: A whitespace: default: ' ' consolidate: true token_class: wb """ with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) bad_yaml = """ tokens: a: [class1] ' ': [wb] rules: (b) a: A whitespace: default: ' ' consolidate: true token_class: wb """ with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) bad_yaml = """ tokens: a: [class1] ' ': [wb] rules: a (b): A whitespace: default: ' ' consolidate: true token_class: wb """ with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) bad_yaml = """ tokens: a: [class1] ' ': [wb] rules: a <class_nonexisting>: A whitespace: default: ' ' consolidate: true token_class: wb """ with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml) # test for bad tokens bad_yaml = """ tokens: '7' rules: a <class_nonexisting>: A whitespace: default: ' ' consolidate: true token_class: wb """ with pytest.raises(ValidationError): GraphTransliterator.from_yaml(bad_yaml)
def test_GraphTransliterator(tmpdir): """Test GraphTransliterator.""" yaml_str = r""" tokens: a: [token, class1] b: [token, class2] u: [token] ' ': [wb] rules: a: A b: B <wb> u: \N{DEVANAGARI LETTER U} onmatch_rules: - <class1> + <class2>: "," - <class1> + <token>: \N{DEVANAGARI SIGN VIRAMA} whitespace: default: ' ' token_class: 'wb' consolidate: true metadata: author: Author """ input_dict = yaml.safe_load(yaml_str) assert "a" in GraphTransliterator.from_easyreading_dict(input_dict).tokens.keys() gt = GraphTransliterator.from_easyreading_dict(input_dict) assert gt.onmatch_rules[0].production == "," assert gt.tokens assert gt.rules assert gt.whitespace assert gt.whitespace.default assert gt.whitespace.token_class assert gt.whitespace.consolidate assert gt.metadata["author"] == "Author" assert type(gt.graph) == DirectedGraph yaml_file = tmpdir.join("yaml_test.yaml") yaml_filename = str(yaml_file) yaml_file.write(yaml_str) assert yaml_file.read() == yaml_str assert GraphTransliterator.from_yaml_file(yaml_filename) assert len(set(GraphTransliterator.from_easyreading_dict(input_dict).tokens)) == 4 assert GraphTransliterator.from_yaml(yaml_str).transliterate("ab") == "A,B" assert ( GraphTransliterator.from_yaml_file(yaml_filename).transliterate("ab") == "A,B" ) assert ( GraphTransliterator.from_easyreading_dict( { "tokens": {"a": ["class_a"], "b": ["class_b"], " ": ["wb"]}, "onmatch_rules": [{"<class_a> + <class_b>": ","}], "whitespace": { "default": " ", "token_class": "wb", "consolidate": True, }, "rules": {"a": "A", "b": "B"}, } ).transliterate("ab") == "A,B" )
def test_serialization(): """Test serialization of graphtransliterator""" # Field definitions required_fields = ["tokens", "rules", "whitespace"] optional_fields = [ "onmatch_rules", "metadata", "ignore_errors", "onmatch_rules_lookup", "tokens_by_class", "graph", "tokenizer_pattern", "graphtransliterator_version", ] ordered_fields = required_fields + optional_fields yaml_ = """ tokens: a: [vowel] ' ': [wb] rules: a: A ' ': ' ' whitespace: default: " " consolidate: false token_class: wb onmatch_rules: - <vowel> + <vowel>: ',' # add a comma between vowels metadata: author: "Author McAuthorson" """ gt = GraphTransliterator.from_yaml(yaml_) # test dump dump = gt.dump() assert dump["graph"]["edge"] # test ordering of dump fields assert list(dump.keys()) == ordered_fields # test dump version assert dump["graphtransliterator_version"] == graphtransliterator.__version__ assert re.match(r"\d+\.\d+\.\d+$", gt.dump()["graphtransliterator_version"]) # test dumps x = gt.dumps() assert "graph" in gt.dumps() assert type(x) == str # test loads new_gt = GraphTransliterator.loads(x) assert GraphTransliterator.loads(gt.dumps()).dumps() assert type(new_gt) == GraphTransliterator # test load settings = gt.dump() assert type(GraphTransliterator.load(settings)) == GraphTransliterator # confirm settings not affected by load assert settings == settings # confirm compacting (dropping) optional settings works for length in range(1, len(optional_fields)): for to_drop in combinations(optional_fields, length): settings = gt.dump() for _ in to_drop: settings.pop(_) # Confirm ValidationError if onmatch_rules_lookup but not onmatch_rules # (chances of this every being the case are slim!) if settings.get("onmatch_rules_lookup") and not settings.get( "onmatch_rules" ): with pytest.raises(ValidationError): assert GraphTransliterator.load(settings) else: assert GraphTransliterator.load(settings) bad_settings = gt.dump() bad_settings.pop("onmatch_rules") with pytest.raises(ValidationError): assert GraphTransliterator.load(bad_settings)
def test_GraphTransliterator_transliterate(tmpdir): """Test GraphTransliterator transliterate.""" YAML = r""" tokens: a: [class_a] b: [class_b] c: [class_c] " ": [wb] d: [] Aa: [contrained_rule] rules: a: A b: B <class_c> <class_c> a: A(AFTER_CLASS_C_AND_CLASS_C) (<class_c> b) a: A(AFTER_B_AND_CLASS_C) (<class_c> b b) a a: AA(AFTER_BB_AND_CLASS_C) a <class_c>: A(BEFORE_CLASS_C) a b (c <class_b>): AB(BEFORE_C_AND_CLASS_B) c: C c c: C*2 a (b b b): A(BEFORE_B_B_B) d (c <class_a>): D(BEFORE_C_AND_CLASS_A) (b b) a: A(AFTER_B_B) <wb> Aa: A(ONLY_A_CONSTRAINED_RULE) onmatch_rules: - <class_a> <class_b> + <class_a> <class_b>: "!" - <class_a> + <class_b>: "," whitespace: default: ' ' consolidate: True token_class: wb """ gt = GraphTransliterator.from_yaml(YAML) # rules with single token assert gt.transliterate("a") == "A" # rules with multiple tokens assert gt.transliterate("aa") == "AA" # rules with multiple tokens (for rule_key) assert gt.transliterate("cc") == "C*2" # # rules with multiple tokens overlapping end of tokens # assert gt.transliterate('c') == 'C' # rules with prev class assert gt.transliterate("ca") == "CA" # rules with prev class and prev token assert gt.transliterate("dca") == "D(BEFORE_C_AND_CLASS_A)CA" # rules with prev class and prev tokens assert gt.transliterate("cbba") == "CBBA(AFTER_B_B)" # rules with next class assert gt.transliterate("ac") == "A(BEFORE_CLASS_C)C" # rules with next class and next tokens assert gt.transliterate("acb") == "A(BEFORE_CLASS_C)CB" # rules with onmatch rule of length 1 assert gt.transliterate("ab") == "A,B" # rules that only have constraints on first element assert gt.transliterate("Aa") == "A(ONLY_A_CONSTRAINED_RULE)" # test whitespace consolidation assert gt.transliterate(" a") == "A" # test whitespace consolidation following assert gt.transliterate("a ") == "A" # rules with longer onmatch rules assert gt.transliterate("abab") == "A,B!A,B" # test last_matched_input_tokens assert gt.last_input_tokens == [" ", "a", "b", "a", "b", " "] # test last_matched_tokens assert gt.last_matched_rule_tokens == [["a"], ["b"], ["a"], ["b"]] # test last_matched_rules assert len(gt.last_matched_rules) == 4
def test_validator(): """Test validator.""" transcriptionYAML_ok = r""" whitespace: token_class: wb default: ' ' consolidate: True tokens: a: [short_vowel] b: [consonant] aa: [long_vowel] ' ': [wb] '\t': [wb] rules: a: s b: c aa: l ' ': b """ transcriptionYAML_bad = r""" whitespace: token_class: wb default: ' ' consolidate: True tokens: a: [short_vowel] b: [consonant] aa: [long_vowel] ' ': [wb] '\t': [wb] rules: a: s b: c aa: X """ shortYAML_ok = r""" whitespace: token_class: wb default: 'b' consolidate: True tokens: b: [wb] s: [short_vowel] c: [consonant] l: [long_vowel] rules: c: s<c> b c s: s<bcs> (l) c (b): s<(l)c(b)> (c) c (b): s<(c)c(b)> b c l (b): s<bcl(b)> c l (b): s<cl(b)> """ shortYAML_bad = r""" whitespace: token_class: wb default: 'b' consolidate: True tokens: b: [wb] s: [short_vowel] c: [consonant] l: [long_vowel] X: [extra bad token] rules: b c s: s<bcs> (l) c (b): s<(l)c(b)> (c) c (b): s<(c)c(b)> b c l (b): s<bcl(b)> c l (b): s<cl(b)> """ longYAML_ok = r""" whitespace: token_class: wb default: 'b' consolidate: True tokens: b: [wb] s: [short_vowel] c: [consonant] l: [long_vowel] rules: b c l: l<bcl> c l: l<cl> """ longYAML_bad = r""" whitespace: token_class: wb default: 'b' consolidate: True tokens: b: [wb] s: [short_vowel] c: [consonant] l: [long_vowel] X: [extra bad token] rules: b c l: l<bcl> c l: l<cl> """ constraints = None # yaml.safe_load(""" # "-": # "-": # "<bcss>": [s<c>] # """) constraints_bad = {"bad": "constraints"} meters_list = yaml.safe_load(""" - id : "1" regex_pattern : ===(-) name : three longs and maybe a short - id : "2" regex_pattern : (-|=)==(=|-) name : a long or short, two longs, and a long or short - id : "2" regex_pattern : (=-=|===)+==(=|-) name : meter with cycles """) meters_list_bad = {"bad": "meters_list"} transcription_parser_ok = GraphTransliterator.from_yaml( transcriptionYAML_ok) long_parser_ok = GraphTransliterator.from_yaml(longYAML_ok) short_parser_ok = GraphTransliterator.from_yaml(shortYAML_ok) transcription_parser_bad = GraphTransliterator.from_yaml( transcriptionYAML_bad) long_parser_bad = GraphTransliterator.from_yaml(longYAML_bad) short_parser_bad = GraphTransliterator.from_yaml(shortYAML_bad) assert Scanner( transcription_parser_ok, long_parser_ok, short_parser_ok, constraints, meters_list, ) with pytest.raises(ValueError): Scanner( transcription_parser_ok, long_parser_bad, short_parser_ok, constraints, meters_list, ) with pytest.raises(ValueError): Scanner( transcription_parser_ok, long_parser_ok, short_parser_bad, constraints, meters_list, ) with pytest.raises(ValueError): Scanner( transcription_parser_bad, long_parser_ok, short_parser_ok, constraints, meters_list, ) # test bad constraints with pytest.raises(ValueError): Scanner( transcription_parser_ok, long_parser_ok, short_parser_ok, constraints_bad, meters_list, ) # test bad meters_list with pytest.raises(ValueError): Scanner( transcription_parser_ok, long_parser_ok, short_parser_ok, constraints, meters_list_bad, )
for i in range(len(final) - a_id): #print(final[a_id+i]) final2.append(final[a_id + i]) return final2 ## ConfusionMatrix confusion_matrix = ConfusionDictionary() possibilities = set(x for x in confusion_matrix.getPreds()) for x in confusion_matrix.getGolds(): possibilities.add(x) import json from graphtransliterator import GraphTransliterator gt = GraphTransliterator.from_yaml_file( "/Users/mosaix/orthographic-ASR/transliterate/transliterators/latin_prealignment.yml" ) tf = GraphTransliterator.from_yaml_file( "/Users/mosaix/orthographic-ASR/transliterate/transliterators/tifinagh_to_latin.yml" ) no_lm_store = {} gold_aligned = [] pred_aligned = [] with open('transliterate/output/latin_norm/no_lm/inferences.json') as f: data = json.load(f) with open('transliterate/output/latin_norm/no_lm/alignments.txt', "w+") as l: for i in data: try: wavfile = i['wav_filename'].split('/')[-1] compare_tuple = (gt.transliterate(i['src']),
norm_string = re.sub(r"ṭ", "ṭ", norm_string) translated = norm_string.translate( norm_string.maketrans(normalization_dict)) norm_string = re.sub(r"-", " ", translated) norm_string = re.sub(r'( ){2,}', " ", norm_string) return norm_string df = pd.read_csv(input_file, sep='\t') df_augmented = df.copy() df_augmented["normalized"] = df["sentence"] = df.apply( lambda row: normalize(row['sentence'], orthography), axis=1) if orthography in [ 'tifinagh_ahaggar', 'tifinagh_ahaggar_lig', 'tifinagh_ircam', 'arabic' ]: gt = GraphTransliterator.from_yaml_file(paths[orthography]) df_augmented['transliteration'] = df["sentence"] = df_augmented.apply( lambda row: gt.transliterate(row['normalized']), axis=1) df.to_csv(output_folder + '/' + os.path.basename(input_file), sep='\t', index=False, header=True) input_base = os.path.splitext(os.path.basename(input_file))[0] with open(output_folder + '/' + input_base + "_compare.txt", "w+") as f: for (idx, row) in df_augmented.iterrows(): f.write(row.sentence + "\n") f.write("\t" + row.normalized + "\n") if orthography in [ 'tifinagh_ahaggar', 'tifinagh_ahaggar_lig', 'tifinagh_ircam', 'arabic'
def test_constraints(): """Test constraints.""" transcription_parser = GraphTransliterator.from_yaml(""" whitespace: token_class: wb default: ' ' consolidate: True tokens: A: [] ' ': [wb] rules: A: "a" ' ': "b" """) short_parser = GraphTransliterator.from_yaml(""" whitespace: token_class: wb default: 'b' consolidate: True tokens: a: [] b: [wb] rules: b a: s<ba> a: s<a> """) long_parser = GraphTransliterator.from_yaml(""" whitespace: token_class: wb default: 'b' consolidate: True tokens: a: [] b: [wb] rules: b a a: "l<baa>" a a: "l<aa>" """) constraints = yaml.safe_load(""" '-': '-': 's<a>': [s<ba>, s<a>] """ # cannot have s<ba> s<a>; it must be long ) meters_list = yaml.safe_load(""" - id: "1" name: long long regex_pattern: "==" - id: "2" name: short short long notes: should not be possible due to constraints regex_pattern: "--=" """) scanner = Scanner(transcription_parser, long_parser, short_parser, constraints, meters_list) assert scanner._constrained_parsers["-"]["-"]["s<a>"]._graph.node[0] == { "ordered_children": {}, "type": "Start", }