Python AtfLexer Examples

Programming Language: Python

Namespace/Package Name: pyoracc.atf.common.atflex

Class/Type: AtfLexer

Examples at hotexamples.com: 13

Python AtfLexer - 13 examples found. These are the top rated real world Python examples of pyoracc.atf.common.atflex.AtfLexer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AtfLexer(8)

input(3)

resolve_keyword(1)

Example #1

Show file

File: test_atflexer.py Project: cdli-gh/pyoracc

def compare_tokens(content,
                   expected_types,
                   expected_values=None,
                   expected_lineno=None,
                   expected_lexpos=None):
    lexer = AtfLexer().lexer
    lexer.input(content)
    if expected_values is None:
        expected_values = repeat(None)
    if expected_lineno is None:
        expected_lineno = repeat(None)
    if expected_lexpos is None:
        expected_lexpos = repeat(None)
    for e_type, e_value, e_lineno, e_lexpos, token in zip_longest(
            expected_types, expected_values, expected_lineno, expected_lexpos,
            lexer):
        if token is None and e_type is None:
            break
        assert token.type == e_type
        if e_value:
            assert token.value == e_value
        if e_lineno:
            assert token.lineno == e_lineno
        if e_lexpos:
            assert token.lexpos == e_lexpos

Example #2

Show file

File: test_atflexer.py Project: oracc/pyoracc

 def test_resolve_keyword_no_extra():
     '''Test that resolve_keyword works correcty when extra is not passes
     This never happes in actual code. Hench this test'''
     mylexer = AtfLexer()
     result = mylexer.resolve_keyword('obverse',
                                      mylexer.structures)
     assert result == 'OBVERSE'

Example #3

Show file

 def ensure_raises_and_not(self, string, nwarnings):
     self.lexer.input(string)
     with pytest.raises(SyntaxError) as excinfo:
         for i in self.lexer:
             pass
     # If we allow invalid syntax this should not raise
     self.lexer = AtfLexer(skipinvalid=True).lexer
     self.lexer.input(string)
     with pytest.warns(UserWarning) as record:
         for i in self.lexer:
             pass
     assert len(record) == nwarnings

Example #4

Show file

File: test_atflexer.py Project: oracc/pyoracc

 def ensure_raises_and_not(self, string, nwarnings):
     self.lexer.input(string)
     with pytest.raises(SyntaxError) as excinfo:
         for i in self.lexer:
             pass
     # If we allow invalid syntax this should not raise
     self.lexer = AtfLexer(skipinvalid=True).lexer
     self.lexer.input(string)
     with pytest.warns(UserWarning) as record:
         for i in self.lexer:
             pass
     assert len(record) == nwarnings

Example #5

Show file

 def __init__(self, content, atftype='oracc', debug=False):
     skipinvalid = False
     if content[-1] != '\n':
         content += "\n"
     if atftype == 'cdli':
         lexer = AtfCDLILexer(debug=debug, skipinvalid=skipinvalid,
                              log=log).lexer
         parser = AtfCDLIParser(debug=debug, log=log).parser
     elif atftype == 'oracc':
         lexer = AtfOraccLexer(debug=debug, skipinvalid=skipinvalid,
                               log=log).lexer
         parser = AtfOraccParser(debug=debug, log=log).parser
     else:
         lexer = AtfLexer(debug=debug, skipinvalid=skipinvalid,
                          log=log).lexer
         parser = AtfParser(debug=debug, log=log).parser
     if debug:
         self.text = parser.parse(content, lexer=lexer, debug=log)
     else:
         self.text = parser.parse(content, lexer=lexer)

Example #6

Show file

File: atffile.py Project: julightzhong10/pyoracc

 def __init__(self, content, atftype='oracc', debug=False,skip=False):
     if content[-1] != '\n':
         content += "\n"
     if atftype == 'cdli':
         atflexer=AtfCDLILexer(debug=debug, skip=skip,log=log)
         atfparser=AtfCDLIParser(debug=debug, skip=skip,log=log)
     elif atftype == 'oracc':
         atflexer=AtfOraccLexer(debug=debug, skip=skip,log=log)
         atfparser=AtfOraccParser(debug=debug, skip=skip,log=log) 
     else:
         atflexer=AtfLexer(debug=debug, skip=skip,log=log)
         atfparser=AtfParser(debug=debug, skip=skip,log=log) 
     lexer = atflexer.lexer
     parser = atfparser.parser
     self.errors_lex=atflexer.errors 
     self.errors_yacc=atfparser.errors 
     if debug:
         self.text = parser.parse(content, lexer=lexer, debug=log)
     else:
         self.text = parser.parse(content, lexer=lexer)

Example #7

Show file

File: test_atflexer.py Project: cdli-gh/pyoracc

def test_resolve_keyword_no_extra():
    '''Test that resolve_keyword works correcty when extra is not passes
    This never happes in actual code. Hench this test'''
    mylexer = AtfLexer()
    result = mylexer.resolve_keyword('obverse', mylexer.structures)
    assert result == 'OBVERSE'

Example #8

Show file

 def setUp(self):
     self.lexer = AtfLexer().lexer

Example #9

Show file

class TestLexer(TestCase):
    """A class that contains all tests of the ATFLexer"""
    def setUp(self):
        self.lexer = AtfLexer().lexer

    def compare_tokens(self,
                       content,
                       expected_types,
                       expected_values=None,
                       expected_lineno=None,
                       expected_lexpos=None):
        self.lexer.input(content)
        if expected_values is None:
            expected_values = repeat(None)
        if expected_lineno is None:
            expected_lineno = repeat(None)
        if expected_lexpos is None:
            expected_lexpos = repeat(None)
        for e_type, e_value, e_lineno, e_lexpos, token in zip_longest(
                expected_types, expected_values, expected_lineno,
                expected_lexpos, self.lexer):
            if token is None and e_type is None:
                break
            assert token.type == e_type
            if e_value:
                assert token.value == e_value
            if e_lineno:
                assert token.lineno == e_lineno
            if e_lexpos:
                assert token.lexpos == e_lexpos

    def ensure_raises_and_not(self, string, nwarnings):
        self.lexer.input(string)
        with pytest.raises(SyntaxError) as excinfo:
            for i in self.lexer:
                pass
        # If we allow invalid syntax this should not raise
        self.lexer = AtfLexer(skipinvalid=True).lexer
        self.lexer.input(string)
        with pytest.warns(UserWarning) as record:
            for i in self.lexer:
                pass
        assert len(record) == nwarnings

    def test_code(self):
        self.compare_tokens("&X001001 = JCS 48, 089\n",
                            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"],
                            [None, "X001001", None, "JCS 48, 089"])

    def test_crlf(self):
        self.compare_tokens(
            "&X001001 = JCS 48, 089\r\n" + "#project: cams/gkab\n\r",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"])

    def test_project(self):
        self.compare_tokens("#project: cams/gkab\n",
                            ["PROJECT", "ID", "NEWLINE"],
                            [None, "cams/gkab", None])

    def test_key(self):
        self.compare_tokens("#key: cdli=ND 02688\n",
                            ["KEY", "ID", "EQUALS", "ID", "NEWLINE"],
                            [None, "cdli", None, "ND 02688", None])

    def test_language_protocol(self):
        self.compare_tokens("#atf: lang akk-x-stdbab\n",
                            ["ATF", "LANG", "ID", "NEWLINE"],
                            [None, None, "akk-x-stdbab"])

    def test_use_unicode(self):
        self.compare_tokens("#atf: use unicode\n",
                            ["ATF", "USE", "UNICODE", "NEWLINE"])

    def test_use_math(self):
        self.compare_tokens("#atf: use math\n",
                            ["ATF", "USE", "MATH", "NEWLINE"])

    def test_use_legacy(self):
        self.compare_tokens("#atf: use legacy\n",
                            ["ATF", "USE", "LEGACY", "NEWLINE"])

    def test_bib(self):
        self.compare_tokens("#bib:  MEE 15 54\n", ["BIB", "ID", "NEWLINE"])

    def test_bib_long(self):
        # not documented but common
        self.compare_tokens("#bib:  MEE 4 73 = EV a\n",
                            ["BIB", "ID", "EQUALS", "ID", "NEWLINE"])

    def test_link(self):
        self.compare_tokens(
            "#link: def A = P363716 = TCL 06, 44\n" + "@tablet\n", [
                "LINK", "DEF", "ID", "EQUALS", "ID", "EQUALS", "ID", "NEWLINE",
                "TABLET", "NEWLINE"
            ], [None, None, "A", None, "P363716", None, "TCL 06, 44"])

    def test_link_parallel_slash(self):
        self.compare_tokens(
            "#link: parallel dcclt/obale:P274929 = IM 070209\n" + "@tablet\n",
            [
                "LINK", "PARALLEL", "ID", "EQUALS", "ID", "NEWLINE", "TABLET",
                "NEWLINE"
            ], [None, None, "dcclt/obale:P274929", None, "IM 070209"])

    def test_link_parallel(self):
        self.compare_tokens(
            "#link: parallel abcd:P363716 = TCL 06, 44\n" + "@tablet\n", [
                "LINK", "PARALLEL", "ID", "EQUALS", "ID", "NEWLINE", "TABLET",
                "NEWLINE"
            ], [None, None, "abcd:P363716", None, "TCL 06, 44"])

    def test_link_reference(self):
        self.compare_tokens("|| A o ii 10\n",
                            ["PARBAR", "ID", "ID", "ID", "ID", "NEWLINE"])

    def test_link_reference_range(self):
        self.compare_tokens("|| A o ii 10 -  o ii 12 \n", [
            "PARBAR", "ID", "ID", "ID", "ID", "MINUS", "ID", "ID", "ID",
            "NEWLINE"
        ])

    def test_link_reference_prime_range(self):
        self.compare_tokens("|| A o ii 10' -  o ii' 12 \n", [
            "PARBAR", "ID", "ID", "ID", "ID", "MINUS", "ID", "ID", "ID",
            "NEWLINE"
        ])

    def test_score(self):
        self.compare_tokens("@score matrix parsed word\n",
                            ["SCORE", "ID", "ID", "ID", "NEWLINE"])

    def test_division_tablet(self):
        self.compare_tokens("@tablet", ["TABLET"])

    def test_text_linenumber(self):
        self.compare_tokens("1.    [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM",
                            ["LINELABEL"] + ['ID'] * 6)

    def test_lemmatize(self):
        self.compare_tokens(
            "#lem: šatti[year]N; n; Ṭebetu[1]MN; " +
            "mūša[at night]AV; ūm[day]N; n",
            ["LEM"] + ['ID', 'SEMICOLON'] * 5 + ['ID'])

    def test_loose_dollar(self):
        self.compare_tokens("$ (a loose dollar line)",
                            ["DOLLAR", "PARENTHETICALID"],
                            [None, "(a loose dollar line)"])

    def test_loose_nested_dollar(self):
        self.compare_tokens("$ (a (very) loose dollar line)",
                            ["DOLLAR", "PARENTHETICALID"],
                            [None, "(a (very) loose dollar line)"])

    def test_loose_end_nested_dollar(self):
        self.compare_tokens("$ (a loose dollar line (wow))",
                            ["DOLLAR", "PARENTHETICALID"],
                            [None, "(a loose dollar line (wow))"])

    def test_strict_dollar(self):
        self.compare_tokens("$ reverse blank",
                            ["DOLLAR", "REFERENCE", "BLANK"])

    def test_translation_intro(self):
        self.compare_tokens("@translation parallel en project",
                            ["TRANSLATION", "PARALLEL", "ID", "PROJECT"])

    def test_translation_text(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1.    Year 63, Ṭebetu (Month X), night of day 2:^1^", [
                "TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE",
                "LINELABEL", "ID", "HAT", "ID", "HAT"
            ], [
                None, "parallel", "en", "project", None, "1",
                "Year 63, Ṭebetu (Month X), night of day 2:", None, '1', None
            ])

    def test_translation_multiline_text(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1.    Year 63, Ṭebetu (Month X)\n" + " , night of day 2\n", [
                "TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE",
                "LINELABEL", "ID", "NEWLINE"
            ], [
                None, "parallel", "en", "project", None, "1",
                "Year 63, Ṭebetu (Month X) , night of day 2", None
            ])

    def test_translation_labeled_text(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 4\n" +
            "Then it will be taken for the rites and rituals.\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ], [
                None, "labeled", "en", "project", None, None, "o", "4", None,
                'Then it will be taken for the rites and rituals.', None
            ])

    def test_translation_labeled_noted_text(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label r 8\n" +
            "The priest says the gods have performed these actions. ^1^\n\n" +
            "@note ^1^ Parenthesised text follows Neo-Assyrian source\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "HAT", "ID", "HAT", "NEWLINE",
                "NOTE", "HAT", "ID", "HAT", "ID", 'NEWLINE'
            ], [
                None, "labeled", "en", "project", None, None, "r", "8", None,
                'The priest says the gods have performed these actions.', None,
                "1", None, None, None, None, "1", None,
                "Parenthesised text follows Neo-Assyrian source"
            ])

    def test_translation_labeled_dashlabel(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 14-15 - o 20\n" +
            "You strew all (kinds of) seed.\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "MINUS", "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ], [
                None, "labeled", "en", "project", None, None, "o", "14-15",
                None, "o", "20", None
            ])

    def test_translation_labeled_atlabel(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(o 20) You strew all (kinds of) seed.\n" +
            "@(o i 2) No-one will occupy the king of Akkad's throne.\n\n", [
                "TRANSLATION",
                "LABELED",
                "ID",
                "PROJECT",
                "NEWLINE",
                "OPENR",
                "ID",
                "ID",
                "CLOSER",
                "ID",
                "NEWLINE",
                "OPENR",
                "ID",
                "ID",
                "ID",
                "CLOSER",
                "ID",
                "NEWLINE",
            ], [
                None,
                "labeled",
                "en",
                "project",
                None,
                None,
                "o",
                "20",
                None,
                "You strew all (kinds of) seed.",
                None,
                None,
                "o",
                "i",
                "2",
                None,
                "No-one will occupy the king of Akkad's throne.",
                None,
            ])

    def test_translation_range_label_prime(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label r 1' - r 2'\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "MINUS", "ID", "ID", "NEWLINE"
            ], [
                None, "labeled", "en", "project", None, None, "r", "1'", None,
                "r", "2'", None
            ])

    def test_translation_label_unicode_suffix(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r A\u2081\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", u"A\u2081"])

    def test_translation_label_unicode_prime(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r 1\u2019\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", "1'", None])

    def test_translation_label_unicode_prime2(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r 1\xb4\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", "1'", None])

    def test_translation_label_unicode_prime3(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r 1\u2032\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", "1'", None])

    def test_translation_label_unicode_prime4(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r 1\u02CA\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", "1'", None])

    def test_translation_range_label_plus(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label+ o 28\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ])

    def test_translation_label_long_reference(self):
        "Translations can have full surface names rather than single letter"
        self.compare_tokens(
            "@translation labeled en project\n" + "@label obverse 28\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "REFERENCE", "ID", "NEWLINE"
            ])

    def test_translation_symbols_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 1'\n" +
            "[...] ... (zodiacal sign) 8, 10° = (sign) 12, 10°\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ])

    def test_translation_ats_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 1'\n" +
            "@kupputu (means): affliction (@? and) reduction?@;" +
            " they are ... like cisterns.\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ])

    def test_translation_blank_line_begins_translation(self):
        # A double newline normally ends a translation paragraph
        # But this is NOT the case at the beginning of a section,
        # Apparently.
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 16\n" + "\n" +
            "@šipir @ṭuhdu @DU means: a message of abundance" +
            " will come triumphantly.\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ])

    def test_translation_blank_line_amid_translation(self):
        # A double newline normally ends a translation paragraph
        # But this is NOT the case at the beginning of a section,
        # Apparently.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(4) their [cri]mes [have been forgiven] by the king." +
            " (As to) all [the\n" + "\n" +
            "    libe]ls that [have been uttered against me " +
            "in the palace, which] he has\n" + "\n" +
            "    heard, [I am not guilty of] any [of them! " +
            "N]ow, should there be a\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "OPENR",
                "ID", "CLOSER", "ID", "NEWLINE", "ID", "NEWLINE", "ID",
                "NEWLINE"
            ])

    def test_translation_no_blank_line_in_labeled_translation(self):
        # This functionality is expressly forbidden at
        # http://build.oracc.org/doc2/help/editinginatf/translations/index.html
        # But appears is in cm_31_139 anyway
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 13\n" +
            "@al-@ŋa₂-@ŋa₂ @al-@ŋa₂-@ŋa₂ @šag₄-@ba-@ni" +
            " @nu-@sed-@da (means) he will" +
            "remove (... and) he will place (...); his heart will not rest" +
            "It is said in the textual corpus of the lamentation-priests.\n" +
            "@label o 15\n" + "Text\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE", "LABEL", "ID", "ID",
                "NEWLINE", "ID", "NEWLINE"
            ])

    def test_translation_ATlines_in_translation(self):
        # @ within Translations mark Foreign
        # http://oracc.museum.upenn.edu/doc/help/editinginatf/translations/index.html
        self.compare_tokens(
            "@translation labeled en project\n" + "@obverse\n" +
            "1'. @MUD (means) trembling. @MUD (means) dark.", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
                "OBVERSE", "NEWLINE", "ID"
            ])

    def test_translation_range_label_periods(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label t.e. 1\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "t.e.", "1"])

    def test_interlinear_translation(self):
        self.compare_tokens("@tablet\n" + "1'. ⸢x⸣\n" + "#tr: English\n", [
            "TABLET", "NEWLINE", "LINELABEL", "ID", "NEWLINE", "TR", "ID",
            "NEWLINE"
        ])

    def test_multilineinterlinear_translation(self):
        self.compare_tokens(
            "@tablet\n" + "1'. ⸢x⸣\n" + "#tr: English\n" +
            " on multiple lines\n", [
                "TABLET", "NEWLINE", "LINELABEL", "ID", "NEWLINE", "TR", "ID",
                "NEWLINE"
            ])

    def test_note_internalflag(self):
        self.compare_tokens("@note Hello James's World", ["NOTE", "ID"],
                            [None, "Hello James's World"])

    def test_note_internalspace(self):
        self.compare_tokens("@note Hello James", ["NOTE", "ID"],
                            [None, "Hello James"])

    def test_note_onechar(self):
        self.compare_tokens("@note H", ["NOTE", "ID"], [None, "H"])

    def test_note_short(self):
        self.compare_tokens("@note I'm", ["NOTE", "ID"], [None, "I'm"])

    def test_division_note(self):
        self.compare_tokens(
            "@note ^1^ A note to the translation.\n",
            ["NOTE", "HAT", "ID", "HAT", "ID", "NEWLINE"],
            [None, None, "1", None, "A note to the translation.", None])

    def test_hash_note(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" +
            "3.    U₄!-BI? 20* [(ina)] 9.30 ina(DIŠ) MAŠ₂!(BAR)\n" +
            "#note: Note to line.\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE", "LINELABEL"] +
            ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"])

    def test_hash_note_UPPERCASE(self):
        # Some files in the corpus such as ctn_4_168.atf
        # Contains #NOTE: even if the line should be #note:
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" +
            "3.    U₄!-BI? 20* [(ina)] 9.30 ina(DIŠ) MAŠ₂!(BAR)\n" +
            "#NOTE: Note to line.\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE", "LINELABEL"] +
            ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"])

    def test_open_text_with_dots(self):
        # This must not come out as a linelabel of Hello.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 1\nHello. World\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ])

    def test_flagged_object(self):
        self.compare_tokens("@object which is remarkable and broken!#\n",
                            ["OBJECT", "ID", "EXCLAIM", "HASH", "NEWLINE"])

    def test_comment(self):
        self.compare_tokens("# I've added various things for test purposes\n",
                            ['COMMENT', "ID", "NEWLINE"])

    def test_nospace_comment(self):
        self.compare_tokens("#I've added various things for test purposes\n",
                            ['COMMENT', "ID", "NEWLINE"])

    def test_check_comment(self):
        self.compare_tokens(
            "#CHECK: I've added various things for test purposes\n",
            ['CHECK', "ID", "NEWLINE"])

    def test_dotline(self):
        self.compare_tokens(". \n", ['NEWLINE'])

    def test_translation_heading(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@h1 A translation heading\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["HEADING", "ID", "NEWLINE"])

    def test_heading(self):
        self.compare_tokens("@obverse\n" + "@h1 A heading\n",
                            ["OBVERSE", "NEWLINE"] +
                            ["HEADING", "ID", "NEWLINE"])

    def test_double_comment(self):
        """Not sure if this is correct; but can't find
        anything in structure or lemmatization doc"""
        self.compare_tokens(
            "## papān libbi[belly] (already in gloss, same spelling)\n",
            ['COMMENT', 'ID', 'NEWLINE'])

    def test_ruling(self):
        self.compare_tokens("$ single ruling", ["DOLLAR", "SINGLE", "RULING"])

    def test_described_object(self):
        self.compare_tokens("@object An object that fits no other category\n",
                            ["OBJECT", "ID", "NEWLINE"],
                            [None, "An object that fits no other category"])

    def test_nested_object(self):
        self.compare_tokens("@tablet\n" + "@obverse\n",
                            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE"])

    def test_object_line(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" +
            "1.    [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM\n"
            "#lem: šatti[year]N; n; Ṭebetu[1]MN; mūša[at night]AV; " +
            "ūm[day]N; n\n",
            ['TABLET', 'NEWLINE', "OBVERSE", 'NEWLINE', 'LINELABEL'] +
            ['ID'] * 6 + ['NEWLINE', 'LEM'] + ['ID', 'SEMICOLON'] * 5 +
            ['ID', "NEWLINE"])

    def test_dot_in_linelabel(self):
        self.compare_tokens("1.1.    [MU]\n", ['LINELABEL', 'ID', 'NEWLINE'])

    def test_score_lines(self):
        self.compare_tokens(
            "@score matrix parsed\n" +
            "1.4′. %n ḫašḫūr [api] lal[laga imḫur-līm?]\n" +
            "#lem: ḫašḫūr[apple (tree)]N; api[reed-bed]N\n\n" +
            "A₁_obv_i_4′: [x x x x x] {ú}la-al-[la-ga? {ú}im-ḫu-ur-lim?]\n" +
            "#lem: u; u; u; u; u; " +
            "+lalangu[(a leguminous vegetable)]N$lallaga\n\n" +
            "e_obv_15′–16′: {giš}ḪAŠḪUR [GIŠ.GI] — // [{ú}IGI-lim]\n" +
            "#lem: +hašhūru[apple (tree)]N$hašhūr; api[reed-bed]N;" +
            " imhur-līm['heals-a-thousand'-plant]N\n\n",
            ['SCORE', 'ID', 'ID', "NEWLINE"] + ['LINELABEL'] + ['ID'] * 5 +
            ['NEWLINE'] + ['LEM', 'ID', 'SEMICOLON', 'ID', 'NEWLINE'] +
            ['SCORELABEL'] + ['ID'] * 7 + ['NEWLINE'] + ['LEM'] +
            ['ID', 'SEMICOLON'] * 5 + ['ID', 'NEWLINE'] + ['SCORELABEL'] +
            ['ID'] * 5 + ['NEWLINE'] + ['LEM'] + ['ID', 'SEMICOLON'] * 2 +
            ['ID', 'NEWLINE'])

    def test_composite(self):
        self.compare_tokens(
            "&Q002769 = SB Anzu 1\n" + "@composite\n" +
            "#project: cams/gkab\n" +
            "1.   bi#-in šar da-ad-mi šu-pa-a na-ram {d}ma#-mi\n" +
            "&Q002770 = SB Anzu 2\n" + "#project: cams/gkab\n" +
            "1.   bi-riq ur-ha šuk-na a-dan-na\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ['COMPOSITE', 'NEWLINE'] + ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 6 + ['NEWLINE'] +
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"] + ["LINELABEL"] + ['ID'] * 4 +
            ["NEWLINE"])

    def test_translated_composite(self):
        self.compare_tokens(
            "&Q002769 = SB Anzu 1\n" + "@composite\n" +
            "#project: cams/gkab\n" +
            "1.   bi#-in šar da-ad-mi šu-pa-a na-ram {d}ma#-mi\n" +
            "@translation labeled en project\n" + "@(1) English\n"
            "&Q002770 = SB Anzu 2\n" + "#project: cams/gkab\n" +
            "1.   bi-riq ur-ha šuk-na a-dan-na\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ['COMPOSITE', 'NEWLINE'] + ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 6 + ['NEWLINE'] +
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "CLOSER", "ID", "NEWLINE"] +
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"] + ["LINELABEL"] + ['ID'] * 4 +
            ["NEWLINE"])

    def test_equalbrace(self):
        self.compare_tokens(
            "@tablet\n" + "@reverse\n" + "2'.    ITI# an-ni-u2#\n" +
            "={    ur-hu\n",
            ['TABLET', "NEWLINE"] + ["REVERSE", "NEWLINE"] + ["LINELABEL"] +
            ['ID'] * 2 + ["NEWLINE"] + ["EQUALBRACE", "ID", "NEWLINE"])

    def test_multilingual_interlinear(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" + "1. dim₃#-me-er# [...]\n" +
            "#lem: diŋir[deity]N; u\n" +
            "== %sb DINGIR-MEŠ GAL#-MEŠ# [...]\n" +
            "#lem: ilū[god]N; rabûtu[great]AJ; u\n" +
            "# ES dim₃-me-er = diŋir\n" + "|| A o ii 15\n",
            ['TABLET', "NEWLINE"] + ["OBVERSE", "NEWLINE"] + ["LINELABEL"] +
            ['ID'] * 2 + ["NEWLINE"] + ["LEM"] + ["ID", "SEMICOLON"] + ["ID"] +
            ["NEWLINE"] + ["MULTILINGUAL", "ID"] + ["ID"] * 3 + ["NEWLINE"] +
            ["LEM"] + ["ID", "SEMICOLON"] * 2 + ["ID"] + ["NEWLINE"] +
            ["COMMENT", "ID", "NEWLINE"] +
            ["PARBAR", "ID", "ID", "ID", "ID", "NEWLINE"])

    def test_strict_in_parallel(self):
        self.compare_tokens(
            "@translation parallel en project\n" + "$ reverse blank",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID"])

    def test_query_in_parallel(self):
        """"The parallel ID regex was to general and identified ? after
            @obverse as an ID token not a QUERY"""
        self.compare_tokens(
            "@translation parallel en project\n" + "@obverse?",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["OBVERSE", "QUERY"])

    def test_loose_in_labeled(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "$ (Break)\n" +
            "@(r 2) I am\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE"])

    def test_ati_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@(r 2) I am\n" +
            "@i{eššēšu-}festival\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE"])

    def test_blank_after_para_transctrl_windows(self):
        """[...] should not exit the para state but did previously
           due to a not as stric regex """
        self.compare_tokens(
            "@translation labeled en project\r\n" + "@(o i 1')\r\n" +
            "[...]\r\n\r\n" + "@(o i 2')\r\n" +
            "[... you put] inside [his] ears [and the evil] " +
            "afflicting his head [will be eradicated].\r\n\r\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] + ["ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] + ["ID", "NEWLINE"])

    def test_blank_after_para_transctrl(self):
        """[...] should not exit the para state but did previously
           due to a not as stric regex """
        self.compare_tokens(
            "@translation labeled en project\n" + "@(o i 1)\n" +
            "[(If) in] Tašritu (month VII), on day 1, " +
            "a solar eclipse takes place: [...].\n\n" + "@(o i 2)\n" +
            "[...], on day 7, a solar eclipse takes place: [...].\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] + ["ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] + ["ID", "NEWLINE"])

    def test_strict_in_labelled_parallel(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "$ reverse blank",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID"])

    def test_strict_as_loose_in_translation(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "$ Continued in text no. 2\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID", "NEWLINE"])

    def test_punctuated_translation(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1. 'What is going on?', said the King!\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["LINELABEL", "ID", "NEWLINE"], [None, None, "en", None, None] +
            ["1", "'What is going on?', said the King!", None])

    def test_translation_note(self):
        self.compare_tokens(
            "@translation parallel en project\n" + "@reverse\n" +
            "#note: reverse uninscribed\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["REVERSE", "NEWLINE"] + ["NOTE", "ID", "NEWLINE"])

    def test_equals_in_translation_note(self):
        self.compare_tokens(
            "@translation parallel en project\n" + "@reverse\n" +
            '#note: The CAD translation šarriru = "humble",\n',
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["REVERSE", "NEWLINE"] + ["NOTE", "ID", "NEWLINE"])

    def test_note_ended_by_strucuture(self):
        self.compare_tokens(
            "@translation parallel en project\n" + "@obverse\n" +
            '#note: The CAD translation šarriru = "humble",\n' + '@reverse',
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["OBVERSE", "NEWLINE"] + ["NOTE", "ID", "NEWLINE"] + ["REVERSE"])

    def test_milestone(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" + "@m=locator catchline\n" +
            "16'. si-i-ia-a-a-ku\n", [
                "TABLET", "NEWLINE", "OBVERSE", "NEWLINE", "M", "EQUALS", "ID",
                "NEWLINE", "LINELABEL", "ID", "NEWLINE"
            ])

    def test_include(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" +
            "@include dcclt:P229061 = MSL 07, 197 V02, 210 V11\n", [
                "TABLET", "NEWLINE", "OBVERSE", "NEWLINE", "INCLUDE", "ID",
                'EQUALS', 'ID', "NEWLINE"
            ])

    def test_double_newline_and_lexpos(self):
        self.compare_tokens("@obverse\n" + "\n" + "#note:\n",
                            ["OBVERSE", "NEWLINE", "NOTE", "NEWLINE"],
                            ["obverse", "\n\n", "note", "\n"], [1, 1, 3, 3],
                            [1, 8, 11, 16])

    def test_blankline_with_tab_inadsorb(self):
        self.compare_tokens(
            "# ES mu-lu = lu₂, ša₃-ab = šag\n" + "	\n" + "7. keš₂-da",
            ["COMMENT", "ID", "NEWLINE", "LINELABEL", "ID"],
            ["#", "ES mu-lu = lu₂, ša₃-ab = šag", "\n\t\n", '7', "keš₂-da"])

    def test_invalid_at_raises_syntax_error(self):
        string = u"@obversel\n"
        self.ensure_raises_and_not(string, nwarnings=1)

    def test_invalid_hash_raises_syntax_error(self):
        string = u"#lems: Ṣalbatanu[Mars]CN\n"
        self.ensure_raises_and_not(string, nwarnings=2)

    def test_invalid_id_syntax_error(self):
        string = u"Ṣalbatanu[Mars]CN\n"
        self.ensure_raises_and_not(string, nwarnings=1)

    @staticmethod
    def test_resolve_keyword_no_extra():
        '''Test that resolve_keyword works correcty when extra is not passes
        This never happes in actual code. Hench this test'''
        mylexer = AtfLexer()
        result = mylexer.resolve_keyword('obverse', mylexer.structures)
        assert result == 'OBVERSE'

Example #10

Show file

File: test_atflexer.py Project: oracc/pyoracc

 def setUp(self):
     self.lexer = AtfLexer().lexer

Example #11

Show file

File: test_atflexer.py Project: oracc/pyoracc

class TestLexer(TestCase):
    """A class that contains all tests of the ATFLexer"""
    def setUp(self):
        self.lexer = AtfLexer().lexer

    def compare_tokens(self, content, expected_types, expected_values=None,
                       expected_lineno=None, expected_lexpos=None):
        self.lexer.input(content)
        if expected_values is None:
            expected_values = repeat(None)
        if expected_lineno is None:
            expected_lineno = repeat(None)
        if expected_lexpos is None:
            expected_lexpos = repeat(None)
        for e_type, e_value, e_lineno, e_lexpos, token in zip_longest(
                expected_types,
                expected_values,
                expected_lineno,
                expected_lexpos,
                self.lexer):
            if token is None and e_type is None:
                break
            assert token.type == e_type
            if e_value:
                assert token.value == e_value
            if e_lineno:
                assert token.lineno == e_lineno
            if e_lexpos:
                assert token.lexpos == e_lexpos

    def ensure_raises_and_not(self, string, nwarnings):
        self.lexer.input(string)
        with pytest.raises(SyntaxError) as excinfo:
            for i in self.lexer:
                pass
        # If we allow invalid syntax this should not raise
        self.lexer = AtfLexer(skipinvalid=True).lexer
        self.lexer.input(string)
        with pytest.warns(UserWarning) as record:
            for i in self.lexer:
                pass
        assert len(record) == nwarnings

    def test_code(self):
        self.compare_tokens(
            "&X001001 = JCS 48, 089\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"],
            [None, "X001001", None, "JCS 48, 089"]
        )

    def test_crlf(self):
        self.compare_tokens(
            "&X001001 = JCS 48, 089\r\n" +
            "#project: cams/gkab\n\r",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"]
        )

    def test_project(self):
        self.compare_tokens(
            "#project: cams/gkab\n",
            ["PROJECT", "ID", "NEWLINE"],
            [None, "cams/gkab", None]
        )

    def test_key(self):
        self.compare_tokens(
            "#key: cdli=ND 02688\n",
            ["KEY", "ID", "EQUALS", "ID", "NEWLINE"],
            [None, "cdli", None, "ND 02688", None]
        )

    def test_language_protocol(self):
        self.compare_tokens(
            "#atf: lang akk-x-stdbab\n",
            ["ATF", "LANG", "ID", "NEWLINE"],
            [None, None, "akk-x-stdbab"]
        )

    def test_use_unicode(self):
        self.compare_tokens(
            "#atf: use unicode\n",
            ["ATF", "USE", "UNICODE", "NEWLINE"]
        )

    def test_use_math(self):
        self.compare_tokens(
            "#atf: use math\n",
            ["ATF", "USE", "MATH", "NEWLINE"]
        )

    def test_use_legacy(self):
        self.compare_tokens(
            "#atf: use legacy\n",
            ["ATF", "USE", "LEGACY", "NEWLINE"]
        )

    def test_bib(self):
        self.compare_tokens(
            "#bib:  MEE 15 54\n",
            ["BIB", "ID", "NEWLINE"]
        )

    def test_bib_long(self):
        # not documented but common
        self.compare_tokens(
            "#bib:  MEE 4 73 = EV a\n",
            ["BIB", "ID", "EQUALS", "ID", "NEWLINE"]
        )

    def test_link(self):
        self.compare_tokens(
            "#link: def A = P363716 = TCL 06, 44\n" +
            "@tablet\n",
            ["LINK", "DEF", "ID", "EQUALS", "ID", "EQUALS", "ID", "NEWLINE",
             "TABLET", "NEWLINE"],
            [None, None, "A", None, "P363716", None, "TCL 06, 44"]
        )

    def test_link_parallel_slash(self):
        self.compare_tokens(
            "#link: parallel dcclt/obale:P274929 = IM 070209\n" +
            "@tablet\n",
            ["LINK", "PARALLEL", "ID", "EQUALS", "ID", "NEWLINE",
             "TABLET", "NEWLINE"],
            [None, None, "dcclt/obale:P274929", None, "IM 070209"]
        )

    def test_link_parallel(self):
        self.compare_tokens(
            "#link: parallel abcd:P363716 = TCL 06, 44\n" +
            "@tablet\n",
            ["LINK", "PARALLEL", "ID", "EQUALS", "ID", "NEWLINE",
             "TABLET", "NEWLINE"],
            [None, None, "abcd:P363716", None, "TCL 06, 44"]
        )

    def test_link_reference(self):
        self.compare_tokens(
            "|| A o ii 10\n",
            ["PARBAR", "ID", "ID", "ID", "ID", "NEWLINE"]
        )

    def test_link_reference_range(self):
        self.compare_tokens(
            "|| A o ii 10 -  o ii 12 \n",
            ["PARBAR", "ID", "ID", "ID", "ID", "MINUS",
             "ID", "ID", "ID", "NEWLINE"]
        )

    def test_link_reference_prime_range(self):
        self.compare_tokens(
            "|| A o ii 10' -  o ii' 12 \n",
            ["PARBAR", "ID", "ID", "ID", "ID", "MINUS",
             "ID", "ID", "ID", "NEWLINE"]
        )

    def test_score(self):
        self.compare_tokens(
            "@score matrix parsed word\n",
            ["SCORE", "ID", "ID", "ID", "NEWLINE"]
        )

    def test_division_tablet(self):
        self.compare_tokens(
            "@tablet",
            ["TABLET"]
        )

    def test_text_linenumber(self):
        self.compare_tokens(
            "1.    [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM",
            ["LINELABEL"] + ['ID'] * 6
        )

    def test_lemmatize(self):
        self.compare_tokens(
            "#lem: šatti[year]N; n; Ṭebetu[1]MN; " +
            "mūša[at night]AV; ūm[day]N; n",
            ["LEM"] + ['ID', 'SEMICOLON'] * 5 + ['ID']
        )

    def test_loose_dollar(self):
        self.compare_tokens(
            "$ (a loose dollar line)",
            ["DOLLAR", "PARENTHETICALID"],
            [None, "(a loose dollar line)"]
        )

    def test_loose_nested_dollar(self):
        self.compare_tokens(
            "$ (a (very) loose dollar line)",
            ["DOLLAR", "PARENTHETICALID"],
            [None, "(a (very) loose dollar line)"]
        )

    def test_loose_end_nested_dollar(self):
        self.compare_tokens(
            "$ (a loose dollar line (wow))",
            ["DOLLAR", "PARENTHETICALID"],
            [None, "(a loose dollar line (wow))"]
        )

    def test_strict_dollar(self):
        self.compare_tokens(
            "$ reverse blank",
            ["DOLLAR", "REFERENCE", "BLANK"]
        )

    def test_translation_intro(self):
        self.compare_tokens(
            "@translation parallel en project",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT"]
        )

    def test_translation_text(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1.    Year 63, Ṭebetu (Month X), night of day 2:^1^",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE",
             "LINELABEL", "ID", "HAT", "ID", "HAT"],
            [None, "parallel", "en", "project", None,
             "1", "Year 63, Ṭebetu (Month X), night of day 2:",
             None, '1', None]
        )

    def test_translation_multiline_text(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1.    Year 63, Ṭebetu (Month X)\n" +
            " , night of day 2\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE",
             "LINELABEL", "ID", "NEWLINE"],
            [None, "parallel", "en", "project", None,
             "1", "Year 63, Ṭebetu (Month X) , night of day 2", None]
        )

    def test_translation_labeled_text(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 4\n" +
            "Then it will be taken for the rites and rituals.\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "o", "4", None,
             'Then it will be taken for the rites and rituals.', None]
        )

    def test_translation_labeled_noted_text(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label r 8\n" +
            "The priest says the gods have performed these actions. ^1^\n\n" +
            "@note ^1^ Parenthesised text follows Neo-Assyrian source\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "HAT", "ID", "HAT", "NEWLINE",
             "NOTE", "HAT", "ID", "HAT", "ID", 'NEWLINE'],
            [None, "labeled", "en", "project", None,
             None, "r", "8", None,
             'The priest says the gods have performed these actions.',
             None, "1", None, None,
             None, None, "1", None,
             "Parenthesised text follows Neo-Assyrian source"]

        )

    def test_translation_labeled_dashlabel(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 14-15 - o 20\n" +
            "You strew all (kinds of) seed.\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "MINUS", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "o", "14-15", None, "o", "20", None]
        )

    def test_translation_labeled_atlabel(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(o 20) You strew all (kinds of) seed.\n" +
            "@(o i 2) No-one will occupy the king of Akkad's throne.\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE",
             "OPENR", "ID", "ID", "ID", "CLOSER", "ID", "NEWLINE", ],
            [None, "labeled", "en", "project", None,
             None, "o", "20", None, "You strew all (kinds of) seed.", None,
             None, "o", "i", "2", None,
             "No-one will occupy the king of Akkad's throne.", None, ]
        )

    def test_translation_range_label_prime(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label r 1' - r 2'\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "MINUS", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None, "r", "2'", None]
        )

    def test_translation_label_unicode_suffix(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r A\u2081\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", u"A\u2081"]
        )

    def test_translation_label_unicode_prime(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r 1\u2019\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None]
        )

    def test_translation_label_unicode_prime2(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r 1\xb4\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None]
        )

    def test_translation_label_unicode_prime3(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r 1\u2032\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None]
        )

    def test_translation_label_unicode_prime4(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r 1\u02CA\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None]
        )

    def test_translation_range_label_plus(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label+ o 28\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"]
        )

    def test_translation_label_long_reference(self):
        "Translations can have full surface names rather than single letter"
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label obverse 28\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "REFERENCE", "ID", "NEWLINE"]
        )

    def test_translation_symbols_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 1'\n" +
            "[...] ... (zodiacal sign) 8, 10° = (sign) 12, 10°\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_translation_ats_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 1'\n" +
            "@kupputu (means): affliction (@? and) reduction?@;" +
            " they are ... like cisterns.\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_translation_blank_line_begins_translation(self):
        # A double newline normally ends a translation paragraph
        # But this is NOT the case at the beginning of a section,
        # Apparently.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 16\n" +
            "\n" +
            "@šipir @ṭuhdu @DU means: a message of abundance" +
            " will come triumphantly.\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_translation_blank_line_amid_translation(self):
        # A double newline normally ends a translation paragraph
        # But this is NOT the case at the beginning of a section,
        # Apparently.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(4) their [cri]mes [have been forgiven] by the king." +
            " (As to) all [the\n" +
            "\n" +
            "    libe]ls that [have been uttered against me " +
            "in the palace, which] he has\n" +
            "\n" +
            "    heard, [I am not guilty of] any [of them! " +
            "N]ow, should there be a\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "OPENR", "ID", "CLOSER", "ID", "NEWLINE",
             "ID", "NEWLINE", "ID", "NEWLINE"]
        )

    def test_translation_no_blank_line_in_labeled_translation(self):
        # This functionality is expressly forbidden at
        # http://build.oracc.org/doc2/help/editinginatf/translations/index.html
        # But appears is in cm_31_139 anyway
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 13\n" +
            "@al-@ŋa₂-@ŋa₂ @al-@ŋa₂-@ŋa₂ @šag₄-@ba-@ni" +
            " @nu-@sed-@da (means) he will" +
            "remove (... and) he will place (...); his heart will not rest" +
            "It is said in the textual corpus of the lamentation-priests.\n" +
            "@label o 15\n" +
            "Text\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_translation_ATlines_in_translation(self):
        # @ within Translations mark Foreign
        # http://oracc.museum.upenn.edu/doc/help/editinginatf/translations/index.html
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@obverse\n" +
            "1'. @MUD (means) trembling. @MUD (means) dark.",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "OBVERSE", "NEWLINE", "ID"]
        )

    def test_translation_range_label_periods(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label t.e. 1\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "t.e.", "1"])

    def test_interlinear_translation(self):
        self.compare_tokens(
            "@tablet\n" +
            "1'. ⸢x⸣\n" +
            "#tr: English\n",
            ["TABLET", "NEWLINE",
             "LINELABEL", "ID", "NEWLINE",
             "TR", "ID", "NEWLINE"])

    def test_multilineinterlinear_translation(self):
        self.compare_tokens(
            "@tablet\n" +
            "1'. ⸢x⸣\n" +
            "#tr: English\n" +
            " on multiple lines\n",
            ["TABLET", "NEWLINE",
             "LINELABEL", "ID", "NEWLINE",
             "TR", "ID", "NEWLINE"])

    def test_note_internalflag(self):
        self.compare_tokens(
            "@note Hello James's World",
            ["NOTE", "ID"],
            [None, "Hello James's World"]
        )

    def test_note_internalspace(self):
        self.compare_tokens(
            "@note Hello James",
            ["NOTE", "ID"],
            [None, "Hello James"]
        )

    def test_note_onechar(self):
        self.compare_tokens(
            "@note H",
            ["NOTE", "ID"],
            [None, "H"]
        )

    def test_note_short(self):
        self.compare_tokens(
            "@note I'm",
            ["NOTE", "ID"],
            [None, "I'm"]
        )

    def test_division_note(self):
        self.compare_tokens(
            "@note ^1^ A note to the translation.\n",
            ["NOTE", "HAT", "ID", "HAT", "ID", "NEWLINE"],
            [None, None, "1", None, "A note to the translation.", None]
        )

    def test_hash_note(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "3.    U₄!-BI? 20* [(ina)] 9.30 ina(DIŠ) MAŠ₂!(BAR)\n" +
            "#note: Note to line.\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE",
             "LINELABEL"] + ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"]
        )

    def test_hash_note_UPPERCASE(self):
        # Some files in the corpus such as ctn_4_168.atf
        # Contains #NOTE: even if the line should be #note:
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "3.    U₄!-BI? 20* [(ina)] 9.30 ina(DIŠ) MAŠ₂!(BAR)\n" +
            "#NOTE: Note to line.\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE",
             "LINELABEL"] + ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"]
        )

    def test_open_text_with_dots(self):
        # This must not come out as a linelabel of Hello.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 1\nHello. World\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_flagged_object(self):
        self.compare_tokens("@object which is remarkable and broken!#\n",
                            ["OBJECT", "ID", "EXCLAIM", "HASH", "NEWLINE"])

    def test_comment(self):
        self.compare_tokens(
            "# I've added various things for test purposes\n",
            ['COMMENT', "ID", "NEWLINE"]
        )

    def test_nospace_comment(self):
        self.compare_tokens(
            "#I've added various things for test purposes\n",
            ['COMMENT', "ID", "NEWLINE"]
        )

    def test_check_comment(self):
        self.compare_tokens(
            "#CHECK: I've added various things for test purposes\n",
            ['CHECK', "ID", "NEWLINE"]
        )

    def test_dotline(self):
        self.compare_tokens(
            ". \n",
            ['NEWLINE']
        )

    def test_translation_heading(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@h1 A translation heading\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["HEADING", "ID", "NEWLINE"]
        )

    def test_heading(self):
        self.compare_tokens(
            "@obverse\n" +
            "@h1 A heading\n",
            ["OBVERSE", "NEWLINE"] +
            ["HEADING", "ID", "NEWLINE"]
        )

    def test_double_comment(self):
        """Not sure if this is correct; but can't find
        anything in structure or lemmatization doc"""
        self.compare_tokens(
            "## papān libbi[belly] (already in gloss, same spelling)\n",
            ['COMMENT', 'ID', 'NEWLINE']
        )

    def test_ruling(self):
        self.compare_tokens(
            "$ single ruling",
            ["DOLLAR", "SINGLE", "RULING"]
        )

    def test_described_object(self):
        self.compare_tokens(
            "@object An object that fits no other category\n",
            ["OBJECT", "ID", "NEWLINE"],
            [None, "An object that fits no other category"]
        )

    def test_nested_object(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE"]
        )

    def test_object_line(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "1.    [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM\n"
            "#lem: šatti[year]N; n; Ṭebetu[1]MN; mūša[at night]AV; " +
            "ūm[day]N; n\n",
            ['TABLET', 'NEWLINE',
             "OBVERSE", 'NEWLINE',
             'LINELABEL'] + ['ID'] * 6 + ['NEWLINE', 'LEM'] +
            ['ID', 'SEMICOLON'] * 5 + ['ID', "NEWLINE"]
        )

    def test_dot_in_linelabel(self):
        self.compare_tokens(
            "1.1.    [MU]\n",
            ['LINELABEL', 'ID', 'NEWLINE']
        )

    def test_score_lines(self):
        self.compare_tokens(
            "@score matrix parsed\n" +
            "1.4′. %n ḫašḫūr [api] lal[laga imḫur-līm?]\n" +
            "#lem: ḫašḫūr[apple (tree)]N; api[reed-bed]N\n\n" +
            "A₁_obv_i_4′: [x x x x x] {ú}la-al-[la-ga? {ú}im-ḫu-ur-lim?]\n" +
            "#lem: u; u; u; u; u; " +
            "+lalangu[(a leguminous vegetable)]N$lallaga\n\n" +
            "e_obv_15′–16′: {giš}ḪAŠḪUR [GIŠ.GI] — // [{ú}IGI-lim]\n" +
            "#lem: +hašhūru[apple (tree)]N$hašhūr; api[reed-bed]N;" +
            " imhur-līm['heals-a-thousand'-plant]N\n\n",
            ['SCORE', 'ID', 'ID', "NEWLINE"] +
            ['LINELABEL'] + ['ID'] * 5 + ['NEWLINE'] +
            ['LEM', 'ID', 'SEMICOLON', 'ID', 'NEWLINE'] +
            ['SCORELABEL'] + ['ID'] * 7 + ['NEWLINE'] +
            ['LEM'] + ['ID', 'SEMICOLON'] * 5 + ['ID', 'NEWLINE'] +
            ['SCORELABEL'] + ['ID'] * 5 + ['NEWLINE'] +
            ['LEM'] + ['ID', 'SEMICOLON'] * 2 + ['ID', 'NEWLINE']
        )

    def test_composite(self):
        self.compare_tokens(
            "&Q002769 = SB Anzu 1\n" +
            "@composite\n" +
            "#project: cams/gkab\n" +
            "1.   bi#-in šar da-ad-mi šu-pa-a na-ram {d}ma#-mi\n" +
            "&Q002770 = SB Anzu 2\n" +
            "#project: cams/gkab\n" +
            "1.   bi-riq ur-ha šuk-na a-dan-na\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ['COMPOSITE', 'NEWLINE'] +
            ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 6 + ['NEWLINE'] +
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 4 + ["NEWLINE"]
        )

    def test_translated_composite(self):
        self.compare_tokens(
            "&Q002769 = SB Anzu 1\n" +
            "@composite\n" +
            "#project: cams/gkab\n" +
            "1.   bi#-in šar da-ad-mi šu-pa-a na-ram {d}ma#-mi\n" +
            "@translation labeled en project\n" +
            "@(1) English\n"
            "&Q002770 = SB Anzu 2\n" +
            "#project: cams/gkab\n" +
            "1.   bi-riq ur-ha šuk-na a-dan-na\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ['COMPOSITE', 'NEWLINE'] +
            ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 6 + ['NEWLINE'] +
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "CLOSER", "ID", "NEWLINE"] +
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 4 + ["NEWLINE"]
        )

    def test_equalbrace(self):
        self.compare_tokens(
            "@tablet\n" +
            "@reverse\n" +
            "2'.    ITI# an-ni-u2#\n" +
            "={    ur-hu\n",
            ['TABLET', "NEWLINE"] +
            ["REVERSE", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 2 + ["NEWLINE"] +
            ["EQUALBRACE", "ID", "NEWLINE"]
        )

    def test_multilingual_interlinear(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "1. dim₃#-me-er# [...]\n" +
            "#lem: diŋir[deity]N; u\n" +
            "== %sb DINGIR-MEŠ GAL#-MEŠ# [...]\n" +
            "#lem: ilū[god]N; rabûtu[great]AJ; u\n" +
            "# ES dim₃-me-er = diŋir\n" +
            "|| A o ii 15\n",
            ['TABLET', "NEWLINE"] +
            ["OBVERSE", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 2 + ["NEWLINE"] +
            ["LEM"] + ["ID", "SEMICOLON"] + ["ID"] + ["NEWLINE"] +
            ["MULTILINGUAL", "ID"] + ["ID"] * 3 + ["NEWLINE"] +
            ["LEM"] + ["ID", "SEMICOLON"] * 2 + ["ID"] + ["NEWLINE"] +
            ["COMMENT", "ID", "NEWLINE"] +
            ["PARBAR", "ID", "ID", "ID", "ID", "NEWLINE"]
        )

    def test_strict_in_parallel(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "$ reverse blank",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID"]
        )

    def test_query_in_parallel(self):
        """"The parallel ID regex was to general and identified ? after
            @obverse as an ID token not a QUERY"""
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@obverse?",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["OBVERSE", "QUERY"]
        )

    def test_loose_in_labeled(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "$ (Break)\n" +
            "@(r 2) I am\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE"]
        )

    def test_ati_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(r 2) I am\n" +
            "@i{eššēšu-}festival\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE"]
        )

    def test_blank_after_para_transctrl_windows(self):
        """[...] should not exit the para state but did previously
           due to a not as stric regex """
        self.compare_tokens(
            "@translation labeled en project\r\n" +
            "@(o i 1')\r\n" +
            "[...]\r\n\r\n" +
            "@(o i 2')\r\n" +
            "[... you put] inside [his] ears [and the evil] " +
            "afflicting his head [will be eradicated].\r\n\r\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] +
            ["ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] +
            ["ID", "NEWLINE"]
        )

    def test_blank_after_para_transctrl(self):
        """[...] should not exit the para state but did previously
           due to a not as stric regex """
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(o i 1)\n" +
            "[(If) in] Tašritu (month VII), on day 1, " +
            "a solar eclipse takes place: [...].\n\n" +
            "@(o i 2)\n" +
            "[...], on day 7, a solar eclipse takes place: [...].\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] +
            ["ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] +
            ["ID", "NEWLINE"]
        )

    def test_strict_in_labelled_parallel(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "$ reverse blank",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID"]
        )

    def test_strict_as_loose_in_translation(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "$ Continued in text no. 2\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID", "NEWLINE"]
        )

    def test_punctuated_translation(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1. 'What is going on?', said the King!\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["LINELABEL", "ID", "NEWLINE"],
            [None, None, "en", None, None] +
            ["1", "'What is going on?', said the King!", None]
        )

    def test_translation_note(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@reverse\n" +
            "#note: reverse uninscribed\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["REVERSE", "NEWLINE"] +
            ["NOTE", "ID", "NEWLINE"]
        )

    def test_equals_in_translation_note(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@reverse\n" +
            '#note: The CAD translation šarriru = "humble",\n',
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["REVERSE", "NEWLINE"] +
            ["NOTE", "ID", "NEWLINE"]
            )

    def test_note_ended_by_strucuture(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@obverse\n" +
            '#note: The CAD translation šarriru = "humble",\n' +
            '@reverse',
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["OBVERSE", "NEWLINE"] +
            ["NOTE", "ID", "NEWLINE"] +
            ["REVERSE"]
        )

    def test_milestone(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "@m=locator catchline\n" +
            "16'. si-i-ia-a-a-ku\n",
            ["TABLET", "NEWLINE",
             "OBVERSE", "NEWLINE",
             "M", "EQUALS", "ID", "NEWLINE",
             "LINELABEL", "ID", "NEWLINE"]
        )

    def test_include(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "@include dcclt:P229061 = MSL 07, 197 V02, 210 V11\n",
            ["TABLET", "NEWLINE",
             "OBVERSE", "NEWLINE",
             "INCLUDE", "ID", 'EQUALS', 'ID', "NEWLINE"]
        )

    def test_double_newline_and_lexpos(self):
        self.compare_tokens(
            "@obverse\n" +
            "\n" +
            "#note:\n",
            ["OBVERSE", "NEWLINE", "NOTE", "NEWLINE"],
            ["obverse", "\n\n", "note", "\n"],
            [1, 1, 3, 3],
            [1, 8, 11, 16])

    def test_blankline_with_tab_inadsorb(self):
        self.compare_tokens(
            "# ES mu-lu = lu₂, ša₃-ab = šag\n" +
            "	\n" +
            "7. keš₂-da",
            ["COMMENT", "ID", "NEWLINE", "LINELABEL", "ID"],
            ["#", "ES mu-lu = lu₂, ša₃-ab = šag", "\n\t\n", '7', "keš₂-da"])

    def test_invalid_at_raises_syntax_error(self):
        string = u"@obversel\n"
        self.ensure_raises_and_not(string, nwarnings=1)

    def test_invalid_hash_raises_syntax_error(self):
        string = u"#lems: Ṣalbatanu[Mars]CN\n"
        self.ensure_raises_and_not(string, nwarnings=2)

    def test_invalid_id_syntax_error(self):
        string = u"Ṣalbatanu[Mars]CN\n"
        self.ensure_raises_and_not(string, nwarnings=1)

    @staticmethod
    def test_resolve_keyword_no_extra():
        '''Test that resolve_keyword works correcty when extra is not passes
        This never happes in actual code. Hench this test'''
        mylexer = AtfLexer()
        result = mylexer.resolve_keyword('obverse',
                                         mylexer.structures)
        assert result == 'OBVERSE'

Example #12

Show file

File: test_atfparser.py Project: cdli-gh/pyoracc

def try_parse(content):
    if content[-1] != '\n':
        content += "\n"
    lexer = AtfLexer().lexer
    parser = AtfParser().parser
    return parser.parse(content, lexer=lexer)

Example #13

Show file

 def setUp(self):
     """
     Initialize lexer and parser.
     """
     self.lexer = AtfLexer().lexer
     self.parser = AtfParser().parser