Example #1
0
def compare_tokens(content,
                   expected_types,
                   expected_values=None,
                   expected_lineno=None,
                   expected_lexpos=None):
    lexer = AtfLexer().lexer
    lexer.input(content)
    if expected_values is None:
        expected_values = repeat(None)
    if expected_lineno is None:
        expected_lineno = repeat(None)
    if expected_lexpos is None:
        expected_lexpos = repeat(None)
    for e_type, e_value, e_lineno, e_lexpos, token in zip_longest(
            expected_types, expected_values, expected_lineno, expected_lexpos,
            lexer):
        if token is None and e_type is None:
            break
        assert token.type == e_type
        if e_value:
            assert token.value == e_value
        if e_lineno:
            assert token.lineno == e_lineno
        if e_lexpos:
            assert token.lexpos == e_lexpos
Example #2
0
 def test_resolve_keyword_no_extra():
     '''Test that resolve_keyword works correcty when extra is not passes
     This never happes in actual code. Hench this test'''
     mylexer = AtfLexer()
     result = mylexer.resolve_keyword('obverse',
                                      mylexer.structures)
     assert result == 'OBVERSE'
Example #3
0
 def ensure_raises_and_not(self, string, nwarnings):
     self.lexer.input(string)
     with pytest.raises(SyntaxError) as excinfo:
         for i in self.lexer:
             pass
     # If we allow invalid syntax this should not raise
     self.lexer = AtfLexer(skipinvalid=True).lexer
     self.lexer.input(string)
     with pytest.warns(UserWarning) as record:
         for i in self.lexer:
             pass
     assert len(record) == nwarnings
Example #4
0
 def ensure_raises_and_not(self, string, nwarnings):
     self.lexer.input(string)
     with pytest.raises(SyntaxError) as excinfo:
         for i in self.lexer:
             pass
     # If we allow invalid syntax this should not raise
     self.lexer = AtfLexer(skipinvalid=True).lexer
     self.lexer.input(string)
     with pytest.warns(UserWarning) as record:
         for i in self.lexer:
             pass
     assert len(record) == nwarnings
Example #5
0
 def __init__(self, content, atftype='oracc', debug=False):
     skipinvalid = False
     if content[-1] != '\n':
         content += "\n"
     if atftype == 'cdli':
         lexer = AtfCDLILexer(debug=debug, skipinvalid=skipinvalid,
                              log=log).lexer
         parser = AtfCDLIParser(debug=debug, log=log).parser
     elif atftype == 'oracc':
         lexer = AtfOraccLexer(debug=debug, skipinvalid=skipinvalid,
                               log=log).lexer
         parser = AtfOraccParser(debug=debug, log=log).parser
     else:
         lexer = AtfLexer(debug=debug, skipinvalid=skipinvalid,
                          log=log).lexer
         parser = AtfParser(debug=debug, log=log).parser
     if debug:
         self.text = parser.parse(content, lexer=lexer, debug=log)
     else:
         self.text = parser.parse(content, lexer=lexer)
Example #6
0
 def __init__(self, content, atftype='oracc', debug=False,skip=False):
     if content[-1] != '\n':
         content += "\n"
     if atftype == 'cdli':
         atflexer=AtfCDLILexer(debug=debug, skip=skip,log=log)
         atfparser=AtfCDLIParser(debug=debug, skip=skip,log=log)
     elif atftype == 'oracc':
         atflexer=AtfOraccLexer(debug=debug, skip=skip,log=log)
         atfparser=AtfOraccParser(debug=debug, skip=skip,log=log) 
     else:
         atflexer=AtfLexer(debug=debug, skip=skip,log=log)
         atfparser=AtfParser(debug=debug, skip=skip,log=log) 
     lexer = atflexer.lexer
     parser = atfparser.parser
     self.errors_lex=atflexer.errors 
     self.errors_yacc=atfparser.errors 
     if debug:
         self.text = parser.parse(content, lexer=lexer, debug=log)
     else:
         self.text = parser.parse(content, lexer=lexer)
Example #7
0
def test_resolve_keyword_no_extra():
    '''Test that resolve_keyword works correcty when extra is not passes
    This never happes in actual code. Hench this test'''
    mylexer = AtfLexer()
    result = mylexer.resolve_keyword('obverse', mylexer.structures)
    assert result == 'OBVERSE'
Example #8
0
 def setUp(self):
     self.lexer = AtfLexer().lexer
Example #9
0
class TestLexer(TestCase):
    """A class that contains all tests of the ATFLexer"""
    def setUp(self):
        self.lexer = AtfLexer().lexer

    def compare_tokens(self,
                       content,
                       expected_types,
                       expected_values=None,
                       expected_lineno=None,
                       expected_lexpos=None):
        self.lexer.input(content)
        if expected_values is None:
            expected_values = repeat(None)
        if expected_lineno is None:
            expected_lineno = repeat(None)
        if expected_lexpos is None:
            expected_lexpos = repeat(None)
        for e_type, e_value, e_lineno, e_lexpos, token in zip_longest(
                expected_types, expected_values, expected_lineno,
                expected_lexpos, self.lexer):
            if token is None and e_type is None:
                break
            assert token.type == e_type
            if e_value:
                assert token.value == e_value
            if e_lineno:
                assert token.lineno == e_lineno
            if e_lexpos:
                assert token.lexpos == e_lexpos

    def ensure_raises_and_not(self, string, nwarnings):
        self.lexer.input(string)
        with pytest.raises(SyntaxError) as excinfo:
            for i in self.lexer:
                pass
        # If we allow invalid syntax this should not raise
        self.lexer = AtfLexer(skipinvalid=True).lexer
        self.lexer.input(string)
        with pytest.warns(UserWarning) as record:
            for i in self.lexer:
                pass
        assert len(record) == nwarnings

    def test_code(self):
        self.compare_tokens("&X001001 = JCS 48, 089\n",
                            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"],
                            [None, "X001001", None, "JCS 48, 089"])

    def test_crlf(self):
        self.compare_tokens(
            "&X001001 = JCS 48, 089\r\n" + "#project: cams/gkab\n\r",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"])

    def test_project(self):
        self.compare_tokens("#project: cams/gkab\n",
                            ["PROJECT", "ID", "NEWLINE"],
                            [None, "cams/gkab", None])

    def test_key(self):
        self.compare_tokens("#key: cdli=ND 02688\n",
                            ["KEY", "ID", "EQUALS", "ID", "NEWLINE"],
                            [None, "cdli", None, "ND 02688", None])

    def test_language_protocol(self):
        self.compare_tokens("#atf: lang akk-x-stdbab\n",
                            ["ATF", "LANG", "ID", "NEWLINE"],
                            [None, None, "akk-x-stdbab"])

    def test_use_unicode(self):
        self.compare_tokens("#atf: use unicode\n",
                            ["ATF", "USE", "UNICODE", "NEWLINE"])

    def test_use_math(self):
        self.compare_tokens("#atf: use math\n",
                            ["ATF", "USE", "MATH", "NEWLINE"])

    def test_use_legacy(self):
        self.compare_tokens("#atf: use legacy\n",
                            ["ATF", "USE", "LEGACY", "NEWLINE"])

    def test_bib(self):
        self.compare_tokens("#bib:  MEE 15 54\n", ["BIB", "ID", "NEWLINE"])

    def test_bib_long(self):
        # not documented but common
        self.compare_tokens("#bib:  MEE 4 73 = EV a\n",
                            ["BIB", "ID", "EQUALS", "ID", "NEWLINE"])

    def test_link(self):
        self.compare_tokens(
            "#link: def A = P363716 = TCL 06, 44\n" + "@tablet\n", [
                "LINK", "DEF", "ID", "EQUALS", "ID", "EQUALS", "ID", "NEWLINE",
                "TABLET", "NEWLINE"
            ], [None, None, "A", None, "P363716", None, "TCL 06, 44"])

    def test_link_parallel_slash(self):
        self.compare_tokens(
            "#link: parallel dcclt/obale:P274929 = IM 070209\n" + "@tablet\n",
            [
                "LINK", "PARALLEL", "ID", "EQUALS", "ID", "NEWLINE", "TABLET",
                "NEWLINE"
            ], [None, None, "dcclt/obale:P274929", None, "IM 070209"])

    def test_link_parallel(self):
        self.compare_tokens(
            "#link: parallel abcd:P363716 = TCL 06, 44\n" + "@tablet\n", [
                "LINK", "PARALLEL", "ID", "EQUALS", "ID", "NEWLINE", "TABLET",
                "NEWLINE"
            ], [None, None, "abcd:P363716", None, "TCL 06, 44"])

    def test_link_reference(self):
        self.compare_tokens("|| A o ii 10\n",
                            ["PARBAR", "ID", "ID", "ID", "ID", "NEWLINE"])

    def test_link_reference_range(self):
        self.compare_tokens("|| A o ii 10 -  o ii 12 \n", [
            "PARBAR", "ID", "ID", "ID", "ID", "MINUS", "ID", "ID", "ID",
            "NEWLINE"
        ])

    def test_link_reference_prime_range(self):
        self.compare_tokens("|| A o ii 10' -  o ii' 12 \n", [
            "PARBAR", "ID", "ID", "ID", "ID", "MINUS", "ID", "ID", "ID",
            "NEWLINE"
        ])

    def test_score(self):
        self.compare_tokens("@score matrix parsed word\n",
                            ["SCORE", "ID", "ID", "ID", "NEWLINE"])

    def test_division_tablet(self):
        self.compare_tokens("@tablet", ["TABLET"])

    def test_text_linenumber(self):
        self.compare_tokens("1.    [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM",
                            ["LINELABEL"] + ['ID'] * 6)

    def test_lemmatize(self):
        self.compare_tokens(
            "#lem: šatti[year]N; n; Ṭebetu[1]MN; " +
            "mūša[at night]AV; ūm[day]N; n",
            ["LEM"] + ['ID', 'SEMICOLON'] * 5 + ['ID'])

    def test_loose_dollar(self):
        self.compare_tokens("$ (a loose dollar line)",
                            ["DOLLAR", "PARENTHETICALID"],
                            [None, "(a loose dollar line)"])

    def test_loose_nested_dollar(self):
        self.compare_tokens("$ (a (very) loose dollar line)",
                            ["DOLLAR", "PARENTHETICALID"],
                            [None, "(a (very) loose dollar line)"])

    def test_loose_end_nested_dollar(self):
        self.compare_tokens("$ (a loose dollar line (wow))",
                            ["DOLLAR", "PARENTHETICALID"],
                            [None, "(a loose dollar line (wow))"])

    def test_strict_dollar(self):
        self.compare_tokens("$ reverse blank",
                            ["DOLLAR", "REFERENCE", "BLANK"])

    def test_translation_intro(self):
        self.compare_tokens("@translation parallel en project",
                            ["TRANSLATION", "PARALLEL", "ID", "PROJECT"])

    def test_translation_text(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1.    Year 63, Ṭebetu (Month X), night of day 2:^1^", [
                "TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE",
                "LINELABEL", "ID", "HAT", "ID", "HAT"
            ], [
                None, "parallel", "en", "project", None, "1",
                "Year 63, Ṭebetu (Month X), night of day 2:", None, '1', None
            ])

    def test_translation_multiline_text(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1.    Year 63, Ṭebetu (Month X)\n" + " , night of day 2\n", [
                "TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE",
                "LINELABEL", "ID", "NEWLINE"
            ], [
                None, "parallel", "en", "project", None, "1",
                "Year 63, Ṭebetu (Month X) , night of day 2", None
            ])

    def test_translation_labeled_text(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 4\n" +
            "Then it will be taken for the rites and rituals.\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ], [
                None, "labeled", "en", "project", None, None, "o", "4", None,
                'Then it will be taken for the rites and rituals.', None
            ])

    def test_translation_labeled_noted_text(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label r 8\n" +
            "The priest says the gods have performed these actions. ^1^\n\n" +
            "@note ^1^ Parenthesised text follows Neo-Assyrian source\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "HAT", "ID", "HAT", "NEWLINE",
                "NOTE", "HAT", "ID", "HAT", "ID", 'NEWLINE'
            ], [
                None, "labeled", "en", "project", None, None, "r", "8", None,
                'The priest says the gods have performed these actions.', None,
                "1", None, None, None, None, "1", None,
                "Parenthesised text follows Neo-Assyrian source"
            ])

    def test_translation_labeled_dashlabel(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 14-15 - o 20\n" +
            "You strew all (kinds of) seed.\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "MINUS", "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ], [
                None, "labeled", "en", "project", None, None, "o", "14-15",
                None, "o", "20", None
            ])

    def test_translation_labeled_atlabel(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(o 20) You strew all (kinds of) seed.\n" +
            "@(o i 2) No-one will occupy the king of Akkad's throne.\n\n", [
                "TRANSLATION",
                "LABELED",
                "ID",
                "PROJECT",
                "NEWLINE",
                "OPENR",
                "ID",
                "ID",
                "CLOSER",
                "ID",
                "NEWLINE",
                "OPENR",
                "ID",
                "ID",
                "ID",
                "CLOSER",
                "ID",
                "NEWLINE",
            ], [
                None,
                "labeled",
                "en",
                "project",
                None,
                None,
                "o",
                "20",
                None,
                "You strew all (kinds of) seed.",
                None,
                None,
                "o",
                "i",
                "2",
                None,
                "No-one will occupy the king of Akkad's throne.",
                None,
            ])

    def test_translation_range_label_prime(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label r 1' - r 2'\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "MINUS", "ID", "ID", "NEWLINE"
            ], [
                None, "labeled", "en", "project", None, None, "r", "1'", None,
                "r", "2'", None
            ])

    def test_translation_label_unicode_suffix(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r A\u2081\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", u"A\u2081"])

    def test_translation_label_unicode_prime(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r 1\u2019\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", "1'", None])

    def test_translation_label_unicode_prime2(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r 1\xb4\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", "1'", None])

    def test_translation_label_unicode_prime3(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r 1\u2032\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", "1'", None])

    def test_translation_label_unicode_prime4(self):
        self.compare_tokens(
            "@translation labeled en project\n" + u'@label r 1\u02CA\n', [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "r", "1'", None])

    def test_translation_range_label_plus(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label+ o 28\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ])

    def test_translation_label_long_reference(self):
        "Translations can have full surface names rather than single letter"
        self.compare_tokens(
            "@translation labeled en project\n" + "@label obverse 28\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "REFERENCE", "ID", "NEWLINE"
            ])

    def test_translation_symbols_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 1'\n" +
            "[...] ... (zodiacal sign) 8, 10° = (sign) 12, 10°\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ])

    def test_translation_ats_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 1'\n" +
            "@kupputu (means): affliction (@? and) reduction?@;" +
            " they are ... like cisterns.\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ])

    def test_translation_blank_line_begins_translation(self):
        # A double newline normally ends a translation paragraph
        # But this is NOT the case at the beginning of a section,
        # Apparently.
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 16\n" + "\n" +
            "@šipir @ṭuhdu @DU means: a message of abundance" +
            " will come triumphantly.\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ])

    def test_translation_blank_line_amid_translation(self):
        # A double newline normally ends a translation paragraph
        # But this is NOT the case at the beginning of a section,
        # Apparently.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(4) their [cri]mes [have been forgiven] by the king." +
            " (As to) all [the\n" + "\n" +
            "    libe]ls that [have been uttered against me " +
            "in the palace, which] he has\n" + "\n" +
            "    heard, [I am not guilty of] any [of them! " +
            "N]ow, should there be a\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "OPENR",
                "ID", "CLOSER", "ID", "NEWLINE", "ID", "NEWLINE", "ID",
                "NEWLINE"
            ])

    def test_translation_no_blank_line_in_labeled_translation(self):
        # This functionality is expressly forbidden at
        # http://build.oracc.org/doc2/help/editinginatf/translations/index.html
        # But appears is in cm_31_139 anyway
        self.compare_tokens(
            "@translation labeled en project\n" + "@label o 13\n" +
            "@al-@ŋa₂-@ŋa₂ @al-@ŋa₂-@ŋa₂ @šag₄-@ba-@ni" +
            " @nu-@sed-@da (means) he will" +
            "remove (... and) he will place (...); his heart will not rest" +
            "It is said in the textual corpus of the lamentation-priests.\n" +
            "@label o 15\n" + "Text\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE", "LABEL", "ID", "ID",
                "NEWLINE", "ID", "NEWLINE"
            ])

    def test_translation_ATlines_in_translation(self):
        # @ within Translations mark Foreign
        # http://oracc.museum.upenn.edu/doc/help/editinginatf/translations/index.html
        self.compare_tokens(
            "@translation labeled en project\n" + "@obverse\n" +
            "1'. @MUD (means) trembling. @MUD (means) dark.", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
                "OBVERSE", "NEWLINE", "ID"
            ])

    def test_translation_range_label_periods(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@label t.e. 1\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE"
            ], [None, "labeled", "en", "project", None, None, "t.e.", "1"])

    def test_interlinear_translation(self):
        self.compare_tokens("@tablet\n" + "1'. ⸢x⸣\n" + "#tr: English\n", [
            "TABLET", "NEWLINE", "LINELABEL", "ID", "NEWLINE", "TR", "ID",
            "NEWLINE"
        ])

    def test_multilineinterlinear_translation(self):
        self.compare_tokens(
            "@tablet\n" + "1'. ⸢x⸣\n" + "#tr: English\n" +
            " on multiple lines\n", [
                "TABLET", "NEWLINE", "LINELABEL", "ID", "NEWLINE", "TR", "ID",
                "NEWLINE"
            ])

    def test_note_internalflag(self):
        self.compare_tokens("@note Hello James's World", ["NOTE", "ID"],
                            [None, "Hello James's World"])

    def test_note_internalspace(self):
        self.compare_tokens("@note Hello James", ["NOTE", "ID"],
                            [None, "Hello James"])

    def test_note_onechar(self):
        self.compare_tokens("@note H", ["NOTE", "ID"], [None, "H"])

    def test_note_short(self):
        self.compare_tokens("@note I'm", ["NOTE", "ID"], [None, "I'm"])

    def test_division_note(self):
        self.compare_tokens(
            "@note ^1^ A note to the translation.\n",
            ["NOTE", "HAT", "ID", "HAT", "ID", "NEWLINE"],
            [None, None, "1", None, "A note to the translation.", None])

    def test_hash_note(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" +
            "3.    U₄!-BI? 20* [(ina)] 9.30 ina(DIŠ) MAŠ₂!(BAR)\n" +
            "#note: Note to line.\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE", "LINELABEL"] +
            ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"])

    def test_hash_note_UPPERCASE(self):
        # Some files in the corpus such as ctn_4_168.atf
        # Contains #NOTE: even if the line should be #note:
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" +
            "3.    U₄!-BI? 20* [(ina)] 9.30 ina(DIŠ) MAŠ₂!(BAR)\n" +
            "#NOTE: Note to line.\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE", "LINELABEL"] +
            ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"])

    def test_open_text_with_dots(self):
        # This must not come out as a linelabel of Hello.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 1\nHello. World\n\n", [
                "TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE", "LABEL",
                "ID", "ID", "NEWLINE", "ID", "NEWLINE"
            ])

    def test_flagged_object(self):
        self.compare_tokens("@object which is remarkable and broken!#\n",
                            ["OBJECT", "ID", "EXCLAIM", "HASH", "NEWLINE"])

    def test_comment(self):
        self.compare_tokens("# I've added various things for test purposes\n",
                            ['COMMENT', "ID", "NEWLINE"])

    def test_nospace_comment(self):
        self.compare_tokens("#I've added various things for test purposes\n",
                            ['COMMENT', "ID", "NEWLINE"])

    def test_check_comment(self):
        self.compare_tokens(
            "#CHECK: I've added various things for test purposes\n",
            ['CHECK', "ID", "NEWLINE"])

    def test_dotline(self):
        self.compare_tokens(". \n", ['NEWLINE'])

    def test_translation_heading(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@h1 A translation heading\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["HEADING", "ID", "NEWLINE"])

    def test_heading(self):
        self.compare_tokens("@obverse\n" + "@h1 A heading\n",
                            ["OBVERSE", "NEWLINE"] +
                            ["HEADING", "ID", "NEWLINE"])

    def test_double_comment(self):
        """Not sure if this is correct; but can't find
        anything in structure or lemmatization doc"""
        self.compare_tokens(
            "## papān libbi[belly] (already in gloss, same spelling)\n",
            ['COMMENT', 'ID', 'NEWLINE'])

    def test_ruling(self):
        self.compare_tokens("$ single ruling", ["DOLLAR", "SINGLE", "RULING"])

    def test_described_object(self):
        self.compare_tokens("@object An object that fits no other category\n",
                            ["OBJECT", "ID", "NEWLINE"],
                            [None, "An object that fits no other category"])

    def test_nested_object(self):
        self.compare_tokens("@tablet\n" + "@obverse\n",
                            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE"])

    def test_object_line(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" +
            "1.    [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM\n"
            "#lem: šatti[year]N; n; Ṭebetu[1]MN; mūša[at night]AV; " +
            "ūm[day]N; n\n",
            ['TABLET', 'NEWLINE', "OBVERSE", 'NEWLINE', 'LINELABEL'] +
            ['ID'] * 6 + ['NEWLINE', 'LEM'] + ['ID', 'SEMICOLON'] * 5 +
            ['ID', "NEWLINE"])

    def test_dot_in_linelabel(self):
        self.compare_tokens("1.1.    [MU]\n", ['LINELABEL', 'ID', 'NEWLINE'])

    def test_score_lines(self):
        self.compare_tokens(
            "@score matrix parsed\n" +
            "1.4′. %n ḫašḫūr [api] lal[laga imḫur-līm?]\n" +
            "#lem: ḫašḫūr[apple (tree)]N; api[reed-bed]N\n\n" +
            "A₁_obv_i_4′: [x x x x x] {ú}la-al-[la-ga? {ú}im-ḫu-ur-lim?]\n" +
            "#lem: u; u; u; u; u; " +
            "+lalangu[(a leguminous vegetable)]N$lallaga\n\n" +
            "e_obv_15′–16′: {giš}ḪAŠḪUR [GIŠ.GI] — // [{ú}IGI-lim]\n" +
            "#lem: +hašhūru[apple (tree)]N$hašhūr; api[reed-bed]N;" +
            " imhur-līm['heals-a-thousand'-plant]N\n\n",
            ['SCORE', 'ID', 'ID', "NEWLINE"] + ['LINELABEL'] + ['ID'] * 5 +
            ['NEWLINE'] + ['LEM', 'ID', 'SEMICOLON', 'ID', 'NEWLINE'] +
            ['SCORELABEL'] + ['ID'] * 7 + ['NEWLINE'] + ['LEM'] +
            ['ID', 'SEMICOLON'] * 5 + ['ID', 'NEWLINE'] + ['SCORELABEL'] +
            ['ID'] * 5 + ['NEWLINE'] + ['LEM'] + ['ID', 'SEMICOLON'] * 2 +
            ['ID', 'NEWLINE'])

    def test_composite(self):
        self.compare_tokens(
            "&Q002769 = SB Anzu 1\n" + "@composite\n" +
            "#project: cams/gkab\n" +
            "1.   bi#-in šar da-ad-mi šu-pa-a na-ram {d}ma#-mi\n" +
            "&Q002770 = SB Anzu 2\n" + "#project: cams/gkab\n" +
            "1.   bi-riq ur-ha šuk-na a-dan-na\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ['COMPOSITE', 'NEWLINE'] + ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 6 + ['NEWLINE'] +
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"] + ["LINELABEL"] + ['ID'] * 4 +
            ["NEWLINE"])

    def test_translated_composite(self):
        self.compare_tokens(
            "&Q002769 = SB Anzu 1\n" + "@composite\n" +
            "#project: cams/gkab\n" +
            "1.   bi#-in šar da-ad-mi šu-pa-a na-ram {d}ma#-mi\n" +
            "@translation labeled en project\n" + "@(1) English\n"
            "&Q002770 = SB Anzu 2\n" + "#project: cams/gkab\n" +
            "1.   bi-riq ur-ha šuk-na a-dan-na\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ['COMPOSITE', 'NEWLINE'] + ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 6 + ['NEWLINE'] +
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "CLOSER", "ID", "NEWLINE"] +
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"] + ["LINELABEL"] + ['ID'] * 4 +
            ["NEWLINE"])

    def test_equalbrace(self):
        self.compare_tokens(
            "@tablet\n" + "@reverse\n" + "2'.    ITI# an-ni-u2#\n" +
            "={    ur-hu\n",
            ['TABLET', "NEWLINE"] + ["REVERSE", "NEWLINE"] + ["LINELABEL"] +
            ['ID'] * 2 + ["NEWLINE"] + ["EQUALBRACE", "ID", "NEWLINE"])

    def test_multilingual_interlinear(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" + "1. dim₃#-me-er# [...]\n" +
            "#lem: diŋir[deity]N; u\n" +
            "== %sb DINGIR-MEŠ GAL#-MEŠ# [...]\n" +
            "#lem: ilū[god]N; rabûtu[great]AJ; u\n" +
            "# ES dim₃-me-er = diŋir\n" + "|| A o ii 15\n",
            ['TABLET', "NEWLINE"] + ["OBVERSE", "NEWLINE"] + ["LINELABEL"] +
            ['ID'] * 2 + ["NEWLINE"] + ["LEM"] + ["ID", "SEMICOLON"] + ["ID"] +
            ["NEWLINE"] + ["MULTILINGUAL", "ID"] + ["ID"] * 3 + ["NEWLINE"] +
            ["LEM"] + ["ID", "SEMICOLON"] * 2 + ["ID"] + ["NEWLINE"] +
            ["COMMENT", "ID", "NEWLINE"] +
            ["PARBAR", "ID", "ID", "ID", "ID", "NEWLINE"])

    def test_strict_in_parallel(self):
        self.compare_tokens(
            "@translation parallel en project\n" + "$ reverse blank",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID"])

    def test_query_in_parallel(self):
        """"The parallel ID regex was to general and identified ? after
            @obverse as an ID token not a QUERY"""
        self.compare_tokens(
            "@translation parallel en project\n" + "@obverse?",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["OBVERSE", "QUERY"])

    def test_loose_in_labeled(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "$ (Break)\n" +
            "@(r 2) I am\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE"])

    def test_ati_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "@(r 2) I am\n" +
            "@i{eššēšu-}festival\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE"])

    def test_blank_after_para_transctrl_windows(self):
        """[...] should not exit the para state but did previously
           due to a not as stric regex """
        self.compare_tokens(
            "@translation labeled en project\r\n" + "@(o i 1')\r\n" +
            "[...]\r\n\r\n" + "@(o i 2')\r\n" +
            "[... you put] inside [his] ears [and the evil] " +
            "afflicting his head [will be eradicated].\r\n\r\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] + ["ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] + ["ID", "NEWLINE"])

    def test_blank_after_para_transctrl(self):
        """[...] should not exit the para state but did previously
           due to a not as stric regex """
        self.compare_tokens(
            "@translation labeled en project\n" + "@(o i 1)\n" +
            "[(If) in] Tašritu (month VII), on day 1, " +
            "a solar eclipse takes place: [...].\n\n" + "@(o i 2)\n" +
            "[...], on day 7, a solar eclipse takes place: [...].\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] + ["ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] + ["ID", "NEWLINE"])

    def test_strict_in_labelled_parallel(self):
        self.compare_tokens(
            "@translation labeled en project\n" + "$ reverse blank",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID"])

    def test_strict_as_loose_in_translation(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "$ Continued in text no. 2\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID", "NEWLINE"])

    def test_punctuated_translation(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1. 'What is going on?', said the King!\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["LINELABEL", "ID", "NEWLINE"], [None, None, "en", None, None] +
            ["1", "'What is going on?', said the King!", None])

    def test_translation_note(self):
        self.compare_tokens(
            "@translation parallel en project\n" + "@reverse\n" +
            "#note: reverse uninscribed\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["REVERSE", "NEWLINE"] + ["NOTE", "ID", "NEWLINE"])

    def test_equals_in_translation_note(self):
        self.compare_tokens(
            "@translation parallel en project\n" + "@reverse\n" +
            '#note: The CAD translation šarriru = "humble",\n',
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["REVERSE", "NEWLINE"] + ["NOTE", "ID", "NEWLINE"])

    def test_note_ended_by_strucuture(self):
        self.compare_tokens(
            "@translation parallel en project\n" + "@obverse\n" +
            '#note: The CAD translation šarriru = "humble",\n' + '@reverse',
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["OBVERSE", "NEWLINE"] + ["NOTE", "ID", "NEWLINE"] + ["REVERSE"])

    def test_milestone(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" + "@m=locator catchline\n" +
            "16'. si-i-ia-a-a-ku\n", [
                "TABLET", "NEWLINE", "OBVERSE", "NEWLINE", "M", "EQUALS", "ID",
                "NEWLINE", "LINELABEL", "ID", "NEWLINE"
            ])

    def test_include(self):
        self.compare_tokens(
            "@tablet\n" + "@obverse\n" +
            "@include dcclt:P229061 = MSL 07, 197 V02, 210 V11\n", [
                "TABLET", "NEWLINE", "OBVERSE", "NEWLINE", "INCLUDE", "ID",
                'EQUALS', 'ID', "NEWLINE"
            ])

    def test_double_newline_and_lexpos(self):
        self.compare_tokens("@obverse\n" + "\n" + "#note:\n",
                            ["OBVERSE", "NEWLINE", "NOTE", "NEWLINE"],
                            ["obverse", "\n\n", "note", "\n"], [1, 1, 3, 3],
                            [1, 8, 11, 16])

    def test_blankline_with_tab_inadsorb(self):
        self.compare_tokens(
            "# ES mu-lu = lu₂, ša₃-ab = šag\n" + "	\n" + "7. keš₂-da",
            ["COMMENT", "ID", "NEWLINE", "LINELABEL", "ID"],
            ["#", "ES mu-lu = lu₂, ša₃-ab = šag", "\n\t\n", '7', "keš₂-da"])

    def test_invalid_at_raises_syntax_error(self):
        string = u"@obversel\n"
        self.ensure_raises_and_not(string, nwarnings=1)

    def test_invalid_hash_raises_syntax_error(self):
        string = u"#lems: Ṣalbatanu[Mars]CN\n"
        self.ensure_raises_and_not(string, nwarnings=2)

    def test_invalid_id_syntax_error(self):
        string = u"Ṣalbatanu[Mars]CN\n"
        self.ensure_raises_and_not(string, nwarnings=1)

    @staticmethod
    def test_resolve_keyword_no_extra():
        '''Test that resolve_keyword works correcty when extra is not passes
        This never happes in actual code. Hench this test'''
        mylexer = AtfLexer()
        result = mylexer.resolve_keyword('obverse', mylexer.structures)
        assert result == 'OBVERSE'
Example #10
0
 def setUp(self):
     self.lexer = AtfLexer().lexer
Example #11
0
class TestLexer(TestCase):
    """A class that contains all tests of the ATFLexer"""
    def setUp(self):
        self.lexer = AtfLexer().lexer

    def compare_tokens(self, content, expected_types, expected_values=None,
                       expected_lineno=None, expected_lexpos=None):
        self.lexer.input(content)
        if expected_values is None:
            expected_values = repeat(None)
        if expected_lineno is None:
            expected_lineno = repeat(None)
        if expected_lexpos is None:
            expected_lexpos = repeat(None)
        for e_type, e_value, e_lineno, e_lexpos, token in zip_longest(
                expected_types,
                expected_values,
                expected_lineno,
                expected_lexpos,
                self.lexer):
            if token is None and e_type is None:
                break
            assert token.type == e_type
            if e_value:
                assert token.value == e_value
            if e_lineno:
                assert token.lineno == e_lineno
            if e_lexpos:
                assert token.lexpos == e_lexpos

    def ensure_raises_and_not(self, string, nwarnings):
        self.lexer.input(string)
        with pytest.raises(SyntaxError) as excinfo:
            for i in self.lexer:
                pass
        # If we allow invalid syntax this should not raise
        self.lexer = AtfLexer(skipinvalid=True).lexer
        self.lexer.input(string)
        with pytest.warns(UserWarning) as record:
            for i in self.lexer:
                pass
        assert len(record) == nwarnings

    def test_code(self):
        self.compare_tokens(
            "&X001001 = JCS 48, 089\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"],
            [None, "X001001", None, "JCS 48, 089"]
        )

    def test_crlf(self):
        self.compare_tokens(
            "&X001001 = JCS 48, 089\r\n" +
            "#project: cams/gkab\n\r",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"]
        )

    def test_project(self):
        self.compare_tokens(
            "#project: cams/gkab\n",
            ["PROJECT", "ID", "NEWLINE"],
            [None, "cams/gkab", None]
        )

    def test_key(self):
        self.compare_tokens(
            "#key: cdli=ND 02688\n",
            ["KEY", "ID", "EQUALS", "ID", "NEWLINE"],
            [None, "cdli", None, "ND 02688", None]
        )

    def test_language_protocol(self):
        self.compare_tokens(
            "#atf: lang akk-x-stdbab\n",
            ["ATF", "LANG", "ID", "NEWLINE"],
            [None, None, "akk-x-stdbab"]
        )

    def test_use_unicode(self):
        self.compare_tokens(
            "#atf: use unicode\n",
            ["ATF", "USE", "UNICODE", "NEWLINE"]
        )

    def test_use_math(self):
        self.compare_tokens(
            "#atf: use math\n",
            ["ATF", "USE", "MATH", "NEWLINE"]
        )

    def test_use_legacy(self):
        self.compare_tokens(
            "#atf: use legacy\n",
            ["ATF", "USE", "LEGACY", "NEWLINE"]
        )

    def test_bib(self):
        self.compare_tokens(
            "#bib:  MEE 15 54\n",
            ["BIB", "ID", "NEWLINE"]
        )

    def test_bib_long(self):
        # not documented but common
        self.compare_tokens(
            "#bib:  MEE 4 73 = EV a\n",
            ["BIB", "ID", "EQUALS", "ID", "NEWLINE"]
        )

    def test_link(self):
        self.compare_tokens(
            "#link: def A = P363716 = TCL 06, 44\n" +
            "@tablet\n",
            ["LINK", "DEF", "ID", "EQUALS", "ID", "EQUALS", "ID", "NEWLINE",
             "TABLET", "NEWLINE"],
            [None, None, "A", None, "P363716", None, "TCL 06, 44"]
        )

    def test_link_parallel_slash(self):
        self.compare_tokens(
            "#link: parallel dcclt/obale:P274929 = IM 070209\n" +
            "@tablet\n",
            ["LINK", "PARALLEL", "ID", "EQUALS", "ID", "NEWLINE",
             "TABLET", "NEWLINE"],
            [None, None, "dcclt/obale:P274929", None, "IM 070209"]
        )

    def test_link_parallel(self):
        self.compare_tokens(
            "#link: parallel abcd:P363716 = TCL 06, 44\n" +
            "@tablet\n",
            ["LINK", "PARALLEL", "ID", "EQUALS", "ID", "NEWLINE",
             "TABLET", "NEWLINE"],
            [None, None, "abcd:P363716", None, "TCL 06, 44"]
        )

    def test_link_reference(self):
        self.compare_tokens(
            "|| A o ii 10\n",
            ["PARBAR", "ID", "ID", "ID", "ID", "NEWLINE"]
        )

    def test_link_reference_range(self):
        self.compare_tokens(
            "|| A o ii 10 -  o ii 12 \n",
            ["PARBAR", "ID", "ID", "ID", "ID", "MINUS",
             "ID", "ID", "ID", "NEWLINE"]
        )

    def test_link_reference_prime_range(self):
        self.compare_tokens(
            "|| A o ii 10' -  o ii' 12 \n",
            ["PARBAR", "ID", "ID", "ID", "ID", "MINUS",
             "ID", "ID", "ID", "NEWLINE"]
        )

    def test_score(self):
        self.compare_tokens(
            "@score matrix parsed word\n",
            ["SCORE", "ID", "ID", "ID", "NEWLINE"]
        )

    def test_division_tablet(self):
        self.compare_tokens(
            "@tablet",
            ["TABLET"]
        )

    def test_text_linenumber(self):
        self.compare_tokens(
            "1.    [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM",
            ["LINELABEL"] + ['ID'] * 6
        )

    def test_lemmatize(self):
        self.compare_tokens(
            "#lem: šatti[year]N; n; Ṭebetu[1]MN; " +
            "mūša[at night]AV; ūm[day]N; n",
            ["LEM"] + ['ID', 'SEMICOLON'] * 5 + ['ID']
        )

    def test_loose_dollar(self):
        self.compare_tokens(
            "$ (a loose dollar line)",
            ["DOLLAR", "PARENTHETICALID"],
            [None, "(a loose dollar line)"]
        )

    def test_loose_nested_dollar(self):
        self.compare_tokens(
            "$ (a (very) loose dollar line)",
            ["DOLLAR", "PARENTHETICALID"],
            [None, "(a (very) loose dollar line)"]
        )

    def test_loose_end_nested_dollar(self):
        self.compare_tokens(
            "$ (a loose dollar line (wow))",
            ["DOLLAR", "PARENTHETICALID"],
            [None, "(a loose dollar line (wow))"]
        )

    def test_strict_dollar(self):
        self.compare_tokens(
            "$ reverse blank",
            ["DOLLAR", "REFERENCE", "BLANK"]
        )

    def test_translation_intro(self):
        self.compare_tokens(
            "@translation parallel en project",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT"]
        )

    def test_translation_text(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1.    Year 63, Ṭebetu (Month X), night of day 2:^1^",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE",
             "LINELABEL", "ID", "HAT", "ID", "HAT"],
            [None, "parallel", "en", "project", None,
             "1", "Year 63, Ṭebetu (Month X), night of day 2:",
             None, '1', None]
        )

    def test_translation_multiline_text(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1.    Year 63, Ṭebetu (Month X)\n" +
            " , night of day 2\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE",
             "LINELABEL", "ID", "NEWLINE"],
            [None, "parallel", "en", "project", None,
             "1", "Year 63, Ṭebetu (Month X) , night of day 2", None]
        )

    def test_translation_labeled_text(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 4\n" +
            "Then it will be taken for the rites and rituals.\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "o", "4", None,
             'Then it will be taken for the rites and rituals.', None]
        )

    def test_translation_labeled_noted_text(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label r 8\n" +
            "The priest says the gods have performed these actions. ^1^\n\n" +
            "@note ^1^ Parenthesised text follows Neo-Assyrian source\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "HAT", "ID", "HAT", "NEWLINE",
             "NOTE", "HAT", "ID", "HAT", "ID", 'NEWLINE'],
            [None, "labeled", "en", "project", None,
             None, "r", "8", None,
             'The priest says the gods have performed these actions.',
             None, "1", None, None,
             None, None, "1", None,
             "Parenthesised text follows Neo-Assyrian source"]

        )

    def test_translation_labeled_dashlabel(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 14-15 - o 20\n" +
            "You strew all (kinds of) seed.\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "MINUS", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "o", "14-15", None, "o", "20", None]
        )

    def test_translation_labeled_atlabel(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(o 20) You strew all (kinds of) seed.\n" +
            "@(o i 2) No-one will occupy the king of Akkad's throne.\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE",
             "OPENR", "ID", "ID", "ID", "CLOSER", "ID", "NEWLINE", ],
            [None, "labeled", "en", "project", None,
             None, "o", "20", None, "You strew all (kinds of) seed.", None,
             None, "o", "i", "2", None,
             "No-one will occupy the king of Akkad's throne.", None, ]
        )

    def test_translation_range_label_prime(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label r 1' - r 2'\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "MINUS", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None, "r", "2'", None]
        )

    def test_translation_label_unicode_suffix(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r A\u2081\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", u"A\u2081"]
        )

    def test_translation_label_unicode_prime(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r 1\u2019\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None]
        )

    def test_translation_label_unicode_prime2(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r 1\xb4\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None]
        )

    def test_translation_label_unicode_prime3(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r 1\u2032\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None]
        )

    def test_translation_label_unicode_prime4(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            u'@label r 1\u02CA\n',
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "r", "1'", None]
        )

    def test_translation_range_label_plus(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label+ o 28\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"]
        )

    def test_translation_label_long_reference(self):
        "Translations can have full surface names rather than single letter"
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label obverse 28\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "REFERENCE", "ID", "NEWLINE"]
        )

    def test_translation_symbols_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 1'\n" +
            "[...] ... (zodiacal sign) 8, 10° = (sign) 12, 10°\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_translation_ats_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 1'\n" +
            "@kupputu (means): affliction (@? and) reduction?@;" +
            " they are ... like cisterns.\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_translation_blank_line_begins_translation(self):
        # A double newline normally ends a translation paragraph
        # But this is NOT the case at the beginning of a section,
        # Apparently.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 16\n" +
            "\n" +
            "@šipir @ṭuhdu @DU means: a message of abundance" +
            " will come triumphantly.\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_translation_blank_line_amid_translation(self):
        # A double newline normally ends a translation paragraph
        # But this is NOT the case at the beginning of a section,
        # Apparently.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(4) their [cri]mes [have been forgiven] by the king." +
            " (As to) all [the\n" +
            "\n" +
            "    libe]ls that [have been uttered against me " +
            "in the palace, which] he has\n" +
            "\n" +
            "    heard, [I am not guilty of] any [of them! " +
            "N]ow, should there be a\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "OPENR", "ID", "CLOSER", "ID", "NEWLINE",
             "ID", "NEWLINE", "ID", "NEWLINE"]
        )

    def test_translation_no_blank_line_in_labeled_translation(self):
        # This functionality is expressly forbidden at
        # http://build.oracc.org/doc2/help/editinginatf/translations/index.html
        # But appears is in cm_31_139 anyway
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 13\n" +
            "@al-@ŋa₂-@ŋa₂ @al-@ŋa₂-@ŋa₂ @šag₄-@ba-@ni" +
            " @nu-@sed-@da (means) he will" +
            "remove (... and) he will place (...); his heart will not rest" +
            "It is said in the textual corpus of the lamentation-priests.\n" +
            "@label o 15\n" +
            "Text\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_translation_ATlines_in_translation(self):
        # @ within Translations mark Foreign
        # http://oracc.museum.upenn.edu/doc/help/editinginatf/translations/index.html
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@obverse\n" +
            "1'. @MUD (means) trembling. @MUD (means) dark.",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "OBVERSE", "NEWLINE", "ID"]
        )

    def test_translation_range_label_periods(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label t.e. 1\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE"],
            [None, "labeled", "en", "project", None,
             None, "t.e.", "1"])

    def test_interlinear_translation(self):
        self.compare_tokens(
            "@tablet\n" +
            "1'. ⸢x⸣\n" +
            "#tr: English\n",
            ["TABLET", "NEWLINE",
             "LINELABEL", "ID", "NEWLINE",
             "TR", "ID", "NEWLINE"])

    def test_multilineinterlinear_translation(self):
        self.compare_tokens(
            "@tablet\n" +
            "1'. ⸢x⸣\n" +
            "#tr: English\n" +
            " on multiple lines\n",
            ["TABLET", "NEWLINE",
             "LINELABEL", "ID", "NEWLINE",
             "TR", "ID", "NEWLINE"])

    def test_note_internalflag(self):
        self.compare_tokens(
            "@note Hello James's World",
            ["NOTE", "ID"],
            [None, "Hello James's World"]
        )

    def test_note_internalspace(self):
        self.compare_tokens(
            "@note Hello James",
            ["NOTE", "ID"],
            [None, "Hello James"]
        )

    def test_note_onechar(self):
        self.compare_tokens(
            "@note H",
            ["NOTE", "ID"],
            [None, "H"]
        )

    def test_note_short(self):
        self.compare_tokens(
            "@note I'm",
            ["NOTE", "ID"],
            [None, "I'm"]
        )

    def test_division_note(self):
        self.compare_tokens(
            "@note ^1^ A note to the translation.\n",
            ["NOTE", "HAT", "ID", "HAT", "ID", "NEWLINE"],
            [None, None, "1", None, "A note to the translation.", None]
        )

    def test_hash_note(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "3.    U₄!-BI? 20* [(ina)] 9.30 ina(DIŠ) MAŠ₂!(BAR)\n" +
            "#note: Note to line.\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE",
             "LINELABEL"] + ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"]
        )

    def test_hash_note_UPPERCASE(self):
        # Some files in the corpus such as ctn_4_168.atf
        # Contains #NOTE: even if the line should be #note:
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "3.    U₄!-BI? 20* [(ina)] 9.30 ina(DIŠ) MAŠ₂!(BAR)\n" +
            "#NOTE: Note to line.\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE",
             "LINELABEL"] + ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"]
        )

    def test_open_text_with_dots(self):
        # This must not come out as a linelabel of Hello.
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@label o 1\nHello. World\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE",
             "LABEL", "ID", "ID", "NEWLINE",
             "ID", "NEWLINE"]
        )

    def test_flagged_object(self):
        self.compare_tokens("@object which is remarkable and broken!#\n",
                            ["OBJECT", "ID", "EXCLAIM", "HASH", "NEWLINE"])

    def test_comment(self):
        self.compare_tokens(
            "# I've added various things for test purposes\n",
            ['COMMENT', "ID", "NEWLINE"]
        )

    def test_nospace_comment(self):
        self.compare_tokens(
            "#I've added various things for test purposes\n",
            ['COMMENT', "ID", "NEWLINE"]
        )

    def test_check_comment(self):
        self.compare_tokens(
            "#CHECK: I've added various things for test purposes\n",
            ['CHECK', "ID", "NEWLINE"]
        )

    def test_dotline(self):
        self.compare_tokens(
            ". \n",
            ['NEWLINE']
        )

    def test_translation_heading(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@h1 A translation heading\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["HEADING", "ID", "NEWLINE"]
        )

    def test_heading(self):
        self.compare_tokens(
            "@obverse\n" +
            "@h1 A heading\n",
            ["OBVERSE", "NEWLINE"] +
            ["HEADING", "ID", "NEWLINE"]
        )

    def test_double_comment(self):
        """Not sure if this is correct; but can't find
        anything in structure or lemmatization doc"""
        self.compare_tokens(
            "## papān libbi[belly] (already in gloss, same spelling)\n",
            ['COMMENT', 'ID', 'NEWLINE']
        )

    def test_ruling(self):
        self.compare_tokens(
            "$ single ruling",
            ["DOLLAR", "SINGLE", "RULING"]
        )

    def test_described_object(self):
        self.compare_tokens(
            "@object An object that fits no other category\n",
            ["OBJECT", "ID", "NEWLINE"],
            [None, "An object that fits no other category"]
        )

    def test_nested_object(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n",
            ["TABLET", "NEWLINE", "OBVERSE", "NEWLINE"]
        )

    def test_object_line(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "1.    [MU] 1.03-KAM {iti}AB GE₆ U₄ 2-KAM\n"
            "#lem: šatti[year]N; n; Ṭebetu[1]MN; mūša[at night]AV; " +
            "ūm[day]N; n\n",
            ['TABLET', 'NEWLINE',
             "OBVERSE", 'NEWLINE',
             'LINELABEL'] + ['ID'] * 6 + ['NEWLINE', 'LEM'] +
            ['ID', 'SEMICOLON'] * 5 + ['ID', "NEWLINE"]
        )

    def test_dot_in_linelabel(self):
        self.compare_tokens(
            "1.1.    [MU]\n",
            ['LINELABEL', 'ID', 'NEWLINE']
        )

    def test_score_lines(self):
        self.compare_tokens(
            "@score matrix parsed\n" +
            "1.4′. %n ḫašḫūr [api] lal[laga imḫur-līm?]\n" +
            "#lem: ḫašḫūr[apple (tree)]N; api[reed-bed]N\n\n" +
            "A₁_obv_i_4′: [x x x x x] {ú}la-al-[la-ga? {ú}im-ḫu-ur-lim?]\n" +
            "#lem: u; u; u; u; u; " +
            "+lalangu[(a leguminous vegetable)]N$lallaga\n\n" +
            "e_obv_15′–16′: {giš}ḪAŠḪUR [GIŠ.GI] — // [{ú}IGI-lim]\n" +
            "#lem: +hašhūru[apple (tree)]N$hašhūr; api[reed-bed]N;" +
            " imhur-līm['heals-a-thousand'-plant]N\n\n",
            ['SCORE', 'ID', 'ID', "NEWLINE"] +
            ['LINELABEL'] + ['ID'] * 5 + ['NEWLINE'] +
            ['LEM', 'ID', 'SEMICOLON', 'ID', 'NEWLINE'] +
            ['SCORELABEL'] + ['ID'] * 7 + ['NEWLINE'] +
            ['LEM'] + ['ID', 'SEMICOLON'] * 5 + ['ID', 'NEWLINE'] +
            ['SCORELABEL'] + ['ID'] * 5 + ['NEWLINE'] +
            ['LEM'] + ['ID', 'SEMICOLON'] * 2 + ['ID', 'NEWLINE']
        )

    def test_composite(self):
        self.compare_tokens(
            "&Q002769 = SB Anzu 1\n" +
            "@composite\n" +
            "#project: cams/gkab\n" +
            "1.   bi#-in šar da-ad-mi šu-pa-a na-ram {d}ma#-mi\n" +
            "&Q002770 = SB Anzu 2\n" +
            "#project: cams/gkab\n" +
            "1.   bi-riq ur-ha šuk-na a-dan-na\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ['COMPOSITE', 'NEWLINE'] +
            ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 6 + ['NEWLINE'] +
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 4 + ["NEWLINE"]
        )

    def test_translated_composite(self):
        self.compare_tokens(
            "&Q002769 = SB Anzu 1\n" +
            "@composite\n" +
            "#project: cams/gkab\n" +
            "1.   bi#-in šar da-ad-mi šu-pa-a na-ram {d}ma#-mi\n" +
            "@translation labeled en project\n" +
            "@(1) English\n"
            "&Q002770 = SB Anzu 2\n" +
            "#project: cams/gkab\n" +
            "1.   bi-riq ur-ha šuk-na a-dan-na\n",
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ['COMPOSITE', 'NEWLINE'] +
            ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 6 + ['NEWLINE'] +
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "CLOSER", "ID", "NEWLINE"] +
            ["AMPERSAND", "ID", "EQUALS", "ID", "NEWLINE"] +
            ["PROJECT", "ID", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 4 + ["NEWLINE"]
        )

    def test_equalbrace(self):
        self.compare_tokens(
            "@tablet\n" +
            "@reverse\n" +
            "2'.    ITI# an-ni-u2#\n" +
            "={    ur-hu\n",
            ['TABLET', "NEWLINE"] +
            ["REVERSE", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 2 + ["NEWLINE"] +
            ["EQUALBRACE", "ID", "NEWLINE"]
        )

    def test_multilingual_interlinear(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "1. dim₃#-me-er# [...]\n" +
            "#lem: diŋir[deity]N; u\n" +
            "== %sb DINGIR-MEŠ GAL#-MEŠ# [...]\n" +
            "#lem: ilū[god]N; rabûtu[great]AJ; u\n" +
            "# ES dim₃-me-er = diŋir\n" +
            "|| A o ii 15\n",
            ['TABLET', "NEWLINE"] +
            ["OBVERSE", "NEWLINE"] +
            ["LINELABEL"] + ['ID'] * 2 + ["NEWLINE"] +
            ["LEM"] + ["ID", "SEMICOLON"] + ["ID"] + ["NEWLINE"] +
            ["MULTILINGUAL", "ID"] + ["ID"] * 3 + ["NEWLINE"] +
            ["LEM"] + ["ID", "SEMICOLON"] * 2 + ["ID"] + ["NEWLINE"] +
            ["COMMENT", "ID", "NEWLINE"] +
            ["PARBAR", "ID", "ID", "ID", "ID", "NEWLINE"]
        )

    def test_strict_in_parallel(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "$ reverse blank",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID"]
        )

    def test_query_in_parallel(self):
        """"The parallel ID regex was to general and identified ? after
            @obverse as an ID token not a QUERY"""
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@obverse?",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["OBVERSE", "QUERY"]
        )

    def test_loose_in_labeled(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "$ (Break)\n" +
            "@(r 2) I am\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE"]
        )

    def test_ati_in_translation(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(r 2) I am\n" +
            "@i{eššēšu-}festival\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "CLOSER", "ID", "NEWLINE"]
        )

    def test_blank_after_para_transctrl_windows(self):
        """[...] should not exit the para state but did previously
           due to a not as stric regex """
        self.compare_tokens(
            "@translation labeled en project\r\n" +
            "@(o i 1')\r\n" +
            "[...]\r\n\r\n" +
            "@(o i 2')\r\n" +
            "[... you put] inside [his] ears [and the evil] " +
            "afflicting his head [will be eradicated].\r\n\r\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] +
            ["ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] +
            ["ID", "NEWLINE"]
        )

    def test_blank_after_para_transctrl(self):
        """[...] should not exit the para state but did previously
           due to a not as stric regex """
        self.compare_tokens(
            "@translation labeled en project\n" +
            "@(o i 1)\n" +
            "[(If) in] Tašritu (month VII), on day 1, " +
            "a solar eclipse takes place: [...].\n\n" +
            "@(o i 2)\n" +
            "[...], on day 7, a solar eclipse takes place: [...].\n\n",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] +
            ["ID", "NEWLINE"] +
            ["OPENR", "ID", "ID", "ID", "CLOSER"] +
            ["ID", "NEWLINE"]
        )

    def test_strict_in_labelled_parallel(self):
        self.compare_tokens(
            "@translation labeled en project\n" +
            "$ reverse blank",
            ["TRANSLATION", "LABELED", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID"]
        )

    def test_strict_as_loose_in_translation(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "$ Continued in text no. 2\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["DOLLAR", "ID", "NEWLINE"]
        )

    def test_punctuated_translation(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "1. 'What is going on?', said the King!\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["LINELABEL", "ID", "NEWLINE"],
            [None, None, "en", None, None] +
            ["1", "'What is going on?', said the King!", None]
        )

    def test_translation_note(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@reverse\n" +
            "#note: reverse uninscribed\n",
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["REVERSE", "NEWLINE"] +
            ["NOTE", "ID", "NEWLINE"]
        )

    def test_equals_in_translation_note(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@reverse\n" +
            '#note: The CAD translation šarriru = "humble",\n',
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["REVERSE", "NEWLINE"] +
            ["NOTE", "ID", "NEWLINE"]
            )

    def test_note_ended_by_strucuture(self):
        self.compare_tokens(
            "@translation parallel en project\n" +
            "@obverse\n" +
            '#note: The CAD translation šarriru = "humble",\n' +
            '@reverse',
            ["TRANSLATION", "PARALLEL", "ID", "PROJECT", "NEWLINE"] +
            ["OBVERSE", "NEWLINE"] +
            ["NOTE", "ID", "NEWLINE"] +
            ["REVERSE"]
        )

    def test_milestone(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "@m=locator catchline\n" +
            "16'. si-i-ia-a-a-ku\n",
            ["TABLET", "NEWLINE",
             "OBVERSE", "NEWLINE",
             "M", "EQUALS", "ID", "NEWLINE",
             "LINELABEL", "ID", "NEWLINE"]
        )

    def test_include(self):
        self.compare_tokens(
            "@tablet\n" +
            "@obverse\n" +
            "@include dcclt:P229061 = MSL 07, 197 V02, 210 V11\n",
            ["TABLET", "NEWLINE",
             "OBVERSE", "NEWLINE",
             "INCLUDE", "ID", 'EQUALS', 'ID', "NEWLINE"]
        )

    def test_double_newline_and_lexpos(self):
        self.compare_tokens(
            "@obverse\n" +
            "\n" +
            "#note:\n",
            ["OBVERSE", "NEWLINE", "NOTE", "NEWLINE"],
            ["obverse", "\n\n", "note", "\n"],
            [1, 1, 3, 3],
            [1, 8, 11, 16])

    def test_blankline_with_tab_inadsorb(self):
        self.compare_tokens(
            "# ES mu-lu = lu₂, ša₃-ab = šag\n" +
            "	\n" +
            "7. keš₂-da",
            ["COMMENT", "ID", "NEWLINE", "LINELABEL", "ID"],
            ["#", "ES mu-lu = lu₂, ša₃-ab = šag", "\n\t\n", '7', "keš₂-da"])

    def test_invalid_at_raises_syntax_error(self):
        string = u"@obversel\n"
        self.ensure_raises_and_not(string, nwarnings=1)

    def test_invalid_hash_raises_syntax_error(self):
        string = u"#lems: Ṣalbatanu[Mars]CN\n"
        self.ensure_raises_and_not(string, nwarnings=2)

    def test_invalid_id_syntax_error(self):
        string = u"Ṣalbatanu[Mars]CN\n"
        self.ensure_raises_and_not(string, nwarnings=1)

    @staticmethod
    def test_resolve_keyword_no_extra():
        '''Test that resolve_keyword works correcty when extra is not passes
        This never happes in actual code. Hench this test'''
        mylexer = AtfLexer()
        result = mylexer.resolve_keyword('obverse',
                                         mylexer.structures)
        assert result == 'OBVERSE'
Example #12
0
def try_parse(content):
    if content[-1] != '\n':
        content += "\n"
    lexer = AtfLexer().lexer
    parser = AtfParser().parser
    return parser.parse(content, lexer=lexer)
Example #13
0
 def setUp(self):
     """
     Initialize lexer and parser.
     """
     self.lexer = AtfLexer().lexer
     self.parser = AtfParser().parser