Exemple #1
0
 def t_INITIAL_parallel_labeled_HASHID(self, t):
     '\#[a-zA-Z][a-zA-Z0-9\[\]]+\:'
     # Note that \:? absorbs a trailing colon in protocol keywords
     t.value = t.value[1:-1]
     t.lexpos += 1
     # Use lower here since there are some ATF files with
     # the protocol incorrectly written as #NOTE:
     t.type = self.resolve_keyword(t.value.lower(),
                                   AtfLexer.protocols,
                                   extra={'CHECK': 'CHECK'})
     if t.type == "KEY":
         t.lexer.push_state('nonequals')
     if t.type == "LEM":
         t.lexer.push_state('lemmatize')
     if t.type == "TR":
         t.lexer.push_state('interlinear')
     if t.type in ['PROJECT', "BIB"]:
         t.lexer.push_state('flagged')
     if t.type in ['CHECK']:
         t.lexer.push_state('absorb')
     if t.type == "NOTE":
         t.lexer.push_state('para')
     if t.type is None:
         formatstring = u"Illegal #STRING '{}'".format(t.value)
         valuestring = t.value
         if _pyversion() == 2:
             formatstring = formatstring.encode('UTF-8')
             valuestring = valuestring.encode('UTF-8')
         if self.skipinvalid:
             warnings.warn(formatstring, UserWarning)
             return
         else:
             raise SyntaxError(formatstring,
                               (None, t.lineno, t.lexpos, valuestring))
     return t
Exemple #2
0
 def p_error(self, p):
     formatstring = u"PyOracc could not parse token '{}'.".format(p)
     valuestring = p.value
     if _pyversion() == 2:
         formatstring = formatstring.encode('UTF-8')
         valuestring = valuestring.encode('UTF-8')
     print(SyntaxError(formatstring,
                       (None, p.lineno, p.lexpos, valuestring)))
Exemple #3
0
 def p_error(self, p):
     formatstring = u"PyOracc could not parse token '{}'.".format(p)
     valuestring = p.value
     if _pyversion() == 2:
         formatstring = formatstring.encode('UTF-8')
         valuestring = valuestring.encode('UTF-8')
     raise SyntaxError(formatstring,
                       (None, p.lineno, p.lexpos, valuestring))
Exemple #4
0
 def t_ANY_error(self, t):
     formatstring = u"Illegal character '{}'".format(t.value[0])
     if _pyversion() == 2:
         formatstring = formatstring.encode('UTF-8')
     if self.skipinvalid:
         t.lexer.skip(1)
         warnings.warn(formatstring, UserWarning)
     else:
         raise SyntaxError(formatstring)
Exemple #5
0
 def p_error(self, p):
     formatstring = u"PyOracc could not parse token {} at line {} at " \
                    u"offset {} with value '{}'.".format(p.type,
                                                         p.lineno,
                                                         p.lexpos, p.value)
     valuestring = p.value
     if _pyversion() == 2:
         valuestring = valuestring.encode('UTF-8')
         formatstring = formatstring.encode('UTF-8')
     raise SyntaxError(formatstring,
                       (None, p.lineno, p.lexpos, valuestring))
Exemple #6
0
 def p_error(self, p):
     formatstring = u"PyOracc could not parse token {} at line {} at " \
                    u"offset {} with value '{}'.".format(p.type,
                                                         p.lineno,
                                                         p.lexpos, p.value)
     valuestring = p.value
     if _pyversion() == 2:
         valuestring = valuestring.encode('UTF-8')
         formatstring = formatstring.encode('UTF-8')
     raise SyntaxError(formatstring,
                       (None, p.lineno, p.lexpos, valuestring))
Exemple #7
0
 def t_ANY_error(self, t):
     fstring = u"PyOracc got an illegal character '{}'".format(t.value[0])
     valuestring = t.value
     if _pyversion() == 2:
         fstring = fstring.encode('UTF-8')
         valuestring = valuestring.encode('UTF-8')
     if self.skipinvalid:
         t.lexer.skip(1)
         warnings.warn(fstring, UserWarning)
         return
     else:
         raise SyntaxError(fstring, (None, t.lineno, t.lexpos, valuestring))
Exemple #8
0
 def t_ANY_error(self, t):
     fstring = u"PyOracc got an illegal character '{}'".format(t.value[0])
     valuestring = t.value
     if _pyversion() == 2:
         fstring = fstring.encode('UTF-8')
         valuestring = valuestring.encode('UTF-8')
     if self.skipinvalid:
         t.lexer.skip(1)
         warnings.warn(fstring, UserWarning)
         return
     else:
         raise SyntaxError(fstring,
                           (None, t.lineno, t.lexpos, valuestring))
Exemple #9
0
    def t_INITIAL_parallel_labeled_ATID(self, t):
        '^\@[a-zA-Z][a-zA-Z0-9\[\]]*\+?'
        t.value = t.value[1:]
        t.lexpos += 1
        t.type = self.resolve_keyword(
            t.value,
            AtfLexer.structures + AtfLexer.long_argument_structures,
            extra={
                "h1": "HEADING",
                "h2": "HEADING",
                "h3": "HEADING",
                "label+": "LABEL",
                "end": "END"
            },
        )

        if t.type == "INCLUDE":
            t.lexer.push_state('nonequals')

        if t.type == "END":
            if not (self.skipinvalid) or t.lexer.current_state() != 'INITIAL':
                t.lexer.pop_state()
            t.lexer.push_state('transctrl')

        if t.type == "LABEL":
            t.lexer.push_state("para")
            t.lexer.push_state("transctrl")

        if t.type == "TRANSLATION":
            t.lexer.push_state("transctrl")

        if t.type == "SCORE":
            t.lexer.push_state('score')

        if t.type in AtfLexer.long_argument_structures + ["NOTE"]:
            t.lexer.push_state('flagged')
        if t.type is None:
            formatstring = u"Illegal @STRING '{}'".format(t.value)
            valuestring = t.value
            if _pyversion() == 2:
                formatstring = formatstring.encode('UTF-8')
                valuestring = valuestring.encode('UTF-8')
            if self.skipinvalid:
                warnings.warn(formatstring, UserWarning)
                return
            else:
                print(
                    SyntaxError(formatstring,
                                (None, t.lineno, t.lexpos, valuestring)))
                return
        return t
Exemple #10
0
    def t_INITIAL_parallel_labeled_ATID(self, t):
        '^\@[a-zA-Z][a-zA-Z0-9\[\]]*\+?'
        t.value = t.value[1:]
        t.lexpos += 1
        t.type = self.resolve_keyword(t.value,
                                      AtfLexer.structures +
                                      AtfLexer.long_argument_structures,
                                      extra={
                                          "h1": "HEADING",
                                          "h2": "HEADING",
                                          "h3": "HEADING",
                                          "label+": "LABEL",
                                          "end": "END"
                                      },
                                      )

        if t.type == "INCLUDE":
            t.lexer.push_state('nonequals')

        if t.type == "END":
            if not(self.skipinvalid) or t.lexer.current_state() != 'INITIAL':
                t.lexer.pop_state()
            t.lexer.push_state('transctrl')

        if t.type == "LABEL":
            t.lexer.push_state("para")
            t.lexer.push_state("transctrl")

        if t.type == "TRANSLATION":
            t.lexer.push_state("transctrl")

        if t.type == "SCORE":
            t.lexer.push_state('score')

        if t.type in AtfLexer.long_argument_structures + ["NOTE"]:
            t.lexer.push_state('flagged')
        if t.type is None:
            formatstring = u"Illegal @STRING '{}'".format(t.value)
            valuestring = t.value
            if _pyversion() == 2:
                formatstring = formatstring.encode('UTF-8')
                valuestring = valuestring.encode('UTF-8')
            if self.skipinvalid:
                warnings.warn(formatstring, UserWarning)
                return
            else:
                raise SyntaxError(formatstring,
                                  (None, t.lineno, t.lexpos, valuestring))
        return t
Exemple #11
0
def test_note_ended_by_line(line_label):
    'Notes can be free text until the next line label.'
    # Sample text.
    line1 = u"a-šar _saḫar.ḫi.a_ bu-bu-su-nu"
    line2 = u"a-kal-ši-na ṭi-id-di"
    # Generate the successive line numbers in the same style.
    label1 = line_label
    next_label = int(label1[:1]) + 1
    if _pyversion() == 2:
        label2 = unicode(next_label) + label1[1:]
    else:
        label2 = str(next_label) + label1[1:]
    compare_tokens(
        label1 + ". " + line1 + "\n" +
        "#note: Does this combine with the next line?\n" + label2 + ". " +
        line2 + "\n", ["LINELABEL"] + ["ID"] * len(line1.split()) +
        ["NEWLINE"] + ["NOTE", "ID", "NEWLINE"] + ["LINELABEL"] +
        ["ID"] * len(line2.split()) + ["NEWLINE"], [label1] + line1.split() +
        [None, None, "Does this combine with the next line?", None] +
        [label2] + line2.split() + [None])
Exemple #12
0
PyORACC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with PyORACC. If not, see <http://www.gnu.org/licenses/>.
'''

from __future__ import print_function
from itertools import repeat
import pytest
from pyoracc.atf.common.atflex import AtfLexer
from pyoracc import _pyversion
if _pyversion() == 2:
    from itertools import izip_longest as zip_longest
else:
    from itertools import zip_longest


def compare_tokens(content,
                   expected_types,
                   expected_values=None,
                   expected_lineno=None,
                   expected_lexpos=None):
    lexer = AtfLexer().lexer
    lexer.input(content)
    if expected_values is None:
        expected_values = repeat(None)
    if expected_lineno is None:
Exemple #13
0
 def yacc_default(self, value, line, pos, etype):
     mesg = self.yacc_tmp_default.format(value, line, pos, etype)
     mesg = mesg.encode('UTF-8') if _pyversion() == 2 else mesg
     return mesg
Exemple #14
0
 def lex_default(self, value, line, pos):
     mesg = self.lex_tmp_default.format(value, line, pos)
     mesg = mesg.encode('UTF-8') if _pyversion() == 2 else mesg
     return mesg
Exemple #15
0
 def wrong_path(self, log_path):
     mesg = self.wrong_logpath_tmp.format(log_path)
     mesg = mesg.encode('UTF-8') if _pyversion() == 2 else mesg
     return mesg
Exemple #16
0
 def summary_num(self, lex_num, yacc_num, pathname):
     mesg = self.summary_num_tmp.format(lex_num, yacc_num, pathname)
     mesg = mesg.encode('UTF-8') if _pyversion() == 2 else mesg
     return mesg
Exemple #17
0
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with PyORACC. If not, see <http://www.gnu.org/licenses/>.
'''


from __future__ import print_function
from itertools import repeat
from unittest import TestCase
import pytest
from pyoracc.atf.common.atflex import AtfLexer
from pyoracc import _pyversion
if _pyversion() == 2:
    from itertools import izip_longest as zip_longest
else:
    from itertools import zip_longest


class TestLexer(TestCase):
    """A class that contains all tests of the ATFLexer"""
    def setUp(self):
        self.lexer = AtfLexer().lexer

    def compare_tokens(self, content, expected_types, expected_values=None,
                       expected_lineno=None, expected_lexpos=None):
        self.lexer.input(content)
        if expected_values is None:
            expected_values = repeat(None)
Exemple #18
0
 def head_default(self, idx, ID, path):
     mesg = self.head_tmp_default.format(idx, ID, path)
     mesg = mesg.encode('UTF-8') if _pyversion() == 2 else mesg
     return mesg
Exemple #19
0
 def summary_end(self, pathname):
     mesg = self.summary_end_tmp.format(pathname)
     mesg = mesg.encode('UTF-8') if _pyversion() == 2 else mesg
     return mesg
Exemple #20
0
 def raise_error(self, e, pathname):
     mesg = self.raise_tmp.format(e, pathname)
     mesg = mesg.encode('UTF-8') if _pyversion() == 2 else mesg
     return mesg