Example #1
0
    def test_scanner(self):
        def s_ident(scanner, token):
            return token

        def s_operator(scanner, token):
            return "op%s" % token

        def s_float(scanner, token):
            return float(token)

        def s_int(scanner, token):
            return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
        ])

        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)

        self.assertEqual(
            scanner.scan("sum = 3*foo + 312.50 + bar"),
            (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
Example #2
0
    def scan(self, string):
        scanner = Scanner([(self.constant_signs, lambda _, tok:
                            (self.constant_type, tok)),
                           (self.numerical_variables, lambda _, tok:
                            (self.numerical_type, tok)),
                           (self.sentntial_variables, lambda _, tok:
                            (self.sentntial_type, tok)),
                           (self.predicate_variables, lambda _, tok:
                            (self.predicate_type, tok))])

        tokens, remainder = scanner.scan(string)
        if remainder:
            if len(remainder) > 10:
                remainder = remainder[:10]
            raise LexicalException("error lexing {0} ..".format(remainder))
        return tokens
Example #3
0
def vt_parse(str):

    # We'll memoise this function so several calls on the same input don't
    # require re-parsing.

    if (str in vt_parse.memory):
        return vt_parse.memory[str]

    # Use the built in re.Scanner to tokenise the input string.

    def s_lbrace(scanner, token):
        return ("LBRACE", token)

    def s_rbrace(scanner, token):
        return ("RBRACE", token)

    def s_comma(scanner, token):
        return ("COMMA", token)

    def s_varname(scanner, token):
        return ("VAR", token)

    scanner = Scanner([(r'{', s_lbrace), (r'}', s_rbrace), (r',', s_comma),
                       (r'[a-zA-Z_]\w*', s_varname), (r'\s+', None)])

    tokens = scanner.scan(str)

    # tokens is a pair of the tokenised string and any "uneaten" part.
    # check the entire string was eaten.

    if (tokens[1] != ''):
        print "Could not read the variable tree given:"
        print str
        #print "could not lex: " + tokens[1].__str__()
        exit()

    tokens = tokens[0]  # Just the list of tokens.

    p = Parser()
    try:
        tree = p.parse(tokens)
    except p.ParseErrors, e:
        print "Could not read the variable tree given:"
        print str
        exit()
Example #4
0
    def parse_code(self):

        def var_found(
                scanner,
                name: str
        ):
            if name in ['caller', 'e', 'pi']:
                return name
            if name not in self._keys:
                self._keys.append(name)
                ret = 'a[%d]' % self._count
                self._count += 1
            else:
                ret = 'a[%d]' % (self._keys.index(name))
            return ret

        code = self._func
        scanner = Scanner([
            (r"x", lambda y, x: x),
            (r"[a-zA-Z]+\.", lambda y, x: x),
            (r"[a-z]+\(", lambda y, x: x),
            (r"[a-zA-Z_]\w*", var_found),
            (r"\d+\.\d*", lambda y, x: x),
            (r"\d+", lambda y, x: x),
            (r"\+|-|\*|/", lambda y, x: x),
            (r"\s+", None),
            (r"\)+", lambda y, x: x),
            (r"\(+", lambda y, x: x),
            (r",", lambda y, x: x),
        ])
        self._count = 0
        self._keys = list()
        parsed, rubbish = scanner.scan(code)
        parsed = ''.join(parsed)
        if rubbish != '':
            raise Exception('parsed: %s, rubbish %s' % (parsed, rubbish))
        self.code = parsed

        # Define parameters
        self._parameters = list()
        for key in self._keys:
            p = FittingParameter(name=key, value=1.0)
            self._parameters.append(p)
Example #5
0
 def read(self, value):
     self.result = []
     self.paren_stack = []
     self.source = value
     self.pos = 0
     self.quoted = False
     self.scanner = Scanner([
         (r"\s+", self("skip")), (r";[^\n]*\n", self("skip")),
         (r""""(((?<=\\)")|[^"])*((?<!\\)")""", self("str")),
         (r"(\(|\[)", self("open")), (r"(\)|\])", self("close")),
         (r"(([\d]+|(((\d+)?\.[\d]+)|([\d]+\.)))e[\+\-]?[\d]+)|(((\d+)?\.[\d]+)|([\d]+\.))",
          self("number")),
         (r"\-?((0x[\da-f]+)|(0[0-7]+)|([1-9][\d]*)|0)[l]?",
          self("number")),
         (r"""%s([^\(\[\)\]\s"]+)""" % self.symbol_marker, self("symbol")),
         (r"'", self("quote")), (r"""([^\(\[\)\]\s"]+)""", self("ident")),
         (r"""".*""", self("unterm_str")), (r".*", self("unknown_token"))
     ], re.M | re.S | re.I)
     self.scanner.scan(self.source)
     if self.paren_stack:
         self.raise_error("missing closing parenthesis.")
     return self.parse(self.result)
Example #6
0
 def _scan_int(self, string, const):
     # TODO: Add better invalid integer handling
     #       Check for integer sign, possibly treat unsigned integer
     #       as POSITIVE
     patterns = []
     INT_SIGN = (r"^[{}{}]".format(CHAR_MAP['space'],
                                   CHAR_MAP['tab']), lambda scanner, token:
                 ("INT_SIGN", token))
     INT_VAL = (r".[{}{}]*".format(CHAR_MAP['space'],
                                   CHAR_MAP['tab']), lambda scanner, token:
                ("INT_VAL", token))
     if const == 'SIGNED_INT':
         patterns.append(INT_SIGN)
     patterns.append(INT_VAL)
     scanner = Scanner(patterns)
     found, remainder = scanner.scan(string)
     self.type = 'INT'
     try:
         self.value = ''.join([f[1] for f in found])
     except IndexError:
         print("Hit IndexError, string trying to check is: {}".format(
             dbg(string)))
Example #7
0
 def __init__(self):
     self.s1 = Scanner((
         (r'^@@', self.got),
         (r'aa', self.got),
     ))
Example #8
0
def _scan_bracket(scanner, token):
    return token


def _scan_float(scanner, token):
    return float(token)


def _scan_int(scanner, token):
    return int(token)


def _scan_dstr(scanner, token):
    return token[1:-1].replace('\\"', '"')


def _scan_sstr(scanner, token):
    return token[1:-1].replace("\\'", "'")


_scanner = Scanner([
    (r'-?\d+\.\d*', _scan_float),
    (r'-?\d+', _scan_int),
    (r'[•\w!@$%^&*()_+<>?|\/;:`~,.=-]+', _scan_identifier),
    (r'\[|\]', _scan_bracket),
    (r'"(?:[^"\\]|\\.)*"', _scan_dstr),
    (r"'(?:[^'\\]|\\.)*'", _scan_sstr),
    (r'\s+', None),
])
Example #9
0
from pprint import pformat
import logging
import re

log = logging.getLogger()
D = log.debug

logging.basicConfig(level=logging.DEBUG)


def callback(scanner, text):
    D("CALL %r", text)


def ignore(scanner, text):
    D("IGNORE %r", text)


s = Scanner((
    (r'{{{', callback),
    (r'##', callback),
    (r'\s+', ignore),
    (r'(.+)(?=##)', callback),
))

text = "## {{{  aa##"
while text:
    D("%r", text)
    text = s.scan(text)[1]
Example #10
0
 def _scan_file(self):
     scanner = Scanner(token_patterns, FLAGS['s'])
     return scanner.scan(self._read_file())[0]
Example #11
0
 def _scan_command(self, line, pos, const):
     patterns = [(r"^{}".format(i[0]), i[1]) for i in const]
     scanner = Scanner(patterns)
     found, remainder = scanner.scan(line[pos:])
     self.type = found[0]
     self.value = [i[0] for i in const if i[1] == self.type][0]
Example #12
0
'''
from re import Scanner
from .utils.stack import list_to_stack
from .utils.snippets import (
    pat as SNIPPETS,
    from_string,
    Snippet,
)

BRACKETS = r'\[|\]'
BLANKS = r'\s+'
WORDS = r'[^[\]\s]+'

token_scanner = Scanner([
    (SNIPPETS, lambda _, token: from_string(token)),
    (BRACKETS, lambda _, token: token),
    (BLANKS, None),
    (WORDS, lambda _, token: token),
])


class Symbol(str):
    '''A string class that represents Joy function names.'''
    __repr__ = str.__str__


def text_to_expression(text):
    '''Convert a string to a Joy expression.

    When supplied with a string this function returns a Python datastructure
    that represents the Joy datastructure described by the text expression.
    Any unbalanced square brackets will raise a ParseError.
Example #13
0
from re import Scanner

scanner = Scanner([
    (r'\(', lambda sc, token: ('BEGIN-BRACE', token)),
    (r'\)', lambda sc, token: ('END-BRACE', token)),
    (r'\[', lambda sc, token: ('BEGIN-NUM', token)),
    (r'\]', lambda sc, token: ('END-NUM', token)),
    (r'\,', lambda sc, token: ('SEP', token)),
    (r'\w+', lambda sc, token: ('KEY', token)),
    (r'\.', lambda sc, token: None),
])


class LinkedScope(object):
    def __init__(self):
        self.stack = [[]]

    @property
    def top(self):
        return self.stack[-1]

    def push(self):
        top = []
        self.stack[-1].append(top)
        self.stack.append(top)
        return top

    def pop(self):
        self.stack.pop()

    def reduce_all(self):