def test_scanner(self): def s_ident(scanner, token): return token def s_operator(scanner, token): return "op%s" % token def s_float(scanner, token): return float(token) def s_int(scanner, token): return int(token) scanner = Scanner([ (r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*", s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+", None), ]) self.assertNotEqual(scanner.scanner.scanner("").pattern, None) self.assertEqual( scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
def scan(self, string): scanner = Scanner([(self.constant_signs, lambda _, tok: (self.constant_type, tok)), (self.numerical_variables, lambda _, tok: (self.numerical_type, tok)), (self.sentntial_variables, lambda _, tok: (self.sentntial_type, tok)), (self.predicate_variables, lambda _, tok: (self.predicate_type, tok))]) tokens, remainder = scanner.scan(string) if remainder: if len(remainder) > 10: remainder = remainder[:10] raise LexicalException("error lexing {0} ..".format(remainder)) return tokens
def vt_parse(str): # We'll memoise this function so several calls on the same input don't # require re-parsing. if (str in vt_parse.memory): return vt_parse.memory[str] # Use the built in re.Scanner to tokenise the input string. def s_lbrace(scanner, token): return ("LBRACE", token) def s_rbrace(scanner, token): return ("RBRACE", token) def s_comma(scanner, token): return ("COMMA", token) def s_varname(scanner, token): return ("VAR", token) scanner = Scanner([(r'{', s_lbrace), (r'}', s_rbrace), (r',', s_comma), (r'[a-zA-Z_]\w*', s_varname), (r'\s+', None)]) tokens = scanner.scan(str) # tokens is a pair of the tokenised string and any "uneaten" part. # check the entire string was eaten. if (tokens[1] != ''): print "Could not read the variable tree given:" print str #print "could not lex: " + tokens[1].__str__() exit() tokens = tokens[0] # Just the list of tokens. p = Parser() try: tree = p.parse(tokens) except p.ParseErrors, e: print "Could not read the variable tree given:" print str exit()
def parse_code(self): def var_found( scanner, name: str ): if name in ['caller', 'e', 'pi']: return name if name not in self._keys: self._keys.append(name) ret = 'a[%d]' % self._count self._count += 1 else: ret = 'a[%d]' % (self._keys.index(name)) return ret code = self._func scanner = Scanner([ (r"x", lambda y, x: x), (r"[a-zA-Z]+\.", lambda y, x: x), (r"[a-z]+\(", lambda y, x: x), (r"[a-zA-Z_]\w*", var_found), (r"\d+\.\d*", lambda y, x: x), (r"\d+", lambda y, x: x), (r"\+|-|\*|/", lambda y, x: x), (r"\s+", None), (r"\)+", lambda y, x: x), (r"\(+", lambda y, x: x), (r",", lambda y, x: x), ]) self._count = 0 self._keys = list() parsed, rubbish = scanner.scan(code) parsed = ''.join(parsed) if rubbish != '': raise Exception('parsed: %s, rubbish %s' % (parsed, rubbish)) self.code = parsed # Define parameters self._parameters = list() for key in self._keys: p = FittingParameter(name=key, value=1.0) self._parameters.append(p)
def read(self, value): self.result = [] self.paren_stack = [] self.source = value self.pos = 0 self.quoted = False self.scanner = Scanner([ (r"\s+", self("skip")), (r";[^\n]*\n", self("skip")), (r""""(((?<=\\)")|[^"])*((?<!\\)")""", self("str")), (r"(\(|\[)", self("open")), (r"(\)|\])", self("close")), (r"(([\d]+|(((\d+)?\.[\d]+)|([\d]+\.)))e[\+\-]?[\d]+)|(((\d+)?\.[\d]+)|([\d]+\.))", self("number")), (r"\-?((0x[\da-f]+)|(0[0-7]+)|([1-9][\d]*)|0)[l]?", self("number")), (r"""%s([^\(\[\)\]\s"]+)""" % self.symbol_marker, self("symbol")), (r"'", self("quote")), (r"""([^\(\[\)\]\s"]+)""", self("ident")), (r"""".*""", self("unterm_str")), (r".*", self("unknown_token")) ], re.M | re.S | re.I) self.scanner.scan(self.source) if self.paren_stack: self.raise_error("missing closing parenthesis.") return self.parse(self.result)
def _scan_int(self, string, const): # TODO: Add better invalid integer handling # Check for integer sign, possibly treat unsigned integer # as POSITIVE patterns = [] INT_SIGN = (r"^[{}{}]".format(CHAR_MAP['space'], CHAR_MAP['tab']), lambda scanner, token: ("INT_SIGN", token)) INT_VAL = (r".[{}{}]*".format(CHAR_MAP['space'], CHAR_MAP['tab']), lambda scanner, token: ("INT_VAL", token)) if const == 'SIGNED_INT': patterns.append(INT_SIGN) patterns.append(INT_VAL) scanner = Scanner(patterns) found, remainder = scanner.scan(string) self.type = 'INT' try: self.value = ''.join([f[1] for f in found]) except IndexError: print("Hit IndexError, string trying to check is: {}".format( dbg(string)))
def __init__(self): self.s1 = Scanner(( (r'^@@', self.got), (r'aa', self.got), ))
def _scan_bracket(scanner, token): return token def _scan_float(scanner, token): return float(token) def _scan_int(scanner, token): return int(token) def _scan_dstr(scanner, token): return token[1:-1].replace('\\"', '"') def _scan_sstr(scanner, token): return token[1:-1].replace("\\'", "'") _scanner = Scanner([ (r'-?\d+\.\d*', _scan_float), (r'-?\d+', _scan_int), (r'[•\w!@$%^&*()_+<>?|\/;:`~,.=-]+', _scan_identifier), (r'\[|\]', _scan_bracket), (r'"(?:[^"\\]|\\.)*"', _scan_dstr), (r"'(?:[^'\\]|\\.)*'", _scan_sstr), (r'\s+', None), ])
from pprint import pformat import logging import re log = logging.getLogger() D = log.debug logging.basicConfig(level=logging.DEBUG) def callback(scanner, text): D("CALL %r", text) def ignore(scanner, text): D("IGNORE %r", text) s = Scanner(( (r'{{{', callback), (r'##', callback), (r'\s+', ignore), (r'(.+)(?=##)', callback), )) text = "## {{{ aa##" while text: D("%r", text) text = s.scan(text)[1]
def _scan_file(self): scanner = Scanner(token_patterns, FLAGS['s']) return scanner.scan(self._read_file())[0]
def _scan_command(self, line, pos, const): patterns = [(r"^{}".format(i[0]), i[1]) for i in const] scanner = Scanner(patterns) found, remainder = scanner.scan(line[pos:]) self.type = found[0] self.value = [i[0] for i in const if i[1] == self.type][0]
''' from re import Scanner from .utils.stack import list_to_stack from .utils.snippets import ( pat as SNIPPETS, from_string, Snippet, ) BRACKETS = r'\[|\]' BLANKS = r'\s+' WORDS = r'[^[\]\s]+' token_scanner = Scanner([ (SNIPPETS, lambda _, token: from_string(token)), (BRACKETS, lambda _, token: token), (BLANKS, None), (WORDS, lambda _, token: token), ]) class Symbol(str): '''A string class that represents Joy function names.''' __repr__ = str.__str__ def text_to_expression(text): '''Convert a string to a Joy expression. When supplied with a string this function returns a Python datastructure that represents the Joy datastructure described by the text expression. Any unbalanced square brackets will raise a ParseError.
from re import Scanner scanner = Scanner([ (r'\(', lambda sc, token: ('BEGIN-BRACE', token)), (r'\)', lambda sc, token: ('END-BRACE', token)), (r'\[', lambda sc, token: ('BEGIN-NUM', token)), (r'\]', lambda sc, token: ('END-NUM', token)), (r'\,', lambda sc, token: ('SEP', token)), (r'\w+', lambda sc, token: ('KEY', token)), (r'\.', lambda sc, token: None), ]) class LinkedScope(object): def __init__(self): self.stack = [[]] @property def top(self): return self.stack[-1] def push(self): top = [] self.stack[-1].append(top) self.stack.append(top) return top def pop(self): self.stack.pop() def reduce_all(self):