Esempio n. 1
0
def tokenize(str):
  'str -> Sequence(Token)'
  specs = [
    ('comment',                 (r'//.*',)),
    ('newline',                 (r'[\r\n]+',)),
    ('space',                   (r'[ \t\r\n]+',)),
    ('name',                    (r'(?!(?:as|exists|priority|reaction)\b)[A-Za-z\200-\377_]([A-Za-z\200-\377_0-9])*',)),
    ('kw_exists',               (r'exists',)),
    ('kw_reaction',             (r'reaction',)),
    ('kw_as',                   (r'as',)),
    ('kw_priority',             (r'priority',)),
    ('op_priority_maximal',     (r'>>',)),
    ('op_tilde',                (r'~',)),
    ('op_production',           (r'::',)),
    ('op_dissolve',             (r'\$',)),
    ('op_osmose_location',      (r'!!',)),
    ('op_osmose',               (r'!',)),
    ('mod_catalyst',            (r'\*',)),
    ('mod_charge_positive',     (r'\+',)),
    ('mod_charge_negative',     (r'-',)),
    ('env_open',                (r'\[',)),
    ('env_close',               (r'\]',)),
    ('membrane_open',           (r'\(',)),
    ('membrane_close',          (r'\)',)),
    ('number',                  (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',))
  ]
  useless = ['comment', 'space', 'newline']
  t = make_tokenizer(specs)
  return [x for x in t(str) if x.type not in useless]
Esempio n. 2
0
def tokenize(str):
    """
    Generates a list of tokens from the given string.
    """
    specs = [
        ('Space',		(r'[ \t\r\n]+',)),
        ('True',		('true',)),
        ('False',		('false',)),
        ('If',  		('if',)),
        ('Then',		('then',)),
        ('Else',		('else',)),
        ('Fi',  		('fi',)),
        ('Call',		('call',)),
        ('Lp',  		('\(',)),
        ('Comma',  		(',',)),
        ('Rp',  		('\)',)),
        ('Let', 		('let',)),
        ('In',  		('in',)),
        ('End', 		('end',)),
        ('Fun', 		('fun',)),
        ('Arrow', 		('=>',)),
        ('Prog',    	('prog',)),
        ('Op',          (r'[\-+/*=<>]',)),
        ('Var', 		(r'[A-Za-z][A-Za-z_0-9]*',)),
        ('Number',      (r'(0|([1-9][0-9]*))', VERBOSE)),
        ('Semicolon',	(';',)),
        ]
    useless = ['Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 3
0
def tokenize(string):
    """
    Generates a list of tokens from the given string.
    """
    specs = [
        ('Space',		(r'[ \t\r\n]+',)),
        ('Or',          ('\|\|',)),
        ('And',         ('&&',)),
        ('Neq',         ('!=',)),
        ('Not',         ('!',)),
        ('Eq',          ('==',)),
        ('Assign',      (':=',)),
        ('Le',          ('<=',)),
        ('Ge',          ('>=',)),
        ('Dot',         ('\.',)),
        ('Op',          (r'[\-+/*=<>%]',)),
        ('Ident', 		(r'[A-Za-z][A-Za-z_0-9]*',)),
        ('Number',      (r'(0|([1-9][0-9]*))', VERBOSE)),
        ('Semicolon',	(';',)),
        ('Comma',	    (',',)),
        ('Lb',          ('{',)),
        ('Rb',          ('}',)),
        ('Lp',          ('\(',)),
        ('Rp',          ('\)',)),
        ('String',      ('"[^"]*"',)),
        ]
    useless = ['Space']
    t = make_tokenizer(specs)
    return [x for x in t(string) if x.type not in useless]
Esempio n. 4
0
def tokenize(string):
    """str -> Sequence(Token)"""
    # flake8: NOQA
    specs = [  # NOQA
        ('Comment', (r'/\*(.|[\r\n])*?\*/', MULTILINE)),  # NOQA
        ('Comment', (r'(//|#).*', )),  # NOQA
        ('NL', (r'[\r\n]+', )),  # NOQA
        ('QuotedRackItem',
         (r'(?<=[:*\-])\s*(?P<quote>"|\').*?(?<!\\)(?P=quote)',
          DOTALL)),  # NOQA
        ('RackItem', (r'(?<=[:*\-])[^\r\n\[;}]+', )),  # NOQA
        ('Space', (r'[ \t\r\n]+', )),  # NOQA
        ('RackHeight', (r'[0-9]+U', )),  # NOQA
        ('Units', (r'[0-9]+(?:\.[0-9]+)?(A|kg)', )),  # NOQA
        ('Number', (r'[0-9]+', )),  # NOQA
        (
            'Name',
            (
                u('[A-Za-z_0-9\u0080-\uffff]') +  # NOQA
                u('[A-Za-z_\\-.0-9\u0080-\uffff]*'), )),  # NOQA
        ('Op', (r'[{}:;,*\-=\[\]]', )),  # NOQA
        ('String', (r'(?P<quote>"|\').*?(?<!\\)(?P=quote)', DOTALL)),  # NOQA
    ]
    useless = ['Comment', 'NL', 'Space']
    t = make_tokenizer(specs)
    return [x for x in t(string) if x.type not in useless]
Esempio n. 5
0
def tokenize(s):
    """Tokenize a type string.

    :type s: unicode
    """
    f = make_tokenizer(token_specs)
    return [t for t in f(s) if t.type != 'space']
Esempio n. 6
0
def token_phase(characters: str):
    specs = [
        ("comment", (r"#.*",)),
        ("space", (r"[ \t\r]+",)),
        (
            "float",
            (
                r"""
            -?                  # Minus
            ([0-9]+)            # Int
            (\.[0-9]+)          # Frac
            ([Ee][+-]?[0-9]+)?  # Exp""",
                VERBOSE,
            ),
        ),
        ("integer", (r"0|([1-9][0-9]*)",)),
        ("name", (r"[A-Za-z_][A-Za-z_0-9]*",)),
        ("grouping", (r"[\(\)\[\]\{\}]",)),
        ("operator", (r"[~!@#$%^&*<>:?/\\|\-\+=]+",)),
        ("prime", (r"'",)),
    ]

    useless = ["comment", "space"]

    tokenizer = make_tokenizer(specs)

    return tuple(token for token in tokenizer(characters) if token.type not in useless)
Esempio n. 7
0
File: kll.py Progetto: slezier/kll
def tokenize( string ):
	"""str -> Sequence(Token)"""

	# Basic Tokens Spec
	specs = [
		( 'Comment',          ( r' *#.*', ) ),
		( 'Space',            ( r'[ \t\r\n]+', ) ),
		( 'USBCode',          ( r'U(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', ) ),
		( 'USBCodeStart',     ( r'U\[', ) ),
		( 'ScanCode',         ( r'S((0x[0-9a-fA-F]+)|([0-9]+))', ) ),
		( 'ScanCodeStart',    ( r'S\[', ) ),
		( 'CodeEnd',          ( r'\]', ) ),
		( 'String',           ( r'"[^"]*"', VERBOSE ) ),
		( 'SequenceString',   ( r"'[^']*'", ) ),
		( 'Operator',         ( r'=>|:\+|:-|:|=', ) ),
		( 'Comma',            ( r',', ) ),
		( 'Dash',             ( r'-', ) ),
		( 'Plus',             ( r'\+', ) ),
		( 'Parenthesis',      ( r'\(|\)', ) ),
		( 'Number',           ( r'-?(0x[0-9a-fA-F]+)|(0|([1-9][0-9]*))', VERBOSE ) ),
		( 'Name',             ( r'[A-Za-z_][A-Za-z_0-9]*', ) ),
		( 'VariableContents', ( r'''[^"' ;:=>()]+''', ) ),
		( 'EndOfLine',        ( r';', ) ),
	]

	# Tokens to filter out of the token stream
	useless = ['Space', 'Comment']

	tokens = make_tokenizer( specs )
	return [x for x in tokens( string ) if x.type not in useless]
Esempio n. 8
0
def tokenize(str):
    """str -> Sequence(Token)"""
    specs = [
        ('COMMENT', (r'//.*', )),
        ('COMMENT', (r'/\*(.|[\r\n])*?\*/', re.MULTILINE)),
        ('NL', (r'[\r\n]+', )),
        ('SPACE', (r'[ \t\r\n]+', )),
        ('REAL', (r'[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*', )),
        ('INT', (r'[0-9]+', )),
        ('INT', (r'\$[0-9A-Fa-f]+', )),
        ('OP', (r'(::)|(:)|(-)|(=)|(\()|(\))', )),
        ('FIELD',
         (r'(bool|char|date|datetime|decimal|duration|email|float|int|slug|text|time|url|uuid|fk|m2m|o2o)',
          )),
        ('NONE', (r'none', )),
        ('FALSE', (r'false', )),
        ('TRUE', (r'true', )),
        ('NAME', (r'([A-Za-z_.][A-Za-z_0-9.]*)', )),
        #('OP', (r'(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]',)),
        ('STRING', (r"'([^']|(''))*'", )),
        ('ESCAPES', (r',', )),
    ]

    useless = ['SPACE', 'NL', 'COMMENT', 'ESCAPES']

    t = make_tokenizer(specs)
    ret = [x for x in t(str) if x.type not in useless]

    return ret
Esempio n. 9
0
 def tokenize(string):
     specs = [
             ('Regex', (r'({.+}|<re>.+?</re>)', re.UNICODE)),
             ('Op', (r':',)),
             ('String', (r"[^<{|}>:][^<{|}>:]*", re.UNICODE)),
             ]
     tok = make_tokenizer(specs)
     return [t for t in tok(string)]
Esempio n. 10
0
def lex_braced_expr(string):
    """Lex a braced expression."""
    tokenizer = lexer.make_tokenizer([
        ('lbrace',  [r'{']),
        ('rbrace',  [r'}']),
        ('content', [r'[^{}]+']),
    ])

    return remove_whitespace_tokens(tokenizer(string))
Esempio n. 11
0
def lex_date(date_string):
    """Lex a string into biblatex date tokens."""
    tokenizer = lexer.make_tokenizer([
        ('number', [r'[0-9]+']),
        ('dash',   [r'-']),
        ('slash',  [r'/'])
    ])

    return tokenizer(date_string)
def tokenize(str):
	"""Returns tokens of the given string."""
	specs = [
		('Op',     (r'[|\(\)\*]',)),
		('Char',   (r'[A-Za-z0-9]',)),
	]
	useless = ['Space']
	t = make_tokenizer(specs)
	return [x for x in t(str) if x.type not in useless]
Esempio n. 13
0
def tokenize(s):
    specs = [
        ('whitespace', (r'[ \t]', )),
        ('newline', (r'[\n]', )),
        ('instruction', (r'(inc|dec|zero|else|stop|else)', )),
        ('variable', (r'[xyz]', )),
        ('number', (r'[0-9]+', )),
    ]
    f = make_tokenizer(specs)
    return [t for t in f(s) if t.type != 'whitespace']
Esempio n. 14
0
def str_tokenize(string):
    'unicode -> Sequence(Token)'
    specs = [
        ('JunkSpace', (r'[\r\n\t]+', )),
        ('Space', (r'[ ]+', )),
        ('Op', (r'[:/\[\]]', )),
        ('Name', (r'[^:/ \[\]\r\t\n]+', re.UNICODE)),
    ]
    useless = ['JunkSpace']
    tok = make_tokenizer(specs)
    return [x for x in tok(string) if x.type not in useless]
Esempio n. 15
0
def test_error_info():
    tokenize = make_tokenizer([
        Spec('keyword', r'(is|end)'),
        Spec('id',      r'[a-z]+'),
        Spec('space',   r'[ \t]+'),
        Spec('nl',      r'[\n\r]+'),
    ])
    try:
        list(tokenize(u'f is ф'))
    except LexerError, e:
        eq_(unicode(e), u'1,6-1,6: cannot tokenize data: "f is \u0444"')
Esempio n. 16
0
def tokenize(string):
    """ str -> Sequence(Token) """
    specs = [
        ('Space', (r'[ \t\r\n]+', )),
        ('String', (r'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE)),
        ('Op', (r'[\[\],()]', )),
        ('Name', (r'[A-Za-z_][A-Za-z_0-9]*', )),
    ]
    empty = ['Space']
    t = make_tokenizer(specs)
    return [x for x in t(string) if x.type not in empty]
Esempio n. 17
0
def str_tokenize(string):
    'unicode -> Sequence(Token)'
    specs = [
            ('JunkSpace', (r'[\r\n\t]+',)),
            ('Space', (r'[ ]+',)),
            ('Op', (r'[:/\[\]]',)),
            ('Name', (ur'[^:/ \[\]\r\t\n]+', re.UNICODE)),
            ]
    useless = ['JunkSpace']
    tok = make_tokenizer(specs)
    return [x for x in tok(string) if x.type not in useless]
Esempio n. 18
0
def test_error_info():
    tokenize = make_tokenizer([
        Spec('keyword', r'(is|end)'),
        Spec('id', r'[a-z]+'),
        Spec('space', r'[ \t]+'),
        Spec('nl', r'[\n\r]+'),
    ])
    try:
        list(tokenize(u'f is ф'))
    except LexerError, e:
        eq_(unicode(e), u'1,6-1,6: cannot tokenize data: "f is \u0444"')
def tokenize(str):
	"""Returns tokens of the given string."""
	specs = [
		('Space',		(r'[ \t\r\n]+',)),
		('Dot',			(r'\.',)),
		('Name',		(r'[A-Za-z_][A-Za-z_0-9]*',)),
		('Lambda',		(u'λ',)),
		('Parentheses',	(r'[\(\)]',)),
	]
	useless = ['Space']
	t = make_tokenizer(specs)
	return [x for x in t(str) if x.type not in useless]
Esempio n. 20
0
def tokenize(to_tokenize_str):
    specs = [
        ('Space', (r'[ \t\r\n]+', )),
        ('Word', (r'[\w\-\_]+', )),
        ('Op', (r'[\\(\\)\\|\\!\\&]{1}', )),
    ]
    useless = [u'Space']
    tokenizer = make_tokenizer(specs)
    return [
        token for token in tokenizer(to_tokenize_str)
        if token.type not in useless
    ]
Esempio n. 21
0
def tokenize(input):
    token_specs = [
        ('NAME', (r'[A-Za-z_][A-Za-z_0-9-]*',)),
        ('REGEXP', (r'/.*/',)),
        ('STRING', (r'"((\\")|[^"])*"',)),
        ('OP', (r'([{}\[\]?$:,|@%!/&]|\.{3})',)),
        ('NUMBER', (r'-?(0|[1-9]\d*)(\.\d+)?',)),
        ('COMMENT', (r'#.*',)),
        ('NL', (r'[\r\n]+([ \t]+[\r\n]+)*',)),
        ('SPACE', (r'[ \t]+',))
    ]
    return indentation(make_tokenizer(token_specs)(input + "\n"))
Esempio n. 22
0
 def test_error_info(self):
     tokenize = make_tokenizer([
         (u'keyword', (ur'(is|end)',)),
         (u'id', (ur'[a-z]+',)),
         (u'space', (ur'[ \t]+',)),
         (u'nl', (ur'[\n\r]+',)),
     ])
     try:
         list(tokenize(u'f is ф'))
     except LexerError, e:
         self.assertEqual(unicode(e),
                          u'cannot tokenize data: 1,6: "f is \u0444"')
Esempio n. 23
0
def tokenize(to_tokenize_str):
    specs = [
        ('Space', (r'[ \t\r\n]+',)),
        ('Word', (r'[\w\-\_]+',)),
        ('Op', (r'[\\(\\)\\|\\!\\&]{1}',)),
    ]
    useless = [u'Space']
    tokenizer = make_tokenizer(specs)
    return [
        token
        for token in tokenizer(to_tokenize_str)
        if token.type not in useless
    ]
def tokenize(str):
	"""Returns tokens of the given string."""
	specs = [
		('Space',  (r'[ \t\r\n]+',)),
		('Number', (r'''
			(0|([1-9][0-9]*))   # Int
			''', VERBOSE)),
		('Op',     (r'[\-+/*\(\),]',)),
		('Name',   (r'[A-Za-z_][A-Za-z_0-9]*',)),
	]
	useless = ['Space']
	t = make_tokenizer(specs)
	return [x for x in t(str) if x.type not in useless]
Esempio n. 25
0
def tokenizer(str):
    'str -> Sequence(Token)'
    specs = [
        ('space',  (r'[ \t\r\n]+',)),
        ('int', (r'-?[1-9][0-9]*|0',)),
        ('true', (r'#t',)),
        ('false', (r'#f',)),
        ('char', (r'#\\[A-Za-z_0-9]',)),
        ('op', (r'[\[\]\(\)\']', re.VERBOSE)),
        ('name', (r'[A-Za-z_0-9\&\*\+\-\~!\=<>\^/,\?:;.]*',)),
    ]
    useless = ['space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 26
0
def tokenize(s):
    regexps = {
        'escaped':   r'\\(?P<standard>["\\/bfnrt])',
        'unescaped': r'[^"\\]' }
    grammar_specifications = [
        ('space',    (r'[ \t\r\n]+',)),
        ('number',   (r'-?(0|([1-9][0-9]*))(\.[0-9]+)?([Ee][+-][0-9]+)?',)),
        ('string',   (r'"[^"]*"',)),                                # unsupported escaped quotes
        ('operator', (r'(\*\*)|([><=!]=)|(and)|(or)|(not)|(in)|[{}\[\]\(\)\-\+\*/=><\.,:]',)),
        ('name',     (r'[A-Za-z_][A-Za-z_0-9]*',)),
    ]

    t = make_tokenizer(grammar_specifications)
    return [x for x in t(s) if x.type not in ['space']]
Esempio n. 27
0
def tokenize(str):
    """str -> Sequence(Token)"""
    specs = [
        ('Comment', (r'/\*(.|[\r\n])*?\*/', MULTILINE)),
        ('Comment', (r'//.*', )),
        ('NL', (r'[\r\n]+', )),
        ('Space', (r'[ \t\r\n]+', )),
        ('Name', (r'[A-Za-z\200-\377_][A-Za-z\200-\377_0-9]*', )),
        ('Op', (r'[{};,=\[\]]|(->)|(--)', )),
        ('Number', (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)', )),
        ('String', (r'"[^"]*"', )),  # '\"' escapes are ignored
    ]
    useless = ['Comment', 'NL', 'Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 28
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        make_multiline_comment(r'/\*', r'\*/'),
        make_comment(r'//'),
        newline,
        space,
        Spec('name',    r'[A-Za-z\200-\377_][A-Za-z\200-\377_0-9]*'),
        Spec('op',      r'[{};,=\[\]]|(->)|(--)'),
        Spec('number',  r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)'),
        Spec('string',  r'"[^"]*"'), # '\"' escapes are ignored
    ]
    useless = ['comment', 'newline', 'space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 29
0
def lex_string_expr(string):
    """Lex a string expression."""
    tokenizer = lexer.make_tokenizer([
        ('concat', [r'#']),
        ('string', [r'"[^"]+"']),
        ('name',   [r'[A-Za-z_][A-Za-z_0-9\-:?\'\.\s]*']),
        ('space',  [r'[ \t\r\n]+']),
    ])

    try:
        return remove_whitespace_tokens(tokenizer(string))
    except lexer.LexerError:
        # If we fail to lex the string, it is not a valid string expression so
        # just return it as a single token
        return [Token('string', string)]
Esempio n. 30
0
def tokenize(s):
    """str -> Sequence(Token)"""
    specs = [
        (u'Space', (ur'[ \t\r\n]+', )),
        (u'String', (ur'"(%(unescaped_str)s | %(escaped)s)*"' % regexps,
                     re.VERBOSE)),
        (u'Regex',
         (ur'/(%(unescaped_regex)s | %(escaped_regex)s)*/[i]*' % regexps,
          re.VERBOSE)),
        (u'Op', (ur'or|and|not|[\(\)]', )),
        (u'Prefix', (ur'client:|server:|any:', )),
    ]
    useless = [u'Space']
    t = make_tokenizer(specs)
    return [x for x in t(s) if x.type not in useless]
Esempio n. 31
0
def tokenize(str):
    """str -> Sequence(Token)"""
    specs = [
        ('Comment', (r'/\*(.|[\r\n])*?\*/', MULTILINE)),
        ('Comment', (r'//.*',)),
        ('NL', (r'[\r\n]+',)),
        ('Space', (r'[ \t\r\n]+',)),
        ('Name', (r'[A-Za-z\200-\377_][A-Za-z\200-\377_0-9]*',)),
        ('Op', (r'[{};,=\[\]]|(->)|(--)',)),
        ('Number', (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),
        ('String', (r'"[^"]*"',)), # '\"' escapes are ignored
    ]
    useless = ['Comment', 'NL', 'Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 32
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        ('Comment', (r'/\*(.|[\r\n])*?\*/', MULTILINE)),
        ('Comment', (r'//.*',)),
        ('NL',      (r'[\r\n]+',)),
        ('Space',   (r'[ \t\r\n]+',)),
        ('Name',    (ur'[A-Za-z_\u0080-\uffff][A-Za-z_0-9\.\u0080-\uffff]*',)),
        ('Op',      (r'[{}():;,=\[\]]',)),
        ('Color',  (r'[A-Za-z0-9]+',)),
        ('Number',  (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),
        ('String',  (r'(?P<quote>"|\').*?(?<!\\)(?P=quote)', DOTALL)),
    ]
    useless = ['Comment', 'NL', 'Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 33
0
def create_tokenizer():
    specs = [
        ('Space', (r'[ \t\r\n]+', )),
        ('String', (r'"(%(unescaped)s | %(escaped)s)*"' % REGEXPS,
                    re.VERBOSE)),
        ('Number', (r'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            ''', re.VERBOSE)),
        ('Op', (r'[{}\[\]\-,:]', )),
        ('Name', (r'[A-Za-z_][A-Za-z_0-9]*', )),
    ]

    return make_tokenizer(specs)
Esempio n. 34
0
def tokenize(s):
    specs = [
        ('comment', (r'#.*',)),
        ('newline', (r'[\r\n]+',)),
        ('space',   (r'[ \t\r\n]+',)),
        ('name',    (r'[a-zA-Z_][a-zA-Z_0-9]*',)),           # _FooBar9_Baz
        ('number',  (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)), # -3.1415
        ('op',      (r'[\+\-/\*\(\),]',)),                   # + - / * ( ) ,
    ]
    f = make_tokenizer(specs)
    useless = ['comment', 'newline', 'space']
    try:
        return [tok for tok in f(s) if tok.type not in useless]
    except LexerError, e:
        logger.error(unicode(e))
        raise
Esempio n. 35
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        ('Comment', (r'/\*(.|[\r\n])*?\*/', MULTILINE)),
        ('Comment', (r'//.*', )),
        ('NL', (r'[\r\n]+', )),
        ('Space', (r'[ \t\r\n]+', )),
        ('Name', (ur'[A-Za-z_\u0080-\uffff][A-Za-z_0-9\.\u0080-\uffff]*', )),
        ('Op', (r'[{}():;,=\[\]]', )),
        ('Color', (r'[A-Za-z0-9]+', )),
        ('Number', (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)', )),
        ('String', (r'(?P<quote>"|\').*?(?<!\\)(?P=quote)', DOTALL)),
    ]
    useless = ['Comment', 'NL', 'Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 36
0
def tokenize(str):
    specs = [
        ('With', (r'WITH',)),
        ('In', (r'IN',)),
        ('Set', (r'SET',)),
        ('Equals', (r'=',)),
        ('Space', (r'[ \t\r\n]+',)),
        ('Value', (r'\".*?\"',)),
        ('Attribute', (r'[A-Za-z][A-Za-z0-9]*',)),
    ]
    useless = ['Space']
    return list(
        filter(
            lambda x: x.type not in useless,
            make_tokenizer(specs)(str)
        )
    )
Esempio n. 37
0
def tokenize(string):
    """str -> Sequence(Token)"""
    # flake8: NOQA
    specs = [                                                                 # NOQA
        ('Comment', (r'/\*(.|[\r\n])*?\*/', MULTILINE)),                      # NOQA
        ('Comment', (r'(//|#).*',)),                                          # NOQA
        ('NL',      (r'[\r\n]+',)),                                           # NOQA
        ('Space',   (r'[ \t\r\n]+',)),                                        # NOQA
        ('Name',    ('[A-Za-z_0-9\u0080-\uffff]' +                            # NOQA
                     '[A-Za-z_\\-.0-9\u0080-\uffff]*',)),                     # NOQA
        ('Op',      (r'[{};,=\[\]]|(<->)|(<-)|(--)|(->)|(>-<)|(-<)|(>-)',)),  # NOQA
        ('Number',  (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),                  # NOQA
        ('String',  (r'(?P<quote>"|\').*?(?<!\\)(?P=quote)', DOTALL)),        # NOQA
    ]
    useless = ['Comment', 'NL', 'Space']
    t = make_tokenizer(specs)
    return [x for x in t(string) if x.type not in useless]
Esempio n. 38
0
def tokenize(s):
    regexps = {
        'escaped': r'\\(?P<standard>["\\/bfnrt])',
        'unescaped': r'[^"\\]'
    }
    grammar_specifications = [
        ('space', (r'[ \t\r\n]+', )),
        ('number', (r'-?(0|([1-9][0-9]*))(\.[0-9]+)?([Ee][+-][0-9]+)?', )),
        ('string', (r'"[^"]*"', )),  # unsupported escaped quotes
        ('operator',
         (r'(\*\*)|([><=!]=)|(and)|(or)|(not)|(in)|[{}\[\]\(\)\-\+\*/=><\.,:]',
          )),
        ('name', (r'[A-Za-z_][A-Za-z_0-9]*', )),
    ]

    t = make_tokenizer(grammar_specifications)
    return [x for x in t(s) if x.type not in ['space']]
Esempio n. 39
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        Spec('space', r'[ \t\r\n]+'),
        Spec('string', ur'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE),
        Spec('number', r'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            ''', VERBOSE),
        Spec('op', r'[{}\[\]\-,:]'),
        Spec('name', r'[A-Za-z_][A-Za-z_0-9]*'),
    ]
    useless = ['space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 40
0
def tokenize(str):
    """str -> Sequence(Token)"""
    specs = [(u'keyword', (ur'({|}|def|context|environment|\.|\(|\))', )),
             (u'Space', (ur'[ \t\r\n]+', )),
             (u'String', (ur'"(%(unescaped)s | %(escaped)s)*"' % regexps,
                          VERBOSE)),
             (u'Number', (ur'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            ''', VERBOSE)), (u'Eq', (ur'=', )), (u'Sep', (ur',', )),
             (u'Op', (ur'[{}\[\]\-,:]', )),
             (u'Name', (ur'[A-Za-z_][A-Za-z_0-9]*', ))]
    useless = [u'Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 41
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        Spec('space', r'[ \t\r\n]+'),
        Spec('number', r'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            (L)?                # Long
            ''', VERBOSE),
        Spec('op', r'[()\[\]\-,:]'),
        Spec('name', r'[A-Za-z_][A-Za-z_]*'),
    ]
    useless = ['space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 42
0
def tokenize(string):
    """str -> Sequence(Token)"""
    # flake8: NOQA
    specs = [                                                           # NOQA
        ('Comment', (r'/\*(.|[\r\n])*?\*/', MULTILINE)),                # NOQA
        ('Comment', (r'(//|#).*',)),                                    # NOQA
        ('NL',      (r'[\r\n]+',)),                                     # NOQA
        ('Space',   (r'[ \t\r\n]+',)),                                  # NOQA
        ('Name',    (u('[A-Za-z_0-9\u0080-\uffff]') +                   # NOQA
                     u('[A-Za-z_\\-.0-9\u0080-\uffff]*'),)),            # NOQA
        ('Op',      (r'[{};,=\[\]]|(<->)|(<-)|(--)|(->)',)),            # NOQA
        ('Number',  (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),            # NOQA
        ('String',  (r'(?P<quote>"|\').*?(?<!\\)(?P=quote)', DOTALL)),  # NOQA
    ]
    useless = ['Comment', 'NL', 'Space']
    t = make_tokenizer(specs)
    return [x for x in t(string) if x.type not in useless]
Esempio n. 43
0
def tokenize(str):
    """str -> Sequence(Token)"""
    specs = [
        ('Space', (r'[ \t\r\n]+', )),
        ('String', (r'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE)),
        ('Number', (r'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            ''', VERBOSE)),
        ('Op', (r'[{}\[\]\-,:]', )),
        ('Name', (r'[A-Za-z_][A-Za-z_0-9]*', )),
    ]
    useless = ['Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 44
0
def tokenize(str):
    """str -> Sequence(Token)"""
    specs = [
        ('Space', (r'[ \t\r\n]+',)),
        ('String', (r'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE)),
        ('Number', (r'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            ''', VERBOSE)),
        ('Op', (r'[{}\[\]\-,:]',)),
        ('Name', (r'[A-Za-z_][A-Za-z_0-9]*',)),
    ]
    useless = ['Space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 45
0
def tokenize(string):
    """str -> Sequence(Token)"""

    # Basic Tokens Spec
    specs = [
        ('Comment', (r' *#.*', )),
        ('Space', (r'[ \t\r\n]+', )),
        ('USBCode', (r'U(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', )),
        ('USBCodeStart', (r'U\[', )),
        ('ConsCode', (r'CONS(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', )),
        ('ConsCodeStart', (r'CONS\[', )),
        ('SysCode', (r'SYS(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', )),
        ('SysCodeStart', (r'SYS\[', )),
        ('LedCode', (r'LED(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', )),
        ('LedCodeStart', (r'LED\[', )),
        ('ScanCode', (r'S((0x[0-9a-fA-F]+)|([0-9]+))', )),
        ('ScanCodeStart', (r'S\[', )),
        ('PixelCodeStart', (r'P\[.*', )),  # Discarded, needs KLL 0.5
        ('AnimationStart', (r'A\[.*', )),  # Discarded, needs KLL 0.5
        ('CodeStart', (r'\[', )),
        ('CodeEnd', (r'\]', )),
        ('String', (r'"[^"]*"', )),
        ('SequenceString', (r"'[^']*'", )),
        ('Position', (r'r?[xyz]:-?[0-9]+(.[0-9]+)?', )),
        ('Operator', (r'<=|=>|:\+|:-|::|:|=', )),
        ('Number', (r'(-[ \t]*)?((0x[0-9a-fA-F]+)|(0|([1-9][0-9]*)))',
                    VERBOSE)),
        ('Comma', (r',', )),
        ('Dash', (r'-', )),
        ('Plus', (r'\+', )),
        ('Parenthesis', (r'\(|\)', )),
        ('None', (r'None', )),
        ('Name', (r'[A-Za-z_][A-Za-z_0-9]*', )),
        ('VariableContents', (r'''[^"' ;:=>()]+''', )),
        ('EndOfLine', (r';', )),
    ]

    # Tokens to filter out of the token stream
    useless = ['Space', 'Comment']

    # Discarded expresssions (KLL 0.4+)
    useless.extend(['PixelCodeStart', 'AnimationStart'])

    tokens = make_tokenizer(specs)
    return [x for x in tokens(string) if x.type not in useless]
Esempio n. 46
0
    def test_error_info(self):
        tokenize = make_tokenizer([
            ('keyword', (r'(is|end)',)),
            ('id', (r'[a-z]+',)),
            ('space', (r'[ \t]+',)),
            ('nl', (r'[\n\r]+',)),
        ])
        try:
            list(tokenize('f is ф'))
        except LexerError as e:
            self.assertEqual(str(e),
                             'cannot tokenize data: 1,6: "f is \u0444"')
        else:
            self.fail('must raise LexerError')

        sometok = lambda type: some(lambda t: t.type == type)
        keyword = lambda s: a(Token('keyword', s))

        id = sometok('id')
        is_ = keyword('is')
        end = keyword('end')
        nl = sometok('nl')

        equality = id + skip(is_) + id >> tuple
        expr = equality + skip(nl)
        file = many(expr) + end

        msg = """\
spam is eggs
eggs isnt spam
end"""
        toks = [x for x in tokenize(msg) if x.type != 'space']
        try:
            file.parse(toks)
        except NoParseError as e:
            self.assertEqual(e.msg,
                             "got unexpected token: 2,11-2,14: id 'spam'")
            self.assertEqual(e.state.pos, 4)
            self.assertEqual(e.state.max, 7)
            # May raise KeyError
            t = toks[e.state.max]
            self.assertEqual(t, Token('id', 'spam'))
            self.assertEqual((t.start, t.end), ((2, 11), (2, 14)))
        else:
            self.fail('must raise NoParseError')
Esempio n. 47
0
    def test_error_info(self):
        tokenize = make_tokenizer([
            ('keyword', (r'(is|end)',)),
            ('id', (r'[a-z]+',)),
            ('space', (r'[ \t]+',)),
            ('nl', (r'[\n\r]+',)),
        ])
        try:
            list(tokenize('f is ф'))
        except LexerError as e:
            self.assertEqual(six.text_type(e),
                             'cannot tokenize data: 1,6: "f is \u0444"')
        else:
            self.fail('must raise LexerError')

        sometok = lambda type: some(lambda t: t.type == type)
        keyword = lambda s: a(Token('keyword', s))

        id = sometok('id')
        is_ = keyword('is')
        end = keyword('end')
        nl = sometok('nl')

        equality = id + skip(is_) + id >> tuple
        expr = equality + skip(nl)
        file = many(expr) + end

        msg = """\
spam is eggs
eggs isnt spam
end"""
        toks = [x for x in tokenize(msg) if x.type != 'space']
        try:
            file.parse(toks)
        except NoParseError as e:
            self.assertEqual(e.msg,
                             "got unexpected token: 2,11-2,14: id 'spam'")
            self.assertEqual(e.state.pos, 4)
            self.assertEqual(e.state.max, 7)
            # May raise KeyError
            t = toks[e.state.max]
            self.assertEqual(t, Token('id', 'spam'))
            self.assertEqual((t.start, t.end), ((2, 11), (2, 14)))
        else:
            self.fail('must raise NoParseError')
Esempio n. 48
0
def tokenize(str):
  'str -> Sequence(Token)'
  specs = [
    ('comment',       (r'//.*',)),
    ('newline',       (r'[\r\n]+',)),
    ('space',         (r'[ \t\r\n]+',)),
    ('number',        (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),
    ('name',          (r'([A-Za-z\200-\377_0-9]|!|\$|%|&|\*|\+|-|/|\?|=|\<|\>)+',)),
    ('kw_bind',       (r'#bind',)),
    ('kw_halt',       (r'#halt',)),
    ('op_lambda',     (r'\\',)),
    ('op_map',        (r'::',)),
    ('form_open',     (r'\(',)),
    ('form_close',    (r'\)',))
  ]
  useless = ['comment', 'space', 'newline']
  t = make_tokenizer(specs)
  return [x for x in t(str) if x.type not in useless]
Esempio n. 49
0
def tokenize(s):
    tokenizer = make_tokenizer([
        ('comment', (r'#.*\n', )),
        ('newline', (r'[\r\n]+', )),
        ('space', (r'[ \t\v]+', )),
        ('operator',
         (r'\->|not|and|or|is|as[\?!]|===|>>=?|<<=?|[&\^\|\+\-\*\/%~><!=]=?|'
          r'[\.:,\?@{}\[\]\(\)]', )),
        ('name', (r'[^\W\d][\w]*', )),
        ('number', (r'[-+]?(0|([1-9][0-9]*))(\.[0-9]+)?([Ee][+-]?[0-9]+)?', )),
        ('string', (r'\'[^\']*\'', )),
    ])

    # Ignore whitespaces and comments.
    return [
        token for token in tokenizer(s)
        if token.type not in ['space', 'newline', 'comment']
    ]
Esempio n. 50
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        Spec('space', r'[ \t\r\n]+'),
        Spec('string', ur'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE),
        Spec('number', r'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            ''', VERBOSE),
        Spec('op', r'[()\-\*\+/,]'),
        Spec('func', r'A0001|A0002|DECUMULATE|FUTURES|JOIN|SAME_TSRANGE_OF|SUM|TEST|TSERIES|WEIGHTED|FLOAT'),
        Spec('name', r'[A-Za-z_][A-Za-z_0-9]*'),
    ]
    useless = ['space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 51
0
File: kll.py Progetto: cryham/kll
def tokenize( string ):
	"""str -> Sequence(Token)"""

	# Basic Tokens Spec
	specs = [
		( 'Comment',          ( r' *#.*', ) ),
		( 'Space',            ( r'[ \t\r\n]+', ) ),
		( 'USBCode',          ( r'U(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', ) ),
		( 'USBCodeStart',     ( r'U\[', ) ),
		( 'ConsCode',         ( r'CONS(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', ) ),
		( 'ConsCodeStart',    ( r'CONS\[', ) ),
		( 'SysCode',          ( r'SYS(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', ) ),
		( 'SysCodeStart',     ( r'SYS\[', ) ),
		( 'LedCode',          ( r'LED(("[^"]+")|(0x[0-9a-fA-F]+)|([0-9]+))', ) ),
		( 'LedCodeStart',     ( r'LED\[', ) ),
		( 'ScanCode',         ( r'S((0x[0-9a-fA-F]+)|([0-9]+))', ) ),
		( 'ScanCodeStart',    ( r'S\[', ) ),
		( 'PixelCodeStart',   ( r'P\[.*', ) ), # Discarded, needs KLL 0.5
		( 'AnimationStart',   ( r'A\[.*', ) ), # Discarded, needs KLL 0.5
		( 'CodeStart',        ( r'\[', ) ),
		( 'CodeEnd',          ( r'\]', ) ),
		( 'String',           ( r'"[^"]*"', ) ),
		( 'SequenceString',   ( r"'[^']*'", ) ),
		( 'Position',         ( r'r?[xyz]:-?[0-9]+(.[0-9]+)?', ) ),
		( 'Operator',         ( r'<=|=>|:\+|:-|::|:|=', ) ),
		( 'Number',           ( r'(-[ \t]*)?((0x[0-9a-fA-F]+)|(0|([1-9][0-9]*)))', VERBOSE ) ),
		( 'Comma',            ( r',', ) ),
		( 'Dash',             ( r'-', ) ),
		( 'Plus',             ( r'\+', ) ),
		( 'Parenthesis',      ( r'\(|\)', ) ),
		( 'None',             ( r'None', ) ),
		( 'Name',             ( r'[A-Za-z_][A-Za-z_0-9]*', ) ),
		( 'VariableContents', ( r'''[^"' ;:=>()]+''', ) ),
		( 'EndOfLine',        ( r';', ) ),
	]

	# Tokens to filter out of the token stream
	useless = ['Space', 'Comment']

	# Discarded expresssions (KLL 0.4+)
	useless.extend( ['PixelCodeStart', 'AnimationStart'] )

	tokens = make_tokenizer( specs )
	return [x for x in tokens( string ) if x.type not in useless]
Esempio n. 52
0
def tokenize(string):
    'unicode -> Sequence(Token)'
    specs = [
        ('Comment', (r'#.*', )),
        ('NL', (r'[\r\n]+', )),
        ('Space', (r'[ ]+', )),
        ('JunkSpace', (r'[\t]+', )),
        ('Op', (r'[\[\]|:{}]', )),
        #('Regex', (r'<(\w|[-={}\[\]|().,^$+*?:\\])*>', re.UNICODE)),
        ('Regex', (r'<re>.*?</re>', re.UNICODE)),
        ('QuotedName', (r'"[^"\n]+"', re.UNICODE)),
        ('Name', (r'[^:<>\[\]{}| \n]+', re.UNICODE)),
        #('Name', (r"(\w[\u0300\u0301\u0302]?)+([-./](\w[\u0300\u0301\u0302]?)+)*['\u2019]?",re.UNICODE)),
        #('Name', (r'(\w[\u0300\u0301]?([-./](\w[\u0300\u0301]?)+)*|[-0-9][-0-9]*)',re.UNICODE))
    ]
    useless = ['Comment', 'NL', 'JunkSpace']
    tok = make_tokenizer(specs)
    #print("DEBUG TOKENIZER: ", [x for x in tok(string)])
    return [x for x in tok(string) if x.type not in useless]
Esempio n. 53
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        Spec('space', r'[ \t\r\n]+'),
        Spec('string', ur'"(%(unescaped)s | %(escaped)s)*"' % regexps,
             VERBOSE),
        Spec(
            'number', r'''
            -?                  # Minus
            (0|([1-9][0-9]*))   # Int
            (\.[0-9]+)?         # Frac
            ([Ee][+-][0-9]+)?   # Exp
            ''', VERBOSE),
        Spec('op', r'[{}\[\]\-,:]'),
        Spec('name', r'[A-Za-z_][A-Za-z_0-9]*'),
    ]
    useless = ['space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 54
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        Spec('space', r'[ \t\r\n]+'),
        Spec('string', ur'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE),
        # NOTE: sometimes number gets into names place thus we shouldn't use them
        # TODO: consider removing or updating it
        # Spec('number', r'''
        #     -?                  # Minus
        #     (0|([1-9][0-9]*))   # Int
        #     (\.[0-9]+)?         # Frac
        #     ([Ee][+-][0-9]+)?   # Exp
        #     \b''', VERBOSE),
        Spec('op', r'[{}\(\),;=]'),
        Spec('comment', r'/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/'),
        Spec('name', r'[/.A-Za-z_0-9]+'),
    ]
    useless = ['space']
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in useless]
Esempio n. 55
0
def test_error_info():
    tokenize = make_tokenizer([
        Spec('keyword', r'(is|end)'),
        Spec('id', r'[a-z]+'),
        Spec('space', r'[ \t]+'),
        Spec('nl', r'[\n\r]+'),
    ])
    try:
        list(tokenize('f is ф'))
    except LexerError as e:
        pass
    else:
        ok_(False, 'must raise LexerError')

    keyword = lambda s: tok('keyword', s)

    id = tok('id')
    is_ = keyword('is')
    end = keyword('end')
    nl = tok('nl')

    equality = id + skip(is_) + id >> tuple
    expr = equality + skip(nl)
    file = many(expr) + end

    msg = """\
rake is eggs
eggs isnt spam
end"""
    toks = [x for x in tokenize(msg) if x.type != 'space']
    try:
        file.parse(toks)
    except ParserError as e:
        msg, pos, i = e.args
        eq_(msg, "got unexpected token: id 'spam'")
        eq_(pos, ((2, 11), (2, 14)))
        # May raise KeyError
        t = toks[i]
        eq_(t, Token('id', 'spam'))
    else:
        ok_(False, 'must raise ParserError')
Esempio n. 56
0
def tokenize(str):
    'str -> Sequence(Token)'
    specs = [
        (u'level', (r'^Level \d+.*$', MULTILINE)),
        (u'snes_monitor', (r'^\s+\d+ SNES Function norm.*$', MULTILINE)),
        (u'snes_converged', (r'^\s*Nonlinear solve converged due to \w+$',
                             MULTILINE)),
        (u'snes_diverged',
         (r'^\s*Nonlinear solve did not converge due to \w+$', MULTILINE)),
        (u'ksp_monitor', (r'^\s+\d+ KSP Residual norm.*$', MULTILINE)),
        (u'ksp_converged', (r'^\s*Linear solve converged due to \w+$',
                            MULTILINE)),
        (u'ksp_diverged', (r'^\s*Linear solve did not converge due to \w+$',
                           MULTILINE)),
        (u'max_wall_time',
         (r'^Time \(sec\):\s+\d\.\d{3}e[-+]\d\d\s+\d\.\d{5}\s+\d\.\d{3}e[-+]\d\d$',
          MULTILINE)),
        (u'event',
         (r'^\S{1,16}\s+\d+ \d\.\d \d\.\d{4}e[-+]\d\d \d\.\d \d\.\d\de[-+]\d\d \d\.\d (\d\.\de[-+]\d\d ){2}.*$',
          MULTILINE)),
        (u'stage', (r'^--- Event Stage \d+: .*$', MULTILINE)),
        (u'memory_usage', (r'^Memory usage is given in bytes:', MULTILINE)),
        (u'summary_begin',
         (r'^---------------------------------------------- PETSc Performance Summary: ----------------------------------------------$',
          MULTILINE)),
        (u'hostline', (r'^\S+ on a \S+ named \S+ with \d+ processors?, by .*$',
                       MULTILINE)),
        (u'option_table_begin', (r'^#PETSc Option Table entries:$',
                                 MULTILINE)),
        (u'option_table_entry', (r'^-\w+(\s+\w+)?$', MULTILINE)),
        (u'option_table_end', (r'^#End of? PETSc Option Table entries$',
                               MULTILINE)),
        (u'nl', (r'[\r\n]+', )),
        (u'other', (r'^.*$',
                    MULTILINE)),  # Catches all lines that we don't understand
    ]
    ignored = 'nl other'.split()
    t = make_tokenizer(specs)
    return [x for x in t(str) if x.type not in ignored]
Esempio n. 57
0
def lex_generic_query(query):
    """Lex a query string.

    Used by bibpy's accompanying tools.

    """
    tokenizer = lexer.make_tokenizer([
        ('not',    [r'\^']),
        ('equals', [r'=']),
        ('approx', [r'~']),
        ('le',     [r'<=']),
        ('lt',     [r'<']),
        ('ge',     [r'>=']),
        ('gt',     [r'>']),
        ('comma',  [r',']),
        ('dash',   [r'-']),
        ('number', [r'-?(0|([1-9][0-9]*))']),
        ('name',   [r'\w+']),
        ('space',  [r'[ \t\r\n]+']),
        ('any',    [r'[^<><=>=\s=\^~]+'])
    ])

    return remove_whitespace_tokens(tokenizer(query))