Example #1
0
 def define_identifier(self):
     """
     Return the syntax definition for an identifier.
     
     """
     # --- Defining the individual identifiers:
     # Getting all the Unicode numbers in a single string:
     unicode_numbers = "".join([unichr(n) for n in xrange(0x10000)
                                if unichr(n).isdigit()])
     unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE)
     space_char = re.escape(self._grammar.get_token("identifier_spacing"))
     identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE)
     # Identifiers cannot start with a number:
     identifier0 = Combine(~unicode_number_expr + identifier0)
     identifier0.setName("individual_identifier")
     
     # --- Defining the namespaces:
     namespace_sep = Suppress(self._grammar.get_token("namespace_separator"))
     namespace = Group(ZeroOrMore(identifier0 + namespace_sep))
     namespace.setName("namespace")
     
     # --- The full identifier, which could have a namespace:
     identifier = Combine(namespace.setResultsName("namespace_parts") +
                          identifier0.setResultsName("identifier"))
     identifier.setName("full_identifier")
     
     return identifier
def nexus_iter(infile):
    import pyparsing
    pyparsing.ParserElement.enablePackrat()
    from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \
         OneOrMore, Group, Optional, Suppress, Regex, Dict
    ## beginblock = Suppress(CaselessKeyword("begin") +
    ##                       CaselessKeyword("trees") + ";")
    ## endblock = Suppress((CaselessKeyword("end") |
    ##                      CaselessKeyword("endblock")) + ";")
    comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]"))
    ## translate = CaselessKeyword("translate").suppress()
    name = Word(string.letters+string.digits+"_.") | QuotedString("'")
    ## ttrec = Group(Word(string.digits).setResultsName("number") +
    ##               name.setResultsName("name") +
    ##               Optional(",").suppress())
    ## ttable = Group(translate + OneOrMore(ttrec) + Suppress(";"))
    newick = Regex(r'[^;]+;')
    tree = (CaselessKeyword("tree").suppress() +
            Optional("*").suppress() +
            name.setResultsName("tree_name") +
            comment.setResultsName("tree_comment") +
            Suppress("=") +
            comment.setResultsName("root_comment") +
            newick.setResultsName("newick"))
    ## treesblock = Group(beginblock +
    ##                    Optional(ttable.setResultsName("ttable")) +
    ##                    Group(OneOrMore(tree)) +
    ##                    endblock)

    def not_begin(s): return s.strip().lower() != "begin trees;"
    def not_end(s): return s.strip().lower() not in ("end;", "endblock;")
    def parse_ttable(f):
        ttable = {}
        while True:
            s = f.next().strip()
            if not s: continue
            if s.lower() == ";": break
            if s[-1] == ",": s = s[:-1]
            k, v = s.split()
            ttable[k] = v
            if s[-1] == ";": break
        return ttable
            
    # read lines between "begin trees;" and "end;"
    f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile))
    s = f.next().strip().lower()
    if s != "begin trees;":
        print sys.stderr, "Expecting 'begin trees;', got %s" % s
        raise StopIteration
    ttable = {}
    while True:
        try: s = f.next().strip()
        except StopIteration: break
        if not s: continue
        if s.lower() == "translate":
            ttable = parse_ttable(f)
            print "ttable: %s" % len(ttable)
        elif s.split()[0].lower()=='tree':
            match = tree.parseString(s)
            yield nexus.Newick(match, ttable)
Example #3
0
 def parseDate(self, dateString):
   dateGrammar = Regex("\d{4}")("year") + Regex("\d{2}")("month") +\
     Regex("\d{2}")("day") + Regex("\d{2}")("hours") +\
     Suppress(":") + Regex("\d{2}")("minutes")
   results = dateGrammar.parseString(dateString)
   return {
     "year" : results["year"],
     "month" : results["month"],
     "day" : results["day"],
     "hours" : results["hours"],
     "minutes" : results["minutes"],
   }
Example #4
0
    def _make_grammar(self):
        from pyparsing import (QuotedString, ZeroOrMore, Combine,
                               Literal, Optional, OneOrMore,
                               Regex, CaselessKeyword)

        def escape_handler(s, loc, toks):
            if toks[0] == '\\\\':
                return "\\"
            elif toks[0] == '\\\'':
                return "'"
            elif toks[0] == '\\"':
                return '"'
            elif toks[0] == '\\f':
                return "\f"
            elif toks[0] == '\\n':
                return "\n"
            elif toks[0] == '\\r':
                return "\r"
            elif toks[0] == '\\t':
                return "\t"
            elif toks[0] == '\\ ':
                return " "
            else:
                return toks[0][1:]

        escape = Combine(Regex(r'\\.')).setParseAction(escape_handler)
        word = Combine(OneOrMore(escape | Regex(r'[^\s\\]+')))
        whitespace = Regex(r'\s+').suppress()
        quotedstring = Combine(OneOrMore(QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\')))
        command = Regex(r'[^\s:]+') + Literal(":").suppress() + (quotedstring | word)
        include = quotedstring | command | word
        exclude = (Literal("-") | Literal("^")).suppress() + (quotedstring | command | word)
        or_keyword = CaselessKeyword("or")
        and_keyword = CaselessKeyword("and")
        keyword = or_keyword | and_keyword

        argument = (keyword | exclude | include)
        expr = ZeroOrMore(Optional(whitespace) + argument)

        # arguments.leaveWhitespace()

        command.setParseAction(CommandExpr)
        include.setParseAction(IncludeExpr)
        exclude.setParseAction(ExcludeExpr)
        or_keyword.setParseAction(OrKeywordExpr)
        and_keyword.setParseAction(AndKeywordExpr)

        # or_expr.setParseAction(lambda s, loc, toks: OrOperator(toks[0], toks[2]))
        # and_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[2]))
        # no_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[1]))
        # expr.setParseAction(Operator)

        return expr
Example #5
0
    def __init__(self, EvaluateVariableChild=None, EvaluateNumberChild=None):
        EvaluateVariableChild = EvaluateVariableChild or EvaluateVariable
        EvaluateNumberChild = EvaluateNumberChild or EvaluateNumber
        # what is a float number
        floatNumber = Regex(r'[-]?\d+(\.\d*)?([eE][-+]?\d+)?')
        # a variable is a combination of letters, numbers, and underscor
        variable = Word(alphanums + "_")
        # a sign is plus or minus
        signOp = oneOf('+ -')
        # an operand is a variable or a floating point number
        operand = floatNumber ^ variable
        # when a floatNumber is found, parse it with evaluate number
        floatNumber.setParseAction(EvaluateNumberChild)
        # when a variable is found, parse it with the EvaluateVariableChild
        # or EvaluateVariable
        variable.setParseAction(EvaluateVariableChild)
        # comparisons include lt,le,gt,ge,eq,ne
        comparisonOp = oneOf("< <= > >= == !=")
        # negation of the boolean is !
        notOp = oneOf("!")
        # an expression is a either a comparison or
        # a NOT operation (where NOT a is essentially (a == False))
        comparisonExpression = operatorPrecedence(operand,
                                                  [
                                                   (comparisonOp,
                                                    2,
                                                    opAssoc.LEFT,
                                                    EvaluateComparison
                                                    ),
                                                   (notOp,
                                                    1,
                                                    opAssoc.RIGHT,
                                                    EvaluateNot
                                                    ),
                                                  ])

        # boolean logic of AND or OR
        boolOp = oneOf("& |")

        # a bool expression contains a nested bool expression or a comparison,
        # joined with a boolean operation
        boolExpression = Forward()
        boolPossible = boolExpression | comparisonExpression
        self.boolExpression = operatorPrecedence(boolPossible,
                                                 [
                                                  (boolOp,
                                                   2,
                                                   opAssoc.RIGHT,
                                                   EvaluateOrAnd
                                                   ),
                                                 ])
        return
Example #6
0
File: nexus.py Project: rhr/ivy
def parse_treesblock(infile):
    import string
    from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress
    from pyparsing import QuotedString
    comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]"))
    name = Word(alphanums+"_") | QuotedString("'")
    newick = Regex(r'[^;]+;')
    tree = (CaselessKeyword("tree").suppress() +
            Optional("*").suppress() +
            name.setResultsName("tree_name") +
            comment.setResultsName("tree_comment") +
            Suppress("=") +
            comment.setResultsName("root_comment") +
            newick.setResultsName("newick"))
    ## treesblock = Group(beginblock +
    ##                    Optional(ttable.setResultsName("ttable")) +
    ##                    Group(OneOrMore(tree)) +
    ##                    endblock)

    def parse_ttable(f):
        ttable = {}
        while True:
            s = f.next().strip()
            if s.lower() == ";":
                break
            if s[-1] in ",;":
                s = s[:-1]
            k, v = s.split()
            ttable[k] = v
            if s[-1] == ";":
                break
        return ttable

    ttable = {}
    while True:
        try:
            s = infile.next().strip()
        except StopIteration:
            break
        if s.lower() == "translate":
            ttable = parse_ttable(infile)
            # print("ttable: %s" % len(ttable))
        else:
            match = tree.parseString(s)
            yield Newick(match, ttable)
Example #7
0
	def getkw_bnf(self):
		sect_begin   = Literal("{").suppress()
		sect_end   = Literal("}").suppress()
		array_begin   = Literal("[").suppress()
		array_end   = Literal("]").suppress()
		tag_begin   = Literal("<").suppress()
		tag_end   = Literal(">").suppress()
		eql   = Literal("=").suppress()
		dmark = Literal('$').suppress()
		end_data=Literal('$end').suppress()
		prtable = alphanums+r'!$%&*+-./<>?@^_|~'
		ival=Regex('[-]?\d+')
		dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?')
		lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)')
	
		# Helper definitions

		kstr= quotedString.setParseAction(removeQuotes) ^ \
				dval ^ ival ^ lval ^ Word(prtable)
		name = Word(alphas+"_",alphanums+"_")
		vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \
				Literal("\n").suppress() ^ \
				quotedString.setParseAction(removeQuotes))+array_end
		sect=name+sect_begin
		tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin

		# Grammar
		keyword = name + eql + kstr
		vector = name + eql + vec
		data=Combine(dmark+name)+SkipTo(end_data)+end_data
		section=Forward()
		sect_def=(sect | tag_sect ) #| vec_sect)
		input=section | data | vector | keyword 
		section << sect_def+ZeroOrMore(input) + sect_end

		# Parsing actions	
		ival.setParseAction(self.conv_ival)
		dval.setParseAction(self.conv_dval)
		lval.setParseAction(self.conv_lval)
		keyword.setParseAction(self.store_key)
		vector.setParseAction(self.store_vector)
		data.setParseAction(self.store_data)
		sect.setParseAction(self.add_sect)
		tag_sect.setParseAction(self.add_sect)
		sect_end.setParseAction(self.pop_sect)

		bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error)
		bnf.ignore(pythonStyleComment)
		return bnf
Example #8
0
def make_sexp_parser ():
    """
    Returns a  simple parser for  nested lists of real  numbers. Round
    parens () are assumed as customary in lisps.
    """

    # Punctuation literals (note round parens):
    LPAR, RPAR = map (Suppress, "()")

    # Real numbers:
    real_string = Regex (r"[+-]?\d+\.\d*([eE][+-]?\d+)?")
    real = real_string.setParseAction (lambda tokens: float (tokens[0]))

    # Voodoo:
    sexp = Forward ()
    sexp_list = Group (LPAR + ZeroOrMore (sexp) + RPAR)
    sexp << (real | sexp_list)

    return lambda s: sexp.parseString (s)[0]
Example #9
0
def ListParser():
	"""
	A parser for list columns, where each list is composed of pairs of values.
	"""

	value = Regex(r'[-+]?[0-9]+(?:\.[0-9]*)?(?:e[-+]?[0-9]+)?', IGNORECASE)
	value.setParseAction(lambda toks: float(toks[0]))

	item = Suppress('(') + value + Suppress(',') + value + Suppress(')')
	item.setParseAction(tuple)

	lst = Suppress('[') + delimitedList(item) + Suppress(']')
	lst.setParseAction(list)

	def parse(s):
		try:
			return lst.parseString(s).asList()
		except ParseBaseException as e:
			raise ValueError(e)

	return parse
Example #10
0
    def translate(self, text, filename):
        self.source = text
        self.super = None
        self.inheritance = 0
        self.declaration_lines = ['inheritance = 0']
        self.block_lines = []
        self.body_lines = ['def body():']

        self.target_lines = self.body_lines
        self.indent = 1
        
        template_close = Literal('%>')
        white = White()
        
        attribute = Word(alphanums + '_') + Literal('=') + QuotedString('"') + Optional(white)
        
        directive = "<%@" +  Optional(white) + Word(alphanums + '_') + white + ZeroOrMore(attribute) + template_close
        declaration = "<%!" + SkipTo(template_close) + template_close
        expression = "<%=" + SkipTo(template_close) + template_close
        scriptlet = '<%' + SkipTo(template_close) + template_close
        
        template_text = directive | declaration | expression | scriptlet
        plain_text = Regex(r'((?!<%).|\s)+', re.MULTILINE)
        
        body = template_text | plain_text
        lit = OneOrMore(body)
        
        directive.setParseAction(self.compile_directive)
        declaration.setParseAction(self.compile_declaration)
        expression.setParseAction(self.compile_expression)
        scriptlet.setParseAction(self.compile_scriptlet)
        plain_text.setParseAction(self.compile_plain_text)
        
        lit.leaveWhitespace()
        lit.parseString(self.source)
        
        translated =  '\n' + '\n'.join(self.declaration_lines + ['\n'] + self.block_lines + ['\n'] + self.body_lines)
        if self.super:
            translated = self.super.module_source + translated
        return translated
Example #11
0
# ===============> Standard libraries and third-party <========================
from plams import (Atom, Molecule)
from pyparsing import (CaselessKeyword, Combine, Literal, nums, Optional,
                       ParseException, Regex, SkipTo, Suppress, Word)
import numpy as np

# Literals
point = Literal('.')
e = CaselessKeyword('E')
minusOrplus = Literal('+') | Literal('-')

# Parsing Floats
natural = Word(nums)
integer = Combine(Optional(minusOrplus) + natural)
floatNumber = Regex(r'(\-)?\d+(\.)(\d*)?([eE][\-\+]\d+)?')

floatNumberDot = Regex(r'(\-)?(\d+)?(\.)(\d*)?([eE][\-\+]\d+)?')

# Parse Utilities
anyChar = Regex('.')
skipAnyChar = Suppress(anyChar)
skipSupress = lambda z: Suppress(SkipTo(z))
skipLine = Suppress(skipSupress('\n'))

# Generic Functions


def parse_file(p, file_name):
    """
    Wrapper over the parseFile method
Example #12
0
    def __init__(self, preferences_dir=None):
        '''
        Creates a new ConfigShell.
        @param preferences_dir: Directory to load/save preferences from/to
        @type preferences_dir: str
        '''
        self._current_node = None
        self._root_node = None
        self._exit = False

        # Grammar of the command line
        command = locatedExpr(Word(alphanums + '_'))('command')
        var = Word(alphanums + '_\+/.<>()~@:-%]')
        value = var
        keyword = Word(alphanums + '_\-')
        kparam = locatedExpr(keyword + Suppress('=') + Optional(value, default=''))('kparams*')
        pparam = locatedExpr(var)('pparams*')
        parameter = kparam | pparam
        parameters = OneOrMore(parameter)
        bookmark = Regex('@([A-Za-z0-9:_.]|-)+')
        pathstd = Regex('([A-Za-z0-9:_.]|-)*' + '/' + '([A-Za-z0-9:_./]|-)*') \
                | '..' | '.'
        path = locatedExpr(bookmark | pathstd | '*')('path')
        parser = Optional(path) + Optional(command) + Optional(parameters)
        self._parser = parser

        if tty:
            readline.set_completer_delims('\t\n ~!#$^&()[{]}\|;\'",?')

        self.log = log.Log()

        if preferences_dir is not None:
            preferences_dir = os.path.expanduser(preferences_dir)
            if not os.path.exists(preferences_dir):
                os.makedirs(preferences_dir)
            self._prefs_file = preferences_dir + '/prefs.bin'
            self.prefs = prefs.Prefs(self._prefs_file)
            self._cmd_history = preferences_dir + '/history.txt'
            self._save_history = True
            if not os.path.isfile(self._cmd_history):
                try:
                    open(self._cmd_history, 'w').close()
                except:
                    self.log.warning("Cannot create history file %s, "
                                     % self._cmd_history
                                     + "command history will not be saved.")
                    self._save_history = False

            if os.path.isfile(self._cmd_history) and tty:
                try:
                    readline.read_history_file(self._cmd_history)
                except IOError:
                    self.log.warning("Cannot read command history file %s."
                                     % self._cmd_history)

            if self.prefs['logfile'] is None:
                self.prefs['logfile'] = preferences_dir + '/' + 'log.txt'

            self.prefs.autosave = True

        else:
            self.prefs = prefs.Prefs()
            self._save_history = False

        try:
            self.prefs.load()
        except IOError:
            self.log.warning("Could not load preferences file %s."
                             % self._prefs_file)

        for pref, value in self.default_prefs.iteritems():
            if pref not in self.prefs:
                self.prefs[pref] = value

        self.con = console.Console()
keywords = (SELECT|FROM|WHERE|AS|NULL|NOT|AND|OR|DISTINCT|ALL|INSERT|
            INTO|VALUES|DELETE|UPDATE|SET|CREATE|INDEX|USING|BTREE|HASH|
            ON|INTEGER|FLOAT|DATETIME|DATE|VARCHAR|CHAR|TABLE|DATABASE|
            DROP|ORDER|BY|ASC|DESC)

# Define basic symbols
LPAR, RPAR = map(Suppress, '()')
dot = Literal(".").suppress()
comma = Literal(",").suppress()
semi_colon  = Literal(";").suppress()

# Basic identifier used to define vars, tables, columns
identifier = ~keywords + Word(alphas, alphanums + '_')

# Literal Values
integer_literal = Regex(r"([+-]?[1-9][0-9]*|0)")
integer_literal = integer_literal.setResultsName('integer_literal')
float_literal = Regex(r"([+-]?[1-9][0-9]*|0)\.[0-9]+")
float_literal = float_literal.setResultsName('float_literal')
numeric_literal = float_literal | integer_literal
string_literal = QuotedString("'").setResultsName('string_literal')
literal_value = (numeric_literal|string_literal|NULL)

# SQL-Type-names
INTEGER = INTEGER.setResultsName('type_name')
FLOAT = FLOAT.setResultsName('type_name')
DATETIME = DATETIME.setResultsName('type_name')
DATE = DATE.setResultsName('type_name')
VARCHAR = VARCHAR.setResultsName('type_name')
CHAR = CHAR.setResultsName('type_name')
Example #14
0
from pyparsing import Regex, Literal, ZeroOrMore, SkipTo, Group, \
    ParseException, StringEnd

import multiprocessing
import collections
import subprocess
import argparse
import json
import sys
import os

# GCC *.map file grammar for parsing code size per file.
hex_word = Regex(r"0x[a-f0-9]+").setParseAction(lambda x: int(x[0], 16))

address = hex_word ^ Literal("[!provide]")
size = hex_word
meta = SkipTo(address ^ StringEnd()).setParseAction(lambda x: x[0].strip())

line_a = Group(address + size)
line_b = Group(address + size + meta) + ZeroOrMore(Group(address + meta))
line_c = Group(address + meta)

grammar = SkipTo(address ^ StringEnd()) + (line_a ^ line_b ^ line_c
                                           ^ StringEnd())


def parse_arguments():
    """
    Parse command line arguments.
    """
Example #15
0
           "SELECTOR_GROUP", "SELECTOR", "MIXIN", "INCLUDE", "MIXIN_PARAM",
           "EXTEND", "FONT_FACE", "OPTION", "FUNCTION_DEFINITION",
           "FUNCTION_RETURN", "IF", "ELSE", "IF_BODY", "FOR", "FOR_BODY",
           "CHARSET", "MEDIA", "WARN", "SEP_VAL_STRING", "POINT")

# Base css word and literals
COMMA, COLON, SEMICOLON = [Suppress(c) for c in ",:;"]
OPT_SEMICOLON = Optional(SEMICOLON)
LACC, RACC, LPAREN, RPAREN = [Suppress(c) for c in "{}()"]
LLACC, LRACC, LBRACK, RBRACK = [Literal(c) for c in "{}[]"]

# Comment
CSS_COMMENT = cStyleComment + Optional(lineEnd)
SCSS_COMMENT = dblSlashComment

IDENT = Regex(r"-?[a-zA-Z_][-a-zA-Z0-9_]*")
COLOR_VALUE = Regex(r"#[a-zA-Z0-9]{3,6}")
VARIABLE = Regex(r"-?\$[-a-zA-Z_][-a-zA-Z0-9_]*")
NUMBER_VALUE = Regex(r"-?\d+(?:\.\d*)?|\.\d+") + Optional(
    Regex(r"(em|ex|px|cm|mm|in|pt|pc|deg|s|%)(?![-\w])"))
PATH = Regex(r"[-\w\d_\.]*\/{1,2}[-\w\d_\.\/]*") | Regex(
    r"((https?|ftp|file):((//)|(\\\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)")
POINT_PART = (NUMBER_VALUE | Regex(r"(top|bottom|left|right)"))
POINT = POINT_PART + POINT_PART

# Values
EXPRESSION = Forward()
INTERPOLATION_VAR = Suppress("#") + LACC + EXPRESSION + RACC
SIMPLE_VALUE = NUMBER_VALUE | PATH | IDENT | COLOR_VALUE | quotedString
DIV_STRING = SIMPLE_VALUE + OneOrMore(Literal("/") + SIMPLE_VALUE)
Example #16
0
query_expr = Forward()

required_modifier = Literal('+')('required')
prohibit_modifier = Literal('-')('prohibit')
special_characters = '=><(){}[]^"~*?:\\/'
valid_word = Word(printables, excludeChars=special_characters).setName('word')
valid_word.setParseAction(
    lambda t: t[0].replace('\\\\', chr(127)).replace('\\', '').replace(chr(127), '\\')
)

clause = Forward()
field_name = valid_word()('fieldname')
single_term = valid_word()('singleterm')
phrase = QuotedString('"', unquoteResults=True)('phrase')
wildcard = Regex('[a-z0-9]*[\?\*][a-z0-9]*')('wildcard')
wildcard.setParseAction(
    lambda t: t[0].replace('?', '.?').replace('*', '.*')
)
regex = QuotedString('/', unquoteResults=True)('regex')

_all = Literal('*')
lower_range = Group((LBRACK('inclusive') | LBRACE('exclusive')) + (valid_word | _all)('lowerbound'))
upper_range = Group((valid_word | _all)('upperbound') + (RBRACK('inclusive') | RBRACE('esclusive')))
_range = (lower_range + to_ + upper_range)('range')

GT = Literal('>')
GTE = Literal('>=')
LT = Literal('<')
LTE = Literal('<=')
Example #17
0
# vim: set encoding=utf-8
"""Atomic components; probably shouldn't use these directly"""
import string

from pyparsing import CaselessLiteral, Optional, Regex, Suppress, Word

from regparser.grammar.utils import Marker, SuffixMarker, WordBoundaries

lower_p = (Suppress("(") +
           Regex(r"[ivx]{1}|[a-hj-uwyz]{1,2}").setResultsName("p1") +
           Suppress(")"))
digit_p = (Suppress("(") + Word(string.digits).setResultsName("p2") +
           Suppress(")"))
roman_p = (Suppress("(") + Word("ivxlcdm").setResultsName("p3") +
           Suppress(")"))
upper_p = (Suppress("(") + Word(string.ascii_uppercase).setResultsName("p4") +
           Suppress(")"))

em_digit_p = (Suppress(Regex(r"\(<E[^>]*>")) +
              Word(string.digits).setResultsName("p5") + Suppress("</E>)"))
em_roman_p = (Suppress(Regex(r"\(<E[^>]*>")) +
              Word("ivxlcdm").setResultsName("p5") + Suppress("</E>)"))

# Allow a plaintext version of italic paragraph markers
plaintext_level5_p = (Suppress("(") +
                      Word(string.digits).setResultsName("plaintext_p5") +
                      Suppress(")"))
plaintext_level6_p = (Suppress("(") +
                      Word("ivxlcdm").setResultsName("plaintext_p6") +
                      Suppress(")"))
Example #18
0
                hyphen_minus,
                newline)
'''
Rec. ITU-T X.680 (08/2015)
12.7 Empty lexical item
Page 18
'''
empty = Empty().suppress()
'''
Rec. ITU-T X.680 (08/2015)
12.8 Numbers
Page 18
'''
# number = Word(digits.replace('0', ''), digits, min=2) | \
#          Word(digits, max=1)
number = Regex(r'\d+')
'''
Rec. ITU-T X.680 (08/2015)
12.9 Real numbers
Page 18
'''
# This is a poor reflection of its definition but for time being will be enough
# realnumber = Word(digits, '.' + digits+ 'eE-')
realnumber = Regex(r'\d+\.\d+')
'''
Rec. ITU-T X.680 (08/2015)
12.10 Binary strings
Page 18
'''
# This is a poor reflection of its definition but for time being will be enough
# bstring = Word("'", "01B'")
Example #19
0
            raise ValueError("Missing required parameter")
        if maximum and len(val) > maximum:
            raise ValueError("Too many parameters")
        return [func(x) for x in val]

    return parse_values


class RawQuotedString(QuotedString):
    def __init__(self, quoteChar, escChar="\\"):  # noqa: N803
        super().__init__(quoteChar, escChar=escChar, convertWhitespaceEscapes=False)
        # unlike the QuotedString this replaces only escaped quotes and not all chars
        self.escCharReplacePattern = (
            re.escape(escChar) + "(" + re.escape(quoteChar) + ")"
        )


SYNTAXCHARS = {",", ":", '"', "'", "\\"}

FlagName = Regex(r"""[^,:"'\\]+""")

RegexString = "r" + RawQuotedString('"')

FlagParam = Optional(
    RegexString | FlagName | RawQuotedString("'") | RawQuotedString('"')
)

Flag = FlagName + ZeroOrMore(":" + FlagParam)

FlagsParser = Optional(Flag) + ZeroOrMore("," + Optional(Flag))
Example #20
0
from pyparsing import (CharsNotIn, Optional, Suppress, Word, Regex,
                       ParseException, alphas, nums)

from brian2.utils.caching import cached

VARIABLE = Word(f"{alphas}_", f"{alphas + nums}_").setResultsName('variable')

OP = Regex(r'(\+|\-|\*|/|//|%|\*\*|>>|<<|&|\^|\|)?=').setResultsName(
    'operation')
EXPR = CharsNotIn('#').setResultsName('expression')
COMMENT = CharsNotIn('#').setResultsName('comment')
STATEMENT = VARIABLE + OP + EXPR + Optional(Suppress('#') + COMMENT)


@cached
def parse_statement(code):
    """
    parse_statement(code)

    Parses a single line of code into "var op expr".
    
    Parameters
    ----------
    code : str
        A string containing a single statement of the form
        ``var op expr # comment``, where the ``# comment`` part is optional.
    
    Returns
    -------
    var, op, expr, comment : str, str, str, str
        The four parts of the statement.
Example #21
0
LBRACKET = L("[").suppress()
RBRACKET = L("]").suppress()
LPAREN = L("(").suppress()
RPAREN = L(")").suppress()
COMMA = L(",").suppress()
SEMICOLON = L(";").suppress()
AT = L("@").suppress()

PUNCTUATION = Word("-_.")
IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM)
IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END))

NAME = IDENTIFIER("name")
EXTRA = IDENTIFIER

URI = Regex(r"[^ ;]+")("url")
URL = AT + URI

EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA)
EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras")

VERSION_PEP440 = Regex(REGEX, re.VERBOSE | re.IGNORECASE)
VERSION_LEGACY = Regex(LEGACY_REGEX, re.VERBOSE | re.IGNORECASE)

VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY
VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE),
                       joinString=",",
                       adjacent=False)("_raw_spec")
_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY))
_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "")
Example #22
0
    def __init__(self):
        """
        expop   :: '^'
        multop  :: '*' | '/'
        addop   :: '+' | '-'
        integer :: ['+' | '-'] '0'..'9'+
        atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
        factor  :: atom [ expop factor ]*
        term    :: factor [ multop factor ]*
        expr    :: term [ addop term ]*
        """
        self.kwargs = {}
        self.expr_stack = []
        self.assignment_stack = []
        self.expression_string = None
        self.results = None
        self.container = None

        self.opn = {
            "+": self.add,
            "-": self.subtract,
            "*": self.multiply,
            "/": self.divide,
            "^": self.pow,
        }

        self.fn = {"exp": self.exp, "clamp": self.clamp}
        self.conditionals = ["==", "!=", ">", ">=", "<", "<="]

        # use CaselessKeyword for e and pi, to avoid accidentally matching
        # functions that start with 'e' or 'pi' (such as 'exp'); Keyword
        # and CaselessKeyword only match whole words
        e = CaselessKeyword("E")
        pi = CaselessKeyword("PI")
        # fnumber = Combine(Word("+-"+nums, nums) +
        #                    Optional("." + Optional(Word(nums))) +
        #                    Optional(e + Word("+-"+nums, nums)))
        # or use provided pyparsing_common.number, but convert back to str:
        # fnumber = ppc.number().addParseAction(lambda t: str(t[0]))
        fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?")
        ident = Word(alphas, alphanums + "_$")

        plus, minus, mult, div = map(Literal, "+-*/")
        lpar, rpar = map(Suppress, "()")
        addop = plus | minus
        multop = mult | div
        expop = Literal("^")
        comparison_op = oneOf(" ".join(self.conditionals))
        qm, colon = map(Literal, "?:")
        assignment = Literal("=")
        assignment_op = ident + assignment + ~FollowedBy(assignment)

        expr = Forward()
        expr_list = delimitedList(Group(expr))
        # add parse action that replaces the function identifier with a (name, number of args) tuple
        fn_call = (ident + lpar - Group(expr_list) + rpar).setParseAction(
            lambda t: t.insert(0, (t.pop(0), len(t[0])))
        )
        atom = (
            addop[...]
            + (
                (fn_call | pi | e | fnumber | ident).setParseAction(self.push_first)
                | Group(lpar + expr + rpar)
            )
        ).setParseAction(self.push_unary_minus)

        # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left
        # exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2.
        factor = Forward()
        factor <<= atom + (expop + factor).setParseAction(self.push_first)[...]
        term = factor + (multop + factor).setParseAction(self.push_first)[...]
        expr <<= term + (addop + term).setParseAction(self.push_first)[...]
        comparison = expr + (comparison_op + expr).setParseAction(self.push_first)[...]
        ternary = (
            comparison + (qm + expr + colon + expr).setParseAction(self.push_first)[...]
        )
        # self.bnf = ternary
        assignment = Optional(assignment_op).setParseAction(self.push_last) + ternary

        self.bnf = assignment
Example #23
0
# protobuf_parser.py
#
#  simple parser for parsing protobuf .proto files
#
#  Copyright 2010, Paul McGuire
#

from pyparsing import (Word, alphas, alphanums, Regex, Suppress, Forward,
                       Group, oneOf, ZeroOrMore, Optional, delimitedList,
                       restOfLine, quotedString, Dict)

ident = Word(alphas + "_", alphanums + "_").setName("identifier")
integer = Regex(r"[+-]?\d+")

LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI = map(
    Suppress, "{}[]()=;")

kwds = """message required optional repeated enum extensions extends extend
          to package service rpc returns true false option import"""
for kw in kwds.split():
    exec("{0}_ = Keyword('{1}')".format(kw.upper(), kw))

messageBody = Forward()

messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody(
    "body") + RBRACE

typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64
                    fixed32 fixed64 sfixed32 sfixed64 bool string bytes"""
                 ) | ident
rvalue = integer | TRUE_ | FALSE_ | ident
Example #24
0
def header_parser():

  identifier  = Regex("[a-zA-Z_][a-zA-Z0-9_\$]*")
  comment     = cppStyleComment.suppress()

  size = Group(
    Optional( Suppress('[') + SkipTo(']') + Suppress(']') )
  )

  # Params

  end_param = Literal(',') + 'parameter' | Literal(')') + '('
  ptype     = Optional( oneOf('integer real realtime time') )

  # NOTE: this isn't completely right, good enough for parsing valid Verilog
  param = Group(
    'parameter' + ptype + size + identifier +
    Suppress('=') + SkipTo(end_param)
  )

  list_of_params = Group(
    Suppress('#(') + delimitedList( param ) + Suppress(')')
  )

  # Ports

  dir_    = Optional( oneOf('input output inout') )
  type_   = Optional( oneOf('wire reg') )
  port    = Group(
    dir_ + type_ + size + identifier
  )

  list_of_ports = Group(
    Suppress('(') + delimitedList( port ) + Suppress(')')
  )

  # Module

  module_identifier = identifier

  module = Group(
    Suppress('module') +
    module_identifier('module_name') +
    Optional( list_of_params('params') )  +
    Optional( list_of_ports ('ports' ) )  +
    Suppress(';') +
    SkipTo('endmodule') + Suppress('endmodule')
  )

  # Debug
  #print
  #module_identifier.setParseAction( dbg('modname') )#.setDebug()
  #param            .setParseAction( dbg('param')  )#.setDebug()
  #port             .setParseAction( dbg('port' )  )#.setDebug()
  #module           .setParseAction( dbg('module', 1) )#.setDebug()

  file_ = SkipTo('module', ignore=comment ).suppress() + \
          OneOrMore( module ).ignore( comment )        + \
          SkipTo( StringEnd() ).suppress()

  return file_
Example #25
0
File: expr.py Project: zachgk/slng
    def parse(expression, equation=False, subs=dict(), main=None, returnVars=False):
        if not isinstance(expression,str): return expression

        varSet = set()

        lparen = Literal("(").suppress()
        rparen = Literal(")").suppress()
        equal = Literal("=").suppress()
        dot = Literal(".")
        spec = {
            "E": exp(1),
            "Pi": pi
        }

        def getSymbol(s):
            varSet.add(s) 
            if s in subs: s = subs[s]
            return symbols(s)

        def getFunction(s):
            if s[0] == "len":
                return SetLength(s[1])
            elif s[0] == "sum":
                return SetSummation(s[1])
            else:
                Error('Unknown slng function ' + s[0])

        integer = Word(nums).setParseAction( lambda t: [ int(t[0]) ] )
        decimal = Regex("[0-9]+\.[0-9]").setParseAction( lambda t: [float(t[0])])
        special = Regex("[A-Z][a-zA-Z]*").setParseAction( lambda t: [spec[t[0]]])
        var = Regex("[a-z][a-zA-Z]*").setParseAction( lambda t: [getSymbol(t[0])])
        lowerName = Regex("[a-z][a-zA-Z]*").setParseAction( lambda t: [t[0]])
        prop = Regex("[a-z][a-zA-Z]*\.[a-z][a-zA-Z]*").setParseAction( lambda t: [getSymbol(t[0])])
        ref = Regex("\{[0-9]+\}").setParseAction( lambda t: [getSymbol(t[0])])
        string = Regex('"[-0-9a-zA-Z: ]*"').setParseAction( lambda t: [t[0][1:-1]])

        opn = {
            "+": (lambda a,b: a+b ),
            "-": (lambda a,b: a-b ),
            "*": (lambda a,b: a*b ),
            "/": (lambda a,b: a/b ),
            "^": (lambda a,b: a**b )
        }
        ops = set(opn.keys())
        def opClean(t):
            if len(t)==1: return t
            res = opClean([opn[t[1]](t[0],t[2])]+t[3:])
            return res

        if main is not None:
            def treeCompute(p):
                try:
                    node = main.fromDotRef(p)
                    comp = hypergraph.treeCompute(node)
                    res = solve(comp,symbols(p))
                    return res[0]
                except Exception as e:
                    logging.exception(e)
                    Error("Error with tree Compute: ")
            prop = prop.setParseAction( lambda t: [treeCompute(t[0])])

        expr = Forward()
        paren = (lparen + expr + rparen).setParseAction( lambda s,l,t: t)
        function = (lowerName + lparen + (prop | var) + rparen).setParseAction( lambda t: getFunction(t) )
        atom = function | string | paren | decimal | integer | ref | prop | special | var
        multExpr = (atom + ZeroOrMore( Word("*/") + atom)).setParseAction( lambda s,l,t: opClean(t))
        expr << (multExpr + ZeroOrMore( Word("+-") + multExpr)).setParseAction( lambda s,l,t: opClean(t))
        equality = (expr + equal + expr).setParseAction( lambda s,l,t: Eq(t[0],t[1]) )



        if equation: res = equality.parseString(expression)[0]
        else: res = expr.parseString(expression)[0]

        if returnVars: return varSet
        else: return res
Example #26
0
    if len(url_protocol.searchString(unquoted)) > 0:
        result = [url_to_resource(unquoted)]
    else:
        result = [unquoted]
    return result


# Numbers are converted to ints if possible.
cql_number = Combine(
    Optional('-') + ('0' | Word(nonzero_nums, nums)) +
    Optional('.' + Word(nums)) +
    Optional(Word('eE', exact=1) +
             Word(nums + '+-', nums))).setParseAction(convert_number)
# Dates are parsed as double-quoted ISO8601 strings and converted to datetime
# objects.
cql_date = Combine(dbl_quote.suppress() + Regex(ISO8601_REGEX) +
                   dbl_quote.suppress()).setParseAction(convert_date)
# All double-quoted strings that are not dates are returned with their quotes
# removed.
cql_string = (dblQuotedString | sglQuotedString).setParseAction(convert_string)

# URLs are detected as strings starting with the http(s) protocol.
url_protocol = Combine(Literal('http') + Optional('s'))

# Number range.
# FIXME: char ranges are not supported yet
cql_number_range = Group(cql_number + '-' +
                         cql_number).setParseAction(convert_range)

cql_values = Group(
    delimitedList(
Example #27
0
    FollowedBy,empty

__all__ = ['tapOutputParser', 'TAPTest', 'TAPSummary']

# newlines are significant whitespace, so set default skippable
# whitespace to just spaces and tabs
ParserElement.setDefaultWhitespaceChars(" \t")
NL = LineEnd().suppress()

integer = Word(nums)
plan = '1..' + integer("ubound")

OK,NOT_OK = map(Literal,['ok','not ok'])
testStatus = (OK | NOT_OK)

description = Regex("[^#\n]+")
description.setParseAction(lambda t:t[0].lstrip('- '))

TODO,SKIP = map(CaselessLiteral,'TODO SKIP'.split())
directive = Group(Suppress('#') + (TODO + restOfLine | 
    FollowedBy(SKIP) + 
        restOfLine.copy().setParseAction(lambda t:['SKIP',t[0]]) ))

commentLine = Suppress("#") + empty + restOfLine

testLine = Group(
    Optional(OneOrMore(commentLine + NL))("comments") +
    testStatus("passed") +
    Optional(integer)("testNumber") + 
    Optional(description)("description") + 
    Optional(directive)("directive")
Example #28
0
import operator
from typing import Dict

from pyparsing import Literal, Word, ZeroOrMore, Forward, alphas, Regex, Suppress, oneOf, Optional, Group

__all__ = 'berekenen'

# Based on https://github.com/pyparsing/pyparsing/blob/master/examples/fourFn.py
# Stripped down to a minimum
# Implementation can be considered quick and dirty

point = Literal(".")
fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?")
ident = Word(alphas + "_")

plus = Literal("+")
minus = Literal("-")
mult = Literal("*")
div = Literal("/")

lpar, rpar = map(Suppress, "()")
addop = plus | minus
multop = mult | div

unary_op = {
    'ABS': abs,
}


def veilig_delen(x, y):
    if abs(float(y)) < 1e-6:
Example #29
0
    def eval(self, archive, context, app, exp_context):
        val = [eval(archive, context, app, exp_context) for eval in self._evals]
        return val


integer = Word(nums)
real = Combine(Word(nums) + '.' + Word(nums))

constant = (Literal('True') |
            Literal('False') |
            Literal('None') |
            Literal('yes') |
            Literal('no')) + WordEnd()

model_reference = Regex(r'([\w\.]*#[\w\.]+)')
variable = Regex(r'([a-zA-Z0-9\._]+)')

string = QuotedString('"', escChar="\\") | QuotedString('\'', escChar="\\")
operand = model_reference | real | integer | constant | string | variable

plusop = oneOf('+ -')
multop = oneOf('* / // %')
groupop = Literal(',')

expr = Forward()

modifier = Combine(Word(alphas + nums) + ':')

integer.setParseAction(EvalInteger)
real.setParseAction(EvalReal)
        alphas, alphanums, delimitedList, originalTextFor, ParseBaseException, \
        Literal, quotedString, Keyword, empty, Suppress, Combine, NotAny, Regex


def eachMostOnce(*args, or_=operator.ior, and_=operator.add):
    return reduce(or_,
                  (reduce(or_,
                          map(lambda x: reduce(and_, x), permutations(args, i)))
                   for i in range(len(args), 0, -1)))


NAME = Word(alphas, alphanums + '_')
INTEGER = Word(nums).setName('integer')
INTEGER_K = Combine(INTEGER + Optional('_' + (INTEGER | NAME)))
EOL = p.LineEnd()
FortranComment = Regex(r'!.*$')
FortranComment.setParseAction(lambda s,loc,toks: [' '+toks[0]])
EOLL = Optional(FortranComment) + EOL
precision = Combine('.' + INTEGER)
exponent = Combine(oneOf('d e D E') + Optional(oneOf('+ -')) +  INTEGER)
REAL = Combine(INTEGER + ((precision + exponent) | precision | exponent))
STRING = quotedString
comp_op = Forward()
user_op = NotAny(comp_op | oneOf('.not. .and. .or. .eqv. .neqv. ** // % .true. .false.')) \
        + Combine('.' + NAME + '.')

atom = Forward()

calllist = Forward()
array_sub = '(' + Optional(atom)+':'+Optional(atom) + Optional(':'+atom) + ')'
type_sub = '%' + NAME
Example #31
0
def _getoptblank(pa, boundarychars):
    return Optional(Regex(r"[^\S%s]+" % re.escape(boundarychars)).leaveWhitespace().setParseAction(pa))
Example #32
0
class ActivityParser13(object):
    """Grammar and parser for the activity."""

    variable = Word(alphas)
    integer = Word(nums).setParseAction(make_int)
    string = (
        QuotedString('\'', escChar='\\', multiline=True) ^
        QuotedString('"', escChar='\\', multiline=True))
    boolean = (
        Literal('true').setParseAction(make_bool(True)) ^
        Literal('false').setParseAction(make_bool(False)))

    regex = (
        Regex('/(.*)/i') ^
        Combine(
            sep('regex(') +
            QuotedString('"', escChar='\\') +
            sep(')')
        ).setParseAction(lambda x: verify.Term(verify.REGEX, x[0]))
    )

    choice_decl = Group(
        sep('[') +
        string + sep(',') +
        boolean + sep(',') +
        string +
        sep(']')
    )

    choices_decl = Group(
        sep('[') +
        Optional(list_of(choice_decl)) +
        sep(']')
    ).setParseAction(make_list)

    multiple_choice_decl = (
        key('questionType') + sep(':') + key('multiple choice') +
        Optional(sep(','))
    )

    multiple_choice = (
        sep('{') +
        multiple_choice_decl +
        Each(
            Optional(
                key('questionHTML') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('choices') + sep(':') +
                choices_decl + Optional(sep(',')))
        ) +
        sep('}')
    ).setParseAction(make_dict)

    free_text_decl = (
        key('questionType') + sep(':') + key('freetext') +
        Optional(sep(','))
    )

    free_text = (
        sep('{') +
        free_text_decl +
        Each(
            Optional(
                key('questionHTML') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('correctAnswerRegex') + sep(':') +
                regex + Optional(sep(','))) +
            Optional(
                key('correctAnswerOutput') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('incorrectAnswerOutput') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('showAnswerPrompt') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('showAnswerOutput') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('outputHeight') + sep(':') +
                string + Optional(sep(',')))
        ) +
        sep('}')
    ).setParseAction(make_dict)

    question_list_decl = (
        sep('{') +
        Each(
            Optional(
                key('questionHTML') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('choices') + sep(':') +
                sep('[') +
                Group(list_of(string)).setParseAction(make_list) +
                sep(']') +
                Optional(sep(','))) +
            Optional(
                key('correctIndex') + sep(':') +
                (integer ^ (
                    sep('[') +
                    Group(list_of(integer)).setParseAction(make_list) +
                    sep(']'))) +
                Optional(sep(','))) +
            Optional(
                key('multiSelect') + sep(':') +
                boolean + Optional(sep(','))),
        ) +
        sep('}')).setParseAction(make_dict)

    questions_list_decl = Group(
        sep('[') +
        Optional(list_of(question_list_decl)) +
        sep(']')
    ).setParseAction(make_list)

    multiple_choice_group_decl = (
        key('questionType') + sep(':') + key('multiple choice group') +
        Optional(sep(','))
    )

    multiple_choice_group = (
        sep('{') +
        multiple_choice_group_decl +
        Each(
            Optional(
                key('questionGroupHTML') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('allCorrectMinCount') + sep(':') +
                integer + Optional(sep(','))) +
            Optional(
                key('allCorrectOutput') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('someIncorrectOutput') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('questionsList') + sep(':') +
                questions_list_decl + Optional(sep(',')))
        ) +
        sep('}')
    ).setParseAction(make_dict)

    activity_grammar = (
        sep('activity') +
        sep('=') +
        sep('[') +
        Optional(list_of(
            string ^ multiple_choice ^ free_text ^ multiple_choice_group)) +
        sep(']') +
        Optional(sep(';')))

    @classmethod
    def parse_string(cls, content):
        return cls.activity_grammar.parseString(content)

    @classmethod
    def parse_string_in_scope(cls, content, scope, root_name):
        """Parses activity text following grammar."""
        if 'activity' != root_name:
            raise Exception('Unsupported schema: %s', root_name)
        return dict(
            scope.items() +
            {'__builtins__': {}}.items() +
            {root_name: cls.parse_string(content).asList()}.items())
Example #33
0
    def parse(cls,
              content,
              basedir=None,
              resolve=True,
              unresolved_value=DEFAULT_SUBSTITUTION):
        """parse a HOCON content

        :param content: HOCON content to parse
        :type content: basestring
        :param resolve: if true, resolve substitutions
        :type resolve: boolean
        :param unresolved_value: assigned value value to unresolved substitution.
        If overriden with a default value, it will replace all unresolved value to the default value.
        If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x})
        :type unresolved_value: boolean
        :return: a ConfigTree or a list
        """

        unescape_pattern = re.compile(r'\\.')

        def replace_escape_sequence(match):
            value = match.group(0)
            return cls.REPLACEMENTS.get(value, value)

        def norm_string(value):
            return unescape_pattern.sub(replace_escape_sequence, value)

        def unescape_string(tokens):
            return ConfigUnquotedString(norm_string(tokens[0]))

        def parse_multi_string(tokens):
            # remove the first and last 3 "
            return tokens[0][3:-3]

        def convert_number(tokens):
            n = tokens[0]
            try:
                return int(n, 10)
            except ValueError:
                return float(n)

        def safe_convert_number(tokens):
            n = tokens[0]
            try:
                return int(n, 10)
            except ValueError:
                try:
                    return float(n)
                except ValueError:
                    return n

        def convert_period(tokens):

            period_value = int(tokens.value)
            period_identifier = tokens.unit

            period_unit = next((single_unit for single_unit, values in
                                cls.get_supported_period_type_map().items()
                                if period_identifier in values))

            return period(period_value, period_unit)

        # ${path} or ${?path} for optional substitution
        SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)"

        def create_substitution(instring, loc, token):
            # remove the ${ and }
            match = re.match(SUBSTITUTION_PATTERN, token[0])
            variable = match.group('variable')
            ws = match.group('ws')
            optional = match.group('optional') == '?'
            substitution = ConfigSubstitution(variable, optional, ws, instring,
                                              loc)
            return substitution

        # ${path} or ${?path} for optional substitution
        STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)'

        def create_quoted_string(instring, loc, token):
            # remove the ${ and }
            match = re.match(STRING_PATTERN, token[0])
            value = norm_string(match.group('value'))
            ws = match.group('ws')
            return ConfigQuotedString(value, ws, instring, loc)

        def include_config(instring, loc, token):
            url = None
            file = None
            required = False

            if token[0] == 'required':
                required = True
                final_tokens = token[1:]
            else:
                final_tokens = token

            if len(final_tokens) == 1:  # include "test"
                value = final_tokens[0].value if isinstance(
                    final_tokens[0], ConfigQuotedString) else final_tokens[0]
                if value.startswith("http://") or value.startswith(
                        "https://") or value.startswith("file://"):
                    url = value
                else:
                    file = value
            elif len(final_tokens) == 2:  # include url("test") or file("test")
                value = final_tokens[1].value if isinstance(
                    token[1], ConfigQuotedString) else final_tokens[1]
                if final_tokens[0] == 'url':
                    url = value
                else:
                    file = value

            if url is not None:
                logger.debug('Loading config from url %s', url)
                obj = ConfigFactory.parse_URL(url,
                                              resolve=False,
                                              required=required,
                                              unresolved_value=NO_SUBSTITUTION)
            elif file is not None:
                path = file if basedir is None else os.path.join(basedir, file)
                logger.debug('Loading config from file %s', path)
                obj = ConfigFactory.parse_file(
                    path,
                    resolve=False,
                    required=required,
                    unresolved_value=NO_SUBSTITUTION)
            else:
                raise ConfigException(
                    'No file or URL specified at: {loc}: {instring}',
                    loc=loc,
                    instring=instring)

            return ConfigInclude(obj if isinstance(obj, list) else obj.items())

        @contextlib.contextmanager
        def set_default_white_spaces():
            default = ParserElement.DEFAULT_WHITE_CHARS
            ParserElement.setDefaultWhitespaceChars(' \t')
            yield
            ParserElement.setDefaultWhitespaceChars(default)

        with set_default_white_spaces():
            assign_expr = Forward()
            true_expr = Keyword("true", caseless=True).setParseAction(
                replaceWith(True))
            false_expr = Keyword("false", caseless=True).setParseAction(
                replaceWith(False))
            null_expr = Keyword("null", caseless=True).setParseAction(
                replaceWith(NoneValue()))
            # key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /')
            key = QuotedString('"', escChar='\\', unquoteResults=False) | \
                  Word("0123456789.").setParseAction(safe_convert_number) | Word(alphanums + alphas8bit + '._- /')

            eol = Word('\n\r').suppress()
            eol_comma = Word('\n\r,').suppress()
            comment = (Literal('#') | Literal('//')) - SkipTo(eol
                                                              | StringEnd())
            comment_eol = Suppress(Optional(eol_comma) + comment)
            comment_no_comma_eol = (comment | eol).suppress()
            number_expr = Regex(
                r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))',
                re.DOTALL).setParseAction(convert_number)

            period_types = itertools.chain.from_iterable(
                cls.get_supported_period_type_map().values())
            period_expr = Regex(r'(?P<value>\d+)\s*(?P<unit>' +
                                '|'.join(period_types) +
                                ')$').setParseAction(convert_period)

            # multi line string using """
            # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969
            multiline_string = Regex(
                '""".*?"*"""',
                re.DOTALL | re.UNICODE).setParseAction(parse_multi_string)
            # single quoted line string
            quoted_string = Regex(
                r'"(?:[^"\\\n]|\\.)*"[ \t]*',
                re.UNICODE).setParseAction(create_quoted_string)
            # unquoted string that takes the rest of the line until an optional comment
            # we support .properties multiline support which is like this:
            # line1  \
            # line2 \
            # so a backslash precedes the \n
            unquoted_string = Regex(
                r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*',
                re.UNICODE).setParseAction(unescape_string)
            substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*'
                                      ).setParseAction(create_substitution)
            string_expr = multiline_string | quoted_string | unquoted_string

            value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr

            include_content = (quoted_string | (
                (Keyword('url') | Keyword('file')) - Literal('(').suppress() -
                quoted_string - Literal(')').suppress()))
            include_expr = (Keyword("include", caseless=True).suppress() +
                            (include_content |
                             (Keyword("required") - Literal('(').suppress() -
                              include_content - Literal(')').suppress()))
                            ).setParseAction(include_config)

            root_dict_expr = Forward()
            dict_expr = Forward()
            list_expr = Forward()
            multi_value_expr = ZeroOrMore(comment_eol | include_expr
                                          | substitution_expr | dict_expr
                                          | list_expr | value_expr
                                          | (Literal('\\') - eol).suppress())
            # for a dictionary : or = is optional
            # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation
            inside_dict_expr = ConfigTreeParser(
                ZeroOrMore(comment_eol | include_expr | assign_expr
                           | eol_comma))
            inside_root_dict_expr = ConfigTreeParser(
                ZeroOrMore(comment_eol | include_expr | assign_expr
                           | eol_comma),
                root=True)
            dict_expr << Suppress('{') - inside_dict_expr - Suppress('}')
            root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress(
                '}')
            list_entry = ConcatenatedValueParser(multi_value_expr)
            list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(
                eol_comma - list_entry)) - Suppress(']')

            # special case when we have a value assignment where the string can potentially be the remainder of the line
            assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - (
                dict_expr | (Literal('=') | Literal(':') | Literal('+=')) -
                ZeroOrMore(comment_no_comma_eol) -
                ConcatenatedValueParser(multi_value_expr)))

            # the file can be { ... } where {} can be omitted or []
            config_expr = ZeroOrMore(comment_eol | eol) + (
                list_expr | root_dict_expr
                | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma)
            config = config_expr.parseString(content, parseAll=True)[0]

            if resolve:
                allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION
                has_unresolved = cls.resolve_substitutions(
                    config, allow_unresolved)
                if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION:
                    raise ConfigSubstitutionException(
                        'resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION'
                    )

            if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION:
                cls.unresolve_substitutions_to_value(config, unresolved_value)
        return config
Example #34
0
class AssessmentParser13(object):
    """Grammar and parser for the assessment."""

    string = (
        QuotedString('\'', escChar='\\', multiline=True) ^
        QuotedString('"', escChar='\\', multiline=True))

    boolean = (
        Literal('true').setParseAction(make_bool(True)) ^
        Literal('false').setParseAction(make_bool(False)))

    float = Combine(
        Word(nums) + Optional(Literal('.') + Word(nums))
    ).setParseAction(make_float)

    integer = Word(nums).setParseAction(make_int)

    choice_decl = (
        string ^
        Combine(
            sep('correct(') + string + sep(')')
        ).setParseAction(lambda x: verify.Term(verify.CORRECT, x[0]))
    )

    regex = (
        Regex('/(.*)/i') ^
        Combine(
            sep('regex(') +
            QuotedString('"', escChar='\\') +
            sep(')')
        ).setParseAction(lambda x: verify.Term(verify.REGEX, x[0]))
    )

    question_decl = (
        sep('{') +
        Each(
            Optional(
                key('questionHTML') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('lesson') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('correctAnswerString') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('correctAnswerRegex') + sep(':') +
                regex + Optional(sep(','))) +
            Optional(
                key('correctAnswerNumeric') + sep(':') +
                float + Optional(sep(','))) +
            Optional(
                key('choiceScores') + sep(':') +
                sep('[') +
                Group(list_of(float)).setParseAction(make_list) +
                sep(']') +
                Optional(sep(','))) +
            Optional(
                key('weight') + sep(':') + integer + Optional(sep(','))) +
            Optional(
                key('multiLine') + sep(':') +
                boolean + Optional(sep(','))) +
            Optional(
                key('choices') + sep(':') +
                sep('[') +
                Group(list_of(choice_decl)).setParseAction(make_list) +
                sep(']') +
                Optional(sep(',')))
        ) +
        sep('}')).setParseAction(make_dict)

    assessment_grammar = (
        sep('assessment') +
        sep('=') +
        sep('{') +
        Each(
            Optional(
                key('assessmentName') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('preamble') + sep(':') +
                string + Optional(sep(','))) +
            Optional(
                key('checkAnswers') + sep(':') +
                boolean + Optional(sep(','))) +
            Optional(
                key('questionsList') + sep(':') +
                sep('[') +
                Group(list_of(question_decl)).setParseAction(make_list) +
                sep(']') +
                Optional(sep(',')))
        ) +
        sep('}') +
        Optional(sep(';'))).setParseAction(make_dict)

    @classmethod
    def parse_string(cls, content):
        return cls.assessment_grammar.parseString(content)

    @classmethod
    def parse_string_in_scope(cls, content, scope, root_name):
        """Parses assessment text following grammar."""
        if 'assessment' != root_name:
            raise Exception('Unsupported schema: %s', root_name)

        # we need to extract the results as a dictionary; so we remove the
        # outer array holding it
        ast = cls.parse_string(content).asList()
        if len(ast) == 1:
            ast = ast[0]

        return dict(
            scope.items() +
            {'__builtins__': {}}.items() +
            {root_name: ast}.items())
Example #35
0
    Word,
    Regex,
    Group,
    oneOf,
    Forward,
    CaselessKeyword,
    Suppress,
    delimitedList,
    operatorPrecedence,
    opAssoc,
    ParseException,
)


# Variables
variable = Regex(r"(?P<table>[ai|di|sv]{2})\.(?P<tag>[\w\d]+)\.(?P<attr>\w+)")


def var_parse_action(text, index, context):
    return context[0]


variable.setParseAction(var_parse_action)

# Numbers
numeric_literal = Regex(r"\-?\d+(\.\d+)?")


def number_prase_action(text, index, data):
    number = data[0]
    if "." in number:
Example #36
0
class ChoiceTree:
    '''
        Class that parses strings representing possible combinations, and returns possible combinations.
        e.g.
            "abc[de|fg]" → [ "abcde", "abcfg" ]
            "I [eat|like] [|hot]dogs" → [ "I eat dogs", "I like dogs", "I eat hotdogs", "I like hotdogs" ]

        Escape symbol is '~'
        e.g.
            "abc~[def~]" → [ "abc[def]" ]
        Due to reasons, an escaped escape '~~' is not turned into a literal '~',
            if this is not up to liking, simply .replace('~~', '~') yourself after parsing.
        
        Essentially, consider the noncommutative Semiring of (unordered) lists of strings,
            so that in python notation: list1+list2 == [*list1, *list2] the concatenation of lists
            and list1*list2 == [a+b for a in list1 for b in list2] the concatenation of each pair of strings.
            (This ring has as neutral element the list of the empty string, and as zero element the empty list.)
        We write addition using the "|" symbol, the product is implicit (i.e. a*b == ab), and use [] as parentheses,
            so that in python notation e.g. "abc" == ["abc"] and "a|b|c" == ["a", "b", "c"]

        What ChoiceTree does is parse such expressions, and using the distributivity rule ( [a|b]c == ab|ac )
            it simplifies the expression to a sum of products.
    '''
    class Text:
        def __init__(self, text):
            self.text = text if text == '' else ''.join(text.asList())
            self.count = 1
            self.reset()

        __str__ = __repr__ = lambda s: s.text

        def next(self):
            self.done = True
            return self.text

        def random(self):
            return self.text

        def reset(self):
            self.done = False

        def current(self):
            return self.text

    class Choice:
        def __init__(self, vals):
            self.vals = vals.asList()
            self.count = sum(v.count for v in self.vals)
            self.reset()

        __str__ = __repr__ = lambda s: '[{}]'.format('|'.join(
            [str(v) for v in s.vals]))

        def next(self):
            next = self.vals[self.i]
            out = next.next()
            if next.done:
                self.i += 1
                if self.i == len(self.vals):
                    self.done = True
            return out

        def random(self):
            # Weighted based on the number of different possible branches each child has.
            return np.random.choice(self.vals,
                                    p=list(v.count / self.count
                                           for v in self.vals)).random()

        def reset(self):
            self.i = 0
            self.done = False
            [c.reset() for c in self.vals]

        def current(self):
            return self.vals[self.i].current()

    class Group:
        def __init__(self, vals):
            self.vals = vals.asList()
            self.count = functools.reduce(lambda x, y: x * y,
                                          (c.count for c in self.vals), 1)
            self.reset()

        __str__ = __repr__ = lambda s: ''.join([str(v) for v in s.vals])

        def next(self):
            i = 0
            out = ''
            while True:
                out += self.vals[i].next()
                if self.vals[i].done:
                    if i == len(self.vals) - 1:
                        self.done = True
                        break
                    else:
                        self.vals[i].reset()
                else:
                    break
                i += 1
            i += 1

            while i < len(self.vals):
                out += self.vals[i].current()
                i += 1

            return out

        def random(self):
            return ''.join(v.random() for v in self.vals)

        def reset(self):
            self.done = False
            [c.reset() for c in self.vals]

        def current(self):
            return ''.join([c.current() for c in self.vals])

    escapedSymbol = Char('~').suppress() + Char('[|]')
    escapedEsc = Literal('~~')
    soleEsc = Char('~')
    lbr = Literal('[').suppress()
    rbr = Literal(']').suppress()
    div = Literal('|').suppress()
    _text = Regex(
        r'[^\[\|\]~]+'
    )  # any sequence of characters not containing '[', ']', '|' or '~'

    text = pGroup(
        OneOrMore(escapedSymbol | escapedEsc | soleEsc
                  | _text)).setParseAction(lambda t: ChoiceTree.Text(t[0]))
    group = Forward()
    choice = pGroup(lbr + group + ZeroOrMore(div + group) +
                    rbr).setParseAction(lambda t: ChoiceTree.Choice(t[0]))
    empty = Empty().setParseAction(lambda t: ChoiceTree.Text(''))
    group <<= pGroup(OneOrMore(text | choice) | empty).setParseAction(
        lambda t: ChoiceTree.Group(t[0])).leaveWhitespace()

    def __init__(self,
                 text,
                 parse_flags=False,
                 add_brackets=False,
                 leave_escapes=False):
        self.flag_random = False
        if parse_flags:
            if text[:3] == '[?]':
                text = text[3:]
                self.flag_random = True

        if add_brackets: text = '[' + text + ']'

        self.root: ChoiceTree.Group = ChoiceTree.group.parseString(text)[0]
        self.count = self.root.count

    def __iter__(self):
        if self.flag_random:
            yield self.random()
            return
        while not self.root.done:
            yield self.root.next()
        self.root.reset()

    def random(self):
        return self.root.random()
Example #37
0
                    joint = imusimModel.getJoint(bone.name)
                    if joint.hasParent:
                        parentRot = joint.parent.rotationKeyFrames.latestValue
                        parentRotOffset = bonedata.parent.rotationOffset
                        rotation = parentRot * parentRotOffset * rotation
                    else:
                        rotation = convertCGtoNED(rotation)
                    joint.rotationKeyFrames.add(t, rotation)
                t += framePeriod

        return imusimModel

# Define parser tokens
comments = ZeroOrMore(Suppress(Literal('#') + SkipTo(LineEnd())))
intValue = Word(nums).setParseAction( lambda s,l,t: int(t[0]) )
floatValue = Regex(r'-?\d+(\.\d*)?(e-?\d*)?').setParseAction(lambda s,l,t: float(t[0]))
floatVector = Group(floatValue + floatValue + floatValue)
limit = Group(
        Suppress(Literal("(")) +
        floatValue +
        floatValue +
        Suppress(Literal(")")))
limits = Group(OneOrMore(limit))
channel = Word("TRtr","XYZxyz")
channels = Group(OneOrMore(channel))
rotationOrder = Word("XYZ", exact=3)
begin = Suppress(Keyword("begin"))
end = Suppress(Keyword("end"))
bonename = Combine(~end + Word(alphanums+"_-")).setWhitespaceChars(' ')

version = Keyword(":version") + Literal("1.10")
Example #38
0
def formula_grammar(table):
    """
    Construct a parser for molecular formulas.

    :Parameters:

        *table* = None : PeriodicTable
             If table is specified, then elements and their associated fields
             will be chosen from that periodic table rather than the default.

    :Returns:
        *parser* : pyparsing.ParserElement.
            The ``parser.parseString()`` method returns a list of
            pairs (*count,fragment*), where fragment is an *isotope*,
            an *element* or a list of pairs (*count,fragment*).

    """
    # Recursive
    composite = Forward()
    mixture = Forward()

    # whitespace and separators
    space = Optional(White().suppress())
    separator = space+Literal('+').suppress()+space

    # Lookup the element in the element table
    symbol = Regex("[A-Z][a-z]*")
    symbol = symbol.setParseAction(lambda s,l,t: table.symbol(t[0]))

    # Translate isotope
    openiso = Literal('[').suppress()
    closeiso = Literal(']').suppress()
    isotope = Optional(~White()+openiso+Regex("[1-9][0-9]*")+closeiso,
                       default='0')
    isotope = isotope.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 0)

    # Translate ion
    openion = Literal('{').suppress()
    closeion = Literal('}').suppress()
    ion = Optional(~White()
                     +openion
                     +Regex("([1-9][0-9]*)?[+-]")
                     +closeion,
                   default='0+')
    ion = ion.setParseAction(lambda s,l,t: int(t[0][-1]+(t[0][:-1] if len(t[0])>1 else '1')))

    # Translate counts
    fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)")
    fract = fract.setParseAction(lambda s,l,t: float(t[0]) if t[0] else 1)
    whole = Regex("[1-9][0-9]*")
    whole = whole.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 1)
    count = Optional(~White()+(fract|whole),default=1)

    # Convert symbol,isotope,ion,count to (count,isotope)
    element = symbol+isotope+ion+count
    def convert_element(string,location,tokens):
        #print "convert_element received",tokens
        symbol,isotope,ion,count = tokens[0:4]
        if isotope != 0: symbol = symbol[isotope]
        if ion != 0: symbol = symbol.ion[ion]
        return (count,symbol)
    element = element.setParseAction(convert_element)

    # Convert "count elements" to a pair
    implicit_group = count+OneOrMore(element)
    def convert_implicit(string,location,tokens):
        #print "implicit",tokens
        count = tokens[0]
        fragment = tokens[1:]
        return fragment if count==1 else (count,fragment)
    implicit_group = implicit_group.setParseAction(convert_implicit)

    # Convert "(composite) count" to a pair
    opengrp = space + Literal('(').suppress() + space
    closegrp = space + Literal(')').suppress() + space
    explicit_group = opengrp + composite + closegrp + count
    def convert_explicit(string,location,tokens):
        #print "explicit",tokens
        count = tokens[-1]
        fragment = tokens[:-1]
        return fragment if count == 1 else (count,fragment)
    explicit_group = explicit_group.setParseAction(convert_explicit)

    # Build composite from a set of groups
    group = implicit_group | explicit_group
    implicit_separator = separator | space
    composite << group + ZeroOrMore(implicit_separator + group)

    density = Literal('@').suppress() + count + Optional(Regex("[ni]"),default='i')
    compound = composite + Optional(density,default=None)
    def convert_compound(string,location,tokens):
        #print "compound",tokens
        if tokens[-1] is None:
            return Formula(structure=_immutable(tokens[:-1]))
        elif tokens[-1] == 'n':
            return Formula(structure=_immutable(tokens[:-2]), natural_density=tokens[-2])
        else:
            return Formula(structure=_immutable(tokens[:-2]), density=tokens[-2])
    compound = compound.setParseAction(convert_compound)

    partsep = space + Literal('//').suppress() + space
    percent = Literal('%').suppress()

    weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space
    by_weight = count + weight_percent + mixture + ZeroOrMore(partsep+count+(weight_percent|percent)+mixture) + partsep + mixture
    def convert_by_weight(string,location,tokens):
        #print "by weight",tokens
        piece = tokens[1:-1:2] + [tokens[-1]]
        fract = [float(v) for v in tokens[:-1:2]]
        fract.append(100-sum(fract))
        #print piece, fract
        if len(piece) != len(fract): raise ValueError("Missing base component of mixture")
        if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%")
        return _mix_by_weight_pairs(zip(piece,fract))
    mixture_by_weight = by_weight.setParseAction(convert_by_weight)

    volume_percent = Regex("%v(ol(ume)?)?").suppress() + space
    by_volume = count + volume_percent + mixture + ZeroOrMore(partsep+count+(volume_percent|percent)+mixture) + partsep + mixture
    def convert_by_volume(string,location,tokens):
        #print "by volume",tokens
        piece = tokens[1:-1:2] + [tokens[-1]]
        fract = [float(v) for v in tokens[:-1:2]]
        fract.append(100-sum(fract))
        #print piece, fract
        if len(piece) != len(fract): raise ValueError("Missing base component of mixture "+string)
        if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%")
        return _mix_by_volume_pairs(zip(piece,fract))
    mixture_by_volume = by_volume.setParseAction(convert_by_volume)

    mixture_by_layer = Forward()
    layer_thick = Group(count + Regex("(nm|um|mm)") + space)
    layer_part = (layer_thick + mixture ) | (opengrp + mixture_by_layer + closegrp +count)
    mixture_by_layer <<   layer_part + ZeroOrMore(partsep + layer_part)
    def convert_by_layer(string,location,tokens):

        units = {'nm': 1e-9,
                 'um': 1e-6,
                 'mm': 1e-3,
                 }
        if len (tokens) < 2:
            return tokens
        piece = []
        fract = []
        for p1, p2 in zip(tokens[0::2], tokens[1::2]):
            if isinstance(p1, Formula):
                f = p1.absthick * float(p2)
                p = p1
            else:
                f = float(p1[0]) * units[p1[1]]
                p = p2
            piece.append(p)
            fract.append(f)
        total = sum(fract)
        vfract = [ (v/total)*100 for v in fract]
        result = _mix_by_volume_pairs(zip(piece,vfract))
        result.absthick = total
        return result
    mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer)

    mixture_by_absmass = Forward()
    absmass_mass = Group(count + Regex("(ng|ug|mg|g|kg)") + space)
    absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass + closegrp + count)
    mixture_by_absmass << absmass_part + ZeroOrMore( partsep + absmass_part)
    def convert_by_absmass(string,location,tokens):

        units = {'ng': 1e-9,
                 'ug': 1e-6,
                 'mg': 1e-3,
                 'g': 1e+0,
                 'kg': 1e+3,
                 }
        if len (tokens) < 2:
            return tokens
        piece = []
        fract = []
        for p1, p2 in zip(tokens[0::2], tokens[1::2]):
            if isinstance(p1, Formula):
                f = p1.absmass * float(p2)
                p = p1
            else:
                f = float(p1[0]) * units[p1[1]]
                p = p2
            piece.append(p)
            fract.append(f)

        total = sum(fract)
        mfract = [ (m/total)*100 for m in fract]
        result = _mix_by_weight_pairs(zip(piece,mfract))
        result.absmass=total
        return result
    mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass)

    mixture_by_absvolume = Forward()
    absvolume_vol = Group(count + Regex("(nl|ul|ml|l)") + space)
    absvolume_part = ( absvolume_vol + mixture )|(opengrp + mixture_by_absvolume + closegrp + count)
    mixture_by_absvolume << absvolume_part + ZeroOrMore( partsep + absvolume_part)
    def convert_by_absvolume(string,location,tokens):

        units = {'nl': 1e-9,
                 'ul': 1e-6,
                 'ml': 1e-3,
                 'l': 1e+0,
                 }
        if len (tokens) < 2:
            return tokens
        piece = []
        fract = []
        for p1, p2 in zip(tokens[0::2], tokens[1::2]):
            if isinstance(p1, Formula):
                f = p1.absvolume * float(p2)
                p = p1
            else:
                f = float(p1[0]) * units[p1[1]]
                p = p2
            piece.append(p)
            fract.append(f)
        total = sum(fract)
        vfract = [ (v/total)*100 for v in fract]
        if len(piece) != len(fract): raise ValueError("Missing base component of mixture "+string)
        if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%")
        result = _mix_by_volume_pairs(zip(piece,vfract))
        result.absvolume = total
        return result
    mixture_by_absvolume = mixture_by_absvolume.setParseAction(convert_by_absvolume)

    mixture << (compound | (opengrp + (mixture_by_weight | mixture_by_volume ) + closegrp))
    formula = compound | mixture_by_weight | mixture_by_volume | mixture_by_layer | mixture_by_absmass | mixture_by_absvolume
    grammar = Optional(formula, default=Formula()) + StringEnd()

    grammar.setName('Chemical Formula')
    return grammar
Example #39
0
# [167] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
PN_CHARS_re = '\\-0-9\u00B7\u0300-\u036F\u203F-\u2040' + PN_CHARS_U_re
# PN_CHARS = Regex(u'[%s]'%PN_CHARS_re, flags=re.U)

# [168] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
PN_PREFIX = Regex(r'[%s](?:[%s\.]*[%s])?' % (PN_CHARS_BASE_re,
                  PN_CHARS_re, PN_CHARS_re), flags=re.U)

# [140] PNAME_NS ::= PN_PREFIX? ':'
PNAME_NS = Optional(
    Param('prefix', PN_PREFIX)) + Suppress(':').leaveWhitespace()

# [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' )

PN_LOCAL_ESC = Regex('\\\\[_~\\.\\-!$&"\'()*+,;=/?#@%]')
PN_LOCAL_ESC.setParseAction(lambda x: x[0][1:])

# [172] HEX ::= [0-9] | [A-F] | [a-f]
# HEX = Regex('[0-9A-Fa-f]') # not needed

# [171] PERCENT ::= '%' HEX HEX
PERCENT = Regex('%[0-9a-fA-F]{2}')
PERCENT.setParseAction(lambda x: chr(int(x[0][1:], 16)))

# [170] PLX ::= PERCENT | PN_LOCAL_ESC
PLX = PERCENT | PN_LOCAL_ESC

# [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )?
PN_LOCAL = Combine((Regex('[%s0-9:]' % PN_CHARS_U_re, flags=re.U) | PLX) + ZeroOrMore((Regex(
    '[%s\\.:]' % PN_CHARS_re, flags=re.U) | PLX) + Optional(Regex('[%s:]' % PN_CHARS_re, flags=re.U) | PLX)))
Example #40
0
        # val = val.replace(".", "\\.")
    elif val.startswith('`') and val.endswith('`'):
          val = "'" + val[1:-1].replace("``","`") + "'"
    elif val.startswith("+"):
        val = val[1:]
    un = ast.literal_eval(val)
    return un


def to_string(instring, tokensStart, retTokens):
    val = retTokens[0]
    val = "'"+val[1:-1].replace("''", "\\'")+"'"
    return {"literal": ast.literal_eval(val)}

# NUMBERS
realNum = Regex(r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?").addParseAction(unquote)
intNum = Regex(r"[+-]?\d+([eE]\+?\d+)?").addParseAction(unquote)

# STRINGS, NUMBERS, VARIABLES
sqlString = Regex(r"\'(\'\'|\\.|[^'])*\'").addParseAction(to_string)
identString = Regex(r'\"(\"\"|\\.|[^"])*\"').addParseAction(unquote)
mysqlidentString = Regex(r'\`(\`\`|\\.|[^`])*\`').addParseAction(unquote)
ident = Combine(~RESERVED + (delimitedList(Literal("*") | Word(alphas + "_", alphanums + "_$") | identString | mysqlidentString, delim=".", combine=True))).setName("identifier")

# EXPRESSIONS
expr = Forward()

# CASE
case = (
    CASE +
    Group(ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") +
Example #41
0
 def getToken(self):
     token = Regex(r"\\\n")
     token = token.setParseAction(lambda s, l, t: u"")("linejoin")
     return token
Example #42
0
    """Write the strategy encoded by the subtree rooted at 'root' in modified Newick format.
    V(H, L) represents the tree with root node V, high subtree H, and low subtree L.
    A node name followed by * indicates that the gusher is being opened solely for information and the Goldie will
    never be found there."""
    if root.high and root.low:
        return f'{root}({write_tree(root.high)}, {write_tree(root.low)})'
    elif root.high:
        return f'{root}({write_tree(root.high)},)'
    elif root.low:
        return f'{root}(,{write_tree(root.low)})'
    else:
        return f'{root}'


# Strategy tree grammar
node = Regex(rf'\w+[{NEVER_FIND_FLAG}]?')
LPAREN, COMMA, RPAREN = map(Suppress, '(,)')
tree = Forward()
subtree = Group(Optional(tree))
subtrees = LPAREN - subtree.setResultsName(
    'high') - COMMA - subtree.setResultsName('low') - RPAREN
tree << node.setResultsName('root') - Optional(subtrees)


def read_tree(tree_str, gusher_map, start=BASKET_LABEL):
    """Read the strategy encoded in tree_str and build the corresponding decision tree.
    V(H, L) represents the tree with root node V, high subtree H, and low subtree L.
    A node name followed by * indicates that the gusher is being opened solely for information and the Goldie will
    never be found there."""
    def build_tree(
        tokens
Example #43
0
import re

from pyparsing import (
    Word, Keyword, NotAny, alphanums, nums, alphas, OneOrMore, srange,
    ZeroOrMore, Regex
)

from whispy_lispy import ast

int_literal = Word(nums) + NotAny('.')
int_literal.setParseAction(ast.Int.from_parsed_result)

float_literal = Word(nums) + Word('.') + Word(nums)
float_literal.setParseAction(ast.Float.from_parsed_result)

bool_literal = Keyword('#t') | Keyword('#f')
bool_literal.setParseAction(ast.Bool.from_parsed_result)

string_literal = Regex(r'\".*?(?<!\\)\"', re.DOTALL)
string_literal.setParseAction(ast.String.from_parse_result)

grammar = OneOrMore(float_literal | int_literal | bool_literal | string_literal)
def dsl_parser(datestr: str, year: int) -> date:
    """
    Parse dsl str for a given year.

    >>> from officiumdivinum.DSL.dsl_parser import dsl_parser
    >>> dsl_parser("Easter", 2020)
    datetime.date(2020, 4, 12)

    >>> dsl_parser("1 Jan", 2020)
    datetime.date(2020, 1, 1)

    >>> dsl_parser("Sun between 2 Jan 4 Jan OR 2 Jan", 2016)
    datetime.date(2016, 1, 3)

    >>> dsl_parser("Sun between 2 Jan 4 Jan OR 2 Jan", 2017)
    datetime.date(2017, 1, 2)

    >>> dsl_parser("22nd Sun after Pentecost", 2021)
    datetime.date(2021, 10, 24)

    Parameters
    ----------
    datestr: str : Expression to be parsed.

    year: int : Year in which to evaluate expression


    Returns
    -------
    date
        a date in the year in question.
    """

    # First we convert all possible date representations into isodate strings (yyyy-mm-dd)

    # convert specials
    special = oneOf(specials.keys())
    special.setParseAction(lambda t: str(specials[t[0]](year)) + " ")
    _specials = special[...]
    datestr = _specials.transformString(datestr)

    # convert yearless date expressions into dates
    yearless = Word(nums) + oneOf(months)
    yearless.setParseAction(
        lambda t: str(date(year,
                           months.index(t[1]) + 1, int(t[0]))) + " ")
    _yearless = yearless[...]
    datestr = _yearless.transformString(datestr)

    # All dates are now isodates.
    isodate = Regex(r"[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]")

    # handle [ordinals] weekdays + timedeltas
    timedelta = Group(
        Optional(oneOf(ordinals))("ordinal") + oneOf(days)("day") +
        oneOf(["before", "after"])("delta") + isodate("date"))
    timedelta.setParseAction(_parse_timedelta)
    _timedeltas = timedelta[...]
    # datestr = _timedeltas.transformString(datestr)

    # handle betweens

    between = Group(
        Optional(oneOf(ordinals))("ordinal") + oneOf(days)("day") + "between" +
        isodate("date1") + isodate("date2"))

    between.setParseAction(_parse_between)
    _betweens = between[...]
    _betweens += _timedeltas
    count = 0
    while any(x in datestr for x in ("after", "before", "between")):
        datestr = _betweens.transformString(datestr)
        if count > 10:
            raise DSLError(f"Recursion limit reached, got as far as {datestr}")
        count += 1

    # At this point we only have calendar dates, components of date
    # expressions ('before', 'after', 'on or before' or 'on or after';
    # ordinal weekdays and 'between' expressions) and operators ('AND'
    # 'OR', 'NOT').  Since operators operate on the *logical status*
    # of operands, and this logical status is False if the operand
    # doesn't evaluate to a date, and otherwise a calendar date, we
    # deal with them last.

    # Then we build parsers for individual objects

    # fail here if we match anything except isodates, 'or' or 'and'
    # illegal = ~(isodate | oneOf(["OR", "AND"]))[1...]

    # At this point the datestr is composed entirely of evaluated date
    # expressions split by logical operators.  We reduce these by looping over them.

    count = 0
    while "OR" in datestr:
        or_expr = Group((isodate("lhs") ^ "False") + "OR" +
                        (isodate("rhs") ^ "False"))
        or_expr.setParseAction(_parse_or)
        _or_expr = or_expr[...]
        datestr = _or_expr.transformString(datestr)
        if count > 10:
            raise DSLError(f"Recursion limit reached, got as far as {datestr}")
        count += 1

    count = 0
    while "AND" in datestr:
        and_expr = Group((isodate("lhs") ^ "False") + "AND" +
                         (isodate("rhs") ^ "False"))
        and_expr.setParseAction(_parse_and)
        _and_expr = and_expr[...]
        datestr = _and_expr.transformString(datestr)
        if count > 10:
            raise DSLError(f"Recursion limit reached, got as far as {datestr}")
        count += 1

    # convert dates to datetime.date() objects
    isodate.setParseAction(lambda s, l, t: date.fromisoformat(t[0]))
    _isodates = isodate[...]
    parsed = _isodates.parseString(datestr)
    try:
        return parsed[0]
    except IndexError:
        raise DSLError("Unable to parse")
Example #45
0
        return self.name == other.name
    def __ne__(self, other):
        return self.name != other.name


# Character literals
LCURLY,RCURLY,LPAREN,RPAREN,QUOTE,COMMA,AT,EQUALS,HASH = map(Suppress,'{}()",@=#')


def bracketed(expr):
    """ Return matcher for `expr` between curly brackets or parentheses """
    return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY)


# Define parser components for strings (the hard bit)
chars_no_curly = Regex(r"[^{}]+")
chars_no_curly.leaveWhitespace()
chars_no_quotecurly = Regex(r'[^"{}]+')
chars_no_quotecurly.leaveWhitespace()
# Curly string is some stuff without curlies, or nested curly sequences
curly_string = Forward()
curly_item = Group(curly_string) | chars_no_curly
curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY
# quoted string is either just stuff within quotes, or stuff within quotes, within
# which there is nested curliness
quoted_item = Group(curly_string) | chars_no_quotecurly
quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE

# Numbers can just be numbers. Only integers though.
number = Regex('[0-9]+')
Example #46
0
unary_op = oneOf('- + ~', caseless=True)
unary_op |= CKeyword('NOT')

# TODO this does not encode precedence
binary_op = oneOf("|| * / % + - << >> & | < <= > >= = == != <>", caseless=True)
binary_op |= reduce(lambda x,y: x|y, [CKeyword(x) for x in 'IS,IS NOT,IN,LIKE,GLOB,MATCH,REGEXP,AND,OR'.split(',')])

# these direct from the SQLite docs
KEYWORDS = 'ABORT ACTION ADD AFTER ALL ALTER ANALYZE AND AS ASC ATTACH AUTOINCREMENT BEFORE BEGIN BETWEEN BY CASCADE CASE CAST CHECK COLLATE COLUMN COMMIT CONFLICT CONSTRAINT CREATE CROSS CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP DATABASE DEFAULT DEFERRABLE DEFERRED DELETE DESC DETACH DISTINCT DROP EACH ELSE END ESCAPE EXCEPT EXCLUSIVE EXISTS EXPLAIN FAIL FOR FOREIGN FROM FULL GLOB GROUP HAVING IF IGNORE IMMEDIATE IN INDEX INDEXED INITIALLY INNER INSERT INSTEAD INTERSECT INTO IS ISNULL JOIN KEY LEFT LIKE LIMIT MATCH NATURAL NO NOT NOTNULL NULL OF OFFSET ON OR ORDER OUTER PLAN PRAGMA PRIMARY QUERY RAISE REFERENCES REGEXP REINDEX RELEASE RENAME REPLACE RESTRICT RIGHT ROLLBACK ROW SAVEPOINT SELECT SET TABLE TEMP TEMPORARY THEN TO TRANSACTION TRIGGER UNION UNIQUE UPDATE USING VACUUM VALUES VIEW VIRTUAL WHEN WHERE'

# TODO probably not right charset & does not account for escaping identifiers
# https://www.sqlite.org/lang_keywords.html
identifier = NotAny(
	reduce(lambda x,y: x|y, [CKeyword(x) for x in KEYWORDS.split(' ')])
	) + Regex('[a-zA-Z_][a-zA-Z0-9_]*')

# for the purposes of attaching parse actions to these
# objects they need to all be separate. table_in_column
# is to distinguish between tables as found in the grammar
# and those specifically found (optionally) in a column spec
# (which gets triggered whether there's actually a table part
# or not.)
table_name = identifier.copy()
table_in_column = table_name.copy()
database_name = identifier.copy()
column_name = identifier.copy()

column = Optional(database_name + '.') + Optional(table_in_column + '.') + column_name

integer_num = Regex('[0-9]+')
Example #47
0
    ParserElement,
)

ParserElement.enablePackrat()

COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^")
LPAR, RPAR = map(Suppress, "()")
and_ = CaselessKeyword("AND")
or_ = CaselessKeyword("OR")
not_ = CaselessKeyword("NOT")
to_ = CaselessKeyword("TO")
keyword = and_ | or_ | not_

expression = Forward()

valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word")
valid_word.setParseAction(lambda t: t[0].replace("\\\\", chr(127)).replace("\\", "").replace(chr(127), "\\"))

string = QuotedString('"')

required_modifier = Literal("+")("required")
prohibit_modifier = Literal("-")("prohibit")
integer = Regex(r"\d+").setParseAction(lambda t: int(t[0]))
proximity_modifier = Group(TILDE + integer("proximity"))
number = Regex(r"\d+(\.\d+)?").setParseAction(lambda t: float(t[0]))
fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy")

term = Forward()
field_name = valid_word.copy().setName("fieldname")
incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK)
excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
Example #48
0
    ":<=": "lte",
    ":>": "gt",
    ":>=": "gte",
}

# Parsing grammar

AND = CaselessKeyword("AND")
OR = Optional(CaselessKeyword("OR"))
NOT = CaselessKeyword("NOT")

# Search operator
OPERATOR = oneOf(OPERATOR_MAP.keys())

# Field name, explicitely exlude URL like patters
FIELD = Regex(r"""(?!http|ftp|https|mailto)[a-zA-Z_]+""")

# Match token
WORD = Regex(r"""[^ \(\)]([^ '"]*[^ '"\)])?""")
DATE = Word("0123456789:.-T")

# Date range
RANGE = "[" + DATE + "to" + DATE + "]"

# Match value
REGEX_STRING = "r" + RawQuotedString('"')
STRING = REGEX_STRING | RawQuotedString("'") | RawQuotedString('"') | WORD

# Single term, either field specific or not
TERM = (FIELD + OPERATOR + (RANGE | STRING)) | STRING
Example #49
0
    def define_dot_parser(self):
        """Define dot grammar

        Based on the grammar http://www.graphviz.org/doc/info/lang.html
        """
        # punctuation
        colon  = Literal(":")
        lbrace = Suppress("{")
        rbrace = Suppress("}")
        lbrack = Suppress("[")
        rbrack = Suppress("]")
        lparen = Literal("(")
        rparen = Literal(")")
        equals = Suppress("=")
        comma  = Literal(",")
        dot    = Literal(".")
        slash  = Literal("/")
        bslash = Literal("\\")
        star   = Literal("*")
        semi   = Suppress(";")
        at     = Literal("@")
        minus  = Literal("-")
        pluss  = Suppress("+")

        # keywords
        strict_    = CaselessLiteral("strict")
        graph_     = CaselessLiteral("graph")
        digraph_   = CaselessLiteral("digraph")
        subgraph_  = CaselessLiteral("subgraph")
        node_      = CaselessLiteral("node")
        edge_      = CaselessLiteral("edge")

        punctuation_ = "".join( [ c for c in string.punctuation if c not in '_' ] ) +string.whitespace
        # token definitions

        identifier = Word(alphanums + "_" ).setName("identifier")

        #double_quoted_string = QuotedString('"', multiline=True,escChar='\\',
        #    unquoteResults=True) # dblQuotedString
        double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE)
        double_quoted_string.setParseAction(removeQuotes)
        quoted_string = Combine(double_quoted_string+
            Optional(OneOrMore(pluss+double_quoted_string)),adjacent=False)
        alphastring_ = OneOrMore(CharsNotIn(punctuation_))

        def parse_html(s, loc, toks):
            return '<<%s>>' % ''.join(toks[0])


        opener = '<'
        closer = '>'
        try:
            html_text = pyparsing.nestedExpr( opener, closer,
                (( CharsNotIn(
                    opener + closer ).setParseAction( lambda t:t[0] ))
                )).setParseAction(parse_html)
        except:
            log.debug('nestedExpr not available.')
            log.warning('Old version of pyparsing detected. Version 1.4.8 or '
                        'later is recommended. Parsing of html labels may not '
                        'work properly.')
            html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]")))


        ID = ( alphastring_ | html_text |
            quoted_string | #.setParseAction(strip_quotes) |
            identifier ).setName("ID")


        float_number = Combine(Optional(minus) +
            OneOrMore(Word(nums + "."))).setName("float_number")

        righthand_id =  (float_number | ID ).setName("righthand_id")

        port_angle = (at + ID).setName("port_angle")

        port_location = ((OneOrMore(Group(colon + ID)) |
            Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location")

        port = Combine((Group(port_location + Optional(port_angle)) |
            Group(port_angle + Optional(port_location)))).setName("port")

        node_id = (ID + Optional(port))
        a_list = OneOrMore(ID + Optional(equals + righthand_id) +
            Optional(comma.suppress())).setName("a_list")

        attr_list = OneOrMore(lbrack + Optional(a_list) +
            rbrack).setName("attr_list").setResultsName('attrlist')

        attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt")

        edgeop = (Literal("--") | Literal("->")).setName("edgeop")

        stmt_list = Forward()
        graph_stmt = (lbrace + Optional(stmt_list) +
            rbrace + Optional(semi) ).setName("graph_stmt")


        edge_point = Forward()

        edgeRHS = OneOrMore(edgeop + edge_point)
        edge_stmt = edge_point + edgeRHS + Optional(attr_list)

        subgraph = (Optional(subgraph_,'') + Optional(ID,'') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph')

        edge_point << (subgraph | graph_stmt | node_id )

        node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt")

        assignment = (ID + equals + righthand_id).setName("assignment")
        stmt =  (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt")
        stmt_list << OneOrMore(stmt + Optional(semi))

        graphparser = ( (Optional(strict_,'notstrict') + ((graph_ | digraph_)) +
            Optional(ID,'') + lbrace + Group(Optional(stmt_list)) +rbrace).setResultsName("graph") )

        singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine)


        # actions
        graphparser.ignore(singleLineComment)
        graphparser.ignore(cStyleComment)
        node_id.setParseAction(self._proc_node_id)
        assignment.setParseAction(self._proc_attr_assignment)
        a_list.setParseAction(self._proc_attr_list)
        edge_stmt.setParseAction(self._proc_edge_stmt)
        node_stmt.setParseAction(self._proc_node_stmt)
        attr_stmt.setParseAction(self._proc_default_attr_stmt)
        attr_list.setParseAction(self._proc_attr_list_combine)
        subgraph.setParseAction(self._proc_subgraph_stmt)
        #graph_stmt.setParseAction(self._proc_graph_stmt)
        graphparser.setParseAction(self._main_graph_stmt)
        return graphparser
Example #50
0
LBRACKET = L("[").suppress()
RBRACKET = L("]").suppress()
LPAREN = L("(").suppress()
RPAREN = L(")").suppress()
COMMA = L(",").suppress()
SEMICOLON = L(";").suppress()
AT = L("@").suppress()

PUNCTUATION = Word("-_.")
IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM)
IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END))

NAME = IDENTIFIER("name")
EXTRA = IDENTIFIER

URI = Regex(r'[^ ]+')("url")
URL = (AT + URI)

EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA)
EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras")

VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE)
VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE)

VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY
VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE),
                       joinString=",",
                       adjacent=False)("_raw_spec")
_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY))
_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or '')
Example #51
0
        return [(t[0][0].port, t[0][1].port)]

def normalize_ip(t):
    # returns a normalized ip
    return t.ip + "/" + (str(t.mask.mask) if t.mask else "32")


port = Group(Word(nums).setParseAction(to_int)('port'))
port_range = Group((port + Word("-").suppress() + port)('range'))

normalized_port_range = (port ^ port_range).setParseAction(to_port_range)

ports  = delimitedList(normalized_port_range)('ports')

# IP addresses, name of another group, or sg-*
security_group = Regex("sg-[\w\d]+")
group_name = Regex("[\w\d\-]+")

mask = Word("/") + Word(nums).setParseAction(to_int)('mask')
ip= (Combine(Word(nums) + ('.' + Word(nums))*3)('ip') + Optional(mask)('mask')).setParseAction(normalize_ip)

parser = Optional(protocol)('protocol') + \
         Optional(port_) + \
         ports + \
         (ip.setResultsName('ip_and_mask') ^ security_group.setResultsName('security_group') ^ group_name('group_name'))


class Rule(object):

    def __init__(self, protocol, from_port, to_port, address=None, group=None, group_name=None):
        """constructs a new rule
Example #52
0
	def getkw_bnf(self):
		sect_begin   = Literal("{").suppress()
		sect_end   = Literal("}").suppress()
		array_begin   = Literal("[").suppress()
		array_end   = Literal("]").suppress()
		tag_begin   = Literal("<").suppress()
		tag_end   = Literal(">").suppress()
		eql   = Literal("=").suppress()
		dmark = Literal('$').suppress()
		end_data=Literal('$end').suppress()
		prtable = alphanums+r'!$%&*+-./<>?@^_|~'
		ival=Regex('[-]?\d+')
		dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?')
		lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)')
	
		# Helper definitions

		kstr= quotedString.setParseAction(removeQuotes) ^ \
				dval ^ ival ^ lval ^ Word(prtable)
		name = Word(alphas+"_",alphanums+"_")
		vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \
				Literal("\n").suppress() ^ \
				quotedString.setParseAction(removeQuotes))+array_end
		sect=name+sect_begin
		tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin

		# Grammar
		keyword = name + eql + kstr
		vector = name + eql + vec
		data=Combine(dmark+name)+SkipTo(end_data)+end_data
		section=Forward()
		sect_def=(sect | tag_sect ) #| vec_sect)
		input=section | data | vector | keyword 
		section << sect_def+ZeroOrMore(input) + sect_end

		# Parsing actions	
		ival.setParseAction(self.conv_ival)
		dval.setParseAction(self.conv_dval)
		lval.setParseAction(self.conv_lval)
		keyword.setParseAction(self.store_key)
		vector.setParseAction(self.store_vector)
		data.setParseAction(self.store_data)
		sect.setParseAction(self.add_sect)
		tag_sect.setParseAction(self.add_sect)
		sect_end.setParseAction(self.pop_sect)

		bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error)
		bnf.ignore(pythonStyleComment)
		return bnf
Example #53
0
from pyparsing import (Literal, CaselessKeyword, Forward, Regex, QuotedString, Suppress,
    Optional, Group, FollowedBy, operatorPrecedence, opAssoc, ParseException, ParserElement)
ParserElement.enablePackrat()

COLON,LBRACK,RBRACK,LBRACE,RBRACE,TILDE,CARAT = map(Literal,":[]{}~^")
LPAR,RPAR = map(Suppress,"()")
and_ = CaselessKeyword("AND")
or_ = CaselessKeyword("OR")
not_ = CaselessKeyword("NOT")
to_ = CaselessKeyword("TO")
keyword = and_ | or_ | not_

expression = Forward()

valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word")
valid_word.setParseAction(
    lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\')
    )

string = QuotedString('"')

required_modifier = Literal("+")("required")
prohibit_modifier = Literal("-")("prohibit")
integer = Regex(r"\d+").setParseAction(lambda t:int(t[0]))
proximity_modifier = Group(TILDE + integer("proximity"))
number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t:float(t[0]))
fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy")

term = Forward()
field_name = valid_word.copy().setName("fieldname")
Example #54
0
class DdlParse(DdlParseBase):
    """DDL parser"""

    _LPAR, _RPAR, _COMMA, _SEMICOLON, _DOT, _DOUBLEQUOTE, _BACKQUOTE, _SPACE = map(
        Suppress, "(),;.\"` ")
    _CREATE, _TABLE, _TEMP, _CONSTRAINT, _NOT_NULL, _PRIMARY_KEY, _UNIQUE, _UNIQUE_KEY, _FOREIGN_KEY, _REFERENCES, _KEY, _CHAR_SEMANTICS, _BYTE_SEMANTICS = \
        map(CaselessKeyword, "CREATE, TABLE, TEMP, CONSTRAINT, NOT NULL, PRIMARY KEY, UNIQUE, UNIQUE KEY, FOREIGN KEY, REFERENCES, KEY, CHAR, BYTE".replace(", ", ",").split(","))
    _TYPE_UNSIGNED, _TYPE_ZEROFILL = \
        map(CaselessKeyword, "UNSIGNED, ZEROFILL".replace(", ", ",").split(","))
    _COL_ATTR_DISTKEY, _COL_ATTR_SORTKEY, _COL_ATTR_CHARACTER_SET = \
        map(CaselessKeyword, "DISTKEY, SORTKEY, CHARACTER SET".replace(", ", ",").split(","))
    _FK_MATCH = \
        CaselessKeyword("MATCH") + Word(alphanums + "_")
    _FK_ON, _FK_ON_OPT_RESTRICT, _FK_ON_OPT_CASCADE, _FK_ON_OPT_SET_NULL, _FK_ON_OPT_NO_ACTION = \
        map(CaselessKeyword, "ON, RESTRICT, CASCADE, SET NULL, NO ACTION".replace(", ", ",").split(","))
    _FK_ON_DELETE = \
        _FK_ON + CaselessKeyword("DELETE") + (_FK_ON_OPT_RESTRICT | _FK_ON_OPT_CASCADE | _FK_ON_OPT_SET_NULL | _FK_ON_OPT_NO_ACTION)
    _FK_ON_UPDATE = \
        _FK_ON + CaselessKeyword("UPDATE") + (_FK_ON_OPT_RESTRICT | _FK_ON_OPT_CASCADE | _FK_ON_OPT_SET_NULL | _FK_ON_OPT_NO_ACTION)
    _SUPPRESS_QUOTE = _BACKQUOTE | _DOUBLEQUOTE

    _COMMENT = Suppress("--" + Regex(r".+"))


    _CREATE_TABLE_STATEMENT = Suppress(_CREATE) + Optional(_TEMP)("temp") + Suppress(_TABLE) + Optional(Suppress(CaselessKeyword("IF NOT EXISTS"))) \
        + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums + "_<>")("table") + Optional(_SUPPRESS_QUOTE) \
        + _LPAR \
        + delimitedList(
            OneOrMore(
                _COMMENT
                |
                # Ignore Index
                Suppress(_KEY + Word(alphanums + "_'`() "))
                |
                Group(
                    Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE))
                    + (
                        (
                            (_PRIMARY_KEY ^ _UNIQUE ^ _UNIQUE_KEY ^ _NOT_NULL)("type")
                            + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_"))("name") + Optional(_SUPPRESS_QUOTE)
                            + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
                        )
                        |
                        (
                            (_FOREIGN_KEY)("type")
                            + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
                            + Optional(Suppress(_REFERENCES)
                                + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("references_table") + Optional(_SUPPRESS_QUOTE)
                                + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR
                                + Optional(_FK_MATCH)("references_fk_match")  # MySQL
                                + Optional(_FK_ON_DELETE)("references_fk_on_delete")  # MySQL
                                + Optional(_FK_ON_UPDATE)("references_fk_on_update")  # MySQL
                            )
                        )
                    )
                )("constraint")
                |
                Group(
                    ((_SUPPRESS_QUOTE + Word(alphanums + " _")("name") + _SUPPRESS_QUOTE) ^ (Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE)))
                    + Group(
                        Group(
                            Word(alphanums + "_")
                            + Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING"))
                        )("type_name")
                        + Optional(_LPAR + Regex(r"[\d\*]+\s*,*\s*\d*")("length") + Optional(_CHAR_SEMANTICS | _BYTE_SEMANTICS)("semantics") + _RPAR)
                        + Optional(_TYPE_UNSIGNED)("unsigned")
                        + Optional(_TYPE_ZEROFILL)("zerofill")
                    )("type")
                    + Optional(Word(r"\[\]"))("array_brackets")
                    + Optional(
                        Regex(r"(?!--)", re.IGNORECASE)
                        + Group(
                            Optional(Regex(r"\b(?:NOT\s+)NULL?\b", re.IGNORECASE))("null")
                            & Optional(Regex(r"\bAUTO_INCREMENT\b", re.IGNORECASE))("auto_increment")
                            & Optional(Regex(r"\b(UNIQUE|PRIMARY)(?:\s+KEY)?\b", re.IGNORECASE))("key")
                            & Optional(Regex(
                                r"\bDEFAULT\b\s+(?:((?:[A-Za-z0-9_\.\'\" -\{\}]|[^\x01-\x7E])*\:\:(?:character varying)?[A-Za-z0-9\[\]]+)|(?:\')((?:\\\'|[^\']|,)+)(?:\')|(?:\")((?:\\\"|[^\"]|,)+)(?:\")|([^,\s]+))",
                                re.IGNORECASE))("default")
                            & Optional(Regex(r"\bCOMMENT\b\s+(\'(\\\'|[^\']|,)+\'|\"(\\\"|[^\"]|,)+\"|[^,\s]+)", re.IGNORECASE))("comment")
                            & Optional(Regex(r"\bENCODE\s+[A-Za-z0-9]+\b", re.IGNORECASE))("encode")  # Redshift
                            & Optional(_COL_ATTR_DISTKEY)("distkey")  # Redshift
                            & Optional(_COL_ATTR_SORTKEY)("sortkey")  # Redshift
                            & Optional(Suppress(_COL_ATTR_CHARACTER_SET) + Word(alphanums + "_")("character_set"))  # MySQL
                        )("constraint")
                    )
                )("column")
                |
                _COMMENT
            )
        )("columns")

    _DDL_PARSE_EXPR = Forward()
    _DDL_PARSE_EXPR << OneOrMore(_COMMENT | _CREATE_TABLE_STATEMENT)

    def __init__(self, ddl=None, source_database=None):
        super().__init__(source_database)
        self._ddl = ddl
        self._table = DdlParseTable(source_database)

    @property
    def source_database(self):
        """
        Source database option

        :param source_database: enum DdlParse.DATABASE
        """
        return super().source_database

    @source_database.setter
    def source_database(self, source_database):
        super(self.__class__,
              self.__class__).source_database.__set__(self, source_database)
        self._table.source_database = source_database

    @property
    def ddl(self):
        """DDL script"""
        return self._ddl

    @ddl.setter
    def ddl(self, ddl):
        self._ddl = ddl

    def parse(self, ddl=None, source_database=None):
        """
        Parse DDL script.

        :param ddl: DDL script
        :return: DdlParseTable, Parsed table define info.
        """

        if ddl is not None:
            self._ddl = ddl

        if source_database is not None:
            self.source_database = source_database

        if self._ddl is None:
            raise ValueError("DDL is not specified")

        ret = self._DDL_PARSE_EXPR.parseString(self._ddl)
        # print(ret.dump())

        if "schema" in ret:
            self._table.schema = ret["schema"]

        self._table.name = ret["table"]
        self._table.is_temp = True if "temp" in ret else False

        for ret_col in ret["columns"]:

            if ret_col.getName() == "column":
                # add column
                col = self._table.columns.append(
                    column_name=ret_col["name"],
                    data_type_array=ret_col["type"],
                    array_brackets=ret_col['array_brackets']
                    if "array_brackets" in ret_col else None,
                    constraint=ret_col['constraint']
                    if "constraint" in ret_col else None)

            elif ret_col.getName() == "constraint":
                # set column constraint
                for col_name in ret_col["constraint_columns"]:
                    col = self._table.columns[col_name]

                    if ret_col["type"] == "PRIMARY KEY":
                        col.not_null = True
                        col.primary_key = True
                    elif ret_col["type"] in ["UNIQUE", "UNIQUE KEY"]:
                        col.unique = True
                    elif ret_col["type"] == "NOT NULL":
                        col.not_null = True

        return self._table
LPAREN = Suppress('(')
RPAREN = Suppress(')')
QUOTE = Suppress('"')
COMMA = Suppress(',')
AT = Suppress('@')
EQUALS = Suppress('=')
HASH = Suppress('#')


def bracketed(expr):
    """ Return matcher for `expr` between curly brackets or parentheses """
    return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY)


# Define parser components for strings (the hard bit)
chars_no_curly = Regex(r"[^{}]+")
chars_no_curly.leaveWhitespace()
chars_no_quotecurly = Regex(r'[^"{}]+')
chars_no_quotecurly.leaveWhitespace()
# Curly string is some stuff without curlies, or nested curly sequences
curly_string = Forward()
curly_item = Group(curly_string) | chars_no_curly
curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY
# quoted string is either just stuff within quotes, or stuff within quotes, within
# which there is nested curliness
quoted_item = Group(curly_string) | chars_no_quotecurly
quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE

# Numbers can just be numbers. Only integers though.
number = Regex('[0-9]+')
Example #56
0
    elif val.startswith('`') and val.endswith('`'):
        val = "'" + val[1:-1].replace("``", "`") + "'"
    elif val.startswith("+"):
        val = val[1:]
    un = ast.literal_eval(val)
    return un


def to_string(instring, tokensStart, retTokens):
    val = retTokens[0]
    val = "'"+val[1:-1].replace("''", "\\'")+"'"
    return {"literal": ast.literal_eval(val)}


# NUMBERS
realNum = Regex(
    r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?").addParseAction(unquote)
intNum = Regex(r"[+-]?\d+([eE]\+?\d+)?").addParseAction(unquote)

# STRINGS, NUMBERS, VARIABLES
sqlString = Regex(r"\'(\'\'|\\.|[^'])*\'").addParseAction(to_string)
identString = Regex(r'\"(\"\"|\\.|[^"])*\"').addParseAction(unquote)
mysqlidentString = Regex(r'\`(\`\`|\\.|[^`])*\`').addParseAction(unquote)
ident = Combine(~RESERVED + (delimitedList(Literal("*") | Word(alphas + "_", alphanums + "_$")
                                           | identString | mysqlidentString, delim=".", combine=True))).setName("identifier")

# EXPRESSIONS
expr = Forward()

# CASE
case = (
    CASE +
Example #57
0
# [165] PN_CHARS_U ::= PN_CHARS_BASE | '_'
PN_CHARS_U_re = "_" + PN_CHARS_BASE_re

# [167] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
PN_CHARS_re = u"\\-0-9\u00B7\u0300-\u036F\u203F-\u2040" + PN_CHARS_U_re
# PN_CHARS = Regex(u'[%s]'%PN_CHARS_re, flags=re.U)

# [168] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
PN_PREFIX = Regex(ur"[%s](?:[%s\.]*[%s])?" % (PN_CHARS_BASE_re, PN_CHARS_re, PN_CHARS_re), flags=re.U)

# [140] PNAME_NS ::= PN_PREFIX? ':'
PNAME_NS = Optional(Param("prefix", PN_PREFIX)) + Suppress(":").leaveWhitespace()

# [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' )

PN_LOCAL_ESC = Regex("\\\\[_~\\.\\-!$&\"'()*+,;=/?#@%]")
PN_LOCAL_ESC.setParseAction(lambda x: x[0][1:])

# [172] HEX ::= [0-9] | [A-F] | [a-f]
# HEX = Regex('[0-9A-Fa-f]') # not needed

# [171] PERCENT ::= '%' HEX HEX
PERCENT = Regex("%[0-9a-fA-F]{2}")
PERCENT.setParseAction(lambda x: unichr(int(x[0][1:], 16)))

# [170] PLX ::= PERCENT | PN_LOCAL_ESC
PLX = PERCENT | PN_LOCAL_ESC

# [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )?
PN_LOCAL = Combine(
    (Regex(u"[%s0-9:]" % PN_CHARS_U_re, flags=re.U) | PLX)
Example #58
0
def templateparser(monitor):
    gfactory = GFactory(scalarpa = Text.pa, boundarychars = '', monitor = monitor)
    return Parser(gfactory.create(gfactory.templatepa) | Regex('^$').setParseAction(Text.pa))
Example #59
0
  return PackageDirective(t[0])
def import_directive_fn(s,l,t):
  return ImportDirective(t[0])
def field_fn(s,l,t):
  return Field(*t)
def service_definition_fn(s,l,t):
  return ServiceDefintion(t[0])
def top_level_statement_fn(s,l,t):
  return TopLevelStatement(t[0])
def parser_fn(s,l,t):
  return Parser(t[0])

identifier = Word(alphas+"_",alphanums+"_").setName("identifier")
identifier.setParseAction(identifier_fn)

integer = Regex(r"[+-]?\d+")
integer.setParseAction(integer_fn)

LBRACE = Suppress('{')
RBRACE = Suppress('}')
LBRACK = Suppress('[')
RBRACK = Suppress(']')
LPAR = Suppress('(')
RPAR = Suppress(')')
EQ = Suppress('=');
SEMI = Suppress(';')

SYNTAX = Keyword('syntax')
IMPORT = Keyword('import')
PACKAGE = Keyword('package')
MESSAGE = Keyword('message')
Example #60
0
 def gettext(pa):
     return Regex(r"[^$\s%s]+" % re.escape(boundarychars)).leaveWhitespace().setParseAction(pa)