Ejemplo n.º 1
0
def indentedBlock(expr, indent_stack, indent=True):
    """Define space-delimited indentation blocks.

    Helper method for defining space-delimited indentation blocks, such as
    those used to define block statements in Python source code.

    There is also a version in pyparsing but doesn't seem to be working fine
    with JSONAlchemy cfg files.
    """
    def check_sub_indent(string, location, tokens):
        """Check the indentation."""
        cur_col = col(location, string)
        if cur_col > indent_stack[-1]:
            indent_stack.append(cur_col)
        else:
            raise ParseException(string, location, "not a subentry")

    def check_unindent(string, location, tokens):
        """Check the 'undentation'."""
        if location >= len(string):
            return
        cur_col = col(location, string)
        if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]):
            raise ParseException(string, location, "not an unindent")

    def do_unindent():
        """Unindent."""
        indent_stack.pop()

    indent = lineEnd.suppress() + empty + empty.copy()\
        .setParseAction(check_sub_indent)
    undent = FollowedBy(empty).setParseAction(check_unindent)
    undent.setParseAction(do_unindent)

    return indent + expr + undent
Ejemplo n.º 2
0
def indentedBlock(expr, indent_stack, indent=True):
    """Define space-delimited indentation blocks.

    Helper method for defining space-delimited indentation blocks, such as
    those used to define block statements in Python source code.

    There is also a version in pyparsing but doesn't seem to be working fine
    with JSONAlchemy cfg files.
    """
    def check_sub_indent(string, location, tokens):
        """Check the indentation."""
        cur_col = col(location, string)
        if cur_col > indent_stack[-1]:
            indent_stack.append(cur_col)
        else:
            raise ParseException(string, location, "not a subentry")

    def check_unindent(string, location, tokens):
        """Check the 'undentation'."""
        if location >= len(string):
            return
        cur_col = col(location, string)
        if not (cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]):
            raise ParseException(string, location, "not an unindent")

    def do_unindent():
        """Unindent."""
        indent_stack.pop()

    indent = lineEnd.suppress() + empty + empty.copy()\
        .setParseAction(check_sub_indent)
    undent = FollowedBy(empty).setParseAction(check_unindent)
    undent.setParseAction(do_unindent)

    return indent + expr + undent
Ejemplo n.º 3
0
 def __defineDictGrammar(self):
     """Function defines the grammar for parsing a string(mainly) into:
     1. Value: Value could be any one of the following
         1. Simple types such as:
             a. numbers: all are floating point
             b. boolean: [true,false], [yes, no]
             c. Strings within double quotes
             d. alphanumerics
         2. Dictionary
         3. List
     2. Dictionary: Set of key value pairs. ':' delimits values from keys.
     ',' delimites different pairs. '{}' delimits a dictionary.
     3. List: Ordered list of values delimited by ','
     pyparsing parse actions are used to convert the tokens into pyton native
     datatype such 'float' for floating point, 'dict' for dictionary and 
     'list' for list. The parser supports arbitrary nesting of the above 
     tokens. Both the nesting and datastructure type integrity is preserved
     in the resulting python representation.
     Application: 
     One of the main use of the grammar is to scrap web pages and extract a
     combination of JSON and javascript-like HTML attributes into python
     data structures. Simpler use cases include extracting supported simple 
     data types from say, HTML tables.  
     """
     dictDefn = Forward()
     listDefn = Forward()
     key = (QuotedString('"') | Word(alphas)) + FollowedBy(Literal(":"))
     key.setName("key")
     self.value = MatchFirst([
         self.unknown, self.floatNumber, self.boolean,
         QuotedString('"'),
         Word(alphanums), dictDefn, listDefn
     ])
     self.value.setName("value")
     # dict_element = Group(key + self.KDELIM + self.value)
     dict_element = Group(key + self.KDELIM + self.value) + \
                    FollowedBy(Or([Literal(","), Literal("}")]))
     lde = Group(Dict(delimitedList(dict_element)))
     dictDefn << ((self.quoteit(lde, '{', '}')) | lde)
     self.dictDefn = dictDefn
     self.dictDefn.setName("Dictionary")
     listDefn << self.quoteit(Group(delimitedList(self.value)), '[', ']')
     self.listDefn = listDefn
     self.listDefn.setName("List")
     self.topElement = Or([self.dictDefn, self.listDefn, self.value])
     self.parseTypes[WebParser.PSTYPE_DEFAULT] = self.topElement
     self.parseTypes[WebParser.PSTYPE_DICT] = self.dictDefn
     return
Ejemplo n.º 4
0
 def expr(self) -> ParserElement:
     return Combine(
         "{code" + Optional(
             ":" + Word(alphanums + "#+").setResultsName("lang") +
             FollowedBy(Literal("}") | Literal("|")), ) + ... + "}" +
         SkipTo("{code}").setResultsName("text") +
         "{code}", ).setParseAction(self.action)
Ejemplo n.º 5
0
def _define_vs():
    KEY = Word(alphas + '_$', alphanums +
               '_$').setName('identifier').setResultsName('key')  # noqa
    VALUE = originalTextFor(_define_json()).setResultsName('value')
    # validator name, eg: int
    NAME = Optional(
        Optional(Suppress('?')) +
        pyparsing_common.identifier.setResultsName('name'))  # noqa
    # refers, eg: @xx@yy
    REFERS = Group(ZeroOrMore(Suppress('@') +
                              pyparsing_common.identifier)).setResultsName(
                                  'refers')  # noqa
    # args, eg: (), (1), (1,2,3), ([1,2], {"key":"value"}, "Any JSON")
    ARGS = Group(
        Optional(
            Suppress('(') + Optional(delimitedList(VALUE)) +
            Suppress(')'))).setResultsName('args')  # noqa
    # key-value, eg: key, key=True, key=[1,2,3]
    KW = Group(KEY + Optional(Suppress('=') + VALUE))
    # kwargs, eg: &key1&key2=True&key3=[1,2,3]
    KWARGS = Group(ZeroOrMore(Suppress('&') + KW)).setResultsName('kwargs')
    # lead xxx is key: xxx@yyy, xxx?yyy, $self&abc
    # lead xxx except '$self' is validator name: xxx(1,2), xxx&abc, xxx
    SELF = Literal('$self').setResultsName('key')
    VS_KEY = Optional((KEY + FollowedBy(Word('@?'))) | SELF)
    VS_DEF = REFERS + NAME + ARGS + KWARGS
    return StringStart() + VS_KEY + VS_DEF + StringEnd()
Ejemplo n.º 6
0
def parse_connection_str(connstr):
    ## Grammar for connection syntax
    digits = "0123456789"
    othervalid = "_.@"
    identifier = Word(alphas + digits + othervalid)
    nodename = identifier.setResultsName('nodename')

    outputnames = delimitedList(identifier).setResultsName('outputnames')
    inputnames = delimitedList(identifier).setResultsName('inputnames')

    # middle nodes have both inputs and outputs
    middlenode = Group(nodename + Suppress('(') + inputnames +
                       Optional("|" + outputnames) +
                       Suppress(")")).setResultsName('middlenode')
    # first node has only outputs
    headnode = (nodename + Suppress("(") + outputnames +
                Suppress(")")).setResultsName('headnode')
    # last node has only inputs
    tailnode = (nodename + Suppress("(") + inputnames +
                Suppress(")")).setResultsName('tailnode')

    # connect head -> [middle ->] tail
    connect= Group( headnode
                    + Group(ZeroOrMore(Suppress("->") \
                        + middlenode + FollowedBy("->") )).setResultsName('middlenodes')
                    + Suppress("->")+tailnode).setResultsName('nodes')

    connectlist = Group( connect + ZeroOrMore( Suppress(";")\
                        + connect )).setResultsName('connects')

    parsed = connectlist.parseString(connstr)
    check_numconnections(parsed)
    return parsed
Ejemplo n.º 7
0
    def __defineBasicTypes(self):
        self.KDELIM = Suppress(":")
        sign = Word("+-", max=1) + FollowedBy(Word(nums))
        crncy = Word(nums) + ZeroOrMore(Suppress(",") + Word(nums)) + \
                Optional(Literal(".") + Word(nums))
        baseUnknownValue = Keyword("?")
        self.unknown = self.completeType(baseUnknownValue, "UNKNOWN_VAL",
                                         lambda t: np.nan)

        floatNumberBasic = Combine(Optional(sign) + \
                                   Or([Word(nums),
                                       crncy,
                                       Regex(r'[0-9]+(\.\d*)?([eE]\d+)?')])) + \
                           Optional(Suppress("%"))
        self.floatNumber = self.completeType(floatNumberBasic, "float",
                                             lambda t: float(t[0]))

        baseBoolValue = Or([
            CaselessKeyword("false"),
            CaselessKeyword("true"),
            CaselessKeyword("yes"),
            CaselessKeyword("no")
        ])
        self.boolean = self.completeType(baseBoolValue, "bool",
                                         lambda t: WebParser.boolMaps[t[0]])

        ratingKeywords = [CaselessKeyword(k).setParseAction( \
            lambda t: Ratings.ratingMaps[t[0].lower()]) \
                          for k in Ratings.ratingMaps.keys()]
        ratingKeywords.append(Keyword("--").setParseAction(lambda t: np.nan))
        self.ratings = self.completeType(Or(ratingKeywords), "ratings")
        self.parseTypes[WebParser.PSTYPE_RATINGS] = self.ratings
Ejemplo n.º 8
0
    def _parser_piece_text():
        """
        Return PyParsing element to the text of a markdown link.
        """
        # No double line breaks in markdown links
        double_line_break = (Word("\n\r", exact=1) + Optional(Word(" \t")) +
                             Word("\n\r", exact=1))

        # We will ignore escaped square brackets when match finding balanced
        # square brackets.
        ignore = Literal("\\[") | Literal("\\]")

        # The text parser will match text inside balanced brackets using the
        # nestedExpr helper function from PyParsing.
        #
        # Next we define the content that is allowed inside the brackets.
        content_character = ~FollowedBy(double_line_break) + CharsNotIn(
            "[]", exact=1)
        # Normally with nestedExpr, the content parser would be separately applied
        # to each whitespace-separated string within the nested expression.
        # However, since we set whitespaceChars to '', the content parser is
        # applied to characters one-at-a-time.
        #
        # If this ever changes, we would need to change content to something
        # like Combine(OneOrMore(~ignore + content_character))
        content = content_character
        text = originalTextFor(
            nestedExpr(
                opener="[",
                closer="]",
                content=content,
                ignoreExpr=ignore,
            )).setResultsName("text")
        text.addParseAction(lambda s, l, toks: toks[0][1:-1])
        return text
Ejemplo n.º 9
0
    def __init__(self):
        if not ParserElement: return
        with warnings.catch_warnings():
            # In Python 2.6, pyparsing throws warnings on its own code.
            warnings.simplefilter("ignore")
            orOperator = Suppress(
                CaselessLiteral("OR")).setResultsName("OR_OPERATOR")
            quoteContents = Group(Word(ALLCHARS.replace("\"", "")))
            quoteContents.leaveWhitespace()
            quotedWord = Group(Suppress('"') + quoteContents +
                               Suppress('"')).setResultsName("QUOTES")
            plainWord = Group(
                NotAny(CaselessLiteral("OR")) +
                Word(WORDCHARS.replace("-", ""), WORDCHARS)).setResultsName(
                    "PLAINWORD")
            anyWord = Group(
                NotAny('(') + ~FollowedBy(')') +
                Word(ALLWORDCHARS)).setResultsName("ANYWORD")
            keyWord = Group(
                Combine(
                    Optional("-") + Word(string.ascii_letters) + Literal(":") +
                    (Word(WORDCHARS) | quotedWord))).setResultsName("KEYWORD")
            notExpr = Group(
                Suppress("-") + NotAny(string.whitespace) +
                (quotedWord | plainWord)).setResultsName("NOT")
            word = Group(keyWord | notExpr | quotedWord
                         | plainWord).setResultsName("WORD")

            grammar = Forward()
            parens = Forward()

            orOperand = Group(word | parens | notExpr
                              | anyWord).setResultsName("OR_OPERAND")
            orExpr = Group(
                FollowedBy(orOperand + orOperator + orOperand) +
                Group(orOperand + OneOrMore(orOperator + orOperand))
            ).setResultsName("OR_EXPRESSION")
            oneExpr = Group(orExpr | parens | word
                            | anyWord).setResultsName("ONE EXPRESSION")
            parens <<= Group(
                Group(Optional("-")).setResultsName("NOT_PARENTHESIS") +
                Suppress("(") + ZeroOrMore(parens | grammar) +
                Suppress(")")).setResultsName("PARENTHESIS")
            grammar <<= ((oneExpr + grammar)
                         | oneExpr).setResultsName("GRAMMAR")
            self._grammar = grammar
Ejemplo n.º 10
0
    def _construct_parser(self):
        '''Construct and return parser.'''
        field = Word(alphanums + '_.')
        operator = oneOf(list(self._operators.keys()))
        value = Word(alphanums + '-_,./*@+')
        quoted_value = quotedString('quoted_value').setParseAction(removeQuotes)

        condition = Group(
            field + operator + (quoted_value | value)
        )('condition')

        not_ = Optional(Suppress(CaselessKeyword('not')))('not')
        and_ = Suppress(CaselessKeyword('and'))('and')
        or_ = Suppress(CaselessKeyword('or'))('or')

        expression = Forward()
        parenthesis = Suppress('(') + expression + Suppress(')')
        previous = condition | parenthesis

        for conjunction in (not_, and_, or_):
            current = Forward()

            if conjunction in (and_, or_):
                conjunction_expression = (
                    FollowedBy(previous + conjunction + previous)
                    + Group(
                        previous + OneOrMore(conjunction + previous)
                    )(conjunction.resultsName)
                )

            elif conjunction in (not_, ):
                conjunction_expression = (
                    FollowedBy(conjunction.expr + current)
                    + Group(conjunction + current)(conjunction.resultsName)
                )

            else:  # pragma: no cover
                raise ValueError('Unrecognised conjunction.')

            current <<= (conjunction_expression | previous)
            previous = current

        expression <<= previous
        return expression('expression')
Ejemplo n.º 11
0
def __build_grammar():
    expr = Forward()

    k_select = CaselessLiteral("SELECT")
    k_from = CaselessLiteral("FROM")
    k_where = CaselessLiteral("WHERE")
    k_and = CaselessLiteral("AND")
    k_instances = CaselessLiteral("INSTANCES")
    qs = QuotedString("'", escQuote="''")

    identifier = Combine(
        Word(alphas + "_", exact=1) +
        Optional(Word(nums + alphas + "_")))("identifier")
    navigation = Group(identifier +
                       ZeroOrMore(Suppress(".") + identifier))("navigation")

    filter_predicate = Group(navigation + Suppress("=") +
                             (qs('value') | (Suppress('(') + expr('subquery') +
                                             Suppress(')'))))('predicate')
    where_clause = Group(
        Suppress(k_where) + filter_predicate +
        ZeroOrMore(Suppress(k_and) + filter_predicate))('where')

    # Pre filters
    impl = Optional(Suppress(CaselessLiteral("implementation"))) + qs('impl')
    cic = Suppress(CaselessLiteral("offer")) + qs('cic')
    lc = Suppress(CaselessLiteral("lc")) + qs('lc')
    envt = Suppress(CaselessLiteral("environment")) + qs('envt')
    pre_filter = Optional(envt) + Optional(lc) + Optional(cic) + Optional(
        impl) + FollowedBy(k_instances)

    # Dict query (only select some elements and navigate)
    nl_expr = Group(navigation + ZeroOrMore(Suppress(',') + navigation) +
                    FollowedBy(k_from))('selector')

    # The sum of all fears
    select = Group(
        Suppress(k_select) + Optional(nl_expr + Suppress(k_from)) +
        pre_filter + Suppress(k_instances) + Optional(where_clause) +
        Optional(CaselessLiteral('WITH COMPUTATIONS')('compute')))('select')

    expr << select
    return expr
Ejemplo n.º 12
0
 def expr(self) -> ParserElement:
     MENTION = Combine(
         "[" + Optional(
             SkipTo("|", failOn="]") + Suppress("|"),
             default="",
         ) + "~" + Optional(CaselessLiteral("accountid:")) +
         Word(alphanums + ":-").setResultsName("accountid") + "]", )
     return ((StringStart()
              | Optional(PrecededBy(White(), retreat=1), default=" ")) +
             MENTION.setParseAction(self.action) +
             (StringEnd() | Optional(FollowedBy(
                 White() | Char(punctuation, excludeChars="[") | MENTION),
                                     default=" ")))
Ejemplo n.º 13
0
    def expr(self) -> ParserElement:
        NON_ALPHANUMS = Regex(r"\W", flags=re.UNICODE)
        TOKEN = Suppress(self.TOKEN)
        IGNORE = White() + TOKEN | self.get_ignore_expr()
        ELEMENT = Combine(
            TOKEN + (~White() & ~Char(self.TOKEN)) +
            SkipTo(TOKEN, ignore=IGNORE, failOn="\n") + TOKEN +
            FollowedBy(NON_ALPHANUMS | StringEnd()), )

        return (StringStart()
                | PrecededBy(NON_ALPHANUMS, retreat=1)) + Combine(
                    ELEMENT.setParseAction(self.action) +
                    Optional(~ELEMENT, default=" "), )
Ejemplo n.º 14
0
 def _define_grammar(self):
     g = {}
     label = Literal('Contents') | Literal('Caption title') | \
             Literal('Sub-caption') | Literal('Half-title') | \
             Literal('Footline') | Literal('Comments') | \
             Literal('Modificatons') | Literal('Errors') | \
             Literal('DMF') | Literal('ADF')
     copies_label = LineStart() + Literal('Copies')
     all_chars = u''.join(
         unichr(c) for c in xrange(65536)
         if unicodedata.category(unichr(c)).startswith('L'))
     section_separator = LineEnd() + FollowedBy(label | copies_label
                                                | StringEnd())
     section = SkipTo(section_separator)
     library = Combine(Word(all_chars) + Literal(u'-') + Word(all_chars))
     copy_separator = LineEnd() + FollowedBy(library) | \
                      LineEnd() + StringEnd() | StringEnd()
     copy = library + SkipTo(copy_separator) + Suppress(copy_separator)
     g['comments'] = Suppress('Comments') + SkipTo(section_separator)
     g['code'] = StringStart() + SkipTo(LineEnd()) + Suppress(LineEnd())
     g['title'] = Suppress(g['code']) + Suppress(LineEnd()) + section
     g['copies'] = Suppress(copies_label) + OneOrMore(Group(copy))
     return g
Ejemplo n.º 15
0
    def parse_pabl(self, raw_pabl):
        INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(
            self.check_sub_indent)
        UNDENT = FollowedBy(empty).setParseAction(self.check_unindent)
        UNDENT.setParseAction(self.unindent)

        terminator = Literal(';').suppress()
        comment = Literal('#') + restOfLine
        item_name = Word(alphas, alphanums + '_')
        variable = Word(alphas, alphanums + '_.')
        variable_as = (variable + 'as' + item_name)

        stmt = Forward()
        suite = Group(
            OneOrMore(empty + stmt.setParseAction(self.check_peer_indent)))
        suite.ignore(comment)

        item_start = Literal('@item').suppress()
        item_end = Literal(':').suppress()
        permission_start = Literal('@permissions')

        item_decl = (item_start + item_name.setResultsName('item') + item_end)
        item_defn = Group(item_decl + INDENT + suite + UNDENT)

        permission_decl = (permission_start + Group(
            delimitedList(item_name).setResultsName('permissions')) + item_end)
        permission_defn = Group(permission_decl + INDENT + suite + UNDENT)

        fieldList = delimitedList(
            Group(variable_as) | variable
        ).setResultsName('fields') + terminator

        stmt << (item_defn | fieldList | Group(permission_defn))

        parseTree = suite.parseString(raw_pabl)

        return parseTree
Ejemplo n.º 16
0
    def pythonVar(self):
        if not self._pythonVar:
            from pyparsing import (ParserElement, Word, alphas, alphanums,
                                   Literal, Suppress, FollowedBy)
            _ws = ' \t'
            ParserElement.setDefaultWhitespaceChars(_ws)
            ident = Word(alphas + "_", alphanums + "_")
            lparen = Literal("(")
            dot = Literal(".")
            dollar = Literal("$")

            self._pythonVar = Suppress(dollar) + ident + ~FollowedBy(
                (dot + ident) | lparen)
            self._pythonVar.setParseAction(self.onPythonVar)
        return self._pythonVar
Ejemplo n.º 17
0
 def create_parser(self):
     LBRACKET = Suppress("[")
     RBRACKET = Suppress("]")
     EQ = Suppress("=")
     SLASH = Suppress("/")
     KEY = Word(alphanums + "-")
     VALUE = Word(alphanums + "-/.:_+") | QuotedString('"')
     FIND = LBRACKET + Group(
         Literal("find") + Literal("default-name") + EQ + VALUE) + RBRACKET
     KVP = Group(KEY + EQ + VALUE)
     BEGIN = LineStart() + SLASH + restOfLine.setParseAction(self.on_begin)
     ADD_OP = LineStart() + Literal("add") + ZeroOrMore(KVP).setParseAction(
         self.on_add)
     SET_OP = (LineStart() + Literal("set") +
               (Optional(FIND | KEY + ~FollowedBy(EQ) | QuotedString('"')) +
                ZeroOrMore(KVP)).setParseAction(self.on_set))
     CONFIG = ZeroOrMore(BEGIN | ADD_OP | SET_OP)
     return CONFIG
Ejemplo n.º 18
0
def _make_arabic_parser():
    escapechar = "//"
    # wordchars = printables
    # for specialchar in '*?^():"{}[] ' + escapechar:
    #    wordchars = wordchars.replace(specialchar, "")
    # wordtext = Word(wordchars)
    alephba = u"""
                abcdefghijklmnopqrstuvwxyz_
                األآإـتنمكطدجحخهعغفقثصضشسيبئءؤرىةوزظذ
                """

    wordtext = CharsNotIn(u'//*؟^():"{}[]$><%~#،,\' +-|')
    escape = Suppress( escapechar ) \
             + ( Word( printables, exact = 1 ) | White( exact = 1 ) )
    wordtoken = Combine(OneOrMore(wordtext | escape))

    # A plain old word.
    plainWord = Group(wordtoken).setResultsName("Word")

    # A wildcard word containing * or ?.
    wildchars = Word(u"؟?*")
    # Start with word chars and then have wild chars mixed in
    wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken))
    # Or, start with wildchars, and then either a mixture of word and wild chars
    # , or the next token
    wildstart = wildchars \
  + ( OneOrMore( wordtoken + Optional( wildchars ) ) \
   | FollowedBy( White() \
   | StringEnd() ) )
    wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard")

    # A range of terms
    startfence = Literal("[")
    endfence = Literal("]")
    rangeitem = QuotedString('"') | wordtoken
    to = Keyword( u"الى" ) \
  | Keyword( u"إلى" ) \
  | Keyword( "To" ) \
  | Keyword( "to" ) \
  | Keyword( "TO" )

    openstartrange = Group( Empty() ) \
       + Suppress( to + White() ) \
       + Group( rangeitem )

    openendrange = Group( rangeitem ) \
     + Suppress( White() + to ) \
                   + Group( Empty() )
    normalrange = Group( rangeitem ) \
     + Suppress( White() + to + White() ) \
     + Group( rangeitem )
    range = Group(
        startfence \
  + ( normalrange | openstartrange | openendrange ) \
  + endfence ).setResultsName( "Range" )

    # synonyms
    syn_symbol = Literal("~")
    synonym = Group(syn_symbol + wordtoken).setResultsName("Synonyms")

    # antonyms
    ant_symbol = Literal("#")
    antonym = Group(ant_symbol + wordtoken).setResultsName("Antonyms")

    # derivation level 1,2
    derive_symbole = Literal(u"<") | Literal(u">")
    derivation = Group(OneOrMore(derive_symbole) +
                       wordtoken).setResultsName("Derivation")

    # spellerrors
    # spellerrors=Group(QuotedString('\'')).setResultsName("Errors")
    spellerrors_symbole = Literal(u"%")
    spellerrors = Group(spellerrors_symbole +
                        wordtoken).setResultsName("SpellErrors")

    # shakl:must uplevel to boostable
    tashkil_symbol = Literal("'")
    tashkil = Group(
    tashkil_symbol + \
    ZeroOrMore( wordtoken | White() ) + \
    tashkil_symbol
    ).setResultsName( "Tashkil" )

    # tuple search (root,pattern,type)
    starttuple = Literal("{")
    endtuple = Literal("}")
    bettuple = Literal(u"،") | Literal(",")
    wordtuple = Group(Optional(wordtoken))
    tuple = Group(
    starttuple + \
     wordtuple + \
     ZeroOrMore( bettuple + wordtuple ) + \
     endtuple
     ).setResultsName( "Tuple" )

    # A word-like thing
    generalWord = range | wildcard | plainWord | tuple | antonym | synonym | \
        derivation | tashkil | spellerrors

    # A quoted phrase
    quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes")

    expression = Forward()

    # Parentheses can enclose (group) any expression
    parenthetical = Group(
        (Suppress("(") + expression + Suppress(")"))).setResultsName("Group")

    boostableUnit = generalWord | quotedPhrase
    boostedUnit = Group(
     boostableUnit + \
     Suppress( "^" ) + \
     Word( "0123456789", ".0123456789" )
     ).setResultsName( "Boost" )

    # The user can flag that a parenthetical group, quoted phrase, or word
    # should be searched in a particular field by prepending 'fn:', where fn is
    # the name of the field.
    fieldableUnit = parenthetical | boostedUnit | boostableUnit
    fieldedUnit = Group(
     ( Word( alephba + "_" ) | Word( alphanums + "_" ) ) + \
     Suppress( ':' ) + \
     fieldableUnit
     ).setResultsName( "Field" )

    # Units of content
    unit = fieldedUnit | fieldableUnit

    # A unit may be "not"-ed.
    operatorNot = Group(
     Suppress( Keyword( u"ليس" ) | Keyword( u"NOT" ) ) + \
     Suppress( White() ) + \
     unit
     ).setResultsName( "Not" )
    generalUnit = operatorNot | unit

    andToken = Keyword(u"و") | Keyword(u"AND")
    orToken = Keyword(u"أو") | Keyword(u"او") | Keyword(u"OR")
    andNotToken = Keyword(u"وليس") | Keyword(u"ANDNOT")

    operatorAnd = Group(
     ( generalUnit + \
     Suppress( White() ) + \
     Suppress( andToken ) + \
     Suppress( White() ) + \
     expression ) | \
     ( generalUnit + \
     Suppress( Literal( u"+" ) ) + \
     expression )
     ).setResultsName( "And" )

    operatorOr = Group(
     ( generalUnit + \
     Suppress( White() ) + \
     Suppress( orToken ) + \
     Suppress( White() ) + \
     expression ) | \
     ( generalUnit + \
     Suppress( Literal( u"|" ) ) + \
     expression )
     ).setResultsName( "Or" )

    operatorAndNot = Group(
      ( unit + \
      Suppress( White() ) + \
      Suppress( andNotToken ) + \
      Suppress( White() ) + \
      expression ) | \
      ( unit + \
      Suppress( Literal( u"-" ) ) + \
      expression )
      ).setResultsName( "AndNot" )



    expression <<= ( OneOrMore( operatorAnd | operatorOr | operatorAndNot | \
       generalUnit | Suppress( White() ) ) | Empty() )

    toplevel = Group(expression).setResultsName("Toplevel") + StringEnd()

    return toplevel.parseString
Ejemplo n.º 19
0
    Word,
    WordEnd,
    WordStart,
    ZeroOrMore,
)

from data_lists import tlds, schemes

alphanum_word_start = WordStart(wordChars=alphanums)
alphanum_word_end = WordEnd(wordChars=alphanums)

# the label definition ignores the fact that labels should not end in an hyphen
label = Word(initChars=alphanums, bodyChars=alphanums + '-', max=63)
domain_tld = Or(tlds)
domain_name = (alphanum_word_start + Combine(
    Combine(OneOrMore(label + ('.' + FollowedBy(Word(alphanums + '-')))))
    ('domain_labels') + domain_tld('tld')) +
               alphanum_word_end).setParseAction(downcaseTokens)

ipv4_section = (Word(
    nums, asKeyword=True,
    max=3).setParseAction(lambda x: str(int(x[0]))).addCondition(
        lambda tokens: int(tokens[0]) < 256))
# basically, the grammar below says: start any words that start with a '.' or a number; I want to match words that start with a '.' because this will fail later in the grammar and I do not want to match anything that start with a '.'
ipv4_address = (alphanum_word_start + WordStart('.' + nums) +
                Combine((ipv4_section + '.') * 3 + ipv4_section) +
                NotAny(Regex('\.\S')) + alphanum_word_end)

hexadectet = Word(hexnums, min=1, max=4)
ipv6_address_full = alphanum_word_start + Combine((hexadectet + ":") * 7 +
                                                  hexadectet)
yes = "Yes, You can!"
print(grammar.parseString(yes))
print("length : ", len(grammar.parseString(yes)))

# http://pythonhosted.org/pyparsing/pyparsing.OneOrMore-class.html

# Class OneOrMore
# Repetition of one or more of the given expression.

# Parameters:

# expr - expression that must match one or more times
# stopOn - (default=None) - expression for a terminating sentinel (only required if the sentinel would ordinarily match the repetition expression)

data_word = Word(alphas)
label = data_word + FollowedBy(':')
attr_expr = Group(label + Suppress(':') +
                  OneOrMore(data_word).setParseAction(' '.join))
text = "shape: SQUARE posn: upper left color: BLACK"
print(text)
OneOrMore(attr_expr).parseString(text).pprint(
)  # Fail! read 'posn' as data instead of next label -> [['shape', 'SQUARE posn']]

# use stopOn attribute for OneOrMore to avoid reading label string as part of the data
attr_expr = Group(label + Suppress(':') +
                  OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
OneOrMore(attr_expr).parseString(text).pprint(
)  # [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

# could also be written as
(attr_expr * (1, )).parseString(text).pprint(
Ejemplo n.º 21
0
    tokens = list(result)
    node = tokens[0]

    for token in tokens[1:]:
        if isinstance(token, Identifier):
            node = Get(node, token)
        elif isinstance(token, FunCall):
            assert isinstance(token.function, Identifier)
            node = FunCall(Get(node, token.function), token.arguments)

    return node


orphan_function_call_paren = Forward().setName("orphan_function_call_paren")
member_access_token = dot + (orphan_function_call_paren | identifier_keyword)
leading_member_access_token = member_access_token + FollowedBy(
    member_access_token)
member_access = (
    (orphan_function_call_paren + FollowedBy(member_access_token) | identifier)
    + pOptional((leading_member_access_token)[...] + dot + identifier_keyword)
).setParseAction(__build_recursive_member_access)


# Operator
def __build_unary_operator(expr, pos, result):
    tokens = result[0].asList()
    assert len(tokens) == 2
    operator_symbol = tokens[0]
    operand = tokens[1]

    tree = UnOp(operator_symbol, operand)
Ejemplo n.º 22
0
    Optional(OneOrMore(STATEMENT), default=None)(PROP_BODY))

ELSE = Group(
    Suppress(SYN_ELSE) +
    Optional(OneOrMore(STATEMENT), default=None)(PROP_BODY))

CONDITIONAL = Group(
    Group(
        Group(
            Group(
                IF(TYPE_IF) + ZeroOrMore(ELSEIF)(TYPE_ELSEIF) +
                Optional(ELSE(TYPE_ELSE))
                # StringEnd() is because EOF can end a conditional.
                # SYN_CLOSE_BRACE is because a closed block can end a conditional.
                + (Literal(SYN_ENDIF)(TYPE_ENDIF) | StringEnd()
                   | FollowedBy(SYN_CLOSE_BRACE))))(PROP_BODY))(
                       TYPE_CONDITIONAL))

ANON_BLOCK = Group(
    Literal(SYN_OPEN_BRACE)(TYPE_ANON_BLOCK) +
    Optional(OneOrMore(ROOT), default=None)(PROP_BODY)
    # StringEnd() is because EOF can end a block.
    + (Suppress(SYN_CLOSE_BRACE) | StringEnd()))

NAMED_BLOCK = Group(
    SYN_BLOCKS(TYPE_BLOCK) + ~SYN_KEYWORDS +
    Optional(Word(TOKEN)(PROP_VALUE)) + Literal(SYN_OPEN_BRACE) +
    Optional(OneOrMore(STATEMENT), default=None)(PROP_BODY) +
    (Suppress(SYN_CLOSE_BRACE) | StringEnd()))

EMPTY_BLOCK = Group(
Ejemplo n.º 23
0
from regparser.grammar import atomic
from regparser.grammar.utils import keep_pos, Marker, QuickSearchable

period_section = Suppress(".") + atomic.section
part_section = atomic.part + period_section
marker_part_section = (
    keep_pos(atomic.section_marker).setResultsName("marker") + part_section)

depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p
depth5_p = ((atomic.em_digit_p | atomic.plaintext_level5_p) +
            Optional(depth6_p))
depth4_p = atomic.upper_p + Optional(depth5_p)
depth3_p = atomic.roman_p + Optional(depth4_p)
depth2_p = atomic.digit_p + Optional(depth3_p)
depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p)
any_depth_p = QuickSearchable(depth1_p | depth2_p | depth3_p | depth4_p
                              | depth5_p | depth6_p)

depth3_c = atomic.upper_c + Optional(atomic.em_digit_c)
depth2_c = atomic.roman_c + Optional(depth3_c)
depth1_c = atomic.digit_c + Optional(depth2_c)
any_a = atomic.upper_a | atomic.digit_a

section_comment = atomic.section + depth1_c

section_paragraph = QuickSearchable(atomic.section + depth1_p)

mps_paragraph = QuickSearchable(marker_part_section + Optional(depth1_p))
ps_paragraph = part_section + Optional(depth1_p)
part_section_paragraph = QuickSearchable(atomic.part + Suppress(".") +
Ejemplo n.º 24
0
    def __init__(self):
        self.filename = ""
        self.basedir = ""

        #############
        # Constants #
        #############
        self.possibleKeywords = [
            "AGGREGALLOWED", "AUTOPEN", "AXIS-VERSION", "BASEPERIOD",
            "CELLNOTE", "CELLNOTEX", "CFPRICES", "CHARSET", "CODEPAGE",
            "CODES", "CONFIDENTIAL", "CONTACT", "CONTENTS", "CONTVARIABLE",
            "COPYRIGHT", "CREATION-DATE", "DATA", "DATABASE", "DATANOTECELL",
            "DATANOTESUM", "DATASYMBOL1", "DATASYMBOL2", "DATASYMBOL3",
            "DATASYMBOL4", "DATASYMBOL5", "DATASYMBOL6", "DATASYMBOLNIL",
            "DATASYMBOLSUM", "DAYADJ", "DECIMAL", "DEFAULT-GRAPH",
            "DESCRIPTION", "DESCRIPTIONDEFAULT", "DIRECTORY-PATH", "DOMAIN",
            "DOUBLECOLUMN", "ELIMINATION", "HEADING", "HIERARCHIES",
            "HIERARCHYLEVELS", "HIERARCHYLEVELSOPEN", "HIERARCHYNAMES", "INFO",
            "INFOFILE", "KEYS", "LANGUAGE", "LANGUAGES", "LAST-UPDATED",
            "LINK", "MAP", "MATRIX", "NEXT-UPDATE", "NOTE", "NOTEX",
            "PARTITIONED", "PRECISION", "PRESTEXT", "PX-SERVER", "REFPERIOD",
            "ROUNDING", "SEASADJ", "SHOWDECIMALS", "SOURCE", "STOCKFA", "STUB",
            "SUBJECT-AREA", "SUBJECT-CODE", "SURVEY", "SYNONYMS", "TABLEID",
            "TIMEVAL", "TITLE", "UNITS", "UPDATE-FREQUENCY", "VALUENOTE",
            "VALUENOTEX", "VALUES", "VARIABLE-TYPE"
        ]
        self.mandatoryKeywords = [
            "CONTENTS", "DATA", "DECIMAL", "HEADING", "MATRIX", "STUB",
            "SUBJECT-AREA", "SUBJECT-CODE", "TITLE", "UNITS", "VALUES"
        ]
        self.languageAllowedKeywords = [
            "BASEPERIOD", "CELLNOTE", "CELLNOTEX", "CFPRICES", "CODES",
            "CONTACT", "CONTENTS", "CONTVARIABLE", "DATABASE", "DATANOTECELL",
            "DATANOTESUM", "DATASYMBOL1", "DATASYMBOL2", "DATASYMBOL3",
            "DATASYMBOL4", "DATASYMBOL5", "DATASYMBOL6", "DATASYMBOLNIL",
            "DATASYMBOLSUM", "DAYADJ", "DESCRIPTION", "DOMAIN", "DOUBLECOLUMN",
            "ELIMINATION", "HEADING", "HIERARCHIES", "HIERARCHYLEVELS",
            "HIERARCHYLEVELSOPEN", "HIERARCHYNAMES", "INFO", "INFOFILE",
            "KEYS", "LAST-UPDATED", "LINK", "MAP", "NOTE", "NOTEX",
            "PARTITIONED", "PRECISION", "PRESTEXT", "REFPERIOD", "SEASADJ",
            "SOURCE", "STOCKFA", "STUB", "SUBJECT-AREA", "SURVEY", "TIMEVAL",
            "TITLE", "UNITS", "VALUENOTE", "VALUENOTEX", "VALUES",
            "VARIABLE-TYPE"
        ]

        ###########
        # Buffers #
        ###########
        self.buffers = {
            "foundKeywords": [],
            "currentKeyword": "",
            "validLanguageCodes": [],
            "languageCode": "",
            "timeFormat": "",
            "rounding": "",
            "results": {},
        }

        ###########
        # Grammar #
        ###########
        # Utilities
        self.grammar = {}
        self.grammar["EOL"] = LineEnd().suppress()
        self.grammar["quote"] = Suppress(Regex("\"|\'"))
        self.grammar["number"] = Word(nums).setParseAction(
            self.convertToNumber)
        self.grammar["quotedString"] = (QuotedString('"') | QuotedString("'"))\
          .setParseAction(lambda tokens: tokens[0].replace("#", "\n"))
        self.grammar["quotedNumber"] = self.grammar["quote"] + self.grammar[
            "number"] + self.grammar["quote"]
        self.grammar["lparen"], self.grammar["rparen"], self.grammar[
            "lbracket"], self.grammar["rbracket"] = map(Suppress, "()[]")
        self.grammar["dots"] = Regex("\.{1,6}")
        self.grammar["quotedDots"] = self.grammar["quote"] + self.grammar[
            "dots"] + self.grammar["quote"]
        self.grammar["dataNumber"] = Combine(Optional("-") + Word(nums) +\
          Optional(Literal(".") + Word(nums))).setParseAction(self.convertToNumber)

        # Keywords
        self.grammar["baseKeyword"] = Word(alphanums.upper() + "-")\
          .setParseAction(self.handleKeyword)("keyword")
        self.grammar["tableSpecificKeyword"] = self.grammar[
            "baseKeyword"] + FollowedBy("=")
        self.grammar["variableSpecificKeyword"] = self.grammar["baseKeyword"] +\
          self.grammar["lparen"] +\
          self.grammar["quotedString"]("variable") +\
          self.grammar["rparen"]  + FollowedBy("=")
        self.grammar["valueSpecificKeyword"] = self.grammar["baseKeyword"] +\
          self.grammar["lparen"] + Group(self.grammar["quotedString"]("variable") + Suppress(",") +\
          self.grammar["quotedString"]("value")) + self.grammar["rparen"]  + FollowedBy("=")
        self.grammar["languageSpecificKeyword"] = (self.grammar["baseKeyword"] +\
          self.grammar["lbracket"] + Word(alphas).setParseAction(self.isValidLanguageCode)("language") +\
          self.grammar["rbracket"] + FollowedBy("=")).setParseAction(self.isLanguageAllowedKeyword)
        self.grammar["keyword"] = self.grammar["tableSpecificKeyword"] |\
          self.grammar["variableSpecificKeyword"] |\
          self.grammar["valueSpecificKeyword"] |\
          self.grammar["languageSpecificKeyword"]

        # Keyword values
        self.grammar["keywordValue"] = OneOrMore(self.grammar["quotedString"])\
          .setParseAction(lambda tokens: " ".join(tokens))
        self.grammar["keywordValues"] = Group(delimitedList((self.grammar["number"] |\
          self.grammar["keywordValue"])\
          .setParseAction(self.handleKeywordValue)("keywordValue")))("keywordValues") +\
          FollowedBy(";")

        # Time list values
        self.grammar["timeFormat"] = Regex("[AHQMW]1").setParseAction(
            self.setTimeFormat)("timeFormat")
        self.grammar["timeValues"] = Group(
            delimitedList(self.grammar["quotedNumber"])("timeValue"))
        self.grammar["timeSpan"] = Group(
            delimitedList(self.grammar["quotedNumber"]("timeValue"),
                          delim="-"))
        self.grammar["TLIST"] = Literal("TLIST") + self.grammar["lparen"] +\
          (self.grammar["timeFormat"] + ((self.grammar["rparen"] + Suppress(",") +\
          self.grammar["timeValues"])|(Suppress(",") + self.grammar["timeSpan"] +\
          self.grammar["rparen"]))).setParseAction(self.convertTimeList) + FollowedBy(";")

        self.grammar["keywordLine"] = Group(self.grammar["keyword"] + Suppress("=") +\
          (self.grammar["keywordValues"] | self.grammar["TLIST"]) +\
          Suppress(";"))

        # Data values
        self.grammar["observation"] = (self.grammar["quotedDots"]
                                       | self.grammar["dataNumber"])
        self.grammar["observationSeparator"] = White(" \t").suppress()
        self.grammar["observationLine"] = Group(delimitedList(self.grammar["observation"],\
          delim=self.grammar["observationSeparator"]).leaveWhitespace())
        self.grammar["observationLines"] = OneOrMore(self.grammar["observationLine"] +\
          Optional(Suppress(";")))("keywordValues")
        self.grammar["data"] = Group(Literal("DATA")("keyword") + Suppress("=") +\
          Optional(OneOrMore(self.grammar["EOL"])) + self.grammar["observationLines"])

        # Whole file
        self.grammar["pcaxisFile"] = OneOrMore(self.grammar["keywordLine"]) +\
          OneOrMore(self.grammar["EOL"]) + self.grammar["data"]
Ejemplo n.º 25
0
from rdflib.py3compat import bytestype

ParserElement.setDefaultWhitespaceChars(" \n")


String = STRING_LITERAL1 | STRING_LITERAL2

RDFLITERAL = Comp('literal', Param('string', String) + Optional(
    Param('lang', LANGTAG.leaveWhitespace()
          ) | Literal('^^').leaveWhitespace(
    ) + Param('datatype', IRIREF).leaveWhitespace()))

NONE_VALUE = object()

EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
EMPTY.setParseAction(lambda x: NONE_VALUE)

TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral

ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
ROW.parseWithTabs()

HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
HEADER.parseWithTabs()


class TSVResultParser(ResultParser):
    def parse(self, source):

        if isinstance(source.read(0), bytestype):
Ejemplo n.º 26
0
    if curCol > indentStack[-1]:
        indentStack.append( curCol )
    else:
        raise ParseException(s,l,"not a subentry")

def checkUnindent(s,l,t):
    if l >= len(s): return
    curCol = col(l,s)
    if not(curCol < indentStack[-1] and curCol <= indentStack[-2]):
        raise ParseException(s,l,"not an unindent")

def doUnindent():
    indentStack.pop()
    
INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(checkSubIndent)
UNDENT = FollowedBy(empty).setParseAction(checkUnindent)
UNDENT.setParseAction(doUnindent)

stmt = Forward()
suite = Group( OneOrMore( empty + stmt.setParseAction( checkPeerIndent ) )  )

identifier = Word(alphas, alphanums)
funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
funcDef = Group( funcDecl + INDENT + suite + UNDENT )

rvalue = Forward()
funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
rvalue << (funcCall | identifier | Word(nums))
assignment = Group(identifier + "=" + rvalue)
stmt << ( funcDef | assignment | identifier )
Ejemplo n.º 27
0
    Keyword("archive") + qualified_identifier("name")
)

resource_type = Group(
    raw_data("raw_data") |
    vector("vector") |
    multivector("multivector") |
    archive_resource("archive") |
    single_object("object") 
)

def _combine_list(t):
    return "".join(t[0].asList())

explicit_field_reference_prefix = Group(
    OneOrMore((Optional(".") + identifier + ~FollowedBy(',')))
).setParseAction(_combine_list)

explicit_reference = Group(
    Keyword("@explicit_reference") -
    "(" +
    explicit_field_reference_prefix("source_type") +
    "." +
    identifier("source_field") + "," + qualified_identifier("destination") +
    ")"
)

bound_implicitly = Group(
    Keyword("@bound_implicitly") -
    "(" +
    identifier("name") + ":" +
Ejemplo n.º 28
0
    def get_grammar(self):
        """
        Defines our grammar for mathematical expressions.

        Possibly helpful:
            - BNF form of context-free grammar https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form
            - Some pyparsing docs http://infohost.nmt.edu/~shipman/soft/pyparsing/web/index.html
        """

        # Define + and -
        plus = Literal("+")

        # Also accept unicode emdash
        emdash = Literal("\u2014")
        emdash.setParseAction(lambda: "-")

        minus = Literal("-") | emdash
        plus_minus = plus | minus

        # 1 or 1.0 or .1
        number_part = Word(nums)
        inner_number = Combine((number_part +
                                Optional("." + Optional(number_part)))
                               | ("." + number_part))
        # Combine() joints the matching parts together in a single token,
        # and requires that the matching parts be contiguous (no spaces)

        # Define our suffixes
        suffix = Word(alphas + '%')
        suffix.setParseAction(self.suffix_parse_action)

        # Construct number as a group consisting of a text string ("num") and an optional suffix.
        # num can include a decimal number and numerical exponent, and can be
        # converted to a number using float()
        # suffix may contain alphas or %
        # Spaces are ignored inside numbers
        # Group wraps everything up into its own ParseResults object when parsing
        number = Group(
            Combine(
                inner_number + Optional(
                    CaselessLiteral("E") + Optional(plus_minus) +
                    number_part), )("num") +
            Optional(suffix)("suffix"))("number")
        # Note that calling ("name") on the end of a parser is equivalent to calling
        # parser.setResultsName, which is used to pull that result out of a parsed
        # expression like a dictionary.

        # Construct variable and function names
        front = Word(alphas, alphanums)  # must start with alpha
        subscripts = Word(alphanums + '_') + ~FollowedBy('{')  # ~ = not
        lower_indices = Literal("_{") + Optional("-") + Word(
            alphanums) + Literal("}")
        upper_indices = Literal("^{") + Optional("-") + Word(
            alphanums) + Literal("}")
        # Construct an object name in either of two forms:
        #   1. front + subscripts + tail
        #   2. front + lower_indices + upper_indices + tail
        # where:
        #   front (required):
        #       starts with alpha, followed by alphanumeric
        #   subscripts (optional):
        #       any combination of alphanumeric and underscores
        #   lower_indices (optional):
        #       Of form "_{(-)<alphanumeric>}"
        #   upper_indices (optional):
        #       Of form "^{(-)<alphanumeric>}"
        #   tail (optional):
        #       any number of primes
        name = Combine(front +
                       Optional(subscripts | (Optional(lower_indices) +
                                              Optional(upper_indices))) +
                       ZeroOrMore("'"))
        # Define a variable as a pyparsing result that contains one object name
        variable = Group(name("varname"))("variable")
        variable.setParseAction(self.variable_parse_action)

        # initialize recursive grammar
        expression = Forward()

        # Construct functions as consisting of funcname and arguments as
        # funcname(arguments)
        # where arguments is a comma-separated list of arguments, returned as a list
        # Must have at least 1 argument
        function = Group(
            name("funcname") + Suppress("(") +
            Group(delimitedList(expression))("arguments") +
            Suppress(")"))("function")
        function.setParseAction(self.function_parse_action)

        # Define parentheses
        parentheses = Group(Suppress("(") + expression +
                            Suppress(")"))('parentheses')

        # Define arrays
        array = Group(
            Suppress("[") + delimitedList(expression) + Suppress("]"))("array")

        # atomic units evaluate directly to number or array without binary operations
        atom = number | function | variable | parentheses | array

        # Define operations in order of precedence
        # Define exponentiation, possibly including negative powers
        power = atom + ZeroOrMore(Suppress("^") + Optional(minus)("op") + atom)
        power.addParseAction(self.group_if_multiple('power'))

        # Define negation (e.g., in 5*-3 --> we need to evaluate the -3 first)
        # Negation in powers is handled separately
        # This has been arbitrarily assigned a higher precedence than parallel
        negation = Optional(minus)("op") + power
        negation.addParseAction(self.group_if_multiple('negation'))

        # Define the parallel operator 1 || 5 == 1/(1/1 + 1/5)
        pipes = Literal('|') + Literal('|')
        parallel = negation + ZeroOrMore(Suppress(pipes) + negation)
        parallel.addParseAction(self.group_if_multiple('parallel'))

        # Define multiplication and division
        product = parallel + ZeroOrMore((Literal('*') | Literal('/'))("op") +
                                        parallel)
        product.addParseAction(self.group_if_multiple('product'))

        # Define sums and differences
        # Note that leading - signs are treated by negation
        sumdiff = Optional(plus) + product + ZeroOrMore(
            plus_minus("op") + product)
        sumdiff.addParseAction(self.group_if_multiple('sum'))

        # Close the recursion
        expression << sumdiff

        return expression + stringEnd
Ejemplo n.º 29
0
def _create_config_parser():
    """
    Creates a parser using pyparsing that works with bibfield rule definitions

    BNF like grammar:

    rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include
    include ::= "include(" PATH ")"
    body ::=  [inherit_from] (creator | derived | calculated) [checker] [documentation]
    aliases ::= json_id ["[0]" | "[n]"] ["," aliases]
    creator ::= "creator:" INDENT creator_body+ UNDENT
    creator_body ::= [parse_first] [legacy] source_format "," source_tag "," python_allowed_expr
    source_format ::= MASTER_FORMATS
    source_tag ::= QUOTED_STRING

    derived ::= "derived" INDENT derived_calculated_body UNDENT
    calculated ::= "calculated:" INDENT derived_calculated_body UNDENT
    derived_calculated_body ::= [parse_first] [depends_on] [only_if] [do_not_cache] "," python_allowed_exp


    peristent_identfier ::= @persitent_identifier( level )
    inherit_from ::= "@inherit_from()"
    legacy ::= "@legacy(" correspondences+ ")"
    do_not_cache ::= "@do_not_cache"
    correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")"
    parse_first ::= "@parse_first(" jsonid+ ")"
    depends_on ::= "@depends_on(" json_id+ ")"
    only_if ::= "@only_if(" python_condition+ ")"

    python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call

    checker ::= "checker:" INDENT checker_function+ UNDENT

    documentation ::= INDENT doc_string subfield* UNDENT
    doc_string ::= QUOTED_STRING
    subfield ::= "@subfield" json_id["."json_id*] ":" docstring
    """

    indent_stack = [1]

    def check_sub_indent(str, location, tokens):
        cur_col = col(location, str)
        if cur_col > indent_stack[-1]:
            indent_stack.append(cur_col)
        else:
            raise ParseException(str, location, "not a subentry")

    def check_unindent(str, location, tokens):
        if location >= len(str):
            return
        cur_col = col(location, str)
        if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]):
            raise ParseException(str, location, "not an unindent")

    def do_unindent():
        indent_stack.pop()

    INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent)
    UNDENT = FollowedBy(empty).setParseAction(check_unindent)
    UNDENT.setParseAction(do_unindent)

    json_id = (Word(alphanums + "_") + Optional(oneOf("[0] [n]")))\
              .setResultsName("json_id", listAllMatches=True)\
              .setParseAction(lambda tokens: "".join(tokens))
    aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]")))
                            .setParseAction(lambda tokens: "".join(tokens)))\
              .setResultsName("aliases")
    python_allowed_expr = Forward()
    ident = Word(alphas + "_", alphanums + "_")
    dict_def = originalTextFor(nestedExpr('{', '}'))
    list_def = originalTextFor(nestedExpr('[', ']'))
    dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']'))
    function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')'))

    python_allowed_expr << (ident ^ dict_def ^ list_def ^ dict_access ^ list_access ^ function_call)\
                          .setResultsName("value", listAllMatches=True)

    persistent_identifier = (Suppress("@persistent_identifier") +  nestedExpr("(", ")"))\
                            .setResultsName("persistent_identifier")
    inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")")))\
                    .setResultsName("inherit_from")
    legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\
             .setResultsName("legacy", listAllMatches=True)
    only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\
              .setResultsName("only_if")
    depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")")))\
                 .setResultsName("depends_on")
    parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")")))\
                  .setResultsName("parse_first")
    do_not_cache = (Suppress("@") + "do_not_cache")\
                   .setResultsName("do_not_cache")
    master_format = (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")")))\
                    .setResultsName("master_format")

    derived_calculated_body = Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(do_not_cache) + python_allowed_expr

    derived = "derived" + Suppress(":") + INDENT + derived_calculated_body + UNDENT
    calculated = "calculated" + Suppress(":") + INDENT + derived_calculated_body + UNDENT

    source_tag = quotedString\
                .setParseAction(removeQuotes)\
                .setResultsName("source_tag", listAllMatches=True)
    source_format = oneOf(CFG_BIBFIELD_MASTER_FORMATS)\
                    .setResultsName("source_format", listAllMatches=True)
    creator_body = (Optional(parse_first) + Optional(depends_on) + Optional(only_if) +  Optional(legacy) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\
                                            .setResultsName("creator_def", listAllMatches=True)
    creator = "creator" + Suppress(":") + INDENT + OneOrMore(creator_body) + UNDENT

    checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\
                       .setResultsName("checker_function", listAllMatches=True)
    checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT)

    doc_string = QuotedString(quoteChar='"""', multiline=True) | quotedString.setParseAction(removeQuotes)
    subfield = (Suppress("@subfield") + Word(alphanums + "_" + '.') + Suppress(":") + Optional(doc_string))\
                 .setResultsName("subfields", listAllMatches=True)
    documentation = ("documentation" + Suppress(":") + INDENT + Optional(doc_string).setResultsName("main_doc") + ZeroOrMore(subfield) + UNDENT)\
                     .setResultsName("documentation")

    field_def = (creator | derived | calculated)\
                .setResultsName("type_field", listAllMatches=True)

    body = Optional(inherit_from) + Optional(field_def) + Optional(checker) + Optional(documentation)
    comment = Literal("#") + restOfLine + LineEnd()
    include = (Suppress("include") + quotedString)\
              .setResultsName("includes", listAllMatches=True)
    rule = (Optional(persistent_identifier) + json_id + Optional(Suppress(",") + aliases) + Suppress(":") + INDENT + body + UNDENT)\
           .setResultsName("rules", listAllMatches=True)

    return OneOrMore(rule | include | comment.suppress())
Ejemplo n.º 30
0
def Verilog_BNF():
    global verilogbnf

    if verilogbnf is None:

        # compiler directives
        compilerDirective = Combine( "`" + \
            oneOf("define undef ifdef else endif default_nettype "
                  "include resetall timescale unconnected_drive "
                  "nounconnected_drive celldefine endcelldefine") + \
            restOfLine ).setName("compilerDirective")

        # primitives
        SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=")

        identLead = alphas+"$_"
        identBody = alphanums+"$_"
        identifier1 = Regex( r"\.?["+identLead+"]["+identBody+r"]*(\.["+identLead+"]["+identBody+"]*)*"
                            ).setName("baseIdent")
        identifier2 = Regex(r"\\\S+").setParseAction(lambda t:t[0][1:]).setName("escapedIdent")#.setDebug()
        identifier = identifier1 | identifier2
        assert(identifier2 == r'\abc')

        hexnums = nums + "abcdefABCDEF" + "_?"
        base = Regex("'[bBoOdDhH]").setName("base")
        basedNumber = Combine( Optional( Word(nums + "_") ) + base + Word(hexnums+"xXzZ"),
                               joinString=" ", adjacent=False ).setName("basedNumber")
        #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) +
                           #~ Optional( DOT + Optional( Word( spacedNums ) ) ) +
                           #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") )
        number = ( basedNumber | \
                   Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \
                  ).setName("numeric")
        #~ decnums = nums + "_"
        #~ octnums = "01234567" + "_"
        expr = Forward().setName("expr")
        concat = Group( LBRACE + delimitedList( expr ) + RBRACE )
        multiConcat = Group("{" + expr + concat + "}").setName("multiConcat")
        funcCall = Group(identifier + LPAR + Optional( delimitedList( expr ) ) + RPAR).setName("funcCall")

        subscrRef = Group(LBRACK + delimitedList( expr, COLON ) + RBRACK)
        subscrIdentifier = Group( identifier + Optional( subscrRef ) )
        #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") ))
        scalarConst = Regex("0|1('[Bb][01xX])?")
        mintypmaxExpr = Group( expr + COLON + expr + COLON + expr ).setName("mintypmax")
        primary = (
                  number |
                  (LPAR + mintypmaxExpr + RPAR ) |
                  ( LPAR + Group(expr) + RPAR ).setName("nestedExpr") |
                  multiConcat |
                  concat |
                  dblQuotedString |
                  funcCall |
                  subscrIdentifier
                  )

        unop  = oneOf( "+  -  !  ~  &  ~&  |  ^|  ^  ~^" ).setName("unop")
        binop = oneOf( "+  -  *  /  %  ==  !=  ===  !==  &&  "
                       "||  <  <=  >  >=  &  |  ^  ^~  >>  << ** <<< >>>" ).setName("binop")

        expr << (
                ( unop + expr ) |  # must be first!
                ( primary + "?" + expr + COLON + expr ) |
                ( primary + Optional( binop + expr ) )
                )

        lvalue = subscrIdentifier | concat

        # keywords
        if_        = Keyword("if")
        else_      = Keyword("else")
        edge       = Keyword("edge")
        posedge    = Keyword("posedge")
        negedge    = Keyword("negedge")
        specify    = Keyword("specify")
        endspecify = Keyword("endspecify")
        fork       = Keyword("fork")
        join       = Keyword("join")
        begin      = Keyword("begin")
        end        = Keyword("end")
        default    = Keyword("default")
        forever    = Keyword("forever")
        repeat     = Keyword("repeat")
        while_     = Keyword("while")
        for_       = Keyword("for")
        case       = oneOf( "case casez casex" )
        endcase    = Keyword("endcase")
        wait       = Keyword("wait")
        disable    = Keyword("disable")
        deassign   = Keyword("deassign")
        force      = Keyword("force")
        release    = Keyword("release")
        assign     = Keyword("assign")

        eventExpr = Forward()
        eventTerm = ( posedge + expr ) | ( negedge + expr ) | expr | ( LPAR + eventExpr + RPAR )
        eventExpr << (
            Group( delimitedList( eventTerm, Keyword("or") ) )
            )
        eventControl = Group( "@" + ( ( LPAR + eventExpr + RPAR ) | identifier | "*" ) ).setName("eventCtrl")

        delayArg = ( number |
                     Word(alphanums+"$_") | #identifier |
                     ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR )
                   ).setName("delayArg")#.setDebug()
        delay = Group( "#" + delayArg ).setName("delay")#.setDebug()
        delayOrEventControl = delay | eventControl

        assgnmt   = Group( lvalue + EQ + Optional( delayOrEventControl ) + expr ).setName( "assgnmt" )
        nbAssgnmt = Group(( lvalue + "<=" + Optional( delay ) + expr ) |
                     ( lvalue + "<=" + Optional( eventControl ) + expr )).setName( "nbassgnmt" )

        range = LBRACK + expr + COLON + expr + RBRACK

        paramAssgnmt = Group( identifier + EQ + expr ).setName("paramAssgnmt")
        parameterDecl = Group( "parameter" + Optional( range ) + delimitedList( paramAssgnmt ) + SEMI).setName("paramDecl")

        inputDecl = Group( "input" + Optional( range ) + delimitedList( identifier ) + SEMI )
        outputDecl = Group( "output" + Optional( range ) + delimitedList( identifier ) + SEMI )
        inoutDecl = Group( "inout" + Optional( range ) + delimitedList( identifier ) + SEMI )

        regIdentifier = Group( identifier + Optional( LBRACK + expr + COLON + expr + RBRACK ) )
        regDecl = Group( "reg" + Optional("signed") + Optional( range ) + delimitedList( regIdentifier ) + SEMI ).setName("regDecl")
        timeDecl = Group( "time" + delimitedList( regIdentifier ) + SEMI )
        integerDecl = Group( "integer" + delimitedList( regIdentifier ) + SEMI )

        strength0 = oneOf("supply0  strong0  pull0  weak0  highz0")
        strength1 = oneOf("supply1  strong1  pull1  weak1  highz1")
        driveStrength = Group( LPAR + ( ( strength0 + COMMA + strength1 ) |
                                       ( strength1 + COMMA + strength0 ) ) + RPAR ).setName("driveStrength")
        nettype = oneOf("wire  tri  tri1  supply0  wand  triand  tri0  supply1  wor  trior  trireg")
        expandRange = Optional( oneOf("scalared vectored") ) + range
        realDecl = Group( "real" + delimitedList( identifier ) + SEMI )

        eventDecl = Group( "event" + delimitedList( identifier ) + SEMI )

        blockDecl = (
            parameterDecl |
            regDecl |
            integerDecl |
            realDecl |
            timeDecl |
            eventDecl
            )

        stmt = Forward().setName("stmt")#.setDebug()
        stmtOrNull = stmt | SEMI
        caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \
                   ( default + Optional(":") + stmtOrNull )
        stmt << Group(
            ( begin + Group( ZeroOrMore( stmt ) ) + end ).setName("begin-end") |
            ( if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional( else_ + stmtOrNull ) ).setName("if") |
            ( delayOrEventControl + stmtOrNull ) |
            ( case + LPAR + expr + RPAR + OneOrMore( caseItem ) + endcase ) |
            ( forever + stmt ) |
            ( repeat + LPAR + expr + RPAR + stmt ) |
            ( while_ + LPAR + expr + RPAR + stmt ) |
            ( for_ + LPAR + assgnmt + SEMI + Group( expr ) + SEMI + assgnmt + RPAR + stmt ) |
            ( fork + ZeroOrMore( stmt ) + join ) |
            ( fork + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ) |
            ( wait + LPAR + expr + RPAR + stmtOrNull ) |
            ( "->" + identifier + SEMI ) |
            ( disable + identifier + SEMI ) |
            ( assign + assgnmt + SEMI ) |
            ( deassign + lvalue + SEMI ) |
            ( force + assgnmt + SEMI ) |
            ( release + lvalue + SEMI ) |
            ( begin + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ).setName("begin:label-end") |
            # these  *have* to go at the end of the list!!!
            ( assgnmt + SEMI ) |
            ( nbAssgnmt + SEMI ) |
            ( Combine( Optional("$") + identifier ) + Optional( LPAR + delimitedList(expr|empty) + RPAR ) + SEMI )
            ).setName("stmtBody")
        """
        x::=<blocking_assignment> ;
        x||= <non_blocking_assignment> ;
        x||= if ( <expression> ) <statement_or_null>
        x||= if ( <expression> ) <statement_or_null> else <statement_or_null>
        x||= case ( <expression> ) <case_item>+ endcase
        x||= casez ( <expression> ) <case_item>+ endcase
        x||= casex ( <expression> ) <case_item>+ endcase
        x||= forever <statement>
        x||= repeat ( <expression> ) <statement>
        x||= while ( <expression> ) <statement>
        x||= for ( <assignment> ; <expression> ; <assignment> ) <statement>
        x||= <delay_or_event_control> <statement_or_null>
        x||= wait ( <expression> ) <statement_or_null>
        x||= -> <name_of_event> ;
        x||= <seq_block>
        x||= <par_block>
        x||= <task_enable>
        x||= <system_task_enable>
        x||= disable <name_of_task> ;
        x||= disable <name_of_block> ;
        x||= assign <assignment> ;
        x||= deassign <lvalue> ;
        x||= force <assignment> ;
        x||= release <lvalue> ;
        """
        alwaysStmt = Group( "always" + Optional(eventControl) + stmt ).setName("alwaysStmt")
        initialStmt = Group( "initial" + stmt ).setName("initialStmt")

        chargeStrength = Group( LPAR + oneOf( "small medium large" ) + RPAR ).setName("chargeStrength")

        continuousAssign = Group(
            assign + Optional( driveStrength ) + Optional( delay ) + delimitedList( assgnmt ) + SEMI
            ).setName("continuousAssign")


        tfDecl = (
            parameterDecl |
            inputDecl |
            outputDecl |
            inoutDecl |
            regDecl |
            timeDecl |
            integerDecl |
            realDecl
            )

        functionDecl = Group(
            "function" + Optional( range | "integer" | "real" ) + identifier + SEMI +
            Group( OneOrMore( tfDecl ) ) +
            Group( ZeroOrMore( stmt ) ) +
            "endfunction"
            )

        inputOutput = oneOf("input output")
        netDecl1Arg = ( nettype +
            Optional( expandRange ) +
            Optional( delay ) +
            Group( delimitedList( ~inputOutput + identifier ) ) )
        netDecl2Arg = ( "trireg" +
            Optional( chargeStrength ) +
            Optional( expandRange ) +
            Optional( delay ) +
            Group( delimitedList( ~inputOutput + identifier ) ) )
        netDecl3Arg = ( nettype +
            Optional( driveStrength ) +
            Optional( expandRange ) +
            Optional( delay ) +
            Group( delimitedList( assgnmt ) ) )
        netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1")
        netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2")
        netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3")

        gateType = oneOf("and  nand  or  nor xor  xnor buf  bufif0 bufif1 "
                         "not  notif0 notif1  pulldown pullup nmos  rnmos "
                         "pmos rpmos cmos rcmos   tran rtran  tranif0  "
                         "rtranif0  tranif1 rtranif1"  )
        gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \
                        LPAR + Group( delimitedList( expr ) ) + RPAR
        gateDecl = Group( gateType +
            Optional( driveStrength ) +
            Optional( delay ) +
            delimitedList( gateInstance) +
            SEMI )

        udpInstance = Group( Group( identifier + Optional(range | subscrRef) ) +
            LPAR + Group( delimitedList( expr ) ) + RPAR )
        udpInstantiation = Group( identifier -
            Optional( driveStrength ) +
            Optional( delay ) +
            delimitedList( udpInstance ) +
            SEMI ).setName("udpInstantiation")

        parameterValueAssignment = Group( Literal("#") + LPAR + Group( delimitedList( expr ) ) + RPAR )
        namedPortConnection = Group( DOT + identifier + LPAR + expr + RPAR ).setName("namedPortConnection")#.setDebug()
        assert(r'.\abc (abc )' == namedPortConnection)
        modulePortConnection = expr | empty
        #~ moduleInstance = Group( Group ( identifier + Optional(range) ) +
            #~ ( delimitedList( modulePortConnection ) |
              #~ delimitedList( namedPortConnection ) ) )
        inst_args = Group( LPAR + (delimitedList( namedPortConnection ) |
                    delimitedList( modulePortConnection )) + RPAR).setName("inst_args")
        moduleInstance = Group( Group ( identifier + Optional(range) ) + inst_args ).setName("moduleInstance")#.setDebug()

        moduleInstantiation = Group( identifier +
            Optional( parameterValueAssignment ) +
            delimitedList( moduleInstance ).setName("moduleInstanceList") +
            SEMI ).setName("moduleInstantiation")

        parameterOverride = Group( "defparam" + delimitedList( paramAssgnmt ) + SEMI )
        task = Group( "task" + identifier + SEMI +
            ZeroOrMore( tfDecl ) +
            stmtOrNull +
            "endtask" )

        specparamDecl = Group( "specparam" + delimitedList( paramAssgnmt ) + SEMI )

        pathDescr1 = Group( LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR )
        pathDescr2 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "*>" +
                                  Group( delimitedList( subscrIdentifier ) ) + RPAR )
        pathDescr3 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "=>" +
                                  Group( delimitedList( subscrIdentifier ) ) + RPAR )
        pathDelayValue = Group( ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) |
                                 mintypmaxExpr |
                                 expr )
        pathDecl = Group( ( pathDescr1 | pathDescr2 | pathDescr3 ) + EQ + pathDelayValue + SEMI ).setName("pathDecl")

        portConditionExpr = Forward()
        portConditionTerm = Optional(unop) + subscrIdentifier
        portConditionExpr << portConditionTerm + Optional( binop + portConditionExpr )
        polarityOp = oneOf("+ -")
        levelSensitivePathDecl1 = Group(
            if_ + Group(LPAR + portConditionExpr + RPAR) +
            subscrIdentifier + Optional( polarityOp ) + "=>" + subscrIdentifier + EQ +
            pathDelayValue +
            SEMI )
        levelSensitivePathDecl2 = Group(
            if_ + Group(LPAR + portConditionExpr + RPAR) +
            LPAR + Group( delimitedList( subscrIdentifier ) ) + Optional( polarityOp ) + "*>" +
                Group( delimitedList( subscrIdentifier ) ) + RPAR + EQ +
            pathDelayValue +
            SEMI )
        levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2

        edgeIdentifier = posedge | negedge
        edgeSensitivePathDecl1 = Group(
            Optional( if_ + Group(LPAR + expr + RPAR) ) +
            LPAR + Optional( edgeIdentifier ) +
            subscrIdentifier + "=>" +
            LPAR + subscrIdentifier + Optional( polarityOp ) + COLON + expr + RPAR + RPAR +
            EQ +
            pathDelayValue +
            SEMI )
        edgeSensitivePathDecl2 = Group(
            Optional( if_ + Group(LPAR + expr + RPAR) ) +
            LPAR + Optional( edgeIdentifier ) +
            subscrIdentifier + "*>" +
            LPAR + delimitedList( subscrIdentifier ) + Optional( polarityOp ) + COLON + expr + RPAR + RPAR +
            EQ +
            pathDelayValue +
            SEMI )
        edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2

        edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr")

        timCheckEventControl = Group( posedge | negedge | (edge + LBRACK + delimitedList( edgeDescr ) + RBRACK ))
        timCheckCond = Forward()
        timCondBinop = oneOf("== === != !==")
        timCheckCondTerm = ( expr + timCondBinop + scalarConst ) | ( Optional("~") + expr )
        timCheckCond << ( ( LPAR + timCheckCond + RPAR ) | timCheckCondTerm )
        timCheckEvent = Group( Optional( timCheckEventControl ) +
                                subscrIdentifier +
                                Optional( "&&&" + timCheckCond ) )
        timCheckLimit = expr
        controlledTimingCheckEvent = Group( timCheckEventControl + subscrIdentifier +
                                            Optional( "&&&" + timCheckCond ) )
        notifyRegister = identifier

        systemTimingCheck1 = Group( "$setup" +
            LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit +
            Optional( COMMA + notifyRegister ) + RPAR +
            SEMI )
        systemTimingCheck2 = Group( "$hold" +
            LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit +
            Optional( COMMA + notifyRegister ) + RPAR +
            SEMI )
        systemTimingCheck3 = Group( "$period" +
            LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit +
            Optional( COMMA + notifyRegister ) + RPAR +
            SEMI )
        systemTimingCheck4 = Group( "$width" +
            LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit +
            Optional( COMMA + expr + COMMA + notifyRegister ) + RPAR +
            SEMI )
        systemTimingCheck5 = Group( "$skew" +
            LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit +
            Optional( COMMA + notifyRegister ) + RPAR +
            SEMI )
        systemTimingCheck6 = Group( "$recovery" +
            LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit +
            Optional( COMMA + notifyRegister ) + RPAR +
            SEMI )
        systemTimingCheck7 = Group( "$setuphold" +
            LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit +
            Optional( COMMA + notifyRegister ) + RPAR +
            SEMI )
        systemTimingCheck = (FollowedBy('$') + ( systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 |
            systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7 )).setName("systemTimingCheck")
        sdpd = if_ + Group(LPAR + expr + RPAR) + \
            ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI

        specifyItem = ~Keyword("endspecify") +(
            specparamDecl |
            pathDecl |
            levelSensitivePathDecl |
            edgeSensitivePathDecl |
            systemTimingCheck |
            sdpd
            )
        """
        x::= <specparam_declaration>
        x||= <path_declaration>
        x||= <level_sensitive_path_declaration>
        x||= <edge_sensitive_path_declaration>
        x||= <system_timing_check>
        x||= <sdpd>
        """
        specifyBlock = Group( "specify" + ZeroOrMore( specifyItem ) + "endspecify" ).setName("specifyBlock")

        moduleItem = ~Keyword("endmodule") + (
            parameterDecl |
            inputDecl |
            outputDecl |
            inoutDecl |
            regDecl |
            netDecl3 |
            netDecl1 |
            netDecl2 |
            timeDecl |
            integerDecl |
            realDecl |
            eventDecl |
            gateDecl |
            parameterOverride |
            continuousAssign |
            specifyBlock |
            initialStmt |
            alwaysStmt |
            task |
            functionDecl |
            # these have to be at the end - they start with identifiers
            moduleInstantiation |
            udpInstantiation
            )
        """  All possible moduleItems, from Verilog grammar spec
        x::= <parameter_declaration>
        x||= <input_declaration>
        x||= <output_declaration>
        x||= <inout_declaration>
        ?||= <net_declaration>  (spec does not seem consistent for this item)
        x||= <reg_declaration>
        x||= <time_declaration>
        x||= <integer_declaration>
        x||= <real_declaration>
        x||= <event_declaration>
        x||= <gate_declaration>
        x||= <UDP_instantiation>
        x||= <module_instantiation>
        x||= <parameter_override>
        x||= <continuous_assign>
        x||= <specify_block>
        x||= <initial_statement>
        x||= <always_statement>
        x||= <task>
        x||= <function>
        """
        portRef = subscrIdentifier
        portExpr = portRef | Group( LBRACE + delimitedList( portRef ) + RBRACE )
        port = portExpr | Group( ( DOT + identifier + LPAR + portExpr + RPAR ) )

        moduleHdr = Group ( oneOf("module macromodule") + identifier +
                 Optional( LPAR + Group( Optional( delimitedList(
                                    Group(oneOf("input output") +
                                            (netDecl1Arg | netDecl2Arg | netDecl3Arg) ) |
                                    port ) ) ) +
                            RPAR ) + SEMI ).setName("moduleHdr")

        module = Group(  moduleHdr +
                 Group( ZeroOrMore( moduleItem ) ) +
                 "endmodule" ).setName("module")#.setDebug()

        udpDecl = outputDecl | inputDecl | regDecl
        #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X")
        udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal")
        udpInitialStmt = Group( "initial" +
            identifier + EQ + udpInitVal + SEMI ).setName("udpInitialStmt")

        levelSymbol = oneOf("0   1   x   X   ?   b   B")
        levelInputList = Group( OneOrMore( levelSymbol ).setName("levelInpList") )
        outputSymbol = oneOf("0   1   x   X")
        combEntry = Group( levelInputList + COLON + outputSymbol + SEMI )
        edgeSymbol = oneOf("r   R   f   F   p   P   n   N   *")
        edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \
               Group( edgeSymbol )
        edgeInputList = Group( ZeroOrMore( levelSymbol ) + edge + ZeroOrMore( levelSymbol ) )
        inputList = levelInputList | edgeInputList
        seqEntry = Group( inputList + COLON + levelSymbol + COLON + ( outputSymbol | "-" ) + SEMI ).setName("seqEntry")
        udpTableDefn = Group( "table" +
            OneOrMore( combEntry | seqEntry ) +
            "endtable" ).setName("table")

        """
        <UDP>
        ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ;
                <UDP_declaration>+
                <UDP_initial_statement>?
                <table_definition>
                endprimitive
        """
        udp = Group( "primitive" + identifier +
            LPAR + Group( delimitedList( identifier ) ) + RPAR + SEMI +
            OneOrMore( udpDecl ) +
            Optional( udpInitialStmt ) +
            udpTableDefn +
            "endprimitive" )

        verilogbnf = OneOrMore( module | udp ) + StringEnd()

        verilogbnf.ignore( cppStyleComment )
        verilogbnf.ignore( compilerDirective )

    return verilogbnf
Ejemplo n.º 31
0
expression = Forward()

# Literals

intNumber = Regex(r'-?\d+')('integer')

floatNumber = Regex(r'-?\d+\.\d+')('float')

sciNumber = Combine((floatNumber | intNumber) + CaselessLiteral('e') +
                    intNumber)('scientific')

aString = quotedString('string')

# Use lookahead to match only numbers in a list (can't remember why this is necessary)
afterNumber = FollowedBy(",") ^ FollowedBy(")") ^ FollowedBy(LineEnd())
number = Group((sciNumber + afterNumber) | (floatNumber + afterNumber)
               | (intNumber + afterNumber))('number')

boolean = Group(CaselessKeyword("true") | CaselessKeyword("false"))('boolean')

none = Group(CaselessKeyword('none'))('none')

argname = Word(alphas + '_', alphanums + '_')('argname')
funcname = Word(alphas + '_', alphanums + '_')('funcname')

## Symbols
leftParen = Literal('(').suppress()
rightParen = Literal(')').suppress()
comma = Literal(',').suppress()
equal = Literal('=').suppress()
Ejemplo n.º 32
0
from rdflib.py3compat import bytestype

ParserElement.setDefaultWhitespaceChars(" \n")

String = STRING_LITERAL1 | STRING_LITERAL2

RDFLITERAL = Comp(
    'literal',
    Param('string', String) + Optional(
        Param('lang', LANGTAG.leaveWhitespace())
        | Literal('^^').leaveWhitespace() +
        Param('datatype', IRIREF).leaveWhitespace()))

NONE_VALUE = object()

EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
EMPTY.setParseAction(lambda x: NONE_VALUE)

TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral

ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
ROW.parseWithTabs()

HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
HEADER.parseWithTabs()


class TSVResultParser(ResultParser):
    def parse(self, source):

        if isinstance(source.read(0), bytestype):
Ejemplo n.º 33
0
from pyparsing import FollowedBy, Literal, Word


def parenthesize(characters, name):
    return Literal("(") + Word(characters).setResultsName(name) + Literal(")")


def decimalize(characters, name):
    return (Word(characters).setResultsName(name) +
            Literal(".").leaveWhitespace())


#   Only used as the top of the appendix hierarchy
a1 = Word(string.digits).setResultsName("a1")
aI = Word("IVXLCDM").setResultsName("aI")

#   Catches the A in 12A but not in 12Awesome
markerless_upper = Word(string.ascii_uppercase).setResultsName(
    'markerless_upper') + ~FollowedBy(Word(string.ascii_lowercase))

paren_upper = parenthesize(string.ascii_uppercase, "paren_upper")
paren_lower = parenthesize(string.ascii_lowercase, "paren_lower")
paren_digit = parenthesize(string.digits, "paren_digit")

period_upper = decimalize(string.ascii_uppercase, "period_upper")
period_lower = decimalize(string.ascii_lowercase, "period_lower")
period_digit = decimalize(string.digits, "period_digit")

roman_upper = decimalize('IVXLCDM', "roman_upper")
Ejemplo n.º 34
0
ParserElement.setDefaultWhitespaceChars(" \t")
NL = LineEnd().suppress()

integer = Word(nums)
plan = '1..' + integer("ubound")

OK, NOT_OK = map(Literal, ['ok', 'not ok'])
testStatus = (OK | NOT_OK)

description = Regex("[^#\n]+")
description.setParseAction(lambda t: t[0].lstrip('- '))

TODO, SKIP = map(CaselessLiteral, 'TODO SKIP'.split())
directive = Group(
    Suppress('#') +
    (TODO + restOfLine | FollowedBy(SKIP) +
     restOfLine.copy().setParseAction(lambda t: ['SKIP', t[0]])))

commentLine = Suppress("#") + empty + restOfLine

testLine = Group(
    Optional(OneOrMore(commentLine + NL))("comments") + testStatus("passed") +
    Optional(integer)("testNumber") + Optional(description)("description") +
    Optional(directive)("directive"))
bailLine = Group(
    Literal("Bail out!")("BAIL") + empty + Optional(restOfLine)("reason"))

tapOutputParser = Optional(Group(plan)("plan") + NL) & \
            Group(OneOrMore((testLine|bailLine) + NL))("tests")

Ejemplo n.º 35
0
# Factorial
fact_expr = Group( func_term + OneOrMore(factop) )
fact_expr.setParseAction(operators.PostfixSymbol.process)
fact_term = ( fact_expr | func_term )

# Exponent
exp_term = Forward()
# 'signop' in exponent is handled in process_expop().
exp_expr = Group( fact_term + expop + ZeroOrMore(signop) + exp_term )
exp_expr.setParseAction(operators.Exponent.process)
exp_term <<= ( exp_expr | fact_term )

# Sign.
sign_term = Forward()
_signop = Optional(signop) # "try to avoid LR" was the original comment, dunno!?
sign_expr = FollowedBy(_signop.expr + sign_term) + Group( _signop + sign_term )
sign_expr.setParseAction(operators.PrefixSymbol.process)
sign_term <<= ( sign_expr | exp_term )

# Multiplication without sign has precendence so that 2km / 3h means
# 2/3 km/h.  Multiplication without sign is possible if RHS is a
# variable/constant/unit.
sm_exp_expr = Group( variable + expop + ZeroOrMore(signop) + exp_term )
sm_exp_expr.setParseAction(operators.Exponent.process)
signless_mult_expr = Group( sign_term + OneOrMore( sm_exp_expr | variable ) )
signless_mult_expr.setParseAction(operators.InfixLeftSymbol.process)
signless_mult_term = ( signless_mult_expr | sign_term )

# Multiplication.
mult_expr = Group( signless_mult_term + OneOrMore(multop + signless_mult_term) )
mult_expr.setParseAction(operators.InfixLeftSymbol.process)
Ejemplo n.º 36
0
    if n == 0:
        return Empty()
    else:
        return Group((Suppress(funOrbNumber(n)) +
                      funCoefficients(n)).setResultsName("lastCoeffs"))


# ====================> Basis File <==========================
comment = Literal("#") + restOfLine

parseAtomLabel = Word(srange("[A-Z]"), max=1) + Optional(
    Word(srange("[a-z]"), max=1))

parserBasisName = Word(alphanums + "-") + Suppress(restOfLine)

parserFormat = OneOrMore(natural + NotAny(FollowedBy(point)))

parserKey = (parseAtomLabel.setResultsName("atom") +
             parserBasisName.setResultsName("basisName") +
             Suppress(Literal("1")))

parserBasisData = OneOrMore(floatNumber)

parserBasis = (parserKey + parserFormat.setResultsName("format") +
               parserBasisData.setResultsName("coeffs"))

topParseBasis = OneOrMore(Suppress(comment)) + OneOrMore(
    Group(parserBasis + Suppress(Optional(OneOrMore(comment)))))

# ===============================<>====================================
# Parsing From File
Ejemplo n.º 37
0
def _create_field_parser():
    """
    Creates a parser using pyparsing that works with bibfield rule definitions

    BNF like grammar:

    rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include | python_comment
    include ::= "include(" PATH ")"
    body ::=  [inherit_from] (creator | derived | calculated) [checker] [documentation] [producer]
    aliases ::= json_id ["[0]" | "[n]"] ["," aliases]

    creator ::= "creator:" INDENT creator_body+ UNDENT
    creator_body ::= [decorators] source_format "," source_tag "," python_allowed_expr
    source_format ::= MASTER_FORMATS
    source_tag ::= QUOTED_STRING

    derived ::= "derived" INDENT derived_calculated_body UNDENT
    calculated ::= "calculated:" INDENT derived_calculated_body UNDENT
    derived_calculated_body ::= [decorators] "," python_allowed_exp

    decorators ::= (peristent_identfier | legacy | do_not_cache | parse_first | depends_on | only_if | only_if_master_value)*
    peristent_identfier ::= @persitent_identifier( level )
    legacy ::= "@legacy(" correspondences+ ")"
    correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")"
    parse_first ::= "@parse_first(" jsonid+ ")"
    depends_on ::= "@depends_on(" json_id+ ")"
    only_if ::= "@only_if(" python_condition+ ")"
    only_if_master_value ::= "@only_if_master_value(" python_condition+  ")"

    inherit_from ::= "@inherit_from()"

    python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call

    checker ::= "checker:" INDENT checker_function+ UNDENT

    documentation ::= INDENT doc_string subfield* UNDENT
    doc_string ::= QUOTED_STRING
    subfield ::= "@subfield" json_id["."json_id*] ":" docstring

    producer ::= "producer:" INDENT producer_body UNDENT
    producer_body ::= producer_code "," python_dictionary
    producer_code ::= ident
    """

    indent_stack = [1]

    def check_sub_indent(str, location, tokens):
        cur_col = col(location, str)
        if cur_col > indent_stack[-1]:
            indent_stack.append(cur_col)
        else:
            raise ParseException(str, location, "not a subentry")

    def check_unindent(str, location, tokens):
        if location >= len(str):
            return
        cur_col = col(location, str)
        if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]):
            raise ParseException(str, location, "not an unindent")

    def do_unindent():
        indent_stack.pop()

    INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent)
    UNDENT = FollowedBy(empty).setParseAction(check_unindent)
    UNDENT.setParseAction(do_unindent)

    json_id = (Word(alphas + "_", alphanums + "_") + Optional(oneOf("[0] [n]")))\
              .setResultsName("json_id", listAllMatches=True)\
              .setParseAction(lambda tokens: "".join(tokens))
    aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]")))
                            .setParseAction(lambda tokens: "".join(tokens)))\
              .setResultsName("aliases")
    ident = Word(alphas + "_", alphanums + "_")
    dict_def = originalTextFor(nestedExpr('{', '}'))
    list_def = originalTextFor(nestedExpr('[', ']'))
    dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']'))
    function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')'))

    python_allowed_expr = (dict_def ^ list_def ^ dict_access ^ \
            list_access ^ function_call ^ restOfLine)\
            .setResultsName("value", listAllMatches=True)

    persistent_identifier = (Suppress("@persistent_identifier") + \
            nestedExpr("(", ")"))\
            .setResultsName("persistent_identifier")
    legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("legacy", listAllMatches=True)
    only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("only_if")
    only_if_master_value = (Suppress("@only_if_value") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("only_if_master_value")
    depends_on = (Suppress("@depends_on") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("depends_on")
    parse_first = (Suppress("@parse_first") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("parse_first")
    memoize = (Suppress("@memoize") + nestedExpr("(", ")"))\
            .setResultsName("memoize")
    field_decorator = parse_first ^ depends_on ^ only_if ^ \
            only_if_master_value ^ memoize ^ legacy

    #Independent decorators
    inherit_from = (Suppress("@inherit_from") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("inherit_from")
    override = (Suppress("@") + "override")\
            .setResultsName("override")
    extend = (Suppress("@") + "extend")\
            .setResultsName("extend")
    master_format = (Suppress("@master_format") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("master_format") \
            .setParseAction(lambda toks: toks[0])

    derived_calculated_body = (ZeroOrMore(field_decorator) + python_allowed_expr)\
            .setResultsName('derived_calculated_def')

    derived = "derived" + Suppress(":") + \
            INDENT + derived_calculated_body + UNDENT
    calculated = "calculated" + Suppress(":") + \
            INDENT + derived_calculated_body + UNDENT

    source_tag = quotedString\
            .setParseAction(removeQuotes)\
            .setResultsName("source_tag", listAllMatches=True)
    source_format = Word(alphas, alphanums + "_")\
                    .setResultsName("source_format", listAllMatches=True)
    creator_body = (ZeroOrMore(field_decorator) + source_format + \
            Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\
            .setResultsName("creator_def", listAllMatches=True)
    creator = "creator" + Suppress(":") + \
            INDENT + OneOrMore(creator_body) + UNDENT
    field_def = (creator | derived | calculated)\
                .setResultsName("type_field", listAllMatches=True)

    #JsonExtra
    json_dumps = (Suppress('dumps') + Suppress(',') + python_allowed_expr)\
        .setResultsName("dumps")\
        .setParseAction(lambda toks: toks.value[0])
    json_loads = (Suppress("loads") + Suppress(",") + python_allowed_expr)\
        .setResultsName("loads")\
        .setParseAction(lambda toks: toks.value[0])

    json_extra = (Suppress('json:') + \
            INDENT + Each((json_dumps, json_loads)) + UNDENT)\
            .setResultsName('json_ext')

    #Checker
    checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\
                       .setResultsName("checker", listAllMatches=True)
    checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT)

    #Description/Documentation
    doc_double = QuotedString(quoteChar='"""', multiline=True)
    doc_single = QuotedString(quoteChar="'''", multiline=True)
    doc_string = INDENT + (doc_double | doc_single) + UNDENT
    description_body = (Suppress('description:') + doc_string).\
                setParseAction(lambda toks: toks[0][0])
    description = (description_body | doc_double | doc_single)\
            .setResultsName('description')

    #Producer
    producer_code = (Word(alphas, alphanums + "_")\
           + originalTextFor(nestedExpr("(", ")")))\
           .setResultsName('producer_code', listAllMatches=True)
    producer_body = (producer_code + Suppress(",") + python_allowed_expr)\
                    .setResultsName("producer_rule", listAllMatches=True)
    producer = Suppress("producer:") + INDENT + OneOrMore(producer_body) + UNDENT

    schema = (Suppress('schema:') + INDENT + dict_def + UNDENT)\
            .setParseAction(lambda toks: toks[0])\
            .setResultsName('schema')

    body = Optional(field_def) & Optional(checker) & Optional(json_extra) \
            & Optional(description) & Optional(producer) & Optional(schema)
    comment = Literal("#") + restOfLine + LineEnd()
    include = (Suppress("include") + quotedString)\
              .setResultsName("includes", listAllMatches=True)
    rule = (Optional(persistent_identifier) + Optional(inherit_from) + \
            Optional(override) + Optional(extend) +json_id + \
            Optional(Suppress(",") + aliases) + Suppress(":") + \
            INDENT + body + UNDENT)\
           .setResultsName("rules", listAllMatches=True)

    return OneOrMore(rule | include | comment.suppress())