Beispiel #1
0
                                               NumericLiteral, BooleanLiteral,
                                               LANGTAG)
from rdflib_elasticstore.sparql.parserutils import Comp, Param, CompValue

from rdflib import Literal as RDFLiteral

from six import binary_type

ParserElement.setDefaultWhitespaceChars(" \n")

String = STRING_LITERAL1 | STRING_LITERAL2

RDFLITERAL = Comp(
    'literal',
    Param('string', String) + Optional(
        Param('lang', LANGTAG.leaveWhitespace())
        | Literal('^^').leaveWhitespace() +
        Param('datatype', IRIREF).leaveWhitespace()))

NONE_VALUE = object()

EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
EMPTY.setParseAction(lambda x: NONE_VALUE)

TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral

ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
ROW.parseWithTabs()

HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
HEADER.parseWithTabs()
# define pgn grammar
#

tag = Suppress("[") + Word(alphanums) + Combine(quotedString) + Suppress("]")
comment = Suppress("{") + Word(alphanums + " ") + Suppress("}")

dot = Literal(".")
piece = oneOf("K Q B N R")
file_coord = oneOf("a b c d e f g h")
rank_coord = oneOf("1 2 3 4 5 6 7 8")
capture = oneOf("x :")
promote = Literal("=")
castle_queenside = oneOf("O-O-O 0-0-0 o-o-o")
castle_kingside = oneOf("O-O 0-0 o-o")

move_number = Optional(comment) + Word(nums) + dot
m1 = file_coord + rank_coord  # pawn move e.g. d4
m2 = file_coord + capture + file_coord + rank_coord  # pawn capture move e.g. dxe5
m3 = file_coord + "8" + promote + piece  # pawn promotion e.g. e8=Q
m4 = piece + file_coord + rank_coord  # piece move e.g. Be6
m5 = piece + file_coord + file_coord + rank_coord  # piece move e.g. Nbd2
m6 = piece + rank_coord + file_coord + rank_coord  # piece move e.g. R4a7
m7 = piece + capture + file_coord + rank_coord  # piece capture move e.g. Bxh7
m8 = castle_queenside | castle_kingside  # castling e.g. o-o

check = oneOf("+ ++")
mate = Literal("#")
annotation = Word("!?", max=2)
nag = " $" + Word(nums)
decoration = check | mate | annotation | nag
Beispiel #3
0
def _parse_folder_spec(spec, groups, sort_key):
    """Parse the folder specification into a nested list.

    Args:
        spec (str): folder specification
        groups (dict): map of group name to list of folders in group
        sort_key (callable): map of folder name to sortable object.

    Returns:
        list: list of parsed tokens

    Raises:
        ValueError: if `spec` cannot be parsed.
    """
    group_names = list(groups.keys())

    def convert_to_slice(parse_string, loc, tokens):
        """Convert SliceSpec tokens to slice instance."""
        parts = "".join(tokens[1:-1]).split(':')
        if len(parts) == 1:
            i = int(parts[0])
            if i == -1:
                return slice(i, None, None)
            else:
                return slice(i, i + 1, None)
        else:
            parts += [''] * (3 - len(parts))  # pad to length 3
            start, stop, step = (int(v) if len(v) > 0 else None for v in parts)
            return slice(start, stop, step)

    def convert_to_callable_filter(parse_string, loc, tokens):
        """Convert ConditionSpec to a callable filter.

        The returned filter takes a single argument `folder` and return True if
        the `folder` passes the filter.
        """
        op, arg = tokens[0], tokens[1]

        def _filter(folder, _op, _list):
            folder = parse_version(folder)
            _list = [parse_version(v) for v in _list]
            if _op == 'in':
                return folder in _list
            elif _op == 'not in':
                return folder not in _list
            elif _op == '<=':
                return all([folder <= v for v in _list])
            elif _op == '<':
                return all([folder < v for v in _list])
            elif _op == '==':
                return all([folder == v for v in _list])
            elif _op == '!=':
                return all([folder != v for v in _list])
            elif _op == '>=':
                return all([folder >= v for v in _list])
            elif _op == '>':
                return all([folder > v for v in _list])
            else:  # pragma: nocover
                raise ValueError("Unknown operator: %r" % _op)

        if isinstance(arg, str):
            _list = [arg]
        else:
            _list = _resolve_folder_spec([arg.asList()],
                                         groups,
                                         sort_key=sort_key)
        return partial(_filter, _op=op, _list=_list)

    Int = Word(nums + "-", nums)
    Colon = Literal(':')

    SliceSpec = ("[" + Optional(Int) + Optional(Colon + Optional(Int)) +
                 Optional(Colon + Optional(Int)) +
                 "]").setParseAction(convert_to_slice)

    LogicalOperator = (Literal('in')
                       | Literal('not in')
                       | Literal('<=')
                       | Literal('<')
                       | Literal('==')
                       | Literal('!=')
                       | Literal('>=')
                       | Literal('>'))

    GroupName = Group("<" + oneOf(group_names, caseless=True) + ">")
    FolderName = Word(alphanums, alphanums + ".-_+")

    ParenthesizedListSpec = Forward()
    ConditionSpec = Forward()

    ParenthesizedListSpec <<= Group("(" +
                                    delimitedList(GroupName | FolderName
                                                  | ParenthesizedListSpec) +
                                    ZeroOrMore(ConditionSpec) + ")" +
                                    Optional(SliceSpec))

    ConditionSpec <<= LogicalOperator + (FolderName | GroupName
                                         | ParenthesizedListSpec)
    ConditionSpec = ConditionSpec.setParseAction(convert_to_callable_filter)

    ListSpec = delimitedList(GroupName | FolderName | ParenthesizedListSpec)

    Spec = ListSpec | ParenthesizedListSpec

    if spec.strip() == '':
        return []
    try:
        return Spec.parseString(spec, parseAll=True).asList()
    except ParseException as exc:
        raise ValueError("Invalid specification (marked '*'): %r" %
                         exc.markInputline('*'))
Beispiel #4
0
def rc_statement():
    """
    Generate a RC statement parser that can be used to parse a RC file

    :rtype: pyparsing.ParserElement
    """

    one_line_comment = '//' + restOfLine

    comments = cStyleComment ^ one_line_comment

    precompiler = Word('#', alphanums) + restOfLine

    language_definition = "LANGUAGE" + Word(alphas + '_').setResultsName(
        "language") + Optional(',' + Word(alphas +
                                          '_').setResultsName("sublanguage"))

    block_start = (Keyword('{') | Keyword("BEGIN")).setName("block_start")
    block_end = (Keyword('}') | Keyword("END")).setName("block_end")

    reserved_words = block_start | block_end

    name_id = ~reserved_words + \
        Word(alphas, alphanums + '_').setName("name_id")

    numbers = Word(nums)

    integerconstant = numbers ^ Combine('0x' + numbers)

    constant = Combine(Optional(Keyword("NOT")) + (name_id | integerconstant),
                       adjacent=False,
                       joinString=' ')

    combined_constants = delimitedList(constant, '|')

    block_options = Optional(
        SkipTo(Keyword("CAPTION"), failOn=block_start)("pre_caption") +
        Keyword("CAPTION") +
        quotedString("caption")) + SkipTo(block_start)("post_caption")

    undefined_control = Group(
        name_id.setResultsName("id_control") +
        delimitedList(quotedString ^ constant ^ numbers
                      ^ Group(combined_constants)).setResultsName("values_"))

    block = block_start + \
        ZeroOrMore(undefined_control)("controls") + block_end

    dialog = name_id("block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG")
                                    )("block_type") + block_options + block

    string_table = Keyword("STRINGTABLE")("block_type") + block_options + block

    menu_item = Keyword("MENUITEM")("block_type") + (
        commaSeparatedList("values_") | Keyword("SEPARATOR"))

    popup_block = Forward()

    popup_block <<= Group(
        Keyword("POPUP")("block_type") + Optional(quotedString("caption")) +
        block_start + ZeroOrMore(Group(menu_item | popup_block))("elements") +
        block_end)("popups*")

    menu = name_id("block_id") + \
        Keyword("MENU")("block_type") + block_options + \
        block_start + ZeroOrMore(popup_block) + block_end

    statem = comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu

    return statem
Beispiel #5
0
present.setParseAction(lambda s, l, t: pureldap.LDAPFilter_present(t[0]))
initial = copy.copy(value)
initial.setParseAction(
    lambda s, l, t: pureldap.LDAPFilter_substrings_initial(t[0]))
initial.setName('initial')
any_value = value + Suppress(Literal("*"))
any_value.setParseAction(
    lambda s, l, t: pureldap.LDAPFilter_substrings_any(t[0]))
any = Suppress(Literal("*")) + ZeroOrMore(any_value)
any.setName('any')
final = copy.copy(value)
final.setName('final')
final.setParseAction(
    lambda s, l, t: pureldap.LDAPFilter_substrings_final(t[0]))
substring = attr + Suppress(
    Literal("=")) + Group(Optional(initial) + any + Optional(final))
substring.setName('substring')


def _p_substring(s, l, t):
    attrtype, substrings = t
    return pureldap.LDAPFilter_substrings(type=attrtype, substrings=substrings)


substring.setParseAction(_p_substring)

keystring = Word(string.ascii_letters,
                 string.ascii_letters + string.digits + ';-')
keystring.setName('keystring')
numericoid = delimitedList(Word(string.digits), delim='.', combine=True)
numericoid.setName('numericoid')
Beispiel #6
0
from pyparsing import alphas, alphanums, Combine, delimitedList, Forward, Group, Literal, \
                      Keyword, nums, oneOf, Optional, ParserElement, Suppress, White, Word

ParserElement.enablePackrat()

LPAR, RPAR = map(Suppress, '()')
const = Literal('true') | Literal('false')

AOps = oneOf('INTS_MODULUS_TOTAL * / + -').setParseAction(
    lambda s, l, t: ['%'] if t[0] == 'INTS_MODULUS_TOTAL' else t)
BOps = (Keyword('and').setParseAction(lambda s, l, t: ['&'])
        | Keyword('not').setParseAction(lambda s, l, t: ['!'])
        | Keyword('or').setParseAction(lambda s, l, t: ['|']))
ROps = oneOf('< > <= >= =')

val = Combine(Optional('-') + Word(nums))
var = Word(alphas + '_:$', alphanums + '_:$')

term = val | var

let = Forward()
pred = Forward()
stmt = Forward()
expr = Forward()

expr << (term
         |
         (LPAR + AOps + Group(delimitedList(expr, delim=White(' '))) + RPAR
          ).setParseAction(lambda s, l, t: [list(joinit(t[1], t[0]))] if not (
              t[0] == '-' and len(t[1]) == 1) else [['0 -', t[1][0]]])
         | (LPAR + expr + RPAR))
Beispiel #7
0
aop0 = oneOf('* /')
aop1 = oneOf('+ -')
aop2 = oneOf('%').setParseAction(lambda s, l, t: ['mod'])

bop = oneOf('& |').setParseAction(lambda s, l, t: ['and']
                                  if t[0] == '&' else ['or'])
NOT = Literal('!')

rop = oneOf('< > <= >= = !=').setParseAction(lambda s, l, t: ['distinct']
                                             if t[0] == '!=' else t)

GET, CAT, HAS, IND, LEN, REP, SUB, EQL = map(
    Literal, '#get #cat #has #ind #len #rep #sub #eql'.split())

var = Word(alphas + '_:$', alphanums + '_:$').setParseAction(addVar)
ival = Combine(Optional('-') + Word(nums)).setParseAction(
    lambda s, l, t: ['(- %s)' % t[0][1:]] if t[0][0] == '-' else t)
ivar = (ival + var).setParseAction(lambda s, l, t: ['*', t[0], t[1]])

term = ivar | ival | var | QuotedString(quoteChar='"', unquoteResults=False)

stmt = Forward()
expr = Forward()
sexpr = Forward()

sexpr << (
    (GET + LPAR + expr + COMMA + expr +
     RPAR).setParseAction(lambda s, l, t: CharAtAction(t))
    | (CAT + LPAR + expr + COMMA + expr + RPAR).setParseAction(
        lambda s, l, t: [['Concat', chkString(t[1]),
                          chkString(t[2])]])
Beispiel #8
0
    def define_dot_parser(self):
        """Define dot grammar

        Based on the grammar http://www.graphviz.org/doc/info/lang.html
        """
        # punctuation
        colon = Literal(":")
        lbrace = Suppress("{")
        rbrace = Suppress("}")
        lbrack = Suppress("[")
        rbrack = Suppress("]")
        lparen = Literal("(")
        rparen = Literal(")")
        equals = Suppress("=")
        comma = Literal(",")
        dot = Literal(".")
        slash = Literal("/")
        bslash = Literal("\\")
        star = Literal("*")
        semi = Suppress(";")
        at = Literal("@")
        minus = Literal("-")
        pluss = Suppress("+")

        # keywords
        strict_ = CaselessLiteral("strict")
        graph_ = CaselessLiteral("graph")
        digraph_ = CaselessLiteral("digraph")
        subgraph_ = CaselessLiteral("subgraph")
        node_ = CaselessLiteral("node")
        edge_ = CaselessLiteral("edge")

        punctuation_ = "".join([c for c in string.punctuation if c not in '_'
                                ]) + string.whitespace
        # token definitions

        identifier = Word(alphanums + "_").setName("identifier")

        #double_quoted_string = QuotedString('"', multiline=True,escChar='\\',
        #    unquoteResults=True) # dblQuotedString
        double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE)
        double_quoted_string.setParseAction(removeQuotes)
        quoted_string = Combine(
            double_quoted_string +
            Optional(OneOrMore(pluss + double_quoted_string)),
            adjacent=False)
        alphastring_ = OneOrMore(CharsNotIn(punctuation_))

        def parse_html(s, loc, toks):
            return '<<%s>>' % ''.join(toks[0])

        opener = '<'
        closer = '>'
        try:
            html_text = pyparsing.nestedExpr(
                opener, closer,
                ((CharsNotIn(opener + closer).setParseAction(lambda t: t[0]))
                 )).setParseAction(parse_html)
        except:
            log.debug('nestedExpr not available.')
            log.warning('Old version of pyparsing detected. Version 1.4.8 or '
                        'later is recommended. Parsing of html labels may not '
                        'work properly.')
            html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]")))

        ID = (
            alphastring_ | html_text | quoted_string
            |  #.setParseAction(strip_quotes) |
            identifier).setName("ID")

        float_number = Combine(Optional(minus) +
                               OneOrMore(Word(nums +
                                              "."))).setName("float_number")

        righthand_id = (float_number | ID).setName("righthand_id")

        port_angle = (at + ID).setName("port_angle")

        port_location = ((OneOrMore(Group(colon + ID))
                          | Group(colon + lparen + ID + comma + ID +
                                  rparen))).setName("port_location")

        port = Combine(
            (Group(port_location + Optional(port_angle))
             | Group(port_angle + Optional(port_location)))).setName("port")

        node_id = (ID + Optional(port))
        a_list = OneOrMore(ID + Optional(equals + righthand_id) +
                           Optional(comma.suppress())).setName("a_list")

        attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName(
            "attr_list").setResultsName('attrlist')

        attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt")

        edgeop = (Literal("--") | Literal("->")).setName("edgeop")

        stmt_list = Forward()
        graph_stmt = (lbrace + Optional(stmt_list) + rbrace +
                      Optional(semi)).setName("graph_stmt")

        edge_point = Forward()

        edgeRHS = OneOrMore(edgeop + edge_point)
        edge_stmt = edge_point + edgeRHS + Optional(attr_list)

        subgraph = (
            Optional(subgraph_, '') + Optional(ID, '') +
            Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph')

        edge_point << (subgraph | graph_stmt | node_id)

        node_stmt = (node_id + Optional(attr_list) +
                     Optional(semi)).setName("node_stmt")

        assignment = (ID + equals + righthand_id).setName("assignment")
        stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt
                | node_stmt).setName("stmt")
        stmt_list << OneOrMore(stmt + Optional(semi))

        graphparser = ((Optional(strict_, 'notstrict') +
                        ((graph_ | digraph_)) + Optional(ID, '') + lbrace +
                        Group(Optional(stmt_list)) +
                        rbrace).setResultsName("graph"))

        singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine)

        # actions
        graphparser.ignore(singleLineComment)
        graphparser.ignore(cStyleComment)
        node_id.setParseAction(self._proc_node_id)
        assignment.setParseAction(self._proc_attr_assignment)
        a_list.setParseAction(self._proc_attr_list)
        edge_stmt.setParseAction(self._proc_edge_stmt)
        node_stmt.setParseAction(self._proc_node_stmt)
        attr_stmt.setParseAction(self._proc_default_attr_stmt)
        attr_list.setParseAction(self._proc_attr_list_combine)
        subgraph.setParseAction(self._proc_subgraph_stmt)
        #graph_stmt.setParseAction(self._proc_graph_stmt)
        graphparser.setParseAction(self._main_graph_stmt)
        return graphparser
    (even binary content) inside the structure. This is done by pre-
    sizing the data with the NUMBER similar to Dan Bernstein's netstrings
    setup.
SPACE White space is basically ignored. This is interesting because since
    Stackish is serialized consistently this means you can use \n as the
    separation character and perform reasonable diffs on two structures.
"""

from pyparsing import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\
        Optional,QuotedString,Forward,Group,ZeroOrMore,srange

MARK, UNMARK, AT, COLON, QUOTE = map(Suppress, "[]@:'")

NUMBER = Word(nums)
NUMBER.setParseAction(lambda t: int(t[0]))
FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums)))
FLOAT.setParseAction(lambda t: float(t[0]))
STRING = QuotedString('"', multiline=True)
WORD = Word(alphas, alphanums + "_:")
ATTRIBUTE = Combine(AT + WORD)

strBody = Forward()


def setBodyLength(tokens):
    strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0]))
    return ""


BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON +
               strBody + QUOTE)
Beispiel #10
0
    else:
        currentLogger.debug(message)


#####################################################
#
# Variants on individual files
#
#####################################################

from pyparsing import CharsNotIn, ZeroOrMore, Group, Suppress, OneOrMore, ParseResults, Optional, StringEnd

phrase = CharsNotIn("[|]")
tag = CharsNotIn("[|]:,")
tags = Group(
    Optional(tag, default="base") + ZeroOrMore(Suppress(',') + tag) +
    Suppress(":"))
option = Group(tags + Optional(phrase, default=''))
optionList = Group(
    Suppress("[") + option + ZeroOrMore(Suppress('|') + option) +
    Suppress("]"))
text = OneOrMore(phrase | optionList) + Suppress(StringEnd())


def clean(unicodeString):
    # We need to clean the input a bit. For a start, until
    # we work out what to do, non breaking spaces will be ignored
    # ie 0xa0
    return unicodeString.replace('\xa0', ' ')

Beispiel #11
0
def sql_to_spec(query):
    """
    Convert an SQL query to a mongo spec.
    This only supports select statements. For now.
    :param query: String. A SQL query.
    :return: None or a dictionary containing a mongo spec.
    """
    @debug_print
    def fix_token_list(in_list):
        """
        tokens as List is some times deaply nested and hard to deal with.
        Improve parser grouping remove this.
        """
        if isinstance(in_list, list) and len(in_list) == 1 and \
           isinstance(in_list[0], list):
            return fix_token_list(in_list[0])
        else:
            return [item for item in in_list]

    @debug_print
    def select_count_func(tokens=None):
        return full_select_func(tokens, 'count')

    @debug_print
    def select_distinct_func(tokens=None):
        return full_select_func(tokens, 'distinct')

    @debug_print
    def select_func(tokens=None):
        return full_select_func(tokens, 'select')

    def full_select_func(tokens=None, method='select'):
        """
        Take tokens and return a dictionary.
        """
        action = {'distinct': 'distinct', 'count': 'count'}.get(method, 'find')
        if tokens is None:
            return
        ret = {
            action: True,
            'fields': {item: 1
                       for item in fix_token_list(tokens.asList())}
        }
        if ret['fields'].get('id'):  # Use _id and not id
            # Drop _id from fields since mongo always return _id
            del (ret['fields']['id'])
        else:
            ret['fields']['_id'] = 0
        if "*" in ret['fields'].keys():
            ret['fields'] = {}
        return ret

    @debug_print
    def where_func(tokens=None):
        """
        Take tokens and return a dictionary.
        """
        if tokens is None:
            return

        tokens = fix_token_list(tokens.asList()) + [None, None, None]
        cond = {
            '!=': '$ne',
            '>': '$gt',
            '>=': '$gte',
            '<': '$lt',
            '<=': '$lte',
            'like': '$regex'
        }.get(tokens[1])

        find_value = tokens[2].strip('"').strip("'")
        if cond == '$regex':
            if find_value[0] != '%':
                find_value = "^" + find_value
            if find_value[-1] != '%':
                find_value = find_value + "$"
            find_value = find_value.strip("%")

        if cond is None:
            expr = {tokens[0]: find_value}
        else:
            expr = {tokens[0]: {cond: find_value}}

        return expr

    @debug_print
    def combine(tokens=None):
        if tokens:
            tokens = fix_token_list(tokens.asList())
            if len(tokens) == 1:
                return tokens
            else:
                return {'${}'.format(tokens[1]): [tokens[0], tokens[2]]}

    # TODO: Reduce list of imported functions.
    from pyparsing import (Word, alphas, CaselessKeyword, Group, Optional,
                           ZeroOrMore, Forward, Suppress, alphanums, OneOrMore,
                           quotedString, Combine, Keyword, Literal,
                           replaceWith, oneOf, nums, removeQuotes,
                           QuotedString, Dict)

    LPAREN, RPAREN = map(Suppress, "()")
    EXPLAIN = CaselessKeyword('EXPLAIN').setParseAction(
        lambda t: {'explain': True})
    SELECT = Suppress(CaselessKeyword('SELECT'))
    WHERE = Suppress(CaselessKeyword('WHERE'))
    FROM = Suppress(CaselessKeyword('FROM'))
    CONDITIONS = oneOf("= != < > <= >= like", caseless=True)
    #CONDITIONS = (Keyword("=") | Keyword("!=") |
    #              Keyword("<") | Keyword(">") |
    #              Keyword("<=") | Keyword(">="))
    AND = CaselessKeyword('and')
    OR = CaselessKeyword('or')

    word_match = Word(alphanums + "._") | quotedString
    number = Word(nums)
    statement = Group(word_match + CONDITIONS +
                      word_match).setParseAction(where_func)
    select_fields = Group(
        SELECT + (word_match | Keyword("*")) +
        ZeroOrMore(Suppress(",") +
                   (word_match | Keyword("*")))).setParseAction(select_func)

    select_distinct = (SELECT + Suppress(CaselessKeyword('DISTINCT')) +
                       LPAREN + (word_match | Keyword("*")) +
                       ZeroOrMore(Suppress(",") +
                                  (word_match | Keyword("*"))) +
                       Suppress(RPAREN)).setParseAction(select_distinct_func)

    select_count = (SELECT + Suppress(CaselessKeyword('COUNT')) + LPAREN +
                    (word_match | Keyword("*")) +
                    ZeroOrMore(Suppress(",") + (word_match | Keyword("*"))) +
                    Suppress(RPAREN)).setParseAction(select_count_func)
    LIMIT = (Suppress(CaselessKeyword('LIMIT')) +
             word_match).setParseAction(lambda t: {'limit': t[0]})
    SKIP = (Suppress(CaselessKeyword('SKIP')) +
            word_match).setParseAction(lambda t: {'skip': t[0]})
    from_table = (
        FROM +
        word_match).setParseAction(lambda t: {'collection': t.asList()[0]})
    #word = ~(AND | OR) + word_match

    operation_term = (
        select_distinct | select_count | select_fields
    )  # place holder for other SQL statements. ALTER, UPDATE, INSERT
    expr = Forward()
    atom = statement | (LPAREN + expr + RPAREN)
    and_term = (OneOrMore(atom) +
                ZeroOrMore(AND + atom)).setParseAction(combine)
    or_term = (and_term + ZeroOrMore(OR + and_term)).setParseAction(combine)

    where_clause = (WHERE + or_term).setParseAction(lambda t: {'spec': t[0]})
    list_term = Optional(EXPLAIN) + operation_term + from_table + \
                Optional(where_clause) + Optional(LIMIT) + Optional(SKIP)
    expr << list_term

    ret = expr.parseString(query.strip())
    query_dict = {}
    _ = map(query_dict.update, ret)
    return query_dict
Beispiel #12
0
# vim: set encoding=utf-8
"""Some common combinations"""
from pyparsing import (FollowedBy, LineEnd, Literal, OneOrMore, Optional,
                       Suppress, SkipTo, ZeroOrMore)

from regparser.grammar import atomic
from regparser.grammar.utils import keep_pos, Marker, QuickSearchable

period_section = Suppress(".") + atomic.section
part_section = atomic.part + period_section
marker_part_section = (
    keep_pos(atomic.section_marker).setResultsName("marker") + part_section)

depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p
depth5_p = ((atomic.em_digit_p | atomic.plaintext_level5_p) +
            Optional(depth6_p))
depth4_p = atomic.upper_p + Optional(depth5_p)
depth3_p = atomic.roman_p + Optional(depth4_p)
depth2_p = atomic.digit_p + Optional(depth3_p)
depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p)
any_depth_p = QuickSearchable(depth1_p | depth2_p | depth3_p | depth4_p
                              | depth5_p | depth6_p)

depth3_c = atomic.upper_c + Optional(atomic.em_digit_c)
depth2_c = atomic.roman_c + Optional(depth3_c)
depth1_c = atomic.digit_c + Optional(depth2_c)
any_a = atomic.upper_a | atomic.digit_a

section_comment = atomic.section + depth1_c

section_paragraph = QuickSearchable(atomic.section + depth1_p)
Beispiel #13
0
section_marker = Suppress(Regex(u"§|Section|section"))
sections_marker = Suppress(Regex(u"§§|Sections|sections"))

# Most of these markers could be SuffixMarkers (which arise due to errors in
# the regulation text). We'll wait until we see explicit examples before
# converting them though, to limit false matches
paragraph_marker = Marker("paragraph")
paragraphs_marker = SuffixMarker("paragraphs")

part_marker = Marker("part")
parts_marker = Marker("parts")

subpart_marker = Marker("subpart")

comment_marker = ((Marker("comment") | Marker("commentary") |
                   (Marker("official") + Marker("interpretations")) |
                   (Marker("supplement") + Suppress(WordBoundaries("I")))) +
                  Optional(Marker("of") | Marker("to")))
comments_marker = Marker("comments")

appendix_marker = Marker("appendix")
appendices_marker = Marker("appendices")

conj_phrases = (
    (Suppress(",") + Optional(Marker("and") | Marker("or"))) | Marker("and")
    | Marker("or") | (Marker("except") + Marker("for"))
    | Suppress(Marker("through") | "-"
               | u"–").setParseAction(lambda: True).setResultsName("through"))

title = Word(string.digits).setResultsName("cfr_title")
Beispiel #14
0
escaped = (
    Literal("\\").suppress()
    +
    # chr(20)-chr(126) + chr(128)-unichr(sys.maxunicode)
    Regex("[\u0020-\u007e\u0080-\uffff]", re.IGNORECASE)
)


def convertToUnicode(t):
    return chr(int(t[0], 16))


hex_unicode = (
    Literal("\\").suppress()
    + Regex("[0-9a-f]{1,6}", re.IGNORECASE)
    + Optional(White(exact=1)).suppress()
).setParseAction(convertToUnicode)


escape = hex_unicode | escaped

# any unicode literal outside the 0-127 ascii range
nonascii = Regex("[^\u0000-\u007f]")

# single character for starting an identifier.
nmstart = Regex("[A-Z]", re.IGNORECASE) | nonascii | escape

nmchar = Regex("[0-9A-Z-]", re.IGNORECASE) | nonascii | escape

identifier = Combine(nmstart + ZeroOrMore(nmchar))
ident = Word(alphas, alphanums + "_$").setName("identifier")
columnName = Upcase(delimitedList(ident, ".", combine=True))
columnNameList = Group(delimitedList(columnName))
tableName = Upcase(delimitedList(ident, ".", combine=True))
tableNameList = Group(delimitedList(tableName))

whereExpression = Forward()
and_ = Keyword("and", caseless=True)
or_ = Keyword("or", caseless=True)
in_ = Keyword("in", caseless=True)

E = CaselessLiteral("E")
binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
arithSign = Word("+-", exact=1)
realNum = Combine(
    Optional(arithSign) +
    (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) +
    Optional(E + Optional(arithSign) + Word(nums)))
intNum = Combine(
    Optional(arithSign) + Word(nums) +
    Optional(E + Optional("+") + Word(nums)))

columnRval = realNum | intNum | quotedString | columnName  # need to add support for alg expressions
whereCondition = Group((columnName + binop + columnRval)
                       | (columnName + in_ + "(" + delimitedList(columnRval) +
                          ")") | (columnName + in_ + "(" + selectStmt + ")")
                       | ("(" + whereExpression + ")"))
whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression)

# define the grammar
selectStmt << (selectToken + ('*' | columnNameList).setResultsName("columns") +
Beispiel #16
0

def pushFirst(str, loc, toks):
    exprStack.append(toks[0])


def assignVar(str, loc, toks):
    varStack.append(toks[0])


# define grammar
point = Literal('.')
e = CaselessLiteral('E')
plusorminus = Literal('+') | Literal('-')
number = Word(nums)
integer = Combine(Optional(plusorminus) + number)
floatnumber = Combine(integer + Optional(point + Optional(number)) +
                      Optional(e + integer))

ident = Word(alphas, alphanums + '_')

plus = Literal("+")
minus = Literal("-")
mult = Literal("*")
div = Literal("/")
lpar = Literal("(").suppress()
rpar = Literal(")").suppress()
addop = plus | minus
multop = mult | div
expop = Literal("^")
assign = Literal("=")
Beispiel #17
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    dpi_setting = Group(Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*')
    mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL
    mount_matrix = Group(mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX')
    xkb_setting = Optional(Word(alphanums + '+-/@._'))

    # Although this set doesn't cover all of characters in database entries, it's enough for test targets.
    name_literal = Word(printables + ' ')

    props = (('MOUSE_DPI', Group(OneOrMore(dpi_setting))),
             ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER),
             ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER),
             ('MOUSE_WHEEL_CLICK_COUNT', INTEGER),
             ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER),
             ('ID_AUTOSUSPEND', Or((Literal('0'), Literal('1')))),
             ('ID_AV_PRODUCTION_CONTROLLER', Or((Literal('0'), Literal('1')))),
             ('ID_PERSIST', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_ACCELEROMETER', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_JOYSTICK', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_KEY', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_KEYBOARD', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_MOUSE', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_POINTINGSTICK', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_SWITCH', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_TABLET', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_TABLET_PAD', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_TOUCHPAD', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_TOUCHSCREEN', Or((Literal('0'), Literal('1')))),
             ('ID_INPUT_TRACKBALL', Or((Literal('0'), Literal('1')))),
             ('ID_SIGNAL_ANALYZER', Or((Literal('0'), Literal('1')))),
             ('POINTINGSTICK_SENSITIVITY', INTEGER),
             ('POINTINGSTICK_CONST_ACCEL', REAL),
             ('ID_INPUT_JOYSTICK_INTEGRATION', Or(('internal', 'external'))),
             ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))),
             ('XKB_FIXED_LAYOUT', xkb_setting),
             ('XKB_FIXED_VARIANT', xkb_setting),
             ('XKB_FIXED_MODEL', xkb_setting),
             ('KEYBOARD_LED_NUMLOCK', Literal('0')),
             ('KEYBOARD_LED_CAPSLOCK', Literal('0')),
             ('ACCEL_MOUNT_MATRIX', mount_matrix),
             ('ACCEL_LOCATION', Or(('display', 'base'))),
             ('PROXIMITY_NEAR_LEVEL', INTEGER),
             ('IEEE1394_UNIT_FUNCTION_MIDI', Or((Literal('0'), Literal('1')))),
             ('IEEE1394_UNIT_FUNCTION_AUDIO', Or((Literal('0'), Literal('1')))),
             ('IEEE1394_UNIT_FUNCTION_VIDEO', Or((Literal('0'), Literal('1')))),
             ('ID_VENDOR_FROM_DATABASE', name_literal),
             ('ID_MODEL_FROM_DATABASE', name_literal),
             ('ID_TAG_MASTER_OF_SEAT', Literal('1')),
             ('ID_INFRARED_CAMERA', Or((Literal('0'), Literal('1')))),
             ('ID_CAMERA_DIRECTION', Or(('front', 'rear'))),
            )
    fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE')
                   for name, val in props]
    kbd_props = [Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME')
                 - Suppress('=') -
                 ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE')
                ]
    abs_props = [Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME')
                 - Suppress('=') -
                 Word(nums + ':')('VALUE')
                ]

    grammar = Or(fixed_props + kbd_props + abs_props) + EOL

    return grammar
Beispiel #18
0
    def _net_decl(self):
        """
            delay3 ::= # delay_value
                      | # ( mintypmax_expression [ , mintypmax_expression [ , mintypmax_expression ] ] )
            delay2 ::= # delay_value
                      | # ( mintypmax_expression [ , mintypmax_expression ] )
            delay_value ::= unsigned_number
                           | real_number
                           | identifier
            net_declaration ::= net_type [ signed ] [ delay3 ] list_of_net_identifiers ;
                                | net_type [ drive_strength ] [ signed ]
                                    [ delay3 ] list_of_net_decl_assignments ;
                                | net_type [ vectored | scalared ] [ signed ]
                                    range [ delay3 ] list_of_net_identifiers ;
                                | net_type [ drive_strength ] [ vectored | scalared ] [ signed ]
                                    range [ delay3 ] list_of_net_decl_assignments ;
                                | trireg [ charge_strength ] [ signed ]
                                    [ delay3 ] list_of_net_identifiers ;
                                | trireg [ drive_strength ] [ signed ]
                                    [ delay3 ] list_of_net_decl_assignments ;
                                | trireg [ charge_strength ] [ vectored | scalared ] [ signed ]
                                    range [ delay3 ] list_of_net_identifiers ;
                                | trireg [ drive_strength ] [ vectored | scalared ] [ signed ]
                                    range [ delay3 ] list_of_net_decl_assignments ;
        Not Implemented Yet:
            [ charge_strength ]: Defined in this class but not being used here
            delay_value ::= real_number
        """
        # TODO: delay_val should use unsigned_number instead of number
        delay_val = number | identifier
        self.delay3 = Group(SHARP + (
            (LPARENTH + self.expr.mintypmax_expression +
             Optional(COMMA + self.expr.mintypmax_expression +
                      Optional(COMMA + self.expr.mintypmax_expression)) +
             RPARENTH)
            | delay_val))
        self.delay2 = Group(SHARP + (
            (LPARENTH + self.expr.mintypmax_expression +
             Optional(COMMA + self.expr.mintypmax_expression) + RPARENTH)
            | delay_val))
        vectored_kw = Keyword('vectored')
        scalared_kw = Keyword('scalared')

        # TODO: Review the folloing defines, and trireg with drive_strength has not been tested
        # net_type [ signed ] [ delay3 ] list_of_net_identifiers ;
        net_decl_0 = Group(net_type + Optional(self.signed_kw) +
                           Optional(self.delay3) + self.l_net_idx)
        # trireg [ charge_strength ] [ signed ] [ delay3 ] list_of_net_identifiers ;
        trireg_decl_0 = Group(trireg + Optional(self.charge_strength) +
                              Optional(self.signed_kw) +
                              Optional(self.delay3) + self.l_net_idx)

        # net_type [ drive_strength ] [ signed ] [ delay3 ] list_of_net_decl_assignments ;
        net_decl_1 = Group(net_type + Optional(self.drive_strength) +
                           Optional(self.signed_kw) + Optional(self.delay3) +
                           self.l_net_decl_assign)
        # trireg [ drive_strength ] [ signed ] [ delay3 ] list_of_net_decl_assignments ;
        trireg_decl_1 = Group(trireg + Optional(self.drive_strength) +
                              Optional(self.signed_kw) +
                              Optional(self.delay3) + self.l_net_decl_assign)

        # net_type [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_identifiers ;
        net_decl_2 = Group(net_type + Optional(vectored_kw | scalared_kw) +
                           Optional(self.signed_kw) + self.the_range +
                           Optional(self.delay3) + self.l_net_idx)
        # trireg [ charge_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_identifiers ;
        trireg_decl_2 = Group(trireg + Optional(self.charge_strength) +
                              Optional(vectored_kw | scalared_kw) +
                              Optional(self.signed_kw) + self.the_range +
                              Optional(self.delay3) + self.l_net_idx)

        # net_type [ drive_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_decl_assignments ;
        net_decl_3 = Group(net_type + Optional(self.drive_strength) +
                           Optional(vectored_kw | scalared_kw) +
                           Optional(self.signed_kw) + self.the_range +
                           Optional(self.delay3) + self.l_net_decl_assign)
        # trireg [ drive_strength ] [ vectored | scalared ] [ signed ] range [ delay3 ] list_of_net_decl_assignments ;
        trireg_decl_3 = Group(trireg + Optional(self.drive_strength) +
                              Optional(vectored_kw | scalared_kw) +
                              Optional(self.signed_kw) + self.the_range +
                              Optional(self.delay3) + self.l_net_decl_assign)

        net_decl = (trireg_decl_3 | trireg_decl_2 | trireg_decl_1
                    | trireg_decl_0
                    | net_decl_3 | net_decl_2 | net_decl_1 | net_decl_0) + SEMI
        return net_decl
Beispiel #19
0
class BoolNot(object):
    def __init__(self,t):
        self.arg = t[0][1]
    def __bool__(self):
        v = bool(self.arg)
        return not v
    def __str__(self):
        return "!" + str(self.arg)
    __repr__ = __str__
    __nonzero__ = __bool__


if PYPARSING_AVAILABLE:
    PF_KEYWORD=oneOf(postfixfields)
    intnum = Word(nums).setParseAction( lambda s,l,t: [ int(t[0]) ] )
    charstring=QuotedString(quoteChar='"') | QuotedString(quoteChar="'") | (QuotedString(quoteChar='/') + Optional(Word("im")))
    AttOperand= charstring | intnum


def makeparser(values):
    SimpleExpression = PF_KEYWORD('pfvalue') + AttOperator('operator') + AttOperand('testvalue')
        
    booleanrule = infixNotation( SimpleExpression,
        [
        ("!", 1, opAssoc.RIGHT, BoolNot),
        ("&&", 2, opAssoc.LEFT,  BoolAnd),
        ("||",  2, opAssoc.LEFT,  BoolOr),
        ])
    
    def evalResult(loc,pos,tokens):
        modifiers=None
Beispiel #20
0
    def _task_decl(self):
        """
            task_declaration ::= task [ automatic ] task_identifier ;
                                    { task_item_declaration }
                                    statement_or_null
                                    endtask
                                | task [ automatic ] task_identifier ( [ task_port_list ] ) ;
                                    { block_item_declaration }
                                    statement_or_null
                                    endtask
            task_item_declaration ::= block_item_declaration
                                | { attribute_instance } tf_input_declaration ;
                                | { attribute_instance } tf_output_declaration ;
                                | { attribute_instance } tf_inout_declaration ;
            task_port_list ::= task_port_item { , task_port_item }
            task_port_item ::= { attribute_instance } tf_input_declaration
                            | { attribute_instance } tf_output_declaration
                            | { attribute_instance } tf_inout_declaration
            tf_input_declaration ::= input [ reg ] [ signed ] [ range ] list_of_port_identifiers
                            | input task_port_type list_of_port_identifiers
            tf_output_declaration ::= output [ reg ] [ signed ] [ range ] list_of_port_identifiers
                            | output task_port_type list_of_port_identifiers
            tf_inout_declaration ::= inout [ reg ] [ signed ] [ range ] list_of_port_identifiers
                            | inout task_port_type list_of_port_identifiers
            task_port_type ::= integer | real | realtime | time
        """
        self.auto_kw = Keyword('automatic')
        task_kw = Keyword('task')
        endtask_kw = Keyword('endtask')

        input_kw = Keyword('input')
        output_kw = Keyword('output')
        inout_kw = Keyword('inout')
        task_port_type = self.integer_kw | self.real_kw | self.realtime_kw | self.time_kw

        self.tf_input_decl = (
            (input_kw + Optional(self.reg_kw) + Optional(self.signed_kw) +
             Optional(self.the_range) +
             delimitedList(~Keyword('input') + identifier))
            | (input_kw + task_port_type + delimitedList(identifier)))

        self.tf_output_decl = (
            (output_kw + Optional(self.reg_kw) + Optional(self.signed_kw) +
             Optional(self.the_range) +
             delimitedList(~Keyword('input') + identifier))
            | (output_kw + task_port_type + delimitedList(identifier)))

        self.tf_inout_decl = (
            (inout_kw + Optional(self.reg_kw) + Optional(self.signed_kw) +
             Optional(self.the_range) +
             delimitedList(~Keyword('input') + identifier))
            | (inout_kw + task_port_type + delimitedList(identifier)))

        _task_port_item = self.tf_input_decl | self.tf_output_decl | self.tf_inout_decl
        task_port_list = delimitedList(_task_port_item)

        task_item_decl = (self.block_item_declaration
                          | (self.tf_input_decl + SEMI)
                          | (self.tf_output_decl + SEMI)
                          | (self.tf_inout_decl + SEMI))

        task_decl = ((task_kw + Optional(self.auto_kw) + identifier + SEMI +
                      ZeroOrMore(task_item_decl) +
                      self.stmt.statement_or_null + endtask_kw)
                     | (task_kw + Optional(self.auto_kw) + identifier +
                        LPARENTH + Optional(task_port_list) + RPARENTH + SEMI +
                        ZeroOrMore(self.block_item_declaration) +
                        self.stmt.statement_or_null + endtask_kw))

        return task_decl
Beispiel #21
0
 def __init__(self):
     """
     expop   :: '^'
     multop  :: 'x' | '/'
     addop   :: '+' | '-'
     integer :: ['+' | '-'] '0'..'9'+
     atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
     factor  :: atom [ expop factor ]*
     term    :: factor [ multop factor ]*
     expr    :: term [ addop term ]*
     """
     point = Literal(".")
     exp = CaselessLiteral("E")
     fnumber = Combine(
         Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) +
         Optional(exp + Word("+-" + nums, nums)))
     ident = Word(alphas, alphas + nums + "_$")
     plus = Literal("+")
     minus = Literal("-")
     mult = Literal("x")
     div = Literal("/")
     lpar = Literal("(").suppress()
     rpar = Literal(")").suppress()
     addop = plus | minus
     multop = mult | div
     powop = Literal("^")
     pi = CaselessLiteral("PI")
     expr = Forward()
     atom = (
         (Optional(oneOf("- +")) +
          (pi | exp | fnumber | ident + lpar + expr + rpar).setParseAction(
              self.push_first))
         | Optional(oneOf("- +")) +
         Group(lpar + expr + rpar)).setParseAction(self.push_unary_minus)
     # by defining exponentiation as "atom [ ^ factor ]..." instead of
     # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
     # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
     factor = Forward()
     factor << atom + ZeroOrMore(
         (powop + factor).setParseAction(self.push_first))
     term = factor + ZeroOrMore(
         (multop + factor).setParseAction(self.push_first))
     expr << term + ZeroOrMore(
         (addop + term).setParseAction(self.push_first))
     # addop_term = ( addop + term ).setParseAction( self.pushFirst )
     # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
     # expr <<  general_term
     self.bnf = expr
     # map operator symbols to corresponding arithmetic operations
     epsilon = 1e-12
     self.opn = {
         "+": operator.add,
         "-": operator.sub,
         "x": operator.mul,
         "/": operator.truediv,
         "^": operator.pow
     }
     self.function = {
         "sin": math.sin,
         "cos": math.cos,
         "tan": math.tan,
         "abs": abs,
         "trunc": lambda a: int(a),
         "round": round,
         "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0
     }
Beispiel #22
0
        def parse_step1(morph):
            """parse the field morphology of qurany corpus

            """

            string = "$ " + str(morph).replace("POS:", "£ POS:").replace(
                "PRON:", "µ PRON:").replace("&lt;", "<").replace("&gt;",
                                                                 ">") + " #"
            #regular expressions
            begin = Keyword('$').suppress()
            center = Keyword('£').suppress()
            last = Keyword('µ').suppress()
            end = Keyword('#').suppress()
            skip = SkipTo(end).suppress()

            prefix = Word(alphas + "+" + ":")
            prefixes = Group(ZeroOrMore(~center + prefix))

            genderK = TagKeywords(["M", "F"])
            numberK = TagKeywords(["S", "D", "P"])
            personK = TagKeywords(["1", "2", "3"])

            genderL = TagLiterals(["M", "F"])
            numberL = TagLiterals(["S", "D", "P"])
            personL = TagLiterals(["1", "2", "3"])

            person_ = personL + Optional(genderL) + Optional(numberL)
            gender_ = genderL + numberL

            gen = person_ | gender_ | numberK | genderK
            pos = "POS:" + Word(alphas)
            lem = "LEM:" + CharsNotIn(" ")
            root = "ROOT:" + CharsNotIn(" ")
            sp = "SP:" + CharsNotIn(" ")
            mood = "MOOD:" + CharsNotIn(" ")

            aspect = TagKeywords(["PERF", "IMPF", "IMPV"])

            voice = TagKeywords(["ACT", "PASS"])
            form = TagKeywords([
                "(I)", "(II)", "(III)", "(IV)", "(V)", "(VI)", "(VII)",
                "(VIII)", "(IX)", "(X)", "(XI)", "(XII)"
            ])
            verb = aspect | voice | form

            voc = Keyword("+voc").suppress()

            deriv = TagKeywords(["ACT", "PCPL", "PASS", "VN"])

            state = TagKeywords(["DEF", "INDEF"])
            case = TagKeywords(["NOM", "ACC", "GEN"])
            nom = case | state

            tag = lem | root | sp | mood | gen | verb | deriv | nom | voc | skip
            part = Group(center + pos +
                         ZeroOrMore(~center + ~last + ~end + tag))

            base = Group(OneOrMore(~end + ~last + part))

            pron = "PRON:" + Group(gen)
            suffixes = Group(ZeroOrMore(~end + last + pron))

            whole = begin + prefixes + base + suffixes + end

            parsed = whole.parseString(string)

            return parsed
Beispiel #23
0
from pyparsing import Literal, Word, alphas, Optional, OneOrMore, Forward, Group, ZeroOrMore, Literal, Empty, oneOf, nums, ParserElement
from pydash import flatten_deep

ParserElement.enablePackrat()

# $ means words, % means numbers, & means punctuations
WildCards = oneOf("$ % &")
LeafWord = WildCards | Word(alphas)
# aaa+ aaa* aaa? aaa{0,3} aaa{2}
RangedQuantifiers = Literal("{") + Word(nums) + Optional(
    Literal(",") + Word(nums)) + Literal("}")
Quantifiers = oneOf("* + ?") | RangedQuantifiers
QuantifiedLeafWord = LeafWord + Quantifiers
# a sequence
ConcatenatedSequence = OneOrMore(QuantifiedLeafWord | LeafWord)
# syntax root
Rule = Forward()
# ( xxx )
GroupStatement = Forward()
QuantifiedGroup = GroupStatement + Quantifiers
# (?<label> xxx)
# TODO: We don't need quantified capture group, so no QuantifiedCaptureGroup. And it is not orAble, can only be in the top level of AST, so it is easier to process
CaptureGroupStatement = Forward()
# xxx | yyy
orAbleStatement = QuantifiedGroup | GroupStatement | ConcatenatedSequence
OrStatement = Group(orAbleStatement +
                    OneOrMore(Literal("|") + Group(orAbleStatement)))

GroupStatement << Group(Literal("(") + Rule + Literal(")"))
CaptureGroupStatement << Group(
    Literal("(") + Literal("?") + Literal("<") + Word(alphas) + Literal(">") +
Beispiel #24
0
def interval(end=0, func=lambda t: t):
    SEPARATOR = Suppress('-') | Suppress('*')
    INTERVAL = Optional(Word(nums), default=0) + SEPARATOR + Optional(
        Word(nums), default=end)
    INTERVAL.setParseAction(func)
    return INTERVAL
Beispiel #25
0
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from pyparsing import Keyword
from pyparsing import Optional

from undebt.pattern.common import COMMA
from undebt.pattern.common import INDENT
from undebt.pattern.python import ATOM
from undebt.pattern.util import tokens_as_list

grammar = (INDENT + Keyword("exec").suppress() + ATOM +
           Keyword("in").suppress() + ATOM + Optional(COMMA.suppress() + ATOM))


@tokens_as_list(assert_len_in=(3, 4))
def replace(tokens):
    """
    exec str in globals(), locals()
    ->
    exec(str, globals(), locals())
    """
    return tokens[0] + "exec(" + ", ".join(tokens[1:]) + ")"
Beispiel #26
0
def _tdb_grammar():  #pylint: disable=R0914
    """
    Convenience function for getting the pyparsing grammar of a TDB file.
    """
    int_number = Word(nums).setParseAction(lambda t: [int(t[0])])
    # matching float w/ regex is ugly but is recommended by pyparsing
    float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') \
        .setParseAction(lambda t: [float(t[0])])
    # symbol name, e.g., phase name, function name
    symbol_name = Word(alphanums + '_:', min=1)
    ref_phase_name = symbol_name = Word(alphanums + '_:()', min=1)
    # species name, e.g., CO2, AL, FE3+
    species_name = Word(alphanums + '+-*', min=1) + Optional(Suppress('%'))
    # constituent arrays are colon-delimited
    # each subarray can be comma- or space-delimited
    constituent_array = Group(
        delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)),
                      ':'))
    param_types = MatchFirst(
        [TCCommand(param_type) for param_type in TDB_PARAM_TYPES])
    # Let sympy do heavy arithmetic / algebra parsing for us
    # a convenience function will handle the piecewise details
    func_expr = Optional(float_number) + OneOrMore(SkipTo(';') \
        + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \
        Suppress(Word('YNyn', exact=1) | White()))
    # ELEMENT
    cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + Optional(Suppress(ref_phase_name)) + \
        Optional(Suppress(OneOrMore(float_number))) + LineEnd()
    # TYPE_DEFINITION
    cmd_typedef = TCCommand('TYPE_DEFINITION') + \
        Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd())
    # FUNCTION
    cmd_function = TCCommand('FUNCTION') + symbol_name + \
        func_expr.setParseAction(_make_piecewise_ast)
    # ASSESSED_SYSTEMS
    cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd())
    # DEFINE_SYSTEM_DEFAULT
    cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd())
    # DEFAULT_COMMAND
    cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd())
    # LIST_OF_REFERENCES
    cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd())
    # PHASE
    cmd_phase = TCCommand('PHASE') + symbol_name + \
        Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \
        Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd()
    # CONSTITUENT
    cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \
        Suppress(White()) + Suppress(':') + constituent_array + \
        Suppress(':') + LineEnd()
    # PARAMETER
    cmd_parameter = TCCommand('PARAMETER') + param_types + \
        Suppress('(') + symbol_name + \
        Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \
        Suppress(',') + constituent_array + \
        Optional(Suppress(';') + int_number, default=0) + \
        Suppress(')') + func_expr.setParseAction(_make_piecewise_ast)
    # Now combine the grammar together
    all_commands = cmd_element | \
                    cmd_typedef | \
                    cmd_function | \
                    cmd_ass_sys | \
                    cmd_defsysdef | \
                    cmd_defcmd | \
                    cmd_lor | \
                    cmd_phase | \
                    cmd_constituent | \
                    cmd_parameter
    return all_commands
Beispiel #27
0
except ImportError:
    ecodes = None
    print('WARNING: evdev is not available')

try:
    from functools import lru_cache
except ImportError:
    # don't do caching on old python
    lru_cache = lambda: (lambda f: f)

EOL = LineEnd().suppress()
EMPTYLINE = LineEnd()
COMMENTLINE = pythonStyleComment + EOL
INTEGER = Word(nums)
STRING =  QuotedString('"')
REAL = Combine((INTEGER + Optional('.' + Optional(INTEGER))) ^ ('.' + INTEGER))
SIGNED_REAL = Combine(Optional(Word('-+')) + REAL)
UDEV_TAG = Word(string.ascii_uppercase, alphanums + '_')

# Those patterns are used in type-specific matches
TYPES = {'mouse':    ('usb', 'bluetooth', 'ps2', '*'),
         'evdev':    ('name', 'atkbd', 'input'),
         'id-input': ('modalias'),
         'touchpad': ('i8042', 'rmi', 'bluetooth', 'usb'),
         'joystick': ('i8042', 'rmi', 'bluetooth', 'usb'),
         'keyboard': ('name', ),
         'sensor':   ('modalias', ),
        }

# Patterns that are used to set general properties on a device
GENERAL_MATCHES = {'acpi',
Beispiel #28
0
def graph_definition():

    global graphparser
    
    if not graphparser:
    
        # punctuation
        colon  = Literal(":")
        lbrace = Literal("{")
        rbrace = Literal("}")
        lbrack = Literal("[")
        rbrack = Literal("]")
        lparen = Literal("(")
        rparen = Literal(")")
        equals = Literal("=")
        comma  = Literal(",")
        dot    = Literal(".")
        slash  = Literal("/")
        bslash = Literal("\\")
        star   = Literal("*")
        semi   = Literal(";")
        at     = Literal("@")
        minus  = Literal("-")
        
        # keywords
        strict_    = CaselessLiteral("strict")
        graph_     = CaselessLiteral("graph")
        digraph_   = CaselessLiteral("digraph")
        subgraph_  = CaselessLiteral("subgraph")
        node_      = CaselessLiteral("node")
        edge_      = CaselessLiteral("edge")
        
        
        # token definitions
        
        identifier = Word(alphanums + "_." ).setName("identifier")
        
        double_quoted_string = QuotedString('"', escChar="\\", multiline=True, unquoteResults=False) # dblQuotedString
        _noncomma = "".join( [ c for c in printables if c != "," ] )
    
        alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' '))

        def parse_html(s, loc, toks):
            return '<%s>' % ''.join(toks[0])
            
        
        opener = '<'
        closer = '>'
        html_text = nestedExpr( opener, closer, 
            ( CharsNotIn( opener + closer )  ) 
                ).setParseAction(parse_html).leaveWhitespace()

        ID = ( identifier | html_text | 
            double_quoted_string | #.setParseAction(strip_quotes) |
            alphastring_ ).setName("ID")
            
        
        float_number = Combine(Optional(minus) +	
            OneOrMore(Word(nums + "."))).setName("float_number")
            
        righthand_id =  (float_number | ID ).setName("righthand_id")

        port_angle = (at + ID).setName("port_angle")
        
        port_location = (OneOrMore(Group(colon + ID)) |	
            Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location")
            
        port = (Group(port_location + Optional(port_angle)) |	
            Group(port_angle + Optional(port_location))).setName("port")
            
        node_id = (ID + Optional(port))
        a_list = OneOrMore(ID + Optional(equals + righthand_id) +
            Optional(comma.suppress())).setName("a_list")
        
        attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) +	
            rbrack.suppress()).setName("attr_list")
        
        attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt")
        
        edgeop = (Literal("--") | Literal("->")).setName("edgeop")
        
        stmt_list = Forward()
        graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) +	
            rbrace.suppress() + Optional(semi.suppress()) ).setName("graph_stmt")
            
            
        edge_point = Forward()
        
        edgeRHS = OneOrMore(edgeop + edge_point)
        edge_stmt = edge_point + edgeRHS + Optional(attr_list)
        
        subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph")
        
        edge_point << Group( subgraph | graph_stmt | node_id ).setName('edge_point')
        
        node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt")
        
        assignment = (ID + equals + righthand_id).setName("assignment")
        stmt =  (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt")
        stmt_list << OneOrMore(stmt + Optional(semi.suppress()))
        
        graphparser = OneOrMore( (Optional(strict_) + Group((graph_ | digraph_)) +
            Optional(ID) + graph_stmt).setResultsName("graph") )
        
        singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine)
        
        
        # actions
        
        graphparser.ignore(singleLineComment)
        graphparser.ignore(cStyleComment)
        
        assignment.setParseAction(push_attr_list)
        a_list.setParseAction(push_attr_list)
        edge_stmt.setParseAction(push_edge_stmt)
        node_stmt.setParseAction(push_node_stmt)
        attr_stmt.setParseAction(push_default_stmt)
        
        subgraph.setParseAction(push_subgraph_stmt)
        graph_stmt.setParseAction(push_graph_stmt)
        graphparser.setParseAction(push_top_graph_stmt)
        
    
    return graphparser
Beispiel #29
0
    PN_CHARS_BASE_re = u'A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'

# [165] PN_CHARS_U ::= PN_CHARS_BASE | '_'
PN_CHARS_U_re = '_' + PN_CHARS_BASE_re

# [167] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
PN_CHARS_re = u'\\-0-9\u00B7\u0300-\u036F\u203F-\u2040' + PN_CHARS_U_re
# PN_CHARS = Regex(u'[%s]'%PN_CHARS_re, flags=re.U)

# [168] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
PN_PREFIX = Regex(u'[%s](?:[%s\\.]*[%s])?' %
                  (PN_CHARS_BASE_re, PN_CHARS_re, PN_CHARS_re),
                  flags=re.U)

# [140] PNAME_NS ::= PN_PREFIX? ':'
PNAME_NS = Optional(Param('prefix',
                          PN_PREFIX)) + Suppress(':').leaveWhitespace()

# [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' )

PN_LOCAL_ESC_re = '\\\\[_~\\.\\-!$&"\'()*+,;=/?#@%]'
# PN_LOCAL_ESC = Regex(PN_LOCAL_ESC_re) # regex'd
#PN_LOCAL_ESC.setParseAction(lambda x: x[0][1:])

# [172] HEX ::= [0-9] | [A-F] | [a-f]
# HEX = Regex('[0-9A-Fa-f]') # not needed

# [171] PERCENT ::= '%' HEX HEX
PERCENT_re = '%[0-9a-fA-F]{2}'
# PERCENT = Regex(PERCENT_re) # regex'd
#PERCENT.setParseAction(lambda x: unichr(int(x[0][1:], 16)))
Beispiel #30
0
def _parse(text: Text):
    comment = Suppress('/*' + Regex(r'([^*]|[*][^/])*') + '*/')

    identifier = (Suppress('`') + Regex(r'[^`]+') +
                  Suppress('`')).setParseAction(lambda toks: toks[0])

    string = (Suppress("'") + Regex(r"([^']|\\.)*") +
              Suppress("'")).setParseAction(lambda toks: toks[0])

    reference_option = (CaselessKeyword('RESTRICT')
                        | CaselessKeyword('CASCADE')
                        | CaselessKeyword('SET NULL')
                        | CaselessKeyword('NO ACTION')
                        | CaselessKeyword('SET DEFAULT'))

    reference_definition = (
        Suppress(CaselessKeyword('REFERENCES')) +
        identifier('reference_tbl_name') + '(' +
        delimitedList(identifier)('tbl_column') + ')' +
        ZeroOrMore((Suppress(CaselessKeyword('ON DELETE')) +
                    reference_option('on_delete'))
                   | (Suppress(CaselessKeyword('ON UPDATE')) +
                      reference_option('on_update'))))

    constraint_definition = (
        (((CaselessKeyword('PRIMARY KEY')('type')) |
          ((CaselessKeyword('FULLTEXT KEY') | CaselessKeyword('UNIQUE KEY')
            | CaselessKeyword('KEY'))('type') + identifier('index_name'))) +
         '(' + delimitedList(identifier('key_part*')) + ')') |
        (Suppress(CaselessKeyword('CONSTRAINT')) + identifier('symbol') +
         (CaselessKeyword('FOREIGN KEY')('type') + '(' +
          delimitedList(identifier('key_part*')) + ')' + reference_definition))
    ).setParseAction(Constraint)

    column_type = (Word(alphanums) + Optional('(' + Regex('[^)]+') + ')') +
                   Optional(Suppress(CaselessKeyword('UNSIGNED'))))

    column_definition = (
        identifier('col_name') + column_type('col_type') + ZeroOrMore(
            (CaselessKeyword('NULL')
             | CaselessKeyword('NOT NULL'))('nullability')
            | (CaselessKeyword('AUTO_INCREMENT'))('auto_increment')
            | (Suppress(CaselessKeyword('COMMENT')) + string('comment'))
            | (Suppress(CaselessKeyword('DEFAULT')) +
               (Word(alphanums + '_')
                | string).setParseAction(lambda toks: toks[0])('default'))
            | (Suppress(CaselessKeyword('ON DELETE')) +
               (Word(alphanums + '_') | reference_option)('on_delete'))
            | (Suppress(CaselessKeyword('ON UPDATE')) +
               (Word(alphanums + '_') | reference_option)('on_update')))
    ).setParseAction(Column)

    create_definition = column_definition('column*') | constraint_definition(
        'constraint*')

    create_table_statement = (
        Suppress(CaselessKeyword('CREATE') + CaselessKeyword('TABLE')) +
        identifier('tbl_name') + Suppress('(') +
        delimitedList(create_definition) + Suppress(')') +
        Suppress(Regex('[^;]*'))).setParseAction(Table)

    parser = delimitedList(comment | create_table_statement('table*'),
                           delim=';') + Suppress(Optional(';'))

    return parser.parseString(text, parseAll=True)['table']