Esempio n. 1
0
def mathematical_expression() -> Token:
    binary_adding_operator = Literal("+") | Literal("-")
    multiplying_operator = Literal("*") | Literal("/")
    highest_precedence_operator = Literal("**")

    array_aggregate = (Literal("(").setParseAction(lambda s, l, t: l) +
                       numeric_literal() + (comma() - numeric_literal()) *
                       (0, ) + Literal(")").setParseAction(lambda s, l, t: l))
    array_aggregate.setParseAction(parse_array_aggregate)

    string = QuotedString('"')
    string.setParseAction(parse_string)

    concatenation = (infixNotation(
        array_aggregate | string,
        [(Suppress(Keyword("&")), 2, opAssoc.LEFT, parse_concatenation)],
    )).setName("Concatenation")

    term = numeric_literal() | attribute_reference() | qualified_identifier(
    ) | concatenation
    term.setParseAction(parse_term)

    return (infixNotation(
        term,
        [
            (highest_precedence_operator, 2, opAssoc.LEFT,
             parse_mathematical_expression),
            (multiplying_operator, 2, opAssoc.LEFT,
             parse_mathematical_expression),
            (binary_adding_operator, 2, opAssoc.LEFT,
             parse_mathematical_expression),
        ],
    )).setName("MathematicalExpression")
Esempio n. 2
0
def transform_human(text, main_window):
    """Transform user input into something Script can read.

    Main window is needed for tool integration."""
    # these are parseActions for pyparsing.
    def str_literal_to_hex(s, loc, toks):
        for i, t in enumerate(toks):
            toks[i] = ''.join(['0x', t.encode('hex')])
        return toks
    def var_name_to_value(s, loc, toks):
        for i, t in enumerate(toks):
            val = main_window.dock_handler.variables.get_key(t.strip('$'))
            if val:
                toks[i] = val
        return toks
    # ^ parseActions for pyparsing end here.
    str_literal = QuotedString('"')
    str_literal.setParseAction(str_literal_to_hex)
    var_name = pyparsing.Combine(Word('$') + Word(pyparsing.alphas))
    var_name.setParseAction(var_name_to_value)

    s = text
    s = var_name.transformString(s)
    s = str_literal.transformString(s)
    return s
Esempio n. 3
0
    def _create_parser() -> ParserElement:
        # operators in the format later used by infixNotation
        operator_list = [
            (None, 2, opAssoc.LEFT, BooleanAndOperation._create_from_implicit_tokens),
            (CaselessKeyword('not') | "~" | "!", 1, opAssoc.RIGHT, BooleanNotOperation._create_from_tokens),
            (CaselessKeyword('and') | "&", 2, opAssoc.LEFT, BooleanAndOperation._create_from_tokens),
            (CaselessKeyword('xor') | "^", 2, opAssoc.LEFT, BooleanXorOperation._create_from_tokens),
            (CaselessKeyword('or') | "|", 2, opAssoc.LEFT, BooleanOrOperation._create_from_tokens),
        ]

        # terms (atoms) that will be combined with the boolean operators
        term_list = [
            (CaselessKeyword('tag'), TagFilterTerm._create_from_tokens),
            (CaselessKeyword('ingr'), IngredientFilterTerm._create_from_tokens),
            (CaselessKeyword('unit'), UnitFilterTerm._create_from_tokens),
            (None, AnyFilterTerm._create_from_tokens),
        ]

        # extract keywords that can
        operator_expressions = [om[0] for om in operator_list if om[0] is not None]
        term_expressions = [tm[0] for tm in term_list if tm[0] is not None]
        reserved_expressions = operator_expressions + term_expressions

        # quoted string indicates exact macthc
        quoted_filter_string = (QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\')).setResultsName('string')
        # quoted_filter_string.setDebug(True)
        quoted_filter_string.setName("quoted_filter_string")
        quoted_filter_string.setParseAction(ExactFilterString._create_from_tokens)

        # not quoted string is inexact match, can't contain whitespace or be an operator
        unquoted_filter_string = ~MatchFirst(reserved_expressions) + Regex(r'[^\s\(\)]+', flags=re.U).setResultsName('string')
        # unquoted_filter_string.setDebug(True)
        unquoted_filter_string.setName("unquoted_filter_string")
        unquoted_filter_string.setParseAction(FuzzyFilterString._create_from_tokens)

        # regular expressions aren't parsed in the grammar but delegated to python re.compile in the parser action
        regex_filter_string = QuotedString('/', escChar='\\')
        regex_filter_string.setName("regex_filter_string")
        regex_filter_string.setParseAction(RegexFilterString._create_from_tokens)

        # unquoted_filter_string must be last, so that initial quotes are handled correctly
        filter_string = regex_filter_string | quoted_filter_string | unquoted_filter_string
        filter_string.setParseAction(lambda toks: toks[0])

        filter_terms = []
        for prefix_expression, term_action in term_list:
            if prefix_expression is not None:
                filter_term = Combine(prefix_expression + ':' + filter_string.setResultsName("filter_string"))
                filter_term.setName("filter_term_"+str(prefix_expression.match))
            else:
                filter_term = filter_string.setResultsName("filter_string")
                filter_term.setName("filter_term_None")
            # filter_term.setDebug(True)
            filter_term.addParseAction(term_action)
            filter_terms.append(filter_term)
        filter_term = MatchFirst(filter_terms)
        filter_expr = infixNotation(filter_term, operator_list)

        return filter_expr
Esempio n. 4
0
def _define_grammar():
    """
    Creates and returns a copy of the selector grammar.

    Wrapped in a function to avoid polluting the module namespace.
    """
    expr = Forward()

    label_name = Word(LABEL_CHARS)
    label_name.setParseAction(LabelNode)

    string_literal = QuotedString('"') | QuotedString("'")
    string_literal.setParseAction(LiteralNode)

    set_literal = (Suppress("{") +
                   delimitedList(QuotedString('"') | QuotedString("'"), ",") +
                   Suppress("}"))
    set_literal.setParseAction(SetLiteralNode)

    eq_comparison = label_name + Suppress("==") + string_literal
    eq_comparison.setParseAction(LabelToLiteralEqualityNode)

    not_eq_comparison = label_name + Suppress("!=") + string_literal
    not_eq_comparison.setParseAction(InequalityNode)

    in_comparison = label_name + Suppress(Keyword("in")) + set_literal
    in_comparison.setParseAction(LabelInSetLiteralNode)

    not_in = Suppress(Keyword("not") + Keyword("in"))
    not_in_comparison = label_name + not_in + set_literal
    not_in_comparison.setParseAction(NotInNode)

    has_check = (Suppress("has(") + Word(LABEL_CHARS) + Suppress(")"))
    has_check.setParseAction(HasNode)

    # For completeness, we allow an all() to occur in an expression like
    # "! all()".  Note: we special-case the trivial selectors "" and
    # "all()" below for efficiency.
    all_op = (Suppress("all()"))
    all_op.setParseAction(AllNode)

    comparison = (eq_comparison | not_eq_comparison | in_comparison
                  | not_in_comparison | has_check | all_op)

    paren_expr = (Suppress("(") + expr + Suppress(")"))

    value = ZeroOrMore("!") + (comparison | paren_expr)
    value.setParseAction(simplify_negation_node)

    and_expr = value + ZeroOrMore(Suppress("&&") + value)
    and_expr.setParseAction(simplify_and_node)

    or_expr = and_expr + ZeroOrMore(Suppress("||") + and_expr)
    or_expr.setParseAction(simplify_or_node)

    expr << or_expr

    grammar = expr + StringEnd()
    return grammar
Esempio n. 5
0
    def __init__(self):
        # speed up infixNotation considerably at the price of some cache memory
        ParserElement.enablePackrat()

        boolean = Keyword('True') | Keyword('False')
        none = Keyword('None')
        integer = Word(nums)
        real = Combine(Word(nums) + "." + Word(nums))
        string = (QuotedString('"', escChar='\\')
                  | QuotedString("'", escChar='\\'))
        regex = QuotedString('/', escChar='\\')
        identifier = Word(alphas, alphanums + '_')
        dereference = infixNotation(identifier, [
            (Literal('.'), 2, opAssoc.LEFT, EvalArith),
        ])
        result = (Keyword('bad') | Keyword('fail') | Keyword('good')
                  | Keyword('ignore') | Keyword('unknown'))
        rval = boolean | none | real | integer | string | regex | result | dereference
        rvallist = Group(
            Suppress('[') + Optional(delimitedList(rval)) + Suppress(']'))
        rvalset = Group(
            Suppress('{') + Optional(delimitedList(rval)) + Suppress('}'))
        operand = rval | rvallist | rvalset

        # parse actions replace the parsed tokens with an instantiated object
        # which we can later call into for evaluation of its content
        boolean.setParseAction(EvalBoolean)
        none.setParseAction(EvalNone)
        integer.setParseAction(EvalInteger)
        real.setParseAction(EvalReal)
        string.setParseAction(EvalString)
        regex.setParseAction(EvalRegex)
        identifier.setParseAction(EvalIdentifier)
        result.setParseAction(EvalResult)
        rvallist.setParseAction(EvalList)
        rvalset.setParseAction(EvalSet)

        identity_test = Keyword('is') + ~Keyword('not') | Combine(
            Keyword('is') + Keyword('not'), adjacent=False, joinString=' ')
        membership_test = Keyword('in') | Combine(
            Keyword('not') + Keyword('in'), adjacent=False, joinString=' ')
        comparison_op = oneOf('< <= > >= != == isdisjoint')
        comparison = identity_test | membership_test | comparison_op

        self.parser = infixNotation(operand, [
            (Literal('**'), 2, opAssoc.LEFT, EvalPower),
            (oneOf('+ - ~'), 1, opAssoc.RIGHT, EvalModifier),
            (oneOf('* / // %'), 2, opAssoc.LEFT, EvalArith),
            (oneOf('+ -'), 2, opAssoc.LEFT, EvalArith),
            (oneOf('<< >>'), 2, opAssoc.LEFT, EvalArith),
            (Literal('&'), 2, opAssoc.LEFT, EvalArith),
            (Literal('^'), 2, opAssoc.LEFT, EvalArith),
            (Literal('|'), 2, opAssoc.LEFT, EvalArith),
            (comparison, 2, opAssoc.LEFT, EvalLogic),
            (Keyword('not'), 1, opAssoc.RIGHT, EvalModifier),
            (Keyword('and'), 2, opAssoc.LEFT, EvalLogic),
            (Keyword('or'), 2, opAssoc.LEFT, EvalLogic),
            (Keyword('->'), 2, opAssoc.LEFT, EvalArith),
        ])
Esempio n. 6
0
 def create_type_query_syntax(self):
     create_type_keyword = CaselessLiteral("CREATE TYPE").setParseAction(self.create_new_type_query_obj)
     new_type = Word(alphas).setParseAction(self.set_type)
     parent_type = Word(alphas).setParseAction(self.set_parent_type)
     description = QuotedString("'", multiline=True) ^ QuotedString('"', multiline=True)
     description.setParseAction(self.set_description)
     create_type_query = create_type_keyword + new_type + parent_type + description
     return create_type_query
Esempio n. 7
0
def _define_grammar():
    """
    Creates and returns a copy of the selector grammar.

    Wrapped in a function to avoid polluting the module namespace.
    """
    expr = Forward()

    label_name = Word(LABEL_CHARS)
    label_name.setParseAction(LabelNode)

    string_literal = QuotedString('"') | QuotedString("'")
    string_literal.setParseAction(LiteralNode)

    set_literal = (Suppress("{") +
                   delimitedList(QuotedString('"') | QuotedString("'"), ",") +
                   Suppress("}"))
    set_literal.setParseAction(SetLiteralNode)

    eq_comparison = label_name + Suppress("==") + string_literal
    eq_comparison.setParseAction(LabelToLiteralEqualityNode)

    not_eq_comparison = label_name + Suppress("!=") + string_literal
    not_eq_comparison.setParseAction(InequalityNode)

    in_comparison = label_name + Suppress(Keyword("in")) + set_literal
    in_comparison.setParseAction(LabelInSetLiteralNode)

    not_in = Suppress(Keyword("not") + Keyword("in"))
    not_in_comparison = label_name + not_in + set_literal
    not_in_comparison.setParseAction(NotInNode)

    has_check = (Suppress("has(") +
                 Word(LABEL_CHARS) +
                 Suppress(")"))
    has_check.setParseAction(HasNode)

    comparison = (eq_comparison |
                  not_eq_comparison |
                  in_comparison |
                  not_in_comparison |
                  has_check)

    paren_expr = (Suppress("(") + expr + Suppress(")"))

    value = comparison | paren_expr

    and_expr = value + ZeroOrMore(Suppress("&&") + value)
    and_expr.setParseAction(simplify_and_node)

    or_expr = and_expr + ZeroOrMore(Suppress("||") + and_expr)
    or_expr.setParseAction(simplify_or_node)

    expr << or_expr

    grammar = expr + StringEnd()
    return grammar
    def _getPattern(self):
        arith_expr = Forward()
        comp_expr = Forward()
        logic_expr = Forward()
        LPAR, RPAR, SEMI = map(Suppress, "();")
        identifier = Word(alphas+"_", alphanums+"_")
        multop = oneOf('* /')
        plusop = oneOf('+ -')
        expop = Literal( "^" )
        compop = oneOf('> < >= <= != ==')
        andop = Literal("AND")
        orop = Literal("OR")
        current_value = Literal( "." )
        assign = Literal( "=" )
        # notop = Literal('NOT')
        function = oneOf(' '.join(self.FUNCTIONS))
        function_call = Group(function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR)
        aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}')
        single_column = QuotedString(quoteChar='[', endQuoteChar=']')
        integer = Regex(r"-?\d+")
        real = Regex(r"-?\d+\.\d*")

        # quotedString enables strings without quotes to pass

        operand = \
            function_call.setParseAction(self.__evalFunction) | \
            aggregate_column.setParseAction(self.__evalAggregateColumn) | \
            single_column.setParseAction(self.__evalSingleColumn) | \
            ((real | integer).setParseAction(self.__evalConstant)) | \
            quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \
            current_value.setParseAction(self.__evalCurrentValue) | \
            identifier.setParseAction(self.__evalString)

        arith_expr << operatorPrecedence(operand,
            [
             (expop, 2, opAssoc.LEFT, self.__expOp),
             (multop, 2, opAssoc.LEFT, self.__multOp),
             (plusop, 2, opAssoc.LEFT, self.__addOp),
            ])

        # comp_expr = Group(arith_expr + compop + arith_expr)
        comp_expr << operatorPrecedence(arith_expr,
            [
                (compop, 2, opAssoc.LEFT, self.__evalComparisonOp),
            ])

        logic_expr << operatorPrecedence(comp_expr,
            [
                (andop, 2, opAssoc.LEFT, self.__evalLogicOp),
                (orop, 2, opAssoc.LEFT, self.__evalLogicOp)
            ])

        pattern = logic_expr + StringEnd()
        return pattern
Esempio n. 9
0
def _define_grammar():
    """
    Creates and returns a copy of the selector grammar.

    Wrapped in a function to avoid polluting the module namespace.
    """
    expr = Forward()

    label_name = Word(LABEL_CHARS)
    label_name.setParseAction(LabelNode)

    string_literal = QuotedString('"') | QuotedString("'")
    string_literal.setParseAction(LiteralNode)

    set_literal = (Suppress("{") +
                   delimitedList(QuotedString('"') | QuotedString("'"), ",") +
                   Suppress("}"))
    set_literal.setParseAction(SetLiteralNode)

    eq_comparison = label_name + Suppress("==") + string_literal
    eq_comparison.setParseAction(LabelToLiteralEqualityNode)

    not_eq_comparison = label_name + Suppress("!=") + string_literal
    not_eq_comparison.setParseAction(InequalityNode)

    in_comparison = label_name + Suppress(Keyword("in")) + set_literal
    in_comparison.setParseAction(LabelInSetLiteralNode)

    not_in = Suppress(Keyword("not") + Keyword("in"))
    not_in_comparison = label_name + not_in + set_literal
    not_in_comparison.setParseAction(NotInNode)

    has_check = (Suppress("has(") + Word(LABEL_CHARS) + Suppress(")"))
    has_check.setParseAction(HasNode)

    comparison = (eq_comparison | not_eq_comparison | in_comparison
                  | not_in_comparison | has_check)

    paren_expr = (Suppress("(") + expr + Suppress(")"))

    value = comparison | paren_expr

    and_expr = value + ZeroOrMore(Suppress("&&") + value)
    and_expr.setParseAction(simplify_and_node)

    or_expr = and_expr + ZeroOrMore(Suppress("||") + and_expr)
    or_expr.setParseAction(simplify_or_node)

    expr << or_expr

    grammar = expr + StringEnd()
    return grammar
Esempio n. 10
0
    def _getPattern(self):
        arith_expr = Forward()
        comp_expr = Forward()
        logic_expr = Forward()
        LPAR, RPAR, SEMI = map(Suppress, "();")
        identifier = Word(alphas + "_", alphanums + "_")
        multop = oneOf('* /')
        plusop = oneOf('+ -')
        expop = Literal("^")
        compop = oneOf('> < >= <= != ==')
        andop = Literal("AND")
        orop = Literal("OR")
        current_value = Literal(".")
        assign = Literal("=")
        # notop = Literal('NOT')
        function = oneOf(' '.join(self.FUNCTIONS))
        function_call = Group(
            function.setResultsName('fn') + LPAR +
            Optional(delimitedList(arith_expr)) + RPAR)
        aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}')
        single_column = QuotedString(quoteChar='[', endQuoteChar=']')
        integer = Regex(r"-?\d+")
        real = Regex(r"-?\d+\.\d*")

        # quotedString enables strings without quotes to pass

        operand = \
            function_call.setParseAction(self.__evalFunction) | \
            aggregate_column.setParseAction(self.__evalAggregateColumn) | \
            single_column.setParseAction(self.__evalSingleColumn) | \
            ((real | integer).setParseAction(self.__evalConstant)) | \
            quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \
            current_value.setParseAction(self.__evalCurrentValue) | \
            identifier.setParseAction(self.__evalString)

        arith_expr << operatorPrecedence(operand, [
            (expop, 2, opAssoc.LEFT, self.__expOp),
            (multop, 2, opAssoc.LEFT, self.__multOp),
            (plusop, 2, opAssoc.LEFT, self.__addOp),
        ])

        # comp_expr = Group(arith_expr + compop + arith_expr)
        comp_expr << operatorPrecedence(arith_expr, [
            (compop, 2, opAssoc.LEFT, self.__evalComparisonOp),
        ])

        logic_expr << operatorPrecedence(
            comp_expr, [(andop, 2, opAssoc.LEFT, self.__evalLogicOp),
                        (orop, 2, opAssoc.LEFT, self.__evalLogicOp)])

        pattern = logic_expr + StringEnd()
        return pattern
Esempio n. 11
0
def transform_human(text):
    """Transform user input into something Script can read."""
    # these are parseActions for pyparsing.
    def str_literal_to_hex(s, loc, toks):
        for i, t in enumerate(toks):
            toks[i] = ''.join(['0x', t.encode('hex')])
        return toks
    # ^ parseActions for pyparsing end here.
    str_literal = QuotedString('"')
    str_literal.setParseAction(str_literal_to_hex)

    transformer = ZeroOrMore( str_literal )
    return transformer.transformString(text)
Esempio n. 12
0
def transform_human(text):
    """Transform user input into something Script can read."""

    # these are parseActions for pyparsing.
    def str_literal_to_hex(s, loc, toks):
        for i, t in enumerate(toks):
            toks[i] = ''.join(['0x', t.encode('hex')])
        return toks

    # ^ parseActions for pyparsing end here.
    str_literal = QuotedString('"')
    str_literal.setParseAction(str_literal_to_hex)

    transformer = ZeroOrMore(str_literal)
    return transformer.transformString(text)
Esempio n. 13
0
    def ParseExpression(cls, source):
        # atoms
        boolean = Keyword('#f') | Keyword('#t')
        boolean.setParseAction(lambda s, l, t: SchemeExpression.make_boolean(t[
            0] == '#t').expression)

        symbol = Word(alphanums + '-_')
        symbol.setParseAction(
            lambda s, l, t: SchemeExpression.make_symbol(t[0]).expression)

        integer = Word(nums)
        integer.setParseAction(
            lambda s, l, t: SchemeExpression.make_integer(t[0]).expression)

        string = QuotedString('"', multiline=True)
        string.setParseAction(
            lambda s, l, t: SchemeExpression.make_string(t[0]).expression)

        element = integer | boolean | symbol | string

        # lists
        lexpr = Forward()
        vexpr = Forward()

        lparen = Literal('(').suppress()
        rparen = Literal(')').suppress()
        hashsym = Literal('#').suppress()

        # vectors
        lexpr << Group(lparen + ZeroOrMore(element ^ lexpr ^ vexpr) + rparen)
        lexpr.setParseAction(lambda s, l, t: SchemeExpression.make_list(t[0]))

        vexpr << Group(hashsym + lparen + ZeroOrMore(element ^ lexpr ^ vexpr) +
                       rparen)
        vexpr.setParseAction(
            lambda s, l, t: SchemeExpression.make_vector(t[0]))

        # final...
        sexpr = element | vexpr | lexpr

        sexpr.keepTabs = True  # this seems to be necessary to fix a problem with pyparsing
        result = sexpr.parseString(source)[0]
        return cls(SchemeExpression._flatten(result))
Esempio n. 14
0
    def _build_grammar(self):
        expr = Forward()

        float_lit = Combine(Word(nums) + '.' + Word(nums))
        float_lit.setName('float')
        float_lit.setParseAction(lambda x: \
                                     self.to_literal(float(x[0])))

        int_lit = Word(nums)
        int_lit.setName('int')
        int_lit.setParseAction(lambda x: \
                                   self.to_literal(int(x[0])))

        num = (float_lit | int_lit)
        num.setParseAction(lambda x: x[0])

        tag_name = Word(alphas + "_", alphanums + "_")
        tag_name.setName('tag_name')
        tag_name.setParseAction(lambda t: tag_reference.TagReference(t[0]))

        quoted_string = QuotedString("'")
        quoted_string.setParseAction(lambda s: self.to_literal(s[0]))

        oper = oneOf('+ * / -')
        oper.setParseAction(lambda o: o[0])

        lpar  = Literal("(").suppress()
        rpar  = Literal(")").suppress()

        arith = Group(lpar + expr + oper + expr + rpar)
        arith.setParseAction(lambda t: \
                                 self.to_arith(t[0][0], t[0][1], t[0][2]))

        assign = tag_name + '=' + expr
        assign.setName('assign')
        assign.setParseAction(lambda x: self.to_assign(x[0],x[2]))

        print_tags = Literal('?')
        print_tags.setParseAction(lambda x: self.to_print_tags())

        expr <<(arith|assign|tag_name|num|quoted_string|print_tags)
        expr.setParseAction(lambda x: x[0])
        return expr
Esempio n. 15
0
    def build_jimple_parser(self):
        # Literals
        op_add     =   Literal("+")
        op_sub     =   Literal("-")
        op_mul     =   Literal("*")
        op_div     =   Literal("/")
        op_xor     =   Literal("^")
        op_lt      =   Literal("<")
        op_gt      =   Literal(">")
        op_eq      =   Literal("==")
        op_neq     =   Literal("!=")
        op_lte     =   Literal("<=")
        op_gte     =   Literal(">=")
        op_sls     =   Literal("<<")
        op_srs     =   Literal(">>")
        op_urs     =   Literal(">>>")
        op_mod     =   Literal("%")
        op_rem     =   Literal("rem")
        op_bwa     =   Literal("&")
        op_bwo     =   Literal("|")
        op_cmp     =   Literal("cmp")
        op_cmpg    =   Literal("cmpg")
        op_cmpl    =   Literal("cmpl")
        lit_lcb    =   Literal("{").suppress()
        lit_rcb    =   Literal("}").suppress()
        lit_lp     =   Literal("(").suppress()
        lit_rp     =   Literal(")").suppress()
        lit_dot    =   Literal(".").suppress()
        lit_asgn   =   Literal("=").suppress()
        lit_ident  =   Literal(":=").suppress()
        lit_strm   =   Literal(";").suppress()
        lit_cln    =   Literal(":").suppress()
        lit_lsb    =   Literal("[").suppress()
        lit_rsb    =   Literal("]").suppress()
        
        binop =   op_add ^ op_sub ^ op_mul ^ op_div ^ op_xor \
                ^ op_bwa ^ op_mod ^ op_rem ^ op_urs ^ op_lte \
                ^ op_gte ^ op_sls ^ op_srs ^ op_lt  ^ op_gt  \
                ^ op_eq  ^ op_neq ^ op_bwo ^ op_cmp ^ op_cmpg \
                ^ op_cmpl \
                
        cond_op =  op_gte ^ op_lte ^ op_lt ^ op_gt ^ op_eq ^ op_neq 
        
        # Keywords
        kw_specialinvoke   = Keyword("specialinvoke")
        kw_interfaceinvoke = Keyword("interfaceinvoke")
        kw_virtualinvoke   = Keyword("virtualinvoke")
        kw_staticinvoke    = Keyword("staticinvoke")
        kw_instanceof      = Keyword("instanceof")
        kw_new             = Keyword("new")
        kw_newarray        = Keyword("newarray")
        kw_newmultiarray   = Keyword("newmultiarray")
        kw_length          = Keyword("lengthof")
        kw_neg             = Keyword("neg")
        kw_goto            = Keyword("goto")
        kw_if              = Keyword("if")
        kw_this            = Keyword("@this")
        kw_caughtexception = Keyword("@caughtexception")
        kw_lookupswitch    = Keyword("lookupswitch")
        kw_case            = Keyword("case")
        kw_default         = Keyword("default")
        kw_return          = Keyword("return")
        kw_entermonitor    = Keyword("entermonitor")
        kw_exitmonitor     = Keyword("exitmonitor")
        kw_throw           = Keyword("throw")
        kw_throws          = Keyword("throws")
        kw_catch           = Keyword("catch")
        kw_transient       = Keyword("transient")
        kw_from            = Keyword("from")
        kw_to              = Keyword("to")
        kw_with            = Keyword("with")
        kw_breakpoint      = Keyword("breakpoint")
        kw_nop             = Keyword("nop")
        kw_public          = Keyword("public")
        kw_protected       = Keyword("protected")
        kw_private         = Keyword("private")
        kw_volatile        = Keyword("volatile")
        kw_static          = Keyword("static")
        kw_annotation      = Keyword("annotation")
        kw_final           = Keyword("final")
        kw_class           = Keyword("class")
        kw_enum            = Keyword("enum")
        kw_interface       = Keyword("interface")
        kw_abstract        = Keyword("abstract")
        kw_extends         = Keyword("extends")
        kw_implements      = Keyword("implements")
        kw_null            = Keyword("null")
        
        modifier = \
                kw_public | kw_protected | kw_private \
            |   kw_static | kw_abstract | kw_final \
            |   kw_volatile | kw_enum | kw_transient \
            |   kw_annotation
        
        #Identifiers
        id_local  = Combine(Optional(Literal("$")) + Word(alphas) + Word(nums))
        id_java = Word(alphas + "'$_", alphanums + "'$_")
        id_class_comp = Word(alphas + "_", alphanums + "$_")
        id_type = Combine(id_class_comp + ZeroOrMore(Combine(Literal(".") + (id_class_comp))) + Optional(Word("[]")))
        id_method_name = id_java | Word("<clinit>") | Word("<init>") 
        id_label = Combine(Literal("label") + Word(nums)) 
        id_parameter = Combine(Literal("@parameter") + Word(nums)) 
        
        # Field
        field_specifier = \
                Suppress(Literal("<")) \
            +   id_type + lit_cln + id_type + id_java \
            +   Suppress(Literal(">"))
        field_specifier.setParseAction(self.field_specifier_parse_action)
        
        # Method
        method_param_list = delimitedList(id_type, delim=",")
        id_method = \
                Suppress(Literal("<")) \
            +   id_type + lit_cln + id_type + id_method_name \
            +   lit_lp + Group(Optional(method_param_list)) + lit_rp \
            +   Suppress(Literal(">"))
        
        number_suffix = Optional(Literal("F") | Literal("L"))
        
        # Numeric constant
        expr_number = \
                Combine( 
                    Word("+-" + nums, nums) 
                +   Optional(Literal(".") + Optional(Word(nums))) 
                +   Optional(Literal("E") + Optional(Word("+-")) + Word(nums)) 
                +   number_suffix) \
            |   Combine(Literal("#Infinity") + number_suffix) \
            |   Combine(Literal("#-Infinity") + number_suffix) \
            |   Combine(Literal("#NaN") + number_suffix)
                
        expr_number.setParseAction(self.expr_numeric_const_parse_action)
        expr_str = QuotedString(quoteChar='"', escChar="\\")
        expr_str.setParseAction(self.expr_str_const_parse_action)
        
        # Null constant
        expr_null = kw_null
        expr_null.setParseAction(self.expr_null_parse_action)
       
        # Group all constants
        expr_constant = \
                expr_str \
            ^   expr_number \
            ^   expr_null 

        # A 'class' expression (class + classname)
        expr_class = kw_class + QuotedString(quoteChar='"')
        expr_class.setParseAction(self.expr_class_parse_action)
        
        # A local variable expression
        expr_local = id_local
        expr_local.setParseAction(self.expr_local_parse_action)
       
        # Group together all "immediate" values
        expr_imm =  expr_local ^ expr_constant ^ expr_class 
        expr_imm.setParseAction(self.expr_imm_parse_action)
         
        # Conditional expression
        expr_cond = expr_imm + cond_op + expr_imm
        expr_cond.setParseAction(self.expr_cond_parse_action)
        
        # Array index
        array_idx = lit_lsb + expr_imm + lit_rsb
        empty_array_idx = lit_lsb + lit_rsb
        
        expr_binop = expr_imm + binop + expr_imm
        expr_binop.setParseAction(self.expr_binop_parse_action)
        
        expr_cast = lit_lp + id_type + lit_rp + expr_imm
        expr_cast.setParseAction(self.expr_cast_parse_action)
        
        expr_instanceof = expr_imm + kw_instanceof + id_type
        expr_instanceof.setParseAction(self.expr_instanceof_parse_action)
        
        expr_new = Suppress(kw_new) + id_type
        expr_new.setParseAction(self.expr_new_parse_action)
        
        expr_newarray = kw_newarray + lit_lp + id_type + lit_rp + array_idx
        expr_newarray.setParseAction(self.expr_newarray_parse_action)
                
        expr_newmultiarray = kw_newmultiarray + lit_lp + id_type + lit_rp + OneOrMore(array_idx | empty_array_idx)
        expr_newmultiarray.setParseAction(self.expr_newmultiarray_parse_action)
        
        expr_lengthof = kw_length + expr_imm 
        expr_lengthof.setParseAction(self.expr_lengthof_parse_action)
        
        expr_neg = kw_neg + expr_imm
        expr_neg.setParseAction(self.expr_neg_parse_action)
        
        # Invoke Expressions
        method_arg_list = delimitedList(expr_imm, delim=",")
        expr_invoke = \
                kw_specialinvoke \
                    + id_local + lit_dot + id_method \
                    + lit_lp + Group(Optional(method_arg_list)) + lit_rp \
            |   kw_interfaceinvoke \
                    + id_local + lit_dot + id_method \
                    + lit_lp + Group(Optional(method_arg_list)) + lit_rp \
            |   kw_virtualinvoke \
                    + id_local + lit_dot + id_method \
                    + lit_lp + Group(Optional(method_arg_list)) + lit_rp \
            |   kw_staticinvoke + id_method \
                    + lit_lp + Group(Optional(method_arg_list)) + lit_rp 
        expr_invoke.setParseAction(self.expr_invoke_parse_action)
                    
        expr = \
                expr_binop \
            ^   expr_cast \
            ^   expr_instanceof \
            ^   expr_invoke \
            ^   expr_new \
            ^   expr_newarray \
            ^   expr_newmultiarray \
            ^   expr_lengthof \
            ^   expr_neg 
        expr.setParseAction(self.expr_parse_action)
                    
        # Concrete Reference Expression
        expr_field_ref =  Group(Optional(id_local + lit_dot)) + field_specifier 
        expr_field_ref.setParseAction(self.expr_field_ref_parse_action)
        
        expr_array_ref = id_local + array_idx
        expr_array_ref.setParseAction(self.expr_array_ref_parse_action)
        
        # L and R values
        expr_lvalue = \
                id_local \
            ^   expr_field_ref \
            ^   expr_array_ref
            
        expr_lvalue.setParseAction(self.expr_lvalue_parse_action)
        expr_rvalue = \
                expr \
            ^   expr_field_ref \
            ^   expr_array_ref \
            ^   expr_imm 
        
        # Declaration
        stmt_decl = \
                id_type \
            +   Group(delimitedList(id_local, delim=",")) \
            +   lit_strm
        stmt_decl.setParseAction(self.stmt_decl_parse_action)
        
        # Statements 
        stmt_assign = \
                expr_lvalue + lit_asgn + expr_rvalue + lit_strm
        '''
                id_local + lit_asgn + expr_rvalue + lit_strm \
            ^   field_specifier + lit_asgn + expr_imm + lit_strm \
            ^   id_local + lit_dot + field_specifier + lit_asgn + expr_imm + lit_strm \
            ^   id_local + lit_lsb + expr_imm + lit_rsb + lit_asgn + expr_imm + lit_strm
        '''
        
        stmt_assign.setParseAction(self.stmt_assign_parse_action)
        
        stmt_identity = \
                id_local + lit_ident + kw_this + lit_cln + id_type + lit_strm \
            ^   id_local + lit_ident + id_parameter + lit_cln + id_type + lit_strm \
            ^   id_local + lit_ident + kw_caughtexception + lit_strm
            
        stmt_identity.setParseAction(self.stmt_identity_parse_action)
        
        stmt_goto = kw_goto + id_label + lit_strm
        stmt_goto.setParseAction(self.stmt_goto_parse_action)
        
        stmt_if = Suppress(kw_if) + expr_cond + Suppress(kw_goto) + id_label + lit_strm
        stmt_if.setParseAction(self.stmt_if_parse_action)
        
        stmt_invoke = expr_invoke + lit_strm
        stmt_invoke.setParseAction(self.stmt_invoke_parse_action)
        
        switch_case = kw_case + expr_number + lit_cln + kw_goto + id_label + lit_strm
        switch_default = kw_default + lit_cln + kw_goto + id_label + lit_strm
        switch_body = ZeroOrMore(switch_case) + Optional(switch_default)
        stmt_switch = kw_lookupswitch + lit_lp + expr_imm + lit_rp + lit_lcb + switch_body + lit_rcb + lit_strm
        stmt_switch.setParseAction(self.stmt_switch_parse_action)
        
        stmt_enter_monitor = kw_entermonitor + expr_imm + lit_strm
        stmt_enter_monitor.setParseAction(self.stmt_enter_monitor_parse_action)
        
        stmt_exit_monitor = kw_exitmonitor + expr_imm + lit_strm
        stmt_exit_monitor.setParseAction(self.stmt_exit_monitor_parse_action)
        
        stmt_return = Suppress(kw_return) + expr_imm + lit_strm | Suppress(kw_return) + lit_strm
        stmt_return.setParseAction(self.stmt_return_parse_action)
        
        stmt_throw = kw_throw + expr_imm + lit_strm
        stmt_throw.setParseAction(self.stmt_throw_parse_action)
        
        stmt_catch = kw_catch + id_type \
                +   kw_from + id_label \
                +   kw_to + id_label \
                +   kw_with + id_label + lit_strm
        stmt_catch.setParseAction(self.stmt_catch_parse_action)
        
        stmt_breakpoint = kw_breakpoint + lit_strm
        stmt_breakpoint.setParseAction(self.stmt_breakpoint_parse_action)
        
        stmt_nop = kw_nop + lit_strm
        stmt_nop.setParseAction(self.stmt_nop_parse_action)
        
        jimple_stmt = \
                stmt_decl           \
            ^   stmt_assign         \
            ^   stmt_identity       \
            ^   stmt_goto           \
            ^   stmt_if             \
            ^   stmt_invoke         \
            ^   stmt_switch         \
            ^   stmt_enter_monitor  \
            ^   stmt_exit_monitor   \
            ^   stmt_return         \
            ^   stmt_throw          \
            ^   stmt_catch          \
            ^   stmt_breakpoint     \
            ^   stmt_nop
        jimple_stmt.setParseAction(self.stmt_parse_action)
        
        throws_clause = kw_throws + delimitedList(id_type, delim=",")
        
        method_sig = \
                    Group(ZeroOrMore(modifier)) \
                +   id_type + id_method_name \
                +   lit_lp + Group(Optional(method_param_list)) + lit_rp \
                +   Group(Optional(throws_clause))
        
        method_decl = method_sig + lit_strm
        
        field_decl = ZeroOrMore(modifier) + id_type + id_java + lit_strm
        field_decl.setParseAction(self.field_decl_parse_action)
                    
        class_decl = \
                    Group(ZeroOrMore(modifier)) + Suppress(kw_class) + id_type \
                +   Optional(kw_extends + delimitedList(id_type, delim=",")) \
                +   Optional(kw_implements + delimitedList(id_type, delim=","))

        interface_decl = \
                    Group(ZeroOrMore(modifier)) + Suppress(kw_interface) + id_type \
                +   Optional(kw_extends + delimitedList(id_type, delim=",")) \
                +   Optional(kw_implements + delimitedList(id_type, delim=","))

        
        jimple_method_item = \
                jimple_stmt \
            |   Combine(id_label + lit_cln).setParseAction(self.label_parse_action)
            
        jimple_method_body = ZeroOrMore(jimple_method_item)
        jimple_method = \
                Group(method_sig) + lit_lcb \
            +   Group(jimple_method_body) \
            +   lit_rcb
        jimple_method.setParseAction(self.method_defn_parse_action)

        jimple_class_item = field_decl | method_decl | jimple_method
        jimple_class_body = ZeroOrMore(jimple_class_item)
        jimple_class = Group(class_decl | interface_decl) + lit_lcb + Group(jimple_class_body) + lit_rcb
        jimple_class.setParseAction(self.class_defn_parse_action)
        return jimple_class
Esempio n. 16
0
class Int(int):
    pass


################ Parsing: parse, tokenize, and read_from_tokens
lisp_integer = Word(nums)
lisp_integer.setParseAction(lambda s, l, t: Int(t[0]))

lisp_float = Combine(Word(nums) + '.' + Word(nums))
lisp_float.setParseAction(lambda s, l, t: float(t[0]))

lisp_number = lisp_integer | lisp_float

lisp_string = QuotedString(quoteChar='"', escChar='\\', multiline=True)
lisp_string.setParseAction(lambda s, l, t: String(t[0]))

special = "_-+*/^><=:'"
#lisp_symbol = Word(alphas + nums + '_-' + '?!') # any order
#lisp_symbol = Combine(Char(alphas) + Word(alphas + nums + '?' + '!')) # starts with alphas
lisp_symbol = Combine(
    Char(alphas + special) + Optional(Word(alphas + nums + special)) +
    Optional(Char('?!')))  # Ruby style

lisp_symbol.setParseAction(lambda s, l, t: Symbol(t[0]))

lisp_atom = lisp_symbol | lisp_string | lisp_number

lisp_list = nestedExpr(opener='(',
                       closer=')',
                       content=lisp_atom,
Esempio n. 17
0
def transform_human(text, variables=None):
    """Transform user input with given context.

    Args:
        text (str): User input.
        variables (dict): Variables for purposes of substitution.

    Returns:
        A 2-tuple of: (A human-readable script that Script can parse,
            A list of contextual information for tooltips, etc.)
    """
    if variables is None:
        variables = {}  # No mutable default value.
    # these are parseActions for pyparsing.
    def str_literal_to_hex(s, loc, toks):
        for i, t in enumerate(toks):
            toks[i] = ''.join(['0x', t.encode('hex')])
        return toks

    def var_name_to_value(s, loc, toks):
        for i, t in enumerate(toks):
            val = variables.get(t[1:])
            if val:
                toks[i] = val
        return toks

    def implicit_opcode_to_explicit(s, loc, toks):
        """Add "OP_" prefix to an opcode."""
        for i, t in enumerate(toks):
            toks[i] = '_'.join(['OP', t])
        return toks

    def hex_to_formatted_hex(s, loc, toks):
        """Add "0x" prefix and ensure even length."""
        for i, t in enumerate(toks):
            new_tok = t
            # Add '0x' prefix
            if not t.startswith('0x'):
                if t.startswith('x'):
                    new_tok = ''.join(['0', t])
                else:
                    new_tok = ''.join(['0x', t])
            # Even-length string
            if len(new_tok) % 2 != 0:
                new_tok = ''.join([new_tok[0:2], '0', new_tok[2:]])
            toks[i] = new_tok
        return toks

    # ^ parseActions for pyparsing end here.
    str_literal = QuotedString('"')
    str_literal.setParseAction(str_literal_to_hex)
    var_name = Combine(Word('$') + Word(pyparsing.alphas))
    var_name.setParseAction(var_name_to_value)

    # Here we populate the list of contextual tips.

    # Explicit opcode names
    op_names = [str(i) for i in OPCODE_NAMES.keys()]
    op_names_explicit = ' '.join(op_names)

    def is_small_int(op):
        """True if op is one of OP_1, OP_2, ...OP_16"""
        try:
            i = int(op[3:])
            return True
        except ValueError:
            return False

    op_names_implicit = ' '.join(
        [i[3:] for i in op_names if not is_small_int(i)])

    # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a')
    explicit_hex = Combine(
        Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd())
    implicit_hex = Combine(pyparsing.WordStart() +
                           OneOrMore(Word(pyparsing.hexnums)) +
                           pyparsing.WordEnd())
    explicit_hex.setParseAction(hex_to_formatted_hex)
    implicit_hex.setParseAction(hex_to_formatted_hex)

    # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD')
    explicit_op = pyparsing.oneOf(op_names_explicit)
    implicit_op = Combine(pyparsing.WordStart() +
                          pyparsing.oneOf(op_names_implicit))
    implicit_op.setParseAction(implicit_opcode_to_explicit)

    contexts = pyparsing.Optional(
        var_name('Variable') | str_literal('String literal')
        | explicit_op('Opcode') | implicit_op('Opcode') | explicit_hex('Hex')
        | implicit_hex('Hex'))
    matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)]
    context_tips = []
    for i in matches:
        d = i[0]
        if len(d.items()) == 0: continue
        match_type, value = d.items()[0]
        start = i[1]
        end = i[2]
        context_tips.append((start, end, value, match_type))

    # Now we do the actual transformation.

    s = text
    s = var_name.transformString(s)
    s = str_literal.transformString(s)
    s = implicit_op.transformString(s)
    s = implicit_hex.transformString(s)
    s = explicit_hex.transformString(s)
    return s, context_tips
Esempio n. 18
0
def transform_human(text, variables=None):
    """Transform user input with given context.

    Args:
        text (str): User input.
        variables (dict): Variables for purposes of substitution.

    Returns:
        A 2-tuple of: (A human-readable script that Script can parse,
            A list of contextual information for tooltips, etc.)
    """
    if variables is None:
        variables = {} # No mutable default value.
    # these are parseActions for pyparsing.
    def str_literal_to_hex(s, loc, toks):
        for i, t in enumerate(toks):
            toks[i] = ''.join(['0x', t.encode('hex')])
        return toks
    def var_name_to_value(s, loc, toks):
        for i, t in enumerate(toks):
            val = variables.get(t[1:])
            if val:
                toks[i] = val
        return toks
    def implicit_opcode_to_explicit(s, loc, toks):
        """Add "OP_" prefix to an opcode."""
        for i, t in enumerate(toks):
            toks[i] = '_'.join(['OP', t])
        return toks
    def hex_to_formatted_hex(s, loc, toks):
        """Add "0x" prefix and ensure even length."""
        for i, t in enumerate(toks):
            new_tok = t
            # Add '0x' prefix
            if not t.startswith('0x'):
                if t.startswith('x'):
                    new_tok = ''.join(['0', t])
                else:
                    new_tok = ''.join(['0x', t])
            # Even-length string
            if len(new_tok) % 2 != 0:
                new_tok = ''.join([new_tok[0:2], '0', new_tok[2:]])
            toks[i] = new_tok
        return toks
    # ^ parseActions for pyparsing end here.
    str_literal = QuotedString('"')
    str_literal.setParseAction(str_literal_to_hex)
    var_name = Combine(Word('$') + Word(pyparsing.alphas))
    var_name.setParseAction(var_name_to_value)

    # Here we populate the list of contextual tips.

    # Explicit opcode names
    op_names = [str(i) for i in OPCODE_NAMES.keys()]
    op_names_explicit = ' '.join(op_names)
    def is_small_int(op):
        """True if op is one of OP_1, OP_2, ...OP_16"""
        try:
            i = int(op[3:])
            return True
        except ValueError:
            return False
    op_names_implicit = ' '.join([i[3:] for i in op_names if not is_small_int(i)])

    # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a')
    explicit_hex = Combine(Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd())
    implicit_hex = Combine(pyparsing.WordStart() + OneOrMore(Word(pyparsing.hexnums)) + pyparsing.WordEnd())
    explicit_hex.setParseAction(hex_to_formatted_hex)
    implicit_hex.setParseAction(hex_to_formatted_hex)

    # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD')
    explicit_op = pyparsing.oneOf(op_names_explicit)
    implicit_op = Combine(pyparsing.WordStart() + pyparsing.oneOf(op_names_implicit))
    implicit_op.setParseAction(implicit_opcode_to_explicit)

    contexts = pyparsing.Optional(var_name('Variable') |
                                  str_literal('String literal') |
                                  explicit_op('Opcode') |
                                  implicit_op('Opcode') |
                                  explicit_hex('Hex') |
                                  implicit_hex('Hex'))
    matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)]
    context_tips = []
    for i in matches:
        d = i[0]
        if len(d.items()) == 0: continue
        match_type, value = d.items()[0]
        start = i[1]
        end = i[2]
        context_tips.append( (start, end, value, match_type) )

    # Now we do the actual transformation.

    s = text
    s = var_name.transformString(s)
    s = str_literal.transformString(s)
    s = implicit_op.transformString(s)
    s = implicit_hex.transformString(s)
    s = explicit_hex.transformString(s)
    return s, context_tips
Esempio n. 19
0
class SearchRestrictionParser(object):
    """ 
    Defines the grammar for a simple search restriction expressions. 
    
    The parsers of the different terms of these restriction expressions are provided by this class.
    """
    def __init__(self):
        """ Constructor. """

        self.__literalExpression = None
        self.__keywordExpression = None
        self.__propertyNameExpression = None
        self.__comparisonExpression = None
        self.__conditionExpression = None
        self.__conjunctionExpression = None
        self.__restrictionExpression = None
        self.__dateExpression = None
        self.__numberExpression = None
        self.__conjunctionTokens = None
        self.__comparisonTokens = None
        self.__andKeyword = None
        self.__orKeyword = None
        self.__notKeyword = None
        self.__quotedStringCharacters = ["\"", "'"]

        self.__initSearchRestrictionParser()

    def __initSearchRestrictionParser(self):
        """ Initializes and returns a parser for the search restrictions. """

        unicodeUmlaut = unicodedata.lookup("LATIN CAPITAL LETTER A WITH DIAERESIS") + \
                        unicodedata.lookup("LATIN SMALL LETTER A WITH DIAERESIS") + \
                        unicodedata.lookup("LATIN CAPITAL LETTER O WITH DIAERESIS") + \
                        unicodedata.lookup("LATIN SMALL LETTER O WITH DIAERESIS") + \
                        unicodedata.lookup("LATIN CAPITAL LETTER U WITH DIAERESIS") + \
                        unicodedata.lookup("LATIN SMALL LETTER U WITH DIAERESIS") + \
                        unicodedata.lookup("LATIN SMALL LETTER SHARP S")
        # define property name
        firstPropertyNameCharacter = alphas + unicodeUmlaut + "_"
        propertyCharacter = firstPropertyNameCharacter + nums + ".-"
        self.__propertyNameExpression = Word(firstPropertyNameCharacter,
                                             propertyCharacter)

        # define literal
        day = Regex("(0[1-9]|[12][0-9]|3[01])")
        month = Regex("(0[1-9]|1[012])")
        year = Regex("((?:19|20)\d\d)")
        hour = Regex("([01][0-9]|2[0-3])")
        minute = Regex("([0-5][0-9])")
        second = minute
        self.__dateExpression = Combine(day + "." + month + "." + year +
                                        White() + hour + ":" + minute + ":" +
                                        second)
        self.__numberExpression = Regex(
            "[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?")
        self.__literalExpression = QuotedString(
            self.__quotedStringCharacters[0])
        for quotedStringCharacter in self.__quotedStringCharacters[1:]:
            self.__literalExpression |= QuotedString(quotedStringCharacter)
        self.__literalExpression.setParseAction(self.__handleLiteral)

        # define keywords
        notKeyword = Keyword(NOT_OPERATOR, caseless=True)
        andKeyword = Keyword(AND_OPERATOR, caseless=True)
        orKeyword = Keyword(OR_OPERATOR, caseless=True)

        gteKeyword = Keyword(GTE_OPERATOR)
        lteKeyword = Keyword(LTE_OPERATOR)
        equalKeyword = Keyword(EQUAL_OPERATOR)
        gtKeyword = Keyword(GT_OPERATOR)
        ltKeyword = Keyword(LT_OPERATOR)
        likeKeyword = Keyword(LIKE_OPERATOR, caseless=True)
        comparisonKeyword = gteKeyword | lteKeyword | equalKeyword | gtKeyword | ltKeyword | likeKeyword

        existsKeyword = Keyword(EXISTS_OPERATOR, caseless=True)
        contentContainsKeyword = Keyword(CONTENT_CONTAINS_OPERATOR,
                                         caseless=True)
        isCollectionKeyword = Keyword(IS_COLLECTION_OPERATOR, caseless=True)

        self.__keywordExpression = notKeyword | andKeyword | orKeyword | comparisonKeyword | existsKeyword | \
                                   contentContainsKeyword | isCollectionKeyword | "(" | ")"

        # definition of condition terms
        comparisonCondition = Group(self.__propertyNameExpression +
                                    comparisonKeyword +
                                    self.__literalExpression)
        existsCondition = Group(existsKeyword + self.__propertyNameExpression)
        contentContainsCondition = Group(contentContainsKeyword +
                                         self.__literalExpression)
        isCollectionCondition = isCollectionKeyword
        self.__conditionExpression = comparisonCondition | existsCondition | contentContainsCondition | isCollectionCondition
        self.__conditionExpression.setParseAction(self.__handleConditionTerm)

        # definition of restriction expressions (operators to combine the condition terms)
        self.__restrictionExpression = operatorPrecedence(
            self.__conditionExpression,
            [(notKeyword, 1, opAssoc.RIGHT), (andKeyword, 2, opAssoc.LEFT),
             (orKeyword, 2, opAssoc.LEFT)]) + StringEnd()

        # definition of comparison expression
        self.__comparisonExpression = comparisonKeyword
        self.__andKeyword = andKeyword
        self.__orKeyword = orKeyword
        self.__notKeyword = notKeyword

        # definition of conjunction expression
        self.__conjunctionExpression = andKeyword | orKeyword

    def registerPropertyParseAction(self, parseAction):
        """ Appends a parsing action when matching a property expression. """

        self.__propertyNameExpression.setParseAction(parseAction)

    def registerLiteralParseAction(self, parseAction):
        """ Appends a parsing action when matching a literal. """

        self.__literalExpression.setParseAction(parseAction)

    def registerConjunctionParseAction(self, parseAction):
        """ Appends a parsing action when matching a conjunction keyword. """

        self.__andKeyword.setParseAction(parseAction)
        self.__orKeyword.setParseAction(parseAction)
        self.__notKeyword.setParseAction(parseAction)

    def registerComparisonParseAction(self, parseAction):
        """ Appends a parsing action when matching a comparison keyword. """

        self.__comparisonExpression.setParseAction(parseAction)

    def __handleLiteral(self, _, __, tokenList):
        """" Evaluates the content of the quoted string. """

        unquotedString = tokenList[0]
        result = list()
        for item in self.__dateExpression.scanString(unquotedString):
            result.append(item)
        if len(result) == 1:
            return time.strptime(str(result[0][0][0]), "%d.%m.%Y %H:%M:%S")
        else:
            for item in self.__numberExpression.scanString(unquotedString):
                result.append(item)
            if len(result) == 1:
                return eval(str(result[0][0][0]))

    def parseString(self, inputString):
        """ 
        Parses the string and returns the result. 
        
        @param inputString: String to parse.
        @type inputString: C{unicode}
        
        @raise ParseException: Signals an error parsing the given string.
        """

        return self.__restrictionExpression.parseString(inputString)

    @staticmethod
    def __handleConditionTerm(_, __, tokens):
        """ 
        Extracts operator, literal, property name from the parsed string
        and calls the given parse action function.
        """

        operator = propertyName = literal = None
        tokenList = list(list(tokens)[0])

        if len(tokenList) == 3:
            operator = tokenList[1]
            propertyName = tokenList[0]
            literal = tokenList[2]
        elif len(tokenList) == 2:
            operator = tokenList[0]
            if operator == EXISTS_OPERATOR:
                propertyName = tokenList[1]
            else:
                literal = tokenList[1]
        else:
            operator = tokens[0]
        return (propertyName, operator, literal)

    def matchKeyword(self, inputString):
        """ 
        Returns all matches of keywords. Keywords in literals are ignored.
        
        @param inputString: String to parse.
        @type inputString: C{unicode}
        
        @return: List of matched expression tuples that consist of matched expression, start index, end index.
        @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int}
        """

        return self._matchWrapper(inputString, self.__keywordExpression)

    def matchPropertyName(self, inputString):
        """ 
        Returns all matches of property names. Keywords and property names in literals are ignored.
        
        @param inputString: String to parse.
        @type inputString: C{unicode}
        
        @return: List of matched expression tuples that consist of matched expression, start index, end index.
        @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int}
        """

        return self._matchWrapper(inputString, self.__propertyNameExpression)

    def matchLiteral(self, inputString):
        """ 
        Returns all matches of literals.
        
        @param inputString: String to parse.
        @type inputString: C{unicode}
        
        @return: List of matched expression tuples that consist of matched expression, start index, end index.
        @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int})
        """

        return self._matchWrapper(inputString, self.__literalExpression)

    def matchComparison(self, inputString):
        """ 
        Returns all matches of comparison operators.
        
        @param inputString: String to parse.
        @type inputString: C{unicode}
        
        @return: List of matched expression tuples that consist of matched expression, start index, end index.
        @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int})
        """

        return self._matchWrapper(inputString, self.__comparisonExpression)

    def matchConjunction(self, inputString):
        """ 
        Returns all matches of conjunction operators.
        
        @param inputString: String to parse.
        @type inputString: C{unicode}
        
        @return: List of matched expression tuples that consist of matched expression, start index, end index.
        @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int})
        """

        return self._matchWrapper(inputString, self.__conjunctionExpression)

    def matchConditionTerm(self, inputString):
        """ 
        Returns all matches of condition terms. Condition terms in literals are ignored.
        
        @param inputString: String to parse.
        @type inputString: C{unicode}
        
        @return: List of matched expression tuples that consist of matched expression, start index, end index.
        @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int}
        """

        return self._matchWrapper(inputString, self.__conditionExpression)

    @property
    def comparisonTokens(self):
        """
        Returns a list of strings representing the comparison operators.
        """

        if self.__comparisonTokens is None:
            self.__comparisonTokens = self._walkKeywordTree(
                self.__comparisonExpression)
        return self.__comparisonTokens

    @property
    def conjunctionTokens(self):
        """
        Returns a list of strings representing the conjunction keywords.
        """

        if self.__conjunctionTokens is None:
            self.__conjunctionTokens = self._walkKeywordTree(
                self.__conjunctionExpression)
        return self.__conjunctionTokens

    @property
    def quotedStringCharacters(self):
        """
        Returns a list of strings representing the quoted string characters.
        """

        return self.__quotedStringCharacters

    def _walkKeywordTree(self, rootNode):
        """
        Walks through a MatchFirst object and returns possible matches as a string list
        """

        nextRoot = None
        try:
            nextRoot = rootNode.exprs[0]
        except AttributeError:
            return [rootNode.match]
        else:
            result = self._walkKeywordTree(nextRoot)
            result.append(rootNode.exprs[1].match)
            return result

    @staticmethod
    def _matchWrapper(inputString, expression):
        """ Calls scanString with given input, parse expression and returns the result. """

        result = list()
        for expression, startIndex, endIndex in expression.scanString(
                inputString):
            expressionString = expression[0]
            result.append((expressionString, startIndex, endIndex))
        return result
Esempio n. 20
0
def _define_grammar():
    """
    Creates and returns a copy of the selector grammar.

    Wrapped in a function to avoid polluting the module namespace.
    """
    expr = Forward()

    label_name = Word(LABEL_CHARS)
    label_name.setParseAction(LabelNode)

    string_literal = QuotedString('"') | QuotedString("'")
    string_literal.setParseAction(LiteralNode)

    set_literal = (Suppress("{") +
                   delimitedList(QuotedString('"') | QuotedString("'"), ",") +
                   Suppress("}"))
    set_literal.setParseAction(SetLiteralNode)

    eq_comparison = label_name + Suppress("==") + string_literal
    eq_comparison.setParseAction(LabelToLiteralEqualityNode)

    not_eq_comparison = label_name + Suppress("!=") + string_literal
    not_eq_comparison.setParseAction(InequalityNode)

    in_comparison = label_name + Suppress(Keyword("in")) + set_literal
    in_comparison.setParseAction(LabelInSetLiteralNode)

    not_in = Suppress(Keyword("not") + Keyword("in"))
    not_in_comparison = label_name + not_in + set_literal
    not_in_comparison.setParseAction(NotInNode)

    has_check = (Suppress("has(") +
                 Word(LABEL_CHARS) +
                 Suppress(")"))
    has_check.setParseAction(HasNode)

    # For completeness, we allow an all() to occur in an expression like
    # "! all()".  Note: we special-case the trivial selectors "" and
    # "all()" below for efficiency.
    all_op = (Suppress("all()"))
    all_op.setParseAction(AllNode)

    comparison = (eq_comparison |
                  not_eq_comparison |
                  in_comparison |
                  not_in_comparison |
                  has_check |
                  all_op)

    paren_expr = (Suppress("(") + expr + Suppress(")"))

    value = ZeroOrMore("!") + (comparison | paren_expr)
    value.setParseAction(simplify_negation_node)

    and_expr = value + ZeroOrMore(Suppress("&&") + value)
    and_expr.setParseAction(simplify_and_node)

    or_expr = and_expr + ZeroOrMore(Suppress("||") + and_expr)
    or_expr.setParseAction(simplify_or_node)

    expr << or_expr

    grammar = expr + StringEnd()
    return grammar
Esempio n. 21
0
    "platform.version": "platform_version",
    "platform.machine": "platform_machine",
    "platform.python_implementation": "platform_python_implementation",
    "python_implementation": "platform_python_implementation",
}
VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0])))

VERSION_CMP = (
    L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<")
)

MARKER_OP = VERSION_CMP | L("not in") | L("in")
MARKER_OP.setParseAction(lambda s, l, t: Op(t[0]))

MARKER_VALUE = QuotedString("'") | QuotedString('"')
MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0]))

BOOLOP = L("and") | L("or")

MARKER_VAR = VARIABLE | MARKER_VALUE

MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR)
MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0]))

LPAREN = L("(").suppress()
RPAREN = L(")").suppress()

MARKER_EXPR = Forward()
MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN)
MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
Esempio n. 22
0
def RawOutputSpecParser():
    '''Syntax of the OUTPUT statement (and nothing else).'''
    with PyParsingDefaultWhitespaceChars(DEFAULT_WHITESPACE_CHARS):
        OUTPUT = CaselessKeyword('OUTPUT').suppress()
        QUERY = CaselessKeyword('query').suppress()
        INDEX = CaselessKeyword('index').suppress()
        KEY = CaselessKeyword('key').suppress()
        CONTENT = CaselessKeyword('content').suppress()
        SET = CaselessKeyword('set').suppress()
        SEQUENCE = CaselessKeyword('sequence').suppress()
        DICTIONARY = CaselessKeyword('dictionary').suppress()
        NOT = CaselessKeyword('not').suppress()

        constant = integer | QuotedString('"', escChar='\\')
        constant.setParseAction(
            lambda t: o.Constant(t[0])
        )  # not strictly necessary to wrap this, but it simplifies working with the syntax tree

        asp_variable_name = Word(alphas_uppercase, alphanums + '_')
        asp_variable_anonymous = Keyword('_')
        asp_variable = asp_variable_anonymous | asp_variable_name
        asp_variable_expr = asp_variable_name.copy()
        #
        asp_variable_name.setParseAction(lambda t: asp.Variable(t[0]))
        asp_variable_anonymous.setParseAction(
            lambda t: asp.AnonymousVariable())
        asp_variable_expr.setParseAction(lambda t: o.Variable(t[0]))

        # TODO:
        # Instead of explicitly marking references with '&', we might just define a convention as follows:
        #   * Output names start with lowercase characters
        #   * ASP variables start with uppercase characters (as they do in actual ASP code)
        reference = amp + py_identifier
        reference.setParseAction(lambda t: o.Reference(t[0])
                                 )  # to distinguish from literal string values

        # Note: must be able to distinguish between unquoted and quoted constants
        asp_constant_symbol = Word(alphas_lowercase, alphanums + '_')
        asp_quoted_string = QuotedString('"', escChar='\\')
        asp_quoted_string.setParseAction(lambda t: asp.QuotedConstant(t[0]))
        term = (asp_constant_symbol | asp_quoted_string | asp_variable
                | positive_integer).setResultsName('terms',
                                                   listAllMatches=True)
        terms = Optional(term + ZeroOrMore(comma + term))
        classical_atom = predicate_name('predicate') + Optional(lpar + terms +
                                                                rpar)
        # Builtin atoms
        builtin_op_binary = (Literal('=') | '==' | '!=' | '<>' | '<' | '<='
                             | '>' | '>='
                             | '#succ').setResultsName('predicate')
        builtin_atom_binary = term + builtin_op_binary + term
        builtin_atom_binary_prefix = builtin_op_binary + lpar + term + comma + term + rpar
        builtin_atom = builtin_atom_binary | builtin_atom_binary_prefix
        #
        body_atom = classical_atom | builtin_atom
        pos_body_atom = body_atom.copy()
        neg_body_atom = NOT + body_atom
        pos_body_atom.setParseAction(
            lambda t: asp.Literal(t.predicate, tuple(t.terms), False))
        neg_body_atom.setParseAction(
            lambda t: asp.Literal(t.predicate, tuple(t.terms), True))
        body_literal = neg_body_atom | pos_body_atom
        #
        asp_query = Group(body_literal + ZeroOrMore(comma + body_literal))
        asp_query.setParseAction(lambda t: asp.Query(tuple(t[0])))

        expr = Forward()

        # TODO: Instead of semicolon, we could use (semicolon | FollowedBy(rbrace)) to make the last semicolon optional (but how would that work with asp_query...)
        query_clause = QUERY + colon + asp_query('query') + semicolon
        content_clause = CONTENT + colon + expr('content') + semicolon
        index_clause = INDEX + colon + asp_variable_expr('index') + semicolon
        key_clause = KEY + colon + expr('key') + semicolon
        #
        simple_set_spec = SET + lbrace + predicate_name(
            'predicate') + slash + positive_integer('arity') + Optional(
                rightarrow + py_qualified_identifier('constructor')) + rbrace
        set_spec = SET + lbrace + (query_clause & content_clause) + rbrace
        # TODO: add clause like "at_missing_index: skip;", "at_missing_index: 0;", "at_missing_index: None;"
        sequence_spec = SEQUENCE + lbrace + (query_clause & content_clause
                                             & index_clause) + rbrace
        dictionary_spec = DICTIONARY + lbrace + (query_clause & content_clause
                                                 & key_clause) + rbrace
        expr_collection = set_spec | simple_set_spec | sequence_spec | dictionary_spec
        #
        simple_set_spec.setParseAction(lambda t: o.ExprSimpleSet(
            t.predicate, t.arity, t.get('constructor')))
        set_spec.setParseAction(lambda t: o.ExprSet(t.query, t.content))
        sequence_spec.setParseAction(
            lambda t: o.ExprSequence(t.query, t.content, t.index))
        dictionary_spec.setParseAction(
            lambda t: o.ExprDictionary(t.query, t.content, t.key))

        expr_obj_args = Group(
            Optional(expr + ZeroOrMore(comma + expr) + Optional(comma)))
        expr_obj = Optional(
            py_qualified_identifier,
            default=None)('constructor') + lpar + expr_obj_args('args') + rpar
        #
        expr_obj.setParseAction(lambda t: o.ExprObject(t.constructor, t.args))

        # Note: "|" always takes the first match, that's why we have to parse variable names after obj (otherwise "variable name" might consume the identifier of expr_obj)
        expr << (constant | expr_collection | expr_obj | reference
                 | asp_variable_expr)

        named_output_spec = py_identifier('name') + equals + expr(
            'expr') + semicolon
        output_statement = OUTPUT + lbrace + ZeroOrMore(
            named_output_spec) + rbrace
        #
        named_output_spec.setParseAction(lambda t: (t.name, t.expr))
        output_statement.setParseAction(lambda t: o.OutputSpec(t))
        return output_statement
Esempio n. 23
0
useToken = Keyword("USE")
defaultToken = Keyword("DEFAULT")
unsignedToken = Keyword("UNSIGNED")
autoincrementToken = Keyword("AUTO_INCREMENT")
autoincrementToken.setParseAction(lambda toks: ["PRIMARY KEY AUTOINCREMENT"])
keyToken = Keyword("KEY")
primaryToken = Keyword("PRIMARY")
uniqueToken = Keyword("UNIQUE")
insertToken = Keyword("INSERT")
intoToken = Keyword("INTO")
valuesToken = Keyword("VALUES")

ident = Word(alphas, alphanums + "_$" ) ^ QuotedString('"') ^ QuotedString("`")
ident.setParseAction(lambda toks: ['"%s"' % toks[0]])
string = QuotedString("'",multiline=True)
string.setParseAction(lambda toks: ["'%s'" % toks[0]])

columnName = delimitedList( ident, ".",combine=True)
tableName = delimitedList( ident, ".",combine=True)
dataType = Word(alphas) + Combine(Optional(Literal("(") + (Word(nums) ^ delimitedList(string,combine=True)) + Literal(")"))) + ZeroOrMore(nnToken ^ autoincrementToken ^ (defaultToken + (string ^ nullToken)) ^ unsignedToken.suppress() )
dataType.setParseAction(convert_datatypes)

columnDescription = Group(ident + dataType)
keyDescription = Optional(primaryToken ^ uniqueToken) + keyToken + Optional(ident) + Literal("(") + delimitedList(ident + Optional(Literal("(") + Word(nums) + Literal(")"))) + Literal(")")

createTableStmt = Group(createToken + tableToken + ifneToken + ident + Literal("(")) + delimitedList(columnDescription ^ keyDescription.suppress()) + Group(Literal(")")) + Optional(autoincrementToken + Literal("=") + Word(nums)).suppress()
createTableStmt.setParseAction(rebuild_createtable)


createDataBaseStmt = Group(createToken + databaseToken + ident +  dcsToken + Word(alphanums)+ collateToken + ident)
Esempio n. 24
0
# Bytes can be represented in binary, hex, char, or a number (0-255 or -128-127)
# and may include embedded arithmetic
#  OPCODE 0b00001100
#  OPCODE 0x0b
#  OPCODE 'a'
#  OPCODE 254-0x0a
#  OPCODE 'a'&0b00001111
binbyte = Combine(Literal('0b') + Char('01') * 8)
binbyte.setName('binbyte')
binbyte.setParseAction(lambda t: [int(t[0], 2)])
hexbyte = Combine(Literal('0x') + Char(srange("[0-9a-fA-F]")) * 2)
hexbyte.setName('hexbyte')
hexbyte.setParseAction(lambda t: [int(t[0], 16)])
chrbyte = QuotedString(quoteChar="'", unquoteResults=True)
chrbyte.setName('char')
chrbyte.setParseAction(lambda t: [ord(t[0])])
number = Word(nums + '-')
number.setName('number')
number.setParseAction(lambda t: [int(t[0])])
allbytes = binbyte | hexbyte | chrbyte | number
mathtoken = Combine(oneOf('+ - & |') + allbytes)
bytemathexpression = Combine(allbytes + OneOrMore(mathtoken))
bytemathexpression.setParseAction(lambda t: [eval(t[0])])
byte = bytemathexpression | allbytes
byte.setName('byte')
# Words can be represented in binary, hex, label, or number (0-65535 or -32768-32767)
#  OPCODE 0b0000111100001111
#  OPCODE 0x2911
#  OPCODE .label
#  OPCODE .label+4
#  OPCODE 2490
Esempio n. 25
0
setAttributeValue     = attribute + Literal("=") + freeformText | attribute + comparison + variable 
setVariableFromVar    = variable + Literal("=") + freeformText | variable + comparison + variable 
setVariableFromAttr   = variable + comparison + attribute 
argument              = Word(alphanums) + Optional(",")
funcDef               = Word(alphanums) + "(" + OneOrMore(argument) + ")"
comment               = QuotedString('/*', endQuoteChar='*/')

line                  = startContext + ";" | setAttributeValue + ";"  + setVariableFromAttr + ";" | setVariableFromVar + ";" | funcDef + ";" | comment
# grammar to be exported
grammar               = OneOrMore(line)


def parse_knowledge_tree(s):
    """ return knowledge tree after parsing with the grammar"""
    grammar.parseString(s)
    return kt



#parse actions
setAttributeValue.setParseAction(update_attribute)
setVariableFromVar.setParseAction(update_variable)
setVariableFromAttr.setParseAction(update_variable_from_attr)
startContext.setParseAction(start_context)
funcDef.setParseAction(set_function)
comment.setParseAction(print_comment)
freeformText.setParseAction(get_freeform)



Esempio n. 26
0
signop = oneOf("+ -")
multop = oneOf("* / // % bitand bitor")
filterop = oneOf("|")
plusop = oneOf("+ -")
notop = Literal("not") + WordEnd(word_characters)

rangeop = Literal("..")
exclusiverangeop = Literal("...")
ternaryop = ("?", ":")

current_scope_operand.setParseAction(EvalCurrentScope)
variable_operand.setParseAction(EvalVariable)
explicit_variable_operand.setParseAction(EvalExplicitVariable)
integer_operand.setParseAction(EvalInteger)
real_operand.setParseAction(EvalReal)
triple_string.setParseAction(EvalTripleString)
string_operand.setParseAction(EvalString)
constant.setParseAction(EvalConstant)
regexp.setParseAction(EvalRegExp)
timespan.setParseAction(EvalTimespan)

modifier = Regex(r"([a-zA-Z][a-zA-Z0-9_]*)\:")

simple_list_operand = Group(delimitedList(expr))
simple_list_operand.setParseAction(EvalSimpleList)

list_operand = Suppress("[") + delimitedList(expr) + Suppress("]")
list_operand.setParseAction(EvalList)

empty_list_operand = Literal("[]")
empty_list_operand.setParseAction(EvalEmptyList)
Esempio n. 27
0
import re
import jinja2
import pyparsing
import bleach
from .attachments import THUMB_PATTERN
from pyparsing import QuotedString, ParserElement, LineStart, LineEnd, SkipTo, OneOrMore, restOfLine
from .util import mime2thumb_ext

ParserElement.setDefaultWhitespaceChars(' \t')

EOL = LineEnd()
SOL = LineStart()

strong = QuotedString("**") | QuotedString(quoteChar="[b]", endQuoteChar="[/b]")
strong.setParseAction(lambda x: "<strong>%s</strong>" % x[0])

italic = QuotedString("*", escChar='\\') | QuotedString(quoteChar="[i]", endQuoteChar="[/i]")
italic.setParseAction(lambda x: "<i>%s</i>" % x[0])

underline = QuotedString("__") | QuotedString(quoteChar="[u]", endQuoteChar="[/u]")
underline.setParseAction(lambda x: "<u>%s</u>" % x[0])

strike = QuotedString(quoteChar="[s]", endQuoteChar="[/s]")
strike.setParseAction(lambda x: "<s>%s</s>" % x[0])

sup = QuotedString(quoteChar="[sup]", endQuoteChar="[/sup]")
sup.setParseAction(lambda x: "<sup>%s</sup>" % x[0])

sub = QuotedString(quoteChar="[sub]", endQuoteChar="[/sub]")
sub.setParseAction(lambda x: "<sub>%s</sub>" % x[0])
Esempio n. 28
0
VERSION_CMP = (
    L("===") |
    L("==") |
    L(">=") |
    L("<=") |
    L("!=") |
    L("~=") |
    L(">") |
    L("<")
)

MARKER_OP = VERSION_CMP | L("not in") | L("in")

MARKER_VALUE = QuotedString("'") | QuotedString('"')
MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0]))

BOOLOP = L("and") | L("or")

MARKER_VAR = VARIABLE | MARKER_VALUE

MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR)
MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0]))

LPAREN = L("(").suppress()
RPAREN = L(")").suppress()

MARKER_EXPR = Forward()
MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN)
MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
Esempio n. 29
0
def make_grammar_2():
    """
    Construct the BBDB grammar.  See grammar.ebnf for the specification.
    """

    # Helper functions for the brace types.
    LP, RP, LB, RB = map(Suppress, "()[]")
    Paren = lambda arg: LP + Group(arg) + RP
    Bracket = lambda arg: LB + Group(arg) + RB

    # Helper functions for constructing return types.
    def make_list(t):
        return t.asList()

    def make_dict(t):
        return {k: v for k, v in t[0] or []}

    def make_address_entry(t):
        return t[0].tag, {
            "location": list(t[0].location or []),
            "city": t[0].city or "",
            "state": t[0].state or "",
            "zipcode": t[0].zipcode or "",
            "country": t[0].country or ""
        }

    def make_record(t):
        return {
            "firstname": t[0].firstname,
            "lastname": t[0].lastname,
            "aka": t[0].aka or [],
            "company": t[0].company or "",
            "phone": t[0].phone or {},
            "address": t[0].address or {},
            "net": t[0].net or [],
            "fields": t[0].fields or {}
        }

    def make_string(t):
        return t[0][1:-1].replace(r'\"', '"')

    # Define the low-level entities.
    string = QuotedString(quoteChar='"', escChar='\\', unquoteResults=False)
    string.setParseAction(make_string)

    nil = Keyword("nil")
    nil.setParseAction(lambda t: [None])

    atom = Word(alphanums + '-')
    dot = Suppress(Keyword("."))

    integer = Word(nums)
    integer.setParseAction(lambda t: int(t[0]))

    # Phone.
    phone_usa = Group(OneOrMore(integer))
    phone_nonusa = string
    phone_entry = Bracket(string("tag") + Or([phone_usa, phone_nonusa]))
    phone = Or([Paren(OneOrMore(phone_entry)), nil])("phone")
    phone.setParseAction(make_dict)

    # Address.
    location = Paren(OneOrMore(string))("location")
    location.setParseAction(make_list)

    address_entry = Bracket(
        string("tag") + location + string("city") + string("state") +
        string("zipcode") + string("country"))
    address_entry.setParseAction(make_address_entry)
    address = Or([Paren(OneOrMore(address_entry)), nil])("address")
    address.setParseAction(make_dict)

    # Field.
    field = Paren(atom + dot + string)
    fields = Or([Paren(OneOrMore(field)), nil])("fields")
    fields.setParseAction(make_dict)

    # Other parts of an entry.
    name = string("firstname") + Or([string("lastname"), nil])
    company = Or([string, nil])("company")

    aka = Or([Paren(OneOrMore(string)), nil])("aka")
    aka.setParseAction(make_list)

    net = Or([Paren(OneOrMore(string)), nil])("net")
    net.setParseAction(make_list)

    cache = nil("cache")

    # A single record.
    record = Bracket(name + aka + company + phone + address + net + fields +
                     cache)

    record.setParseAction(make_record)

    # All the records.
    bbdb = ZeroOrMore(record)
    bbdb.setParseAction(make_list)

    # Define comment syntax.
    comment = Regex(r";.*")
    bbdb.ignore(comment)

    return bbdb
Esempio n. 30
0
filterop = oneOf('|')
plusop = oneOf('+ -')
notop = Literal('not') + WordEnd(word_characters)

rangeop = Literal('..')
exclusiverangeop = Literal('...')
ternaryop = ('?', ':')

current_scope_operand.setParseAction(EvalCurrentScope)
variable_operand.setParseAction(EvalVariable)
explicit_variable_operand.setParseAction(EvalExplicitVariable)
integer_operand.setParseAction(EvalInteger)
real_operand.setParseAction(EvalReal)
string_operand.setParseAction(EvalString)
constant.setParseAction(EvalConstant)
regexp.setParseAction(EvalRegExp)
timespan.setParseAction(EvalTimespan)

modifier = Regex(r'([a-zA-Z][a-zA-Z0-9_]*)\:')

simple_list_operand = Group(delimitedList(expr))
simple_list_operand.setParseAction(EvalSimpleList)

list_operand = (Suppress('[') + delimitedList(expr) + Suppress(']'))
list_operand.setParseAction(EvalList)

empty_list_operand = Literal('[]')
empty_list_operand.setParseAction(EvalEmptyList)

dict_item = Group(expr + Suppress(Literal(':')) + expr)
dict_operand = Group(Suppress('{') + delimitedList(dict_item) + Suppress('}'))
Esempio n. 31
0
filterop = oneOf('|')
plusop = oneOf('+ -')
notop = Literal('not') + WordEnd(word_characters)

rangeop = Literal('..')
exclusiverangeop = Literal('...')
ternaryop = ('?', ':')

current_scope_operand.setParseAction(EvalCurrentScope)
variable_operand.setParseAction(EvalVariable)
explicit_variable_operand.setParseAction(EvalExplicitVariable)
integer_operand.setParseAction(EvalInteger)
real_operand.setParseAction(EvalReal)
string_operand.setParseAction(EvalString)
constant.setParseAction(EvalConstant)
regexp.setParseAction(EvalRegExp)
timespan.setParseAction(EvalTimespan)

modifier = Regex(r'([a-zA-Z][a-zA-Z0-9_]*)\:')

simple_list_operand = Group(delimitedList(expr))
simple_list_operand.setParseAction(EvalSimpleList)

list_operand = (Suppress('[') + delimitedList(expr) + Suppress(']'))
list_operand.setParseAction(EvalList)

empty_list_operand = Literal('[]')
empty_list_operand.setParseAction(EvalEmptyList)

dict_item = Group(expr + Suppress(Literal(':')) + expr)
dict_operand = Group(Suppress('{') + delimitedList(dict_item) + Suppress('}'))
Esempio n. 32
0
variable = Regex(r'([a-zA-Z0-9\._]+)')

string = QuotedString('"', escChar="\\") | QuotedString('\'', escChar="\\")
operand = model_reference | real | integer | constant | string | variable

plusop = oneOf('+ -')
multop = oneOf('* / // %')
groupop = Literal(',')

expr = Forward()

modifier = Combine(Word(alphas + nums) + ':')

integer.setParseAction(EvalInteger)
real.setParseAction(EvalReal)
string.setParseAction(EvalString)
constant.setParseAction(EvalConstant)
variable.setParseAction(EvalVariable)
model_reference.setParseAction(EvalModelReference)

comparisonop = (oneOf("< <= > >= != == ~= ^= $=") |
                (Literal('not in') + WordEnd()) |
                (oneOf("in lt lte gt gte matches contains icontains like") + WordEnd()))


logicopOR = Literal('or') + WordEnd()
logicopAND = Literal('and') + WordEnd()

expr << operatorPrecedence(operand, [
    (modifier, 1, opAssoc.RIGHT, EvalModifierOp),
    (multop, 2, opAssoc.LEFT, EvalMultOp),
Esempio n. 33
0
def parse(string=None, filename=None, token=None, lang=None):
    """
    Parse a token stream from or raise a SyntaxError

    This function includes the parser grammar.
    """

    if not lang:
        lang = guess_language(string, filename)

    #
    # End of Line
    #
    EOL = Suppress(lineEnd)
    UTFWORD = Word(unicodePrintables)

    #
    # @tag
    #
    TAG = Suppress('@') + UTFWORD

    #
    # A table
    #
    # A table is made up of rows of cells, e.g.
    #
    #   | column 1 | column 2 |
    #
    # Table cells need to be able to handle escaped tokens such as \| and \n
    #
    def handle_esc_char(tokens):
        token = tokens[0]

        if token == r'\|':
            return u'|'
        elif token == r'\n':
            return u'\n'
        elif token == r'\\':
            return u'\\'

        raise NotImplementedError(u"Unknown token: %s" % token)

    ESC_CHAR = Word(initChars=r'\\', bodyChars=unicodePrintables, exact=2)
    ESC_CHAR.setParseAction(handle_esc_char)

    #
    # A cell can contain anything except a cell marker, new line or the
    # beginning of a cell marker, we then handle escape characters separately
    # and recombine the cell afterwards
    #
    CELL = OneOrMore(CharsNotIn('|\n\\') + Optional(ESC_CHAR))
    CELL.setParseAction(lambda tokens: u''.join(tokens))

    TABLE_ROW = Suppress('|') + OneOrMore(CELL + Suppress('|')) + EOL
    TABLE_ROW.setParseAction(lambda tokens: [v.strip() for v in tokens])
    TABLE = Group(OneOrMore(Group(TABLE_ROW)))

    #
    # Multiline string
    #
    def clean_multiline_string(s, loc, tokens):
        """
        Clean a multiline string

        The indent level of a multiline string is the indent level of the
        triple-". We have to derive this by walking backwards from the
        location of the quoted string token to the newline before it.

        We also want to remove the leading and trailing newline if they exist.

        FIXME: assumes UNIX newlines
        """

        def remove_indent(multiline, indent):
            """
            Generate the lines removing the indent
            """

            for line in multiline.splitlines():
                if line and not line[:indent].isspace():
                    warn("%s: %s: under-indented multiline string "
                         "truncated: '%s'" %
                         (lineno(loc, s), col(loc, s), line),
                         LettuceSyntaxWarning)

                # for those who are surprised by this, slicing a string
                # shorter than indent will yield empty string, not IndexError
                yield line[indent:]

        # determine the indentation offset
        indent = loc - s.rfind('\n', 0, loc) - 1

        multiline = '\n'.join(remove_indent(tokens[0], indent))

        # remove leading and trailing newlines
        if multiline[0] == '\n':
            multiline = multiline[1:]

        if multiline[-1] == '\n':
            multiline = multiline[:-1]

        return multiline

    MULTILINE = QuotedString('"""', multiline=True)
    MULTILINE.setParseAction(clean_multiline_string)

    # A Step
    #
    # Steps begin with a keyword such as Given, When, Then or And They can
    # contain an optional inline comment, although it's possible to encapsulate
    # it in a string. Finally they can contain a table or a multiline 'Python'
    # string.
    #
    # <variables> are not parsed as part of the grammar as it's not easy to
    # distinguish between a variable and XML. Instead scenarios will replace
    # instances in the steps based on the outline keys.
    #
    STATEMENT_SENTENCE = Group(
        lang.STATEMENT +  # Given, When, Then, And
        OneOrMore(UTFWORD.setWhitespaceChars(' \t') |
                  quotedString.setWhitespaceChars(' \t')) +
        EOL
    )

    STATEMENT = Group(
        STATEMENT_SENTENCE('sentence') +
        Optional(TABLE('table') | MULTILINE('multiline'))
    )
    STATEMENT.setParseAction(Step)

    STATEMENTS = Group(ZeroOrMore(STATEMENT))

    #
    # Background:
    #
    BACKGROUND_DEFN = \
        lang.BACKGROUND('keyword') + Suppress(':') + EOL
    BACKGROUND_DEFN.setParseAction(Background)

    BACKGROUND = Group(
        BACKGROUND_DEFN('node') +
        STATEMENTS('statements')
    )
    BACKGROUND.setParseAction(Background.add_statements)

    #
    # Scenario: description
    #
    SCENARIO_DEFN = Group(
        Group(ZeroOrMore(TAG))('tags') +
        lang.SCENARIO('keyword') + Suppress(':') +
        restOfLine('name') +
        EOL
    )
    SCENARIO_DEFN.setParseAction(Scenario)

    SCENARIO = Group(
        SCENARIO_DEFN('node') +
        STATEMENTS('statements') +
        Group(ZeroOrMore(
            Suppress(lang.EXAMPLES + ':') + EOL + TABLE
        ))('outlines')
    )
    SCENARIO.setParseAction(Scenario.add_statements)

    #
    # Feature: description
    #
    FEATURE_DEFN = Group(
        Group(ZeroOrMore(TAG))('tags') +
        lang.FEATURE('keyword') + Suppress(':') +
        restOfLine('name') +
        EOL
    )
    FEATURE_DEFN.setParseAction(Feature)

    #
    # A description composed of zero or more lines, before the
    # Background/Scenario block
    #
    DESCRIPTION_LINE = Group(
        ~BACKGROUND_DEFN + ~SCENARIO_DEFN +
        OneOrMore(UTFWORD).setWhitespaceChars(' \t') +
        EOL
    )
    DESCRIPTION = Group(ZeroOrMore(DESCRIPTION_LINE | EOL))
    DESCRIPTION.setParseAction(Description)

    #
    # Complete feature file definition
    #
    FEATURE = Group(
        FEATURE_DEFN('node') +
        DESCRIPTION('description') +
        Optional(BACKGROUND('background')) +
        Group(OneOrMore(SCENARIO))('scenarios') +
        stringEnd)
    FEATURE.ignore(pythonStyleComment)
    FEATURE.setParseAction(Feature.add_blocks)

    #
    # Try parsing the string
    #

    if not token:
        token = FEATURE
    else:
        token = locals()[token]

    try:
        if string:
            tokens = token.parseString(string)
        elif filename:
            with open(filename, 'r', 'utf-8') as fp:
                tokens = token.parseFile(fp)
        else:
            raise RuntimeError("Must pass string or filename")

        return tokens
    except ParseException as e:
        if e.parserElement == stringEnd:
            msg = "Expected EOF (max one feature per file)"
        else:
            msg = e.msg

        raise LettuceSyntaxError(
            filename,
            u"{lineno}:{col} Syntax Error: {msg}\n{line}\n{space}^".format(
                msg=msg,
                lineno=e.lineno,
                col=e.col,
                line=e.line,
                space=' ' * (e.col - 1)))
    except LettuceSyntaxError as e:
        # reraise the exception with the filename
        raise LettuceSyntaxError(filename, e.string)