Example #1
0
	def getkw_bnf(self):
		sect_begin   = Literal("{").suppress()
		sect_end   = Literal("}").suppress()
		array_begin   = Literal("[").suppress()
		array_end   = Literal("]").suppress()
		tag_begin   = Literal("<").suppress()
		tag_end   = Literal(">").suppress()
		eql   = Literal("=").suppress()
		dmark = Literal('$').suppress()
		end_data=Literal('$end').suppress()
		prtable = alphanums+r'!$%&*+-./<>?@^_|~'
		ival=Regex('[-]?\d+')
		dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?')
		lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)')
	
		# Helper definitions

		kstr= quotedString.setParseAction(removeQuotes) ^ \
				dval ^ ival ^ lval ^ Word(prtable)
		name = Word(alphas+"_",alphanums+"_")
		vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \
				Literal("\n").suppress() ^ \
				quotedString.setParseAction(removeQuotes))+array_end
		sect=name+sect_begin
		tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin

		# Grammar
		keyword = name + eql + kstr
		vector = name + eql + vec
		data=Combine(dmark+name)+SkipTo(end_data)+end_data
		section=Forward()
		sect_def=(sect | tag_sect ) #| vec_sect)
		input=section | data | vector | keyword 
		section << sect_def+ZeroOrMore(input) + sect_end

		# Parsing actions	
		ival.setParseAction(self.conv_ival)
		dval.setParseAction(self.conv_dval)
		lval.setParseAction(self.conv_lval)
		keyword.setParseAction(self.store_key)
		vector.setParseAction(self.store_vector)
		data.setParseAction(self.store_data)
		sect.setParseAction(self.add_sect)
		tag_sect.setParseAction(self.add_sect)
		sect_end.setParseAction(self.pop_sect)

		bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error)
		bnf.ignore(pythonStyleComment)
		return bnf
Example #2
0
	def getkw_bnf(self):
		sect_begin   = Literal("{").suppress()
		sect_end   = Literal("}").suppress()
		array_begin   = Literal("[").suppress()
		array_end   = Literal("]").suppress()
		tag_begin   = Literal("<").suppress()
		tag_end   = Literal(">").suppress()
		eql   = Literal("=").suppress()
		dmark = Literal('$').suppress()
		end_data=Literal('$end').suppress()
		prtable = alphanums+r'!$%&*+-./<>?@^_|~'
		ival=Regex('[-]?\d+')
		dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?')
		lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)')
	
		# Helper definitions

		kstr= quotedString.setParseAction(removeQuotes) ^ \
				dval ^ ival ^ lval ^ Word(prtable)
		name = Word(alphas+"_",alphanums+"_")
		vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \
				Literal("\n").suppress() ^ \
				quotedString.setParseAction(removeQuotes))+array_end
		sect=name+sect_begin
		tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin

		# Grammar
		keyword = name + eql + kstr
		vector = name + eql + vec
		data=Combine(dmark+name)+SkipTo(end_data)+end_data
		section=Forward()
		sect_def=(sect | tag_sect ) #| vec_sect)
		input=section | data | vector | keyword 
		section << sect_def+ZeroOrMore(input) + sect_end

		# Parsing actions	
		ival.setParseAction(self.conv_ival)
		dval.setParseAction(self.conv_dval)
		lval.setParseAction(self.conv_lval)
		keyword.setParseAction(self.store_key)
		vector.setParseAction(self.store_vector)
		data.setParseAction(self.store_data)
		sect.setParseAction(self.add_sect)
		tag_sect.setParseAction(self.add_sect)
		sect_end.setParseAction(self.pop_sect)

		bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error)
		bnf.ignore(pythonStyleComment)
		return bnf
Example #3
0
class SDKConfig:
    """
    Encapsulates an sdkconfig file. Defines grammar of a configuration entry, and enables
    evaluation of logical expressions involving those entries.
    """

    # A configuration entry is in the form CONFIG=VALUE. Definitions of components of that grammar
    IDENTIFIER = Word(printables.upper())

    HEX = Combine("0x" + Word(hexnums)).setParseAction(lambda t: int(t[0], 16))
    DECIMAL = Combine(Optional(Literal("+") | Literal("-")) +
                      Word(nums)).setParseAction(lambda t: int(t[0]))
    LITERAL = Word(printables)
    QUOTED_LITERAL = quotedString.setParseAction(removeQuotes)

    VALUE = HEX | DECIMAL | LITERAL | QUOTED_LITERAL

    # Operators supported by the expression evaluation
    OPERATOR = oneOf(["=", "!=", ">", "<", "<=", ">="])

    def __init__(self, kconfig_file, sdkconfig_file, env=[]):
        env = [(name, value)
               for (name, value) in (e.split("=", 1) for e in env)]

        for name, value in env:
            value = " ".join(value.split())
            os.environ[name] = value

        self.config = kconfiglib.Kconfig(kconfig_file.name)
        self.config.load_config(sdkconfig_file.name)

    def evaluate_expression(self, expression):
        result = self.config.eval_string(expression)

        if result == 0:  # n
            return False
        elif result == 2:  # y
            return True
        else:  # m
            raise Exception("Unsupported config expression result.")

    @staticmethod
    def get_expression_grammar():
        identifier = SDKConfig.IDENTIFIER.setResultsName("identifier")
        operator = SDKConfig.OPERATOR.setResultsName("operator")
        value = SDKConfig.VALUE.setResultsName("value")

        test_binary = identifier + operator + value
        test_single = identifier

        test = test_binary | test_single

        condition = Group(
            Optional("(").suppress() + test + Optional(")").suppress())

        grammar = infixNotation(condition, [("!", 1, opAssoc.RIGHT),
                                            ("&&", 2, opAssoc.LEFT),
                                            ("||", 2, opAssoc.LEFT)])

        return grammar
Example #4
0
def get_standard_type_defs(word):
    tuple_str = Forward()
    list_str = Forward()
    dict_str = Forward()
    cvt_tuple = lambda toks: tuple(toks.asList())
    cvt_dict = lambda toks: dict(toks.asList())

    list_item = (none | boolean | real | integer | list_str | tuple_str
                 | dict_str
                 | quotedString.setParseAction(removeQuotes)
                 | word)
    list_item2 = list_item | Empty().setParseAction(lambda: [None])

    tuple_str.inner = list_of(list_item)
    tuple_str.inner.setParseAction(cvt_tuple)
    tuple_str << (lparen + tuple_str.inner + rparen)

    list_str.inner = tuple_str.inner.copy()
    list_str.inner.setParseAction(lambda toks: list(toks))
    list_str << (lbrack + list_str.inner + rbrack)

    dict_entry = Group(list_item + colon + list_item2)
    dict_str.inner = list_of(dict_entry)
    dict_str.inner.setParseAction(cvt_dict)
    dict_str << (lbrace + Optional(dict_str.inner) + rbrace)

    return {
        'tuple': tuple_str,
        'list': list_str,
        'dict': dict_str,
        'list_item': list_item
    }
Example #5
0
def _query_expression():
    operand = quotedString.setParseAction(removeQuotes)
    return operatorPrecedence(operand, [
        (CaselessLiteral('not'), 1, opAssoc.RIGHT, _not_expression),
        (CaselessLiteral('and'), 2, opAssoc.LEFT, _and_expression),
        (CaselessLiteral('or'), 2, opAssoc.LEFT, _or_expression)
    ])
Example #6
0
def transform(txt):

    idx1 = txt.find('[')
    idx2 = txt.find('{')
    if idx1 < idx2 and idx1 > 0:
        txt = txt[idx1:txt.rfind(']') + 1]
    elif idx2 < idx1 and idx2 > 0:
        txt = txt[idx2:txt.rfind('}') + 1]

    try:
        json.loads(txt)
    except:
        # parse dict-like syntax

        LBRACK, RBRACK, LBRACE, RBRACE, COLON, COMMA = map(Suppress, "[]{}:,")
        integer = Regex(r"[+-]?\d+").setParseAction(lambda t: int(t[0]))
        real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t: float(t[0]))
        string_ = Word(alphas, alphanums +
                       "_") | quotedString.setParseAction(removeQuotes)
        bool_ = oneOf("true false").setParseAction(lambda t: t[0] == "true")
        item = Forward()

        key = string_
        dict_ = LBRACE - Optional(dictOf(key + COLON,
                                         item + Optional(COMMA))) + RBRACE
        list_ = LBRACK - Optional(delimitedList(item)) + RBRACK
        item << (real | integer | string_ | bool_ | Group(list_ | dict_))

        result = item.parseString(txt, parseAll=True)[0]
        print result
        txt = result

    return txt
Example #7
0
def get_standard_type_defs(word):
    tuple_str = Forward()
    list_str = Forward()
    dict_str = Forward()
    cvt_tuple = lambda toks: tuple(toks.asList())
    cvt_dict = lambda toks: dict(toks.asList())

    list_item = (
        none
        | boolean
        | real
        | integer
        | list_str
        | tuple_str
        | dict_str
        | quotedString.setParseAction(removeQuotes)
        | word
    )
    list_item2 = list_item | Empty().setParseAction(lambda: [None])

    tuple_str.inner = list_of(list_item)
    tuple_str.inner.setParseAction(cvt_tuple)
    tuple_str << (lparen + tuple_str.inner + rparen)

    list_str.inner = tuple_str.inner.copy()
    list_str.inner.setParseAction(lambda toks: list(toks))
    list_str << (lbrack + list_str.inner + rbrack)

    dict_entry = Group(list_item + colon + list_item2)
    dict_str.inner = list_of(dict_entry)
    dict_str.inner.setParseAction(cvt_dict)
    dict_str << (lbrace + Optional(dict_str.inner) + rbrace)

    return {"tuple": tuple_str, "list": list_str, "dict": dict_str, "list_item": list_item}
Example #8
0
def getEbnfParser(symbols):
    """ Returns an EBNF parser for the command language. """
    identifier = Word(alphas + '_', alphanums + '_')
    string = quotedString.setParseAction(
        lambda t: symbols.append((t[0][1:-1], TokenType.StrLit))
    )
    integer = Word(nums).setParseAction(
        lambda t: symbols.append((int(t[0]), TokenType.NumLit))
    )
    var = Suppress("$") + identifier
    var.setParseAction(
        lambda t: symbols.append((t[0], TokenType.Var))
    )
    literal = var | string | integer
    fnid = Suppress(Optional(".")) + identifier
    fnid.setParseAction(
        lambda t: symbols.append((t[0], TokenType.Call))
    )
    call = Forward()
    callb = fnid + ZeroOrMore(call | literal)
    call << ((Suppress("(") + callb + Suppress(")")) | callb)
    fndef_head = Suppress("let") + identifier
    fndef_head.setParseAction(
        lambda t: symbols.append((t[0], TokenType.Def))
    )
    definition = fndef_head + ZeroOrMore(var) + Suppress("=") + call
    cmd = OneOrMore((definition | call) + Word(";").setParseAction(
        lambda t: symbols.append((t[0], TokenType.End))
    ))
    msg = OneOrMore(cmd)
    return msg
Example #9
0
    def __grammar(self):
        """
    Pyparsing grammar to parse the filter string.
    """

        float_ = Combine(Word(nums) + Literal(".") +
                         Word(nums)).setParseAction(
                             lambda x, y, z: float(z[0]))
        sci = Combine(
            Word(nums) + Optional(".") + Optional(Word(nums)) + oneOf("e E") +
            Optional("-") +
            Word(nums)).setParseAction(lambda x, y, z: float(z[0]))
        int_ = Word(nums).setParseAction(lambda x, y, z: int(z[0]))

        comp = oneOf("in eq gt lt ge le < > = like",
                     caseless=True).setResultsName("comp")
        op = oneOf("and or", caseless=True).setResultsName("op")

        lhs = Word(alphanums + "_").setResultsName("lhs")
        element = sci | float_ | int_ | quotedString.setParseAction(
            removeQuotes) | Word(alphanums)
        rhs = (element +
               ZeroOrMore(Suppress(",") + element)).setResultsName("rhs")

        stmt = Group(lhs + comp + rhs).setResultsName("statement")

        expr = stmt + ZeroOrMore(op + stmt)

        self.grammar = expr
Example #10
0
class NginxParser(object):
    '''A class for parsing nginx config files.

    Based on https://github.com/fatiherikli/nginxparser'''

    # constants
    left_bracket = Literal("{").setParseAction(_nodeify('punctuation'))
    right_bracket = Literal("}").setParseAction(_nodeify('punctuation'))
    semicolon = Literal(";").setParseAction(_nodeify('punctuation'))
    space = White().suppress()
    key = Word(alphanums + "_/").setParseAction(_nodeify('key'))
    value = CharsNotIn("{};, ").setParseAction(_nodeify('value'))
    value2 = CharsNotIn(";" + string.whitespace).setParseAction(
        _nodeify('value'))
    quotedstring = quotedString.setParseAction(_nodeify('value'))
    location = CharsNotIn("{};," + string.whitespace).setParseAction(
        _nodeify('location'))
    ifword = Literal("if").setParseAction(_nodeify('keyword'))
    setword = Literal("set").setParseAction(_nodeify('keyword'))

    # modifier for location uri [ = | ~ | ~* | ^~ ]
    modifier = (Literal("=") | Literal("~*") | Literal("~")
                | Literal("^~")).setParseAction(_nodeify('modifier'))

    # rules
    directive = (key + ZeroOrMore(space + Or([value, quotedstring])) +
                 semicolon).setParseAction(_nodeifydirective)
    setblock = (setword + OneOrMore(space + value2) +
                semicolon).setParseAction(_nodeifyset)
    block = Forward()
    ifblock = Forward()
    subblock = Group(ZeroOrMore(setblock | directive | block | ifblock))
    # TODO: parse if condition
    ifblock = (ifword + SkipTo('{') + left_bracket + subblock +
               right_bracket).setParseAction(_nodeifyif)

    block << Group(
        Group(key + Optional(space + modifier) + Optional(space + location)) +
        left_bracket +
        Group(ZeroOrMore(directive | block | ifblock | setblock)) +
        right_bracket).setParseAction(_nodeifycontext)

    script = OneOrMore(directive | block).ignore(
        pythonStyleComment).setParseAction(_nodeifymain).parseWithTabs()

    def __init__(self, source):
        self.source = source

    def parse(self):
        """
        Returns the parsed tree.
        """
        return self.script.parseString(self.source)

    def as_list(self):
        """
        Returns the list of tree.
        """
        return self.parse().asList()
Example #11
0
def pyparse_gml():
    """A pyparsing tokenizer for GML graph format.

    This is not intended to be called directly.

    See Also
    --------
    write_gml, read_gml, parse_gml

    Notes
    -----
    This doesn't implement the complete GML specification for
    nested attributes for graphs, edges, and nodes. 

    """  
    global graph
    
    try:
        from pyparsing import \
             Literal, CaselessLiteral, Word, Forward,\
             ZeroOrMore, Group, Dict, Optional, Combine,\
             ParseException, restOfLine, White, alphas, alphanums, nums,\
             OneOrMore,quotedString,removeQuotes,dblQuotedString
    except ImportError:
        raise ImportError, \
          "Import Error: not able to import pyparsing: http://pyparsing.wikispaces.com/"

    if not graph:
        lbrack = Literal("[").suppress()
        rbrack = Literal("]").suppress()
        pound = ("#")
        comment = pound + Optional( restOfLine )
        white = White(" \t\n")
        point = Literal(".")
        e = CaselessLiteral("E")
        integer = Word(nums).setParseAction(lambda s,l,t:[ int(t[0])])
        real = Combine( Word("+-"+nums, nums )+ 
                        Optional(point+Optional(Word(nums)))+
                        Optional(e+Word("+-"+nums, nums))).setParseAction(
                                        lambda s,l,t:[ float(t[0]) ])
        key = Word(alphas,alphanums+'_')
        value_atom = integer^real^Word(alphanums)^quotedString.setParseAction(removeQuotes)

        value = Forward()   # to be defined later with << operator
        keyvalue = Group(key+value)
        value << (value_atom | Group( lbrack + ZeroOrMore(keyvalue) + rbrack ))
        node = Group(Literal("node") + lbrack + Group(OneOrMore(keyvalue)) + rbrack)
        edge = Group(Literal("edge") + lbrack + Group(OneOrMore(keyvalue)) + rbrack)

        creator = Group(Literal("Creator")+ Optional( restOfLine ))
        version = Group(Literal("Version")+ Optional( restOfLine ))
        graphkey = Literal("graph").suppress()

        graph = Optional(creator)+Optional(version)+\
            graphkey + lbrack + ZeroOrMore( (node|edge|keyvalue) ) + rbrack
        graph.ignore(comment)
        
    return graph
Example #12
0
    def getkw_bnf(self):
        lcb = Literal("{").suppress()
        rcb = Literal("}").suppress()
        lsb = Literal("[").suppress()
        rsb = Literal("]").suppress()
        lps = Literal("(").suppress()
        rps = Literal(")").suppress()
        eql = Literal("=").suppress()
        dmark = Literal('$').suppress()
        end_sect = rcb
        end_data = Literal('$end').suppress()
        prtable = srange("[0-9a-zA-Z]") + '!$%&*+-./<>?@^_|~:'

        kstr = Word(prtable) ^ quotedString.setParseAction(removeQuotes)

        name = Word(alphas + "_", alphanums + "_")

        vec=lsb+delimitedList(Word(prtable) ^ Literal("\n").suppress() ^\
          quotedString.setParseAction(removeQuotes))+rsb
        key = kstr ^ vec
        keyword = name + eql + kstr
        vector = name + eql + vec
        data = Combine(dmark + name) + SkipTo(end_data) + end_data
        data.setParseAction(self.store_data)
        sect = name + lcb
        sect.setParseAction(self.add_sect)
        key_sect = name + Group(lps + kstr + rps) + lcb
        key_sect.setParseAction(self.add_sect)
        vec_sect = name + Group(lps + vec + rps) + lcb
        vec_sect.setParseAction(self.add_vecsect)
        end_sect.setParseAction(self.pop_sect)

        keyword.setParseAction(self.store_key)
        vector.setParseAction(self.store_vector)

        section = Forward()
        input = section ^ data ^ keyword ^ vector

        sectdef = sect ^ key_sect ^ vec_sect
        section << sectdef + ZeroOrMore(input) + rcb

        bnf = ZeroOrMore(input)
        bnf.ignore(pythonStyleComment)

        return bnf
Example #13
0
    def string_literal(self):
        """
        string_literal ::= "'" string "'" | "\"" string "\""

        Any successful match is converted to a single quoted string to simplify
        post-parsed operations.
        """
        return quotedString.setParseAction(
            lambda s, l, t: "'{string}'".format(string=removeQuotes(s, l, t)))
Example #14
0
    def string_literal(self):
        """
        string_literal ::= "'" string "'" | "\"" string "\""

        Any successful match is converted to a single quoted string to simplify
        post-parsed operations.
        """
        return quotedString.setParseAction(
            lambda s, l, t: "'{string}'".format(string=removeQuotes(s, l, t)))
Example #15
0
	def getkw_bnf(self):
		sect_begin   = Literal("{").suppress()
		sect_end   = Literal("}").suppress()
		array_begin   = Literal("[").suppress()
		array_end   = Literal("]").suppress()
		arg_begin   = Literal("(").suppress()
		arg_end   = Literal(")").suppress()
		eql   = Literal("=").suppress()
		dmark = Literal('$').suppress()
		end_data=Literal('$end').suppress()
		prtable = alphanums+r'!$%&*+-./<>?@^_|~'

	
		# Helper definitions
		kstr=Word(prtable) ^ quotedString.setParseAction(removeQuotes)
		name = Word(alphas+"_",alphanums+"_")
		vec=array_begin+delimitedList(Word(prtable) ^ \
				Literal("\n").suppress() ^ \
				quotedString.setParseAction(removeQuotes))+array_end
		sect=name+sect_begin
		key_sect=name+Group(arg_begin+kstr+arg_end)+sect_begin
		vec_sect=name+Group(arg_begin+vec+ arg_end)+sect_begin

		# Grammar
		keyword = name + eql + kstr
		vector = name + eql + vec
		data=Combine(dmark+name)+SkipTo(end_data)+end_data
		section=Forward()
		sect_def=(sect | key_sect | vec_sect)
		input=section | data | vector | keyword 
		section << sect_def+ZeroOrMore(input) + sect_end

		# Parsing actions	
		keyword.setParseAction(self.store_key)
		vector.setParseAction(self.store_vector)
		data.setParseAction(self.store_data)
		sect.setParseAction(self.add_sect)
		key_sect.setParseAction(self.add_sect)
		vec_sect.setParseAction(self.add_vecsect)
		sect_end.setParseAction(self.pop_sect)

		bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error)
		bnf.ignore(pythonStyleComment)
		return bnf
Example #16
0
    def _get_bnf(self):
        """
        Returns the `Backus–Naur Form` for the parser
        """
        if not self.bnf:
            # Operators
            exponent_operator = Literal("^")
            # negate_operator = Literal("!")  # TODO: Implement this so we can write `!True`
            multiply_operator = oneOf("* / %")
            add_operator = oneOf("+ -")
            comparison_operator = oneOf("== != < <= > >= & |") ^ Keyword("in")

            # Functions
            e = CaselessLiteral("E")
            pi = CaselessLiteral("PI")

            lparen, rparen, lbrack, rbrack = map(Suppress, "()[]")
            ident = Word(alphas, alphas + nums + "_$")
            variable = Combine(Literal("$") + Word(alphanums + "_"))
            boolean = Keyword("True") ^ Keyword("False")
            string = quotedString.setParseAction(removeQuotes)
            numeric = Combine(
                Word("+-" + nums, nums) +
                Optional(Literal(".") + Optional(Word(nums))) +
                Optional(e + Word("+-" + nums, nums)))
            none = Keyword("None")

            expression = Forward()

            lists = Forward()
            lists << (lbrack + Optional(
                delimitedList(numeric ^ variable ^ boolean ^ string)) + rbrack)

            atom = (Optional("-") +
                    (pi | e | numeric | ident + lparen + expression +
                     rparen).setParseAction(self.push_stack)
                    | (variable | none | boolean | string
                       | Group(lists)).setParseAction(self.push_stack)
                    |
                    (lparen + expression.suppress() + rparen)).setParseAction(
                        self.push_unary_stack)

            # By defining exponentiation as "atom [^factor]" instead of "atom [^atom],
            # we get left to right exponents. 2^3^2 = 2^(3^2), not (2^3)^2.
            factor = Forward()
            factor << atom + ZeroOrMore(
                (exponent_operator + factor).setParseAction(self.push_stack))

            boolean = factor + ZeroOrMore(
                (comparison_operator + factor).setParseAction(self.push_stack))
            term = boolean + ZeroOrMore(
                (multiply_operator + boolean).setParseAction(self.push_stack))
            self.bnf = expression << term + ZeroOrMore(
                (add_operator + term).setParseAction(self.push_stack))

        return self.bnf
Example #17
0
def get_standard_type_defs(word=word_free):
    """
    Return dict of the pyparsing base lexical elements.

    The compound types (tuple, list, dict) can contain compound types or simple
    types such as integers, floats and words.

    Parameters
    ----------
    word : lexical element
        A custom lexical element for word.

    Returns
    -------
    defs : dict
        The dictionary with the following items:

        - tuple: (..., ..., ...)
        - list: [..., ...., ...]
        - dict: {...:..., ...:..., ....} or {...=..., ...=..., ....}
        - list_item: any of preceding compound types or simple types
    """
    tuple_str = Forward()
    list_str = Forward()
    dict_str = Forward()
    cvt_tuple = lambda toks: tuple(toks.asList())
    cvt_dict = lambda toks: dict(toks.asList())

    list_item = (none | boolean | real | integer | list_str | tuple_str
                 | dict_str
                 | quotedString.setParseAction(removeQuotes)
                 | word)
    list_item2 = list_item | Empty().setParseAction(lambda: [None])

    tuple_str.inner = list_of(list_item)
    tuple_str.inner.setParseAction(cvt_tuple)
    tuple_str << (lparen + tuple_str.inner + rparen)

    list_str.inner = tuple_str.inner.copy()
    list_str.inner.setParseAction(lambda toks: [list(toks)])
    list_str << (lbrack + list_str.inner + rbrack)

    dict_entry = Group(list_item + (colon | equal_sign) + list_item2)
    dict_str.inner = list_of(dict_entry)
    dict_str.inner.setParseAction(cvt_dict)
    dict_str << (lbrace + Optional(dict_str.inner) + rbrace)

    defs = {
        'tuple': tuple_str,
        'list': list_str,
        'dict': dict_str,
        'list_item': list_item
    }

    return defs
    def _getPattern(self):
        arith_expr = Forward()
        comp_expr = Forward()
        logic_expr = Forward()
        LPAR, RPAR, SEMI = map(Suppress, "();")
        identifier = Word(alphas+"_", alphanums+"_")
        multop = oneOf('* /')
        plusop = oneOf('+ -')
        expop = Literal( "^" )
        compop = oneOf('> < >= <= != ==')
        andop = Literal("AND")
        orop = Literal("OR")
        current_value = Literal( "." )
        assign = Literal( "=" )
        # notop = Literal('NOT')
        function = oneOf(' '.join(self.FUNCTIONS))
        function_call = Group(function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR)
        aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}')
        single_column = QuotedString(quoteChar='[', endQuoteChar=']')
        integer = Regex(r"-?\d+")
        real = Regex(r"-?\d+\.\d*")

        # quotedString enables strings without quotes to pass

        operand = \
            function_call.setParseAction(self.__evalFunction) | \
            aggregate_column.setParseAction(self.__evalAggregateColumn) | \
            single_column.setParseAction(self.__evalSingleColumn) | \
            ((real | integer).setParseAction(self.__evalConstant)) | \
            quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \
            current_value.setParseAction(self.__evalCurrentValue) | \
            identifier.setParseAction(self.__evalString)

        arith_expr << operatorPrecedence(operand,
            [
             (expop, 2, opAssoc.LEFT, self.__expOp),
             (multop, 2, opAssoc.LEFT, self.__multOp),
             (plusop, 2, opAssoc.LEFT, self.__addOp),
            ])

        # comp_expr = Group(arith_expr + compop + arith_expr)
        comp_expr << operatorPrecedence(arith_expr,
            [
                (compop, 2, opAssoc.LEFT, self.__evalComparisonOp),
            ])

        logic_expr << operatorPrecedence(comp_expr,
            [
                (andop, 2, opAssoc.LEFT, self.__evalLogicOp),
                (orop, 2, opAssoc.LEFT, self.__evalLogicOp)
            ])

        pattern = logic_expr + StringEnd()
        return pattern
Example #19
0
def pyparse_gml():
    """A pyparsing tokenizer for GML graph format.

    This is not indented to be called directly.

    See Also
    --------
    write_gml, read_gml, parse_gml

    Notes
    -----
    This doesn't implement the complete GML specification for
    nested attributes for graphs, edges, and nodes. 

    """
    global graph

    try:
        from pyparsing import \
             Literal, CaselessLiteral,Word,\
             ZeroOrMore, Group, Dict, Optional, Combine,\
             ParseException, restOfLine, White, alphanums, nums,\
             OneOrMore,quotedString,removeQuotes,dblQuotedString
    except ImportError:
        raise ImportError, \
          "Import Error: not able to import pyparsing: http://pyparsing.wikispaces.com/"

    if not graph:
        creator = Literal("Creator") + Optional(restOfLine)
        graphkey = Literal("graph").suppress()
        lbrack = Literal("[").suppress()
        rbrack = Literal("]").suppress()
        pound = ("#")
        comment = pound + Optional(restOfLine)
        white = White(" \t\n")
        point = Literal(".")
        e = CaselessLiteral("E")
        integer = Word(nums).setParseAction(lambda s, l, t: [int(t[0])])
        real = Combine(
            Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) +
            Optional(e + Word("+-" + nums, nums))).setParseAction(
                lambda s, l, t: [float(t[0])])
        key = Word(alphanums)
        value = integer ^ real ^ Word(alphanums) ^ quotedString.setParseAction(
            removeQuotes)
        keyvalue = Dict(Group(key+OneOrMore(white).suppress()\
                   +value+OneOrMore(white).suppress()))
        node = Group(Literal("node") + lbrack + OneOrMore(keyvalue) + rbrack)
        edge = Group(Literal("edge") + lbrack + OneOrMore(keyvalue) + rbrack)
        graph = Optional(creator)+\
            graphkey + lbrack + ZeroOrMore(edge|node|keyvalue) + rbrack
        graph.ignore(comment)

    return graph
Example #20
0
class SDKConfig:
    """
    Evaluates conditional expressions based on the build's sdkconfig and Kconfig files.
    This also defines the grammar of conditional expressions.
    """

    # A configuration entry is in the form CONFIG=VALUE. Definitions of components of that grammar
    IDENTIFIER = Word(alphanums.upper() + '_')

    HEX = Combine('0x' + Word(hexnums)).setParseAction(lambda t: int(t[0], 16))
    DECIMAL = Combine(Optional(Literal('+') | Literal('-')) +
                      Word(nums)).setParseAction(lambda t: int(t[0]))
    LITERAL = Word(printables.replace(':', ''))
    QUOTED_LITERAL = quotedString.setParseAction(removeQuotes)

    VALUE = HEX | DECIMAL | LITERAL | QUOTED_LITERAL

    # Operators supported by the expression evaluation
    OPERATOR = oneOf(['=', '!=', '>', '<', '<=', '>='])

    def __init__(self, kconfig_file, sdkconfig_file):
        self.config = kconfiglib.Kconfig(kconfig_file)
        self.config.load_config(sdkconfig_file)

    def evaluate_expression(self, expression):
        result = self.config.eval_string(expression)

        if result == 0:  # n
            return False
        elif result == 2:  # y
            return True
        else:  # m
            raise Exception('unsupported config expression result')

    @staticmethod
    def get_expression_grammar():
        identifier = SDKConfig.IDENTIFIER.setResultsName('identifier')
        operator = SDKConfig.OPERATOR.setResultsName('operator')
        value = SDKConfig.VALUE.setResultsName('value')

        test_binary = identifier + operator + value
        test_single = identifier

        test = test_binary | test_single

        condition = Group(
            Optional('(').suppress() + test + Optional(')').suppress())

        grammar = infixNotation(condition, [('!', 1, opAssoc.RIGHT),
                                            ('&&', 2, opAssoc.LEFT),
                                            ('||', 2, opAssoc.LEFT)])

        return grammar
Example #21
0
def get_standard_type_defs(word=word_free):
    """
    Return dict of the pyparsing base lexical elements.

    The compound types (tuple, list, dict) can contain compound types or simple
    types such as integers, floats and words.

    Parameters
    ----------
    word : lexical element
        A custom lexical element for word.

    Returns
    -------
    defs : dict
        The dictionary with the following items:

        - tuple: (..., ..., ...)
        - list: [..., ...., ...]
        - dict: {...:..., ...:..., ....} or {...=..., ...=..., ....}
        - list_item: any of preceding compound types or simple types
    """
    tuple_str = Forward()
    list_str = Forward()
    dict_str = Forward()
    cvt_tuple = lambda toks : tuple(toks.asList())
    cvt_dict = lambda toks: dict(toks.asList())

    list_item = (none | boolean | cmplx | real | integer | list_str | tuple_str
                 | dict_str
                 | quotedString.setParseAction(removeQuotes)
                 | word)
    list_item2 = list_item | Empty().setParseAction(lambda: [None])

    tuple_str.inner = list_of(list_item)
    tuple_str.inner.setParseAction(cvt_tuple)
    tuple_str << (lparen + tuple_str.inner + rparen)

    list_str.inner = tuple_str.inner.copy()
    list_str.inner.setParseAction(lambda toks: [list(toks)])
    list_str << (lbrack + list_str.inner + rbrack)

    dict_entry = Group(list_item + (colon | equal_sign) + list_item2)
    dict_str.inner = list_of(dict_entry)
    dict_str.inner.setParseAction(cvt_dict)
    dict_str << (lbrace + Optional(dict_str.inner) + rbrace)

    defs = {'tuple' : tuple_str,
            'list' : list_str,
            'dict' : dict_str,
            'list_item' : list_item}

    return defs
Example #22
0
 def define_string(self):
     """
     Return the syntax definition for a string.
     
     **Do not override this method**, it's not necessary: it already
     supports unicode strings. If you want to override the delimiters,
     check :attr:`T_QUOTES`.
     
     """
     string = quotedString.setParseAction(removeQuotes, self.make_string)
     string.setName("string")
     return string
Example #23
0
 def define_string(self):
     """
     Return the syntax definition for a string.
     
     **Do not override this method**, it's not necessary: it already
     supports unicode strings. If you want to override the delimiters,
     check :attr:`T_QUOTES`.
     
     """
     string = quotedString.setParseAction(removeQuotes, self.make_string)
     string.setName("string")
     return string
    def _getPattern(self):
        arith_expr = Forward()
        comp_expr = Forward()
        logic_expr = Forward()
        LPAR, RPAR, SEMI = map(Suppress, "();")
        identifier = Word(alphas + "_", alphanums + "_")
        multop = oneOf('* /')
        plusop = oneOf('+ -')
        expop = Literal("^")
        compop = oneOf('> < >= <= != ==')
        andop = Literal("AND")
        orop = Literal("OR")
        current_value = Literal(".")
        assign = Literal("=")
        # notop = Literal('NOT')
        function = oneOf(' '.join(self.FUNCTIONS))
        function_call = Group(
            function.setResultsName('fn') + LPAR +
            Optional(delimitedList(arith_expr)) + RPAR)
        aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}')
        single_column = QuotedString(quoteChar='[', endQuoteChar=']')
        integer = Regex(r"-?\d+")
        real = Regex(r"-?\d+\.\d*")

        # quotedString enables strings without quotes to pass

        operand = \
            function_call.setParseAction(self.__evalFunction) | \
            aggregate_column.setParseAction(self.__evalAggregateColumn) | \
            single_column.setParseAction(self.__evalSingleColumn) | \
            ((real | integer).setParseAction(self.__evalConstant)) | \
            quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \
            current_value.setParseAction(self.__evalCurrentValue) | \
            identifier.setParseAction(self.__evalString)

        arith_expr << operatorPrecedence(operand, [
            (expop, 2, opAssoc.LEFT, self.__expOp),
            (multop, 2, opAssoc.LEFT, self.__multOp),
            (plusop, 2, opAssoc.LEFT, self.__addOp),
        ])

        # comp_expr = Group(arith_expr + compop + arith_expr)
        comp_expr << operatorPrecedence(arith_expr, [
            (compop, 2, opAssoc.LEFT, self.__evalComparisonOp),
        ])

        logic_expr << operatorPrecedence(
            comp_expr, [(andop, 2, opAssoc.LEFT, self.__evalLogicOp),
                        (orop, 2, opAssoc.LEFT, self.__evalLogicOp)])

        pattern = logic_expr + StringEnd()
        return pattern
Example #25
0
    def parse_poi_file(self):

        floatNumber = Regex(r'-?\d+(\.\d*)?([eE][\+-]\d+)?').setParseAction(lambda s, l, t: [float(t[0])])
        integer = Word(nums).setParseAction(lambda s, l, t: [long(t[0])])
        numericValue = floatNumber | integer

        poiline = numericValue + numericValue + quotedString.setParseAction(removeQuotes)
        try:
            for a in poiline.searchString(file(self.poi_file).read()):
                self.latlon.append(a.asList())
                print a.asList()
        except TypeError as e:
            print "failed to open poi file"
            raise
Example #26
0
 def __init__(self):
     dash = Word("-",max=2)
     operator = oneOf(": =")
     
     argValueType1 = quotedString.setParseAction(removeQuotes)
     argValueType2 = Regex("[a-zA-Z0-9_\./]+")
     
     positionalArgument = (argValueType1 | argValueType2)
     regularArgument = Combine(dash + Word(alphas) + operator + (argValueType1 | argValueType2))
     novalueArgument = Combine(dash + Word(alphas))
     
     arguments = ZeroOrMore(positionalArgument | regularArgument | novalueArgument)
     
     self.parser = Group(Word(alphas) + arguments).setResultsName("command")
Example #27
0
    def parse_poi_file(self):

        floatNumber = Regex(r'-?\d+(\.\d*)?([eE][\+-]\d+)?').setParseAction(
            lambda s, l, t: [float(t[0])])
        integer = Word(nums).setParseAction(lambda s, l, t: [long(t[0])])
        numericValue = floatNumber | integer

        poiline = numericValue + numericValue + quotedString.setParseAction(
            removeQuotes)
        try:
            for a in poiline.searchString(file(self.poi_file).read()):
                self.latlon.append(a.asList())
                print a.asList()
        except TypeError as e:
            print "failed to open poi file"
            raise
Example #28
0
def pyparse_gml():
    """pyparser tokenizer for GML graph format

    This doesn't implement the complete GML specification for
    nested attributes for graphs, edges, and nodes. 

    """  
    global graph
    
    try:
        from pyparsing import \
             Literal, CaselessLiteral,Word,\
             ZeroOrMore, Group, Dict, Optional, Combine,\
             ParseException, restOfLine, White, alphanums, nums,\
             OneOrMore,quotedString,removeQuotes,dblQuotedString
    except ImportError:
        raise ImportError, \
          "Import Error: not able to import pyparsing: http://pyparsing.wikispaces.com/"

    if not graph:
        creator = Literal("Creator")+ Optional( restOfLine )
        graphkey = Literal("graph").suppress()
        lbrack = Literal("[").suppress()
        rbrack = Literal("]").suppress()
        pound = ("#")
        comment = pound + Optional( restOfLine )
        white = White(" \t\n")
        point = Literal(".")
        e = CaselessLiteral("E")
        integer = Word(nums).setParseAction(lambda s,l,t:[ int(t[0])])
        real = Combine( Word("+-"+nums, nums )+ 
                        Optional(point+Optional(Word(nums)))+
                        Optional(e+Word("+-"+nums, nums))).setParseAction(
                                        lambda s,l,t:[ float(t[0]) ])
        key=Word(alphanums)
        value=integer^real^Word(alphanums)^quotedString.setParseAction(removeQuotes)
        keyvalue = Dict(Group(key+OneOrMore(white).suppress()\
                   +value+OneOrMore(white).suppress()))
        node = Group(Literal("node") + lbrack + OneOrMore(keyvalue) + rbrack)
        edge = Group(Literal("edge") + lbrack + OneOrMore(keyvalue) + rbrack)
        graph = Optional(creator)+\
            graphkey + lbrack + OneOrMore(edge|node|keyvalue) + rbrack
        graph.ignore(comment)
        
    return graph
Example #29
0
def load_js_obj_literal(j):
    """Terrible hack."""
    j = j[j.index('{'):]
    j = j.replace('\n', '').replace('\t', '')
    j = j.replace(';', '')
    j = re.sub(r'//.*?{', r'{', j)
    LBRACK, RBRACK, LBRACE, RBRACE, COLON, COMMA = map(Suppress,"[]{}:,")
    integer = Regex(r"[+-]?\d+").setParseAction(lambda t:int(t[0]))
    real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t:float(t[0]))
    string_ = Word(alphas,alphanums+"_") | quotedString.setParseAction(removeQuotes)
    bool_ = oneOf("true false").setParseAction(lambda t: t[0]=="true")
    item = Forward()
    key = string_
    dict_ = LBRACE - Optional(dictOf(key+COLON, item+Optional(COMMA))) + RBRACE
    list_ = LBRACK - Optional(delimitedList(item)) + RBRACK
    item << (real | integer | string_ | bool_ | Group(list_ | dict_ ))
    result = item.parseString(j,parseAll=True)[0]
    return result
Example #30
0
def create_bnf():
    cvt_int = lambda toks: int(toks[0])
    cvt_real = lambda toks: float(toks[0])
    cvt_tuple = lambda toks : tuple(toks.asList())
    cvt_dict = lambda toks: dict(toks.asList())

    # define punctuation as suppressed literals
    (lparen, rparen, lbrack, rbrack,
     lbrace, rbrace, colon) = map(Suppress,"()[]{}:")

    integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer")
    integer.setParseAction(cvt_int)

    real = Combine(Optional(oneOf("+ -"))+ Word(nums)
                   + "." + Optional(Word(nums))
                   + Optional("e" + Optional(oneOf("+ -"))
                              + Word(nums))).setName("real")
    real.setParseAction(cvt_real)

    tuple_str = Forward()
    list_str = Forward()
    dict_str = Forward()

    list_item = (real | integer | Group(list_str) | tuple_str | dict_str
                 | quotedString.setParseAction(removeQuotes)
                 | Word(alphas8bit + alphas, alphas8bit + alphanums + "_"))
    list_item2 = list_item | Empty().setParseAction(lambda: [None])

    tuple_str << (Suppress("(") + Optional(delimitedList(list_item)) +
                  Optional(Suppress(",")) + Suppress(")"))
    tuple_str.setParseAction(cvt_tuple)

    list_str << (lbrack + Optional(delimitedList(list_item) +
                                   Optional(Suppress(","))) + rbrack)

    dict_entry = Group(list_item + colon + list_item2)
    dict_inner = delimitedList(dict_entry) + Optional(Suppress(","))
    dict_inner.setParseAction(cvt_dict)

    dict_str << (lbrace + Optional(dict_inner) + rbrace)

    return dict_inner
rpar = Literal(")").suppress()
backtick = Literal("`").suppress()
singlequote = Literal("'").suppress()
doublequote = Literal("\"").suppress()

identifier = Combine(Word(alphas + "_", alphanums + "_$"))
columnName = identifier | (backtick + identifier + backtick)
tableName = identifier | (backtick + identifier + backtick)

arithSign = Word("+-", exact=1)
intNum = Combine(Optional(arithSign) +
                 Word(nums)).setParseAction(lambda toks: int(toks[0]))
realNum = Combine(Optional(arithSign) + Word(nums) + "." +
                  Word(nums)).setParseAction(lambda toks: float(toks[0]))
# TODO Support scientific notation with a mantissa? e.g. 1e10
columnRval = intNum | realNum | quotedString.setParseAction(
    lambda toks: toks[0][1:-1])  # Removes quotes

eq_ = Literal("=").setParseAction(replaceWith('eq_'))
neq_ = oneOf("!= <>").setParseAction(replaceWith('neq_'))
gt_ = oneOf(">").setParseAction(replaceWith('gt_'))
ge_ = oneOf(">=").setParseAction(replaceWith('ge_'))
lt_ = oneOf("<").setParseAction(replaceWith('lt_'))
le_ = oneOf("<=").setParseAction(replaceWith('le_'))
in_ = CaselessKeyword("in").setParseAction(replaceWith('in_'))
nin_ = CaselessKeyword("not in").setParseAction(replaceWith('nin_'))
# TODO Add support for LIKE

and_ = CaselessKeyword(AND_KEYWORD)
or_ = CaselessKeyword(OR_KEYWORD)
# TODO Add support for NOT
Example #32
0
 def parse_element(cls, indent_stack):
     return (Keyword("@parse_first").suppress() + Literal('(').suppress() +
             delimitedList(quotedString.setParseAction(removeQuotes)) +
             Literal(')').suppress()).setResultsName("parse_first")
def _create_field_parser():
    """
    Creates a parser using pyparsing that works with bibfield rule definitions

    BNF like grammar:

    rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include | python_comment
    include ::= "include(" PATH ")"
    body ::=  [inherit_from] (creator | derived | calculated) [checker] [documentation] [producer]
    aliases ::= json_id ["[0]" | "[n]"] ["," aliases]

    creator ::= "creator:" INDENT creator_body+ UNDENT
    creator_body ::= [decorators] source_format "," source_tag "," python_allowed_expr
    source_format ::= MASTER_FORMATS
    source_tag ::= QUOTED_STRING

    derived ::= "derived" INDENT derived_calculated_body UNDENT
    calculated ::= "calculated:" INDENT derived_calculated_body UNDENT
    derived_calculated_body ::= [decorators] "," python_allowed_exp

    decorators ::= (peristent_identfier | legacy | do_not_cache | parse_first | depends_on | only_if | only_if_master_value)*
    peristent_identfier ::= @persitent_identifier( level )
    legacy ::= "@legacy(" correspondences+ ")"
    correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")"
    parse_first ::= "@parse_first(" jsonid+ ")"
    depends_on ::= "@depends_on(" json_id+ ")"
    only_if ::= "@only_if(" python_condition+ ")"
    only_if_master_value ::= "@only_if_master_value(" python_condition+  ")"

    inherit_from ::= "@inherit_from()"

    python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call

    checker ::= "checker:" INDENT checker_function+ UNDENT

    documentation ::= INDENT doc_string subfield* UNDENT
    doc_string ::= QUOTED_STRING
    subfield ::= "@subfield" json_id["."json_id*] ":" docstring

    producer ::= "producer:" INDENT producer_body UNDENT
    producer_body ::= producer_code "," python_dictionary
    producer_code ::= ident
    """

    indent_stack = [1]

    def check_sub_indent(str, location, tokens):
        cur_col = col(location, str)
        if cur_col > indent_stack[-1]:
            indent_stack.append(cur_col)
        else:
            raise ParseException(str, location, "not a subentry")

    def check_unindent(str, location, tokens):
        if location >= len(str):
            return
        cur_col = col(location, str)
        if not (cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]):
            raise ParseException(str, location, "not an unindent")

    def do_unindent():
        indent_stack.pop()

    INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent)
    UNDENT = FollowedBy(empty).setParseAction(check_unindent)
    UNDENT.setParseAction(do_unindent)

    json_id = (
        (Word(alphas + "_", alphanums + "_") + Optional(oneOf("[0] [n]")))
        .setResultsName("json_id", listAllMatches=True)
        .setParseAction(lambda tokens: "".join(tokens))
    )
    aliases = delimitedList(
        (Word(alphanums + "_") + Optional(oneOf("[0] [n]"))).setParseAction(lambda tokens: "".join(tokens))
    ).setResultsName("aliases")
    ident = Word(alphas + "_", alphanums + "_")
    dict_def = originalTextFor(nestedExpr("{", "}"))
    list_def = originalTextFor(nestedExpr("[", "]"))
    dict_access = list_access = originalTextFor(ident + nestedExpr("[", "]"))
    function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr("(", ")"))

    python_allowed_expr = (dict_def ^ list_def ^ dict_access ^ list_access ^ function_call ^ restOfLine).setResultsName(
        "value", listAllMatches=True
    )

    persistent_identifier = (Suppress("@persistent_identifier") + nestedExpr("(", ")")).setResultsName(
        "persistent_identifier"
    )
    legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")"))).setResultsName("legacy", listAllMatches=True)
    only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")"))).setResultsName("only_if")
    only_if_master_value = (Suppress("@only_if_value") + originalTextFor(nestedExpr("(", ")"))).setResultsName(
        "only_if_master_value"
    )
    depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")"))).setResultsName("depends_on")
    parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")"))).setResultsName("parse_first")
    memoize = (Suppress("@memoize") + nestedExpr("(", ")")).setResultsName("memoize")
    field_decorator = parse_first ^ depends_on ^ only_if ^ only_if_master_value ^ memoize ^ legacy

    # Independent decorators
    inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")"))).setResultsName("inherit_from")
    override = (Suppress("@") + "override").setResultsName("override")
    extend = (Suppress("@") + "extend").setResultsName("extend")
    master_format = (
        (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")")))
        .setResultsName("master_format")
        .setParseAction(lambda toks: toks[0])
    )

    derived_calculated_body = (ZeroOrMore(field_decorator) + python_allowed_expr).setResultsName(
        "derived_calculated_def"
    )

    derived = "derived" + Suppress(":") + INDENT + derived_calculated_body + UNDENT
    calculated = "calculated" + Suppress(":") + INDENT + derived_calculated_body + UNDENT

    source_tag = quotedString.setParseAction(removeQuotes).setResultsName("source_tag", listAllMatches=True)
    source_format = Word(alphas, alphanums + "_").setResultsName("source_format", listAllMatches=True)
    creator_body = (
        ZeroOrMore(field_decorator) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr
    ).setResultsName("creator_def", listAllMatches=True)
    creator = "creator" + Suppress(":") + INDENT + OneOrMore(creator_body) + UNDENT
    field_def = (creator | derived | calculated).setResultsName("type_field", listAllMatches=True)

    # JsonExtra
    json_dumps = (
        (Suppress("dumps") + Suppress(",") + python_allowed_expr)
        .setResultsName("dumps")
        .setParseAction(lambda toks: toks.value[0])
    )
    json_loads = (
        (Suppress("loads") + Suppress(",") + python_allowed_expr)
        .setResultsName("loads")
        .setParseAction(lambda toks: toks.value[0])
    )

    json_extra = (Suppress("json:") + INDENT + Each((json_dumps, json_loads)) + UNDENT).setResultsName("json_ext")

    # Checker
    checker_function = (
        Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr("(", ")"))
    ).setResultsName("checker", listAllMatches=True)
    checker = "checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT

    # Description/Documentation
    doc_double = QuotedString(quoteChar='"""', multiline=True)
    doc_single = QuotedString(quoteChar="'''", multiline=True)
    doc_string = INDENT + (doc_double | doc_single) + UNDENT
    description_body = (Suppress("description:") + doc_string).setParseAction(lambda toks: toks[0][0])
    description = (description_body | doc_double | doc_single).setResultsName("description")

    # Producer
    producer_code = (Word(alphas, alphanums + "_") + originalTextFor(nestedExpr("(", ")"))).setResultsName(
        "producer_code", listAllMatches=True
    )
    producer_body = (producer_code + Suppress(",") + python_allowed_expr).setResultsName(
        "producer_rule", listAllMatches=True
    )
    producer = Suppress("producer:") + INDENT + OneOrMore(producer_body) + UNDENT

    schema = (
        (Suppress("schema:") + INDENT + dict_def + UNDENT).setParseAction(lambda toks: toks[0]).setResultsName("schema")
    )

    body = (
        Optional(field_def)
        & Optional(checker)
        & Optional(json_extra)
        & Optional(description)
        & Optional(producer)
        & Optional(schema)
    )
    comment = Literal("#") + restOfLine + LineEnd()
    include = (Suppress("include") + quotedString).setResultsName("includes", listAllMatches=True)
    rule = (
        Optional(persistent_identifier)
        + Optional(inherit_from)
        + Optional(override)
        + Optional(extend)
        + json_id
        + Optional(Suppress(",") + aliases)
        + Suppress(":")
        + INDENT
        + body
        + UNDENT
    ).setResultsName("rules", listAllMatches=True)

    return OneOrMore(rule | include | comment.suppress())
Example #34
0
from utils import choose_one, error_exit


REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])'
REGEX_LOG_FORMAT_VARIABLE = r'\$([a-z0-9\_]+)'
LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \
                      '"$request" $status $body_bytes_sent ' \
                      '"$http_referer" "$http_user_agent"'

# common parser element
semicolon = Literal(';').suppress()
# nginx string parameter can contain any character except: { ; " '
parameter = Word(''.join(c for c in printables if c not in set('{;"\'')))
# which can also be quoted
parameter = parameter | quotedString.setParseAction(removeQuotes)


def detect_config_path():
    """
    Get nginx configuration file path based on `nginx -V` output
    :return: detected nginx configuration file path
    """
    try:
        proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE)
    except OSError:
        error_exit('Access log file or format was not set and nginx config file cannot be detected. ' +
                   'Perhaps nginx is not in your PATH?')

    stdout, stderr = proc.communicate()
    version_output = stderr.decode('utf-8')
Example #35
0
    symbol=t[0][0]
    d={}
    addToTree(symbol,d)
    treestack.append(curtree)
    curtree=d

def endBlock(s,l,t):
    global curtree,treestack
    debugToks("endBlock",s,l,t)
    curtree=treestack.pop()

symbol=Word(alphas+'_',alphanums+'_')
hexval=Combine(Literal('0x')+Word(nums+'abcdefABCDEF')).setParseAction(convertHex)
decval=Word(nums).setParseAction(convertDec)
enumval=Word(alphas+'_',alphanums+'_').setParseAction(convertEnum)
stringval=quotedString.setParseAction(convertStr)
value=hexval|decval|quotedString|enumval
assignment=Group(symbol+'='+value).setParseAction(addAssignment)
block=Forward()
statement=assignment|block
block<<Group(symbol+'{').setParseAction(startBlock)+ZeroOrMore(statement)+Literal('}').setParseAction(endBlock)
comment=cStyleComment|(Literal('//')+restOfLine)
config=ZeroOrMore(statement).ignore(comment)

def parseFile(s):
    global config, tree, curtree
    tree={}
    curtree=tree
    treestack=[]
    config.parseFile(s)
    return tree
Example #36
0
# Also, I think there is a bug in IRIRef.* in that they assume that the
# IRIref will be a URIRef, but it could also be a QName.
DatasetClause = (
    FROM +
    (IRIref.copy().setParseAction(refer_component(components.RemoteGraph))
     | NAMED +
     IRIref.copy().setParseAction(refer_component(components.NamedGraph))))
if DEBUG:
    DatasetClause.setName('DatasetClause')

# String:
#
# TODO: flesh this out to include multiline strings, and also
# investigate a possible bug with Expression.ParsedString; it
# doesn't look like it is properly expanding escaped characters.
String = quotedString.setParseAction(
    composition2([removeQuotes, components.ParsedString]))
if DEBUG:
    String.setName('String')

# RDFLiteral
AT = Suppress('@')
LANGTAG = AT + Regex(PN_CHARS_BASE_re + '+' + regex_group('-[a-zA-Z0-9]+') +
                     '*')

DOUBLE_HAT = Suppress('^^')

unescape_dict = (
    (r'\t', '\t'), (r'\n', '\n'), (r'\r', '\r'), (r'\b', '\b'), (r'\f', '\f'),
    (r'\"', '"'), (r"\'", "'"), (r'\\', '\\'))  # must be done last!

    pass


ident = Word(alphas, alphanums + "_:")
columnName = (ident | quotedString())("columnName")

whereExpression = Forward()
and_ = Keyword("and", caseless=True)('and')
or_ = Keyword("or", caseless=True)('or')
in_ = Keyword("in", caseless=True)("in")
isnotnull = Keyword("is not null", caseless=True)('notnull')
binop = oneOf("= != < > >= <=", caseless=True)('binop')
intNum = Word(nums)

columnRval = (intNum
              | quotedString.setParseAction(lambda x: x[0][1:-1]))('rval*')
whereCondition = Group((columnName + isnotnull)
                       | (columnName + binop + columnRval)
                       | (columnName + in_ + "(" + delimitedList(columnRval) +
                          ")") | ("(" + whereExpression + ")"))('condition')
whereExpression << Group(whereCondition + ZeroOrMore(
    (and_ | or_) + whereExpression))('expression')


class SQLValidator(object):
    """ Parses a subset of SQL to define feature selections.
        This validates the SQL to make sure the user can't do anything dangerous."""
    def __init__(self, s):
        self._s = s
        self._errors = []
        self._parse_result = None
Example #38
0
realNum = Combine(Optional(arithSign) +
                  (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums)))  # noqa
                  + Optional(E + Optional(arithSign) + Word(nums)))
realNum.setParseAction(lambda x: expression.NumericLiteral(float(x[0])))

intNum = Combine(Optional(arithSign) + Word(nums) +
                 Optional(E + Optional("+") + Word(nums)))
intNum.setParseAction(lambda x: expression.NumericLiteral(int(x[0])))

number = realNum | intNum

variable = ident.copy()
variable.setParseAction(lambda x: model.Var(x[0]))

quotedString.setParseAction(lambda x: expression.StringLiteral(x[0][1:-1]))

literal = quotedString | number

valueref = variable | literal


def mkterm(x):
    return model.Term(x)

term = (predicate
        + drop("(")
        + Group(delimitedList(valueref, ","))
        + drop(")")).setParseAction(mkterm)

Example #39
0
def _create_field_parser():
    """Create a parser that can handle field definitions.

    BFN like grammar::

        rule       ::= [pid | extend | override]
                       json_id ["," aliases]":"
                           body
        json_id    ::= (letter|"_") (letter|digit|_)*
        aliases    ::= json_id ["," aliases]

        pid        ::= @persistent_identifier( level )
        extend     ::= @extend
        override   ::= @override
        hidden     ::= @hidden

        body       ::=(creator* | derived | calculated) (extensions)*

        creator    ::= [decorators] format "," tag "," expr
        derived    ::= [decorators] expr
        calculated ::= [decorators] expr

    To check the syntactics of the parser extensions or decorators please go to
    :mod:`invenio.modules.jsonalchemy.jsonext.parsers`
    """
    indent_stack = [1]

    # Independent/special decorators
    persistent_identifier = (
        Keyword('@persistent_identifier').suppress() + nestedExpr()
    ).setResultsName('pid').setParseAction(lambda toks: int(toks[0][0]))
    override = Keyword('@override').suppress()\
        .setResultsName('override')\
        .setParseAction(lambda toks: True)
    extend = Keyword('@extend').suppress()\
        .setResultsName('extend')\
        .setParseAction(lambda toks: True)
    hidden = Keyword('@hidden').suppress()\
        .setResultsName('hidden')\
        .setParseAction(lambda toks: True)
    rule_decorators = (Optional(persistent_identifier) &
                       Optional(override) &
                       Optional(extend) &
                       Optional(hidden))

    # Field definition decorators
    field_decorators = Each(
        [Optional(p.parser.parse_element(indent_stack))
         for p in parsers if issubclass(p.parser,
                                        DecoratorBaseExtensionParser)])

    # Creator rules
    creator_body = (
        Optional(field_decorators).setResultsName('decorators') +
        Word(alphas, alphanums + '_') +
        Literal(',').suppress() +
        quotedString.setParseAction(removeQuotes) +
        Literal(',').suppress() +
        PYTHON_ALLOWED_EXPR
    ).setParseAction(lambda toks: {
        'source_format': toks[-3],
        'source_tags': toks[-2].split(' '),
        'function': compile(toks[-1].strip(), '', 'eval'),
        'type': 'creator',
        'decorators': toks.decorators.asDict()}
    ).setResultsName('creator_def', listAllMatches=True)
    creator = (Keyword('creator:').suppress() +
               indentedBlock(OneOrMore(creator_body), indent_stack))

    # Derived and calculated rules
    der_calc_body = (Optional(field_decorators).setResultsName('decorators') +
                     PYTHON_ALLOWED_EXPR)
    derived = (
        Keyword('derived:').suppress() +
        indentedBlock(der_calc_body, indent_stack)
    ).setParseAction(lambda toks: {
        'source_format': 'derived',
        'source_tags': None,
        'function': compile(toks[-1].strip(), '', 'eval'),
        'type': 'derived',
        'decorators': toks.decorators.asDict()}).setResultsName('derived_def')
    calculated = (
        Keyword('calculated:').suppress() +
        indentedBlock(der_calc_body, indent_stack)
    ).setParseAction(lambda toks: {
        'source_format': 'calculated',
        'source_tags': None,
        'function': compile(toks[-1].strip(), '', 'eval'),
        'type': 'calculated',
        'decorators': toks.decorators.asDict()
    }).setResultsName('calculated_def')

    rule_sections = [Optional(creator | derived | calculated), ]
    rule_sections.extend([Optional(p.parser.parse_element(indent_stack))
                          for p in parsers
                          if issubclass(p.parser, FieldBaseExtensionParser)])

    json_id = (IDENT +
               Optional(Suppress(',') +
                        delimitedList(Word(alphanums + '_'))) +
               Suppress(':')
               ).setResultsName('field')\
        .setParseAction(lambda toks: {'json_id': toks[0],
                                      'aliases': toks[1:]})

    rule = Group(Optional(rule_decorators) +
                 json_id +
                 indentedBlock(Each(rule_sections), indent_stack)
                 )

    return OneOrMore(COMMENT.suppress() | rule)
Example #40
0
    def __init__(self, network):
        self.network = network
        self.g_business_relationship = nx.DiGraph()
        self.user_defined_sets = {}
        self.user_library_calls = []
        self.user_defined_functions = {}

        # Grammars
#TODO: tidy this up
        attribute_unnamed = Word(alphanums+'_'+".")
        attribute = attribute_unnamed.setResultsName("attribute")
        self.attribute = attribute

        lt = Literal("<").setResultsName("<")
        le = Literal("<=").setResultsName("<=")
        eq = Literal("=").setResultsName("=")
        ne = Literal("!=").setResultsName("!=")
        ge = Literal(">=").setResultsName(">=")
        gt = Literal(">").setResultsName(">")
        wildcard = Literal("*").setResultsName("wildcard")
        self.wildcard = wildcard

        self.prefix_lists = {}
        self.tags_to_allocate = set()
        self.allocated_tags = {}

        self._opn = {
                '<': operator.lt,
                '<=': operator.le,
                '=': operator.eq,
                '!=': operator.ne,
                '>=': operator.ge,
                '>': operator.gt,
                '&': set.intersection,
                '|': set.union,
                }

        # map alphanum chars to alphanum equivalents for use in tags
        self._opn_to_tag = {
                '<': "lt",
                '<=': "le",
                '=': "eq",
                '!=': "ne",
                '>=': "ge",
                '>': "gt",
                '&': "and",
                '|': "or",
                }

# Both are of comparison to access in same manner when evaluating
        comparison = (lt | le | eq | ne | ge | gt).setResultsName("comparison")
        stringComparison = (eq | ne).setResultsName("comparison")
#
#quoted string is already present
        float_string = Word(nums).setResultsName("value").setParseAction(lambda t: float(t[0]))
        integer_string = Word(nums).setResultsName("value").setParseAction(lambda t: int(t[0]))
#TODO: use numString, and make integer if fiull stop

#TODO: allow parentheses? - should be ok as pass to the python parser
        ipField = Word(nums, max=3)
        ipAddress = Combine( ipField + "." + ipField + "." + ipField + "." + ipField ).setResultsName("ipAddress")

        boolean_and = Literal("&").setResultsName("&")
        boolean_or = Literal("|").setResultsName("|")
        boolean = (boolean_and | boolean_or).setResultsName("boolean")
        self._boolean = boolean # need to use in checking

#TODO fix this matching 2a.ab when that should match a string
        numericQuery = Group(attribute + comparison + float_string).setResultsName( "numericQuery")


        stringValues = (attribute_unnamed | quotedString.setParseAction(removeQuotes)
                ).setResultsName("value")

        stringQuery =  Group(attribute + stringComparison + stringValues).setResultsName( "stringQuery")
        wildcardQuery = wildcard.setResultsName("wildcardQuery")

        singleQuery = numericQuery | stringQuery | wildcardQuery
        singleQuery.setFailAction(parse_fail_action)
        self.nodeQuery = singleQuery + ZeroOrMore(boolean + singleQuery)

        self.u_egress = Literal("egress->").setResultsName("u_egress") 
        self.v_ingress = Literal("->ingress").setResultsName("v_ingress")
        self.u_ingress = Literal("ingress<-").setResultsName("u_ingress")
        self.v_egress = Literal("<-egress").setResultsName("v_egress") 
        edgeType = ( self.u_egress | self.u_ingress | self.v_egress
                | self.v_ingress).setResultsName("edgeType").setFailAction(parse_fail_action)
        self.edgeQuery = ("(" + self.nodeQuery.setResultsName("query_a") + ")"
                + edgeType
                + "(" + self.nodeQuery.setResultsName("query_b")
                + ")").setFailAction(parse_fail_action)

#start of BGP queries
        originQuery = (Literal("Origin").setResultsName("attribute") + 
                #this is a workaround for the match, comparison, value 3-tuple in processing
                Literal("(").setResultsName("comparison") +  
                Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("originQuery")
        transitQuery = (Literal("Transit").setResultsName("attribute") +
                #this is a workaround for the match, comparison, value 3-tuple in processing
                Literal("(").setResultsName("comparison") +  
                Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("transitQuery")

        prefixList = Literal("prefix_list")
        matchPl = (prefixList.setResultsName("attribute")
                + comparison
                + attribute.setResultsName("value"))

        matchTag = (Literal("tag").setResultsName("attribute")
                + comparison
                + attribute.setResultsName("value"))


        #tags contain -> tag = aaa
        inTags = (                Literal("tags").setResultsName("attribute").setParseAction(lambda x: "tag")
                + Literal("contain").setResultsName("comparison").setParseAction(lambda x: "=")
                + attribute_unnamed.setResultsName("value")
                )

        bgpMatchQuery = Group(matchPl | matchTag | inTags | originQuery | transitQuery ).setResultsName("bgpMatchQuery").setFailAction(parse_fail_action)
        self.bgpMatchQuery = bgpMatchQuery

        setLP = (Literal("setLP").setResultsName("attribute") 
                + integer_string.setResultsName("value")).setResultsName("setLP")
        setMED = (Literal("setMED").setResultsName("attribute") 
                + integer_string.setResultsName("value")).setResultsName("setMED")

        addTag = (Literal("addTag").setResultsName("attribute") 
                + attribute.setResultsName("value")).setResultsName("addTag")
        removeTag = (Literal("removeTag").setResultsName("attribute") 
                + attribute.setResultsName("value")).setResultsName("removeTag")
        #TODO: need to set blank value
        reject = Literal("reject")
#TODO: remove once move quagga output inside module
        self.reject = reject
        rejectAction = (reject.setResultsName("attribute") +
                Literal("route").setResultsName("value")).setResultsName("reject")
        setNextHop = (Literal("setNextHop").setResultsName("attribute") + ipAddress.setResultsName("value")).setResultsName("setNextHop")

        setOriginAttribute = (Literal("setOriginAttribute").setResultsName("attribute") 
                + (oneOf("IGP BGP None").setResultsName("value"))).setResultsName("setOriginAttribute")

        bgpAction = Group(addTag | setLP | setMED | removeTag |
                setNextHop | setOriginAttribute | rejectAction).setResultsName("bgpAction")

        # The Clauses
        ifClause = Group(Suppress("if") + bgpMatchQuery 
                + ZeroOrMore(Suppress(boolean_and)
                    + bgpMatchQuery)).setResultsName("if_clause")

        actionClause = bgpAction + ZeroOrMore(Suppress(boolean_and) + bgpAction)
        thenClause = Group(Suppress("then") + actionClause).setResultsName("then_clause")
        ifThenClause = Group(Suppress("(") + 
                ifClause + thenClause + Suppress(")")).setResultsName("ifThenClause")
        elseActionClause = Group(Suppress("(") + actionClause 
                + Suppress(")")).setResultsName("else_clause")
# Support actions without a condition (ie no "if")
        unconditionalAction =  Group(Suppress("(")
            + Group(actionClause).setResultsName("unconditionalActionClause")
            + Suppress(")")).setResultsName("bgpSessionQuery")

# Query may contain itself (nested)
        bgpSessionQuery = Forward()
        bgpSessionQuery << ( ifThenClause +
                Optional( Suppress("else") + (elseActionClause | bgpSessionQuery))
                ).setResultsName("bgpSessionQuery")
        bgpSessionQuery =  bgpSessionQuery | unconditionalAction
        self.bgpSessionQuery = bgpSessionQuery

        self.bgpApplicationQuery = self.edgeQuery + Suppress(":") + self.bgpSessionQuery

# Library stuff
        set_values = Suppress("{") + delimitedList( attribute, delim=',').setResultsName("set_values") + Suppress("}")
#Set to empty set, rather than empty list as empty list is processed differently somewhere in parser
        empty_set = Literal("{}").setResultsName("set_values").setParseAction(lambda x: set())
        self.set_definition = attribute.setResultsName("set_name") + Suppress("=") + (empty_set | set_values)

        library_params = attribute | Group(set_values) | empty_set
        library_function = attribute.setResultsName("def_name") + Suppress("(") + delimitedList( library_params, delim=',').setResultsName("def_params") + Suppress(")")
        library_function.setFailAction(parse_fail_action)

        self.library_def = Suppress("define") + library_function

        self.library_call = Suppress("apply") + library_function
        self.library_def.setFailAction(parse_fail_action)
        self.library_edge_query = (self.attribute.setResultsName("query_a")
                + edgeType + self.attribute.setResultsName("query_b"))
        self.library_edge_query.setFailAction(parse_fail_action)
        library_edge_definition = self.library_edge_query + Suppress(":") + self.bgpSessionQuery
        library_global_definition = "global tags = {" + delimitedList( attribute, delim=',').setResultsName("tags") + "}"
        self.library_entry = library_global_definition.setResultsName("global_tags") | library_edge_definition.setResultsName("library_edge")
        self.library_entry.setFailAction(parse_fail_action)

        self.bgpPolicyLine = (
                self.bgpApplicationQuery.setResultsName("bgpApplicationQuery")
                | self.library_call.setResultsName("library_call")
                | self.set_definition.setResultsName("set_definition")
                )
Example #41
0
def restscrape(resturl, filenamersc, filenamerevsc):

    time.sleep(randint(2,8))
    # Read the url
    response = urllib2.urlopen(resturl)
    soup = BeautifulSoup(response.read())
    response.close()


    # Check if it is rated
    if soup.find(itemprop="ratingValue") == None:
        return

    # Anamoly
    if soup.find(class_="container no-reviews") != None:
        return

    # Check if it is not the alternate version
    if soup.find(id="mapbox") != None:
        print "alt version"
        restscrape(resturl, filenamersc, filenamerevsc)
        return

    # Check if it is not an alternate version
    if soup.find(class_="friend-count miniOrange") == None:
        print "alt version rev"
        restscrape(resturl, filenamersc, filenamerevsc)
        return

#### ##    ## ########  #######  
 ##  ###   ## ##       ##     ## 
 ##  ####  ## ##       ##     ## 
 ##  ## ## ## ######   ##     ## 
 ##  ##  #### ##       ##     ## 
 ##  ##   ### ##       ##     ## 
#### ##    ## ##        #######  

    # Key Yelp information
    title = soup.find(property="og:title").get("content").encode('utf-8')
    latitude = soup.find(property="place:location:latitude").get("content")
    longitude = soup.find(property="place:location:longitude").get("content")
    rating = soup.find(itemprop="ratingValue").get("content")
    reviewCount = soup.find(itemprop="reviewCount").get_text()

    if soup.find(id="cat_display") != None:
        categories = soup.find(id="cat_display").get_text().strip()
        categories = ' '.join(categories.split())
    else:
        categories = "None"

    if soup.find(class_="photo-box-img")['src'] != "http://s3-media1.ak.yelpcdn.com/assets/2/www/img/5f69f303f17c/default_avatars/business_medium_square.png":
        photos = "Has photos"
    else:
        photos = "None"

    if soup.find(id="bizUrl") != None:
         URL = soup.find(id="bizUrl").get_text().strip().encode('utf-8')
    else:
        URL = "None"

    # Get Neighborhoods
    # Particularly special code because it has to be stripped from javascript script
    # Automatically strip quotes from quoted strings
    # quotedString matches single or double quotes
    neighborhood = ""
    quotedString.setParseAction(removeQuotes)

    # Define a pattern to extract the neighborhoods: entry
    neighborhoodsSpec = Literal('\"neighborhoods\":') + '[' + delimitedList(quotedString)('neighborhoods') + ']'

    for hoods in neighborhoodsSpec.searchString(soup):
        neighborhood = str(hoods.neighborhoods)


    # Yelp Interaction/Information
    if soup.find(class_="yelp-menu") != None:
        menu = "Has menu"
    else:
        menu = "None"

    if soup.find(id="opentable-reservation-actions") != None:
        reservable = "Reservable"
    else:
        reservable = "None"

    if soup.find(class_="media-story offer-detail") != None:
        deal = "Has deal"
    else:
        deal = "None"
        
    if soup.find(id="delivery-address-form") != None:
        yelpDelivery = "Delivery system"
    else:
        yelpDelivery = "None"        

    if soup.find(id="bizSlide") != None:
        slides = "Has slides"
    else:
        slides = "None"


    # Restaurant status
    if soup.find(id="bizSupporter") != None:
        sponsor = "Sponsors"
    else:
        sponsor = "None"

    if soup.find(id="bizClaim") != None:
        claim = "Unclaimed"
    else:
        claim = "None"

    if soup.find(style="color:#999999;") == None:
        eliteReviews = "Has Elites"
    else:
        eliteReviews = "None"


    # Restaurant attributes from attributes section
    # Attributes self-explanatory
    if soup.find(class_="attr-transit") != None:
        transit = soup.find(class_="attr-transit").get_text().strip()
    else:
        transit = "None"

    if soup.find(class_="attr-BusinessHours") != None:
        hours = soup.find('dd', class_="attr-BusinessHours").get_text()
    else:
        hours = "None"

    if soup.find(class_="attr-RestaurantsAttire") != None:
        attire = soup.find('dd', class_="attr-RestaurantsAttire").get_text()
    else:
        attire = "None"

    if soup.find(class_="attr-BusinessAcceptsCreditCards") != None:
        creditCards = soup.find('dd', class_="attr-BusinessAcceptsCreditCards").get_text()
    else:
        creditCards = "None"

    if soup.find(class_="attr-BusinessParking") != None:
        parking = soup.find('dd', class_="attr-BusinessParking").get_text()
    else:
        parking = "None"

    if soup.find(class_="attr-RestaurantsPriceRange2") != None:
        price = soup.find('dd', class_="attr-RestaurantsPriceRange2").get_text().strip()
    else:
        price = "None"

    if soup.find(class_="attr-RestaurantsGoodForGroups") != None:
        groups = soup.find('dd', class_="attr-RestaurantsGoodForGroups").get_text()
    else:
        groups = "None"

    if soup.find(class_="attr-GoodForKids") != None:
        kids = soup.find('dd', class_="attr-GoodForKids").get_text()
    else:
        kids = "None"

    if soup.find(class_="attr-RestaurantsReservations") != None:
        reservations = soup.find('dd', class_="attr-RestaurantsReservations").get_text()
    else:
        reservations = "None"

    if soup.find(class_="attr-RestaurantsDelivery") != None:
        delivery = soup.find('dd', class_="attr-RestaurantsDelivery").get_text()
    else:
        delivery = "None"

    if soup.find(class_="attr-RestaurantsTakeOut") != None:
        takeout = soup.find('dd', class_="attr-RestaurantsTakeOut").get_text()
    else:
        takeout = "None"

    if soup.find(class_="attr-RestaurantsTableService") != None:
        service = soup.find('dd', class_="attr-RestaurantsTableService").get_text()
    else:
        service = "None"

    if soup.find(class_="attr-OutdoorSeating") != None:
        outdoorSeating = soup.find('dd', class_="attr-OutdoorSeating").get_text()
    else:
        outdoorSeating = "None"

    if soup.find(class_="attr-WiFi") != None:
        wifi = soup.find('dd', class_="attr-WiFi").get_text()
    else:
        wifi = "None"

    if soup.find(class_="attr-GoodForMeal") != None:
        meals = soup.find('dd', class_="attr-GoodForMeal").get_text()
    else:
        meals = "None"

    if soup.find(class_="attr-BestNights") != None:
        bestNights = soup.find('dd', class_="attr-BestNights").get_text()
    else:
        bestNights = "None"

    if soup.find(class_="attr-HappyHour") != None:
        happyHour = soup.find('dd', class_="attr-HappyHour").get_text()
    else:
        happyHour = "None"

    if soup.find(class_="attr-Alcohol") != None:
        alcohol = soup.find('dd', class_="attr-Alcohol").get_text()
    else:
        alcohol = "None"

    if soup.find(class_="attr-Smoking") != None:
        smoking = soup.find('dd', class_="attr-Smoking").get_text()
    else:
        smoking = "None"

    if soup.find(class_="attr-CoatCheck") != None:
        coatCheck = soup.find('dd', class_="attr-CoatCheck").get_text()
    else:
        coatCheck = "None"        

    if soup.find(class_="attr-NoiseLevel") != None:
        noise = soup.find('dd', class_="attr-NoiseLevel").get_text()
    else:
        noise = "None"

    if soup.find(class_="attr-GoodForDancing") != None:
        goodForDancing = soup.find('dd', class_="attr-GoodForDancing").get_text()
    else:
        goodForDancing = "None"

    if soup.find(class_="attr-Ambience") != None:
        ambience = soup.find('dd', class_="attr-Ambience").get_text()
    else:
        ambience = "None"

    if soup.find(class_="attr-HasTV") != None:
        tv = soup.find('dd', class_="attr-HasTV").get_text()
    else:
        tv = "None"

    if soup.find(class_="attr-Caters") != None:
        caters = soup.find('dd', class_="attr-Caters").get_text()
    else:
        caters = "None"

    if soup.find(class_="attr-WheelchairAccessible") != None:
        wheelchairAccessible = soup.find('dd', class_="attr-WheelchairAccessible").get_text()
    else:
        wheelchairAccessible = "None"

    if soup.find(class_="attr-DogsAllowed") != None:
        dogsAllowed = soup.find('dd', class_="attr-DogsAllowed").get_text()
    else:
        dogsAllowed = "None"


    with open(filenamersc, "ab") as filer:
        fr = csv.writer(filer)
        # Writing to CSV
        fr.writerow([resturl, title, latitude, longitude, rating, reviewCount, categories, photos, URL, neighborhood, menu, reservable, yelpDelivery, slides, sponsor, claim, eliteReviews, transit, hours, attire, creditCards, parking, price, groups, kids, reservations, deal, delivery, takeout, service, outdoorSeating, wifi, meals, bestNights, happyHour, alcohol, smoking, coatCheck, noise, goodForDancing, ambience, tv, caters, wheelchairAccessible])

########  ######## ##     ## #### ######## ##      ##  ######  
##     ## ##       ##     ##  ##  ##       ##  ##  ## ##    ## 
##     ## ##       ##     ##  ##  ##       ##  ##  ## ##       
########  ######   ##     ##  ##  ######   ##  ##  ##  ######  
##   ##   ##        ##   ##   ##  ##       ##  ##  ##       ## 
##    ##  ##         ## ##    ##  ##       ##  ##  ## ##    ## 
##     ## ########    ###    #### ########  ###  ###   ######  

    # Parsing top 40 Reviews
    reviews = soup.findAll(itemprop="review")
    for review in reviews:
        
        # Get user data
        if review.find(title="User is Elite") != None:
            eliteStatus = "Elite"
        else:
            eliteStatus = "None"

        friendCount = review.find(class_="friend-count miniOrange").get_text()[:-8].strip()
        reviewCount = review.find(class_="review-count miniOrange").get_text()[:-8].strip()

        if review.find(class_="photo-box-img")['src'] != "http://s3-media4.ak.yelpcdn.com/assets/2/www/img/78074914700f/default_avatars/user_small_square.png":
            userPhoto = "Has photo"
        else:
            userPhoto = "None"

        reviewInfo = review.find(class_="reviewer_info").get_text().encode('utf-8')


        # Get review data
        reviewRating = review.find(itemprop="ratingValue").get("content")
        publish = review.find(itemprop="datePublished").get("content")
        description = review.find(itemprop="description").get_text().encode('utf-8')


        # Get review attributes
        if review.find(class_="i-wrap ig-wrap-common i-camera-common-wrap badge photo-count") != None:
            reviewPix = review.find(class_="i-wrap ig-wrap-common i-camera-common-wrap badge photo-count").get_text()[:-6].strip()
        else:
            reviewPix = "None"

        if review.find(class_="i-wrap ig-wrap-common i-opentable-badge-common-wrap badge opentable-badge-marker") != None:
            reviewSeated = "Seated"
        else:
            reviewSeated = "None"

        if review.find(class_="i ig-common i-deal-price-tag-common") != None:
            reviewDeal = "Purchased Deal"
        else:
            reviewDeal = "None"

        if review.find(class_="i-wrap ig-wrap-common i-checkin-burst-blue-small-common-wrap badge checkin checkin-irregular") != None:
            reviewCheckIn = review.find(class_="i-wrap ig-wrap-common i-checkin-burst-blue-small-common-wrap badge checkin checkin-irregular").get_text()[:-14].strip()
        else:
            reviewCheckIn = "None"


        # Special Qype users lack stats
        if review.find(class_="count"):
            usefulfunnycool = review.findAll(class_="count")
            # Get useful, funny, cool statistics
            if usefulfunnycool[0].get_text() != "":
                useful = usefulfunnycool[0].get_text()
            else:
                useful = 0

            if usefulfunnycool[1].get_text() != "":
                funny = usefulfunnycool[1].get_text()
            else:
                funny = 0

            if usefulfunnycool[2].get_text() != "":
                cool = usefulfunnycool[2].get_text()
            else:
                cool = 0
        else:
            useful = 0
            funny = 0
            cool = 0

        with open(filenamerevsc, "ab") as filerev:
            frev = csv.writer(filerev)
            # Writing to CSV
            frev.writerow([resturl, eliteStatus, friendCount, reviewCount, userPhoto, reviewInfo, reviewRating, publish, description, reviewPix, reviewSeated, reviewDeal, reviewCheckIn, useful, funny, cool])
Example #42
0
def braces_parser(text, opener=BLOB_OPENER, closer=BLOB_CLOSER):
    cvtTuple = lambda toks: tuple(toks.asList())  # @IgnorePep8
    cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList()))
                                    )  # @IgnorePep8
    cvtDict = lambda toks: GlobDict(toks.asList())  # @IgnorePep8
    extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end
                                              ])  # @IgnorePep8

    def pythonize(toks):
        s = toks[0]
        if s == 'true':
            return True
        elif s == 'false':
            return False
        elif s == 'none':
            return [None]
        elif s.isdigit():
            return int(s)
        elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s):
            return float(s)
        return toks[0]

    def noneDefault(s, loc, t):
        return t if len(t) else [RawEOL]

    # define punctuation as suppressed literals
    lbrace, rbrace = map(Suppress, "{}")

    identifier = Word(printables, excludeChars='{}"\'')
    quotedStr = QuotedString('"', escChar='\\', multiline=True) | \
        QuotedString('\'', escChar='\\', multiline=True)
    quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \
        QuotedString('\'', escChar='\\', unquoteResults=False)
    dictStr = Forward()
    setStr = Forward()
    objStr = Forward()

    oddIdentifier = identifier + quotedIdentifier
    dictKey = quotedIdentifier | \
        Combine(oddIdentifier).setParseAction(cvtRaw)
    dictKey.setParseAction(cvtRaw)

    dictValue = quotedStr | dictStr | setStr | \
        Combine(oddIdentifier).setParseAction(cvtRaw)

    if OLD_STYLE_KEYS:
        dictKey |= Combine(identifier + ZeroOrMore(
            White(' ') +
            (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd()))))
        dictValue |= identifier.setParseAction(pythonize)
    else:
        dictKey |= identifier
        dictValue |= Or([
            delimitedList(identifier | quotedIdentifier,
                          delim=White(' '),
                          combine=True),
            Combine(
                delimitedList(identifier | quotedIdentifier,
                              delim=White(' '),
                              combine=True) +
                Optional(
                    White(' ') + originalTextFor(nestedExpr('{', '}')).
                    setParseAction(extractText))).setParseAction(cvtRaw)
        ])

    ParserElement.setDefaultWhitespaceChars(' \t')
    dictEntry = Group(dictKey +
                      Optional(White(' ').suppress() +
                               dictValue).setParseAction(noneDefault) +
                      Optional(White(' ').suppress()) + LineEnd().suppress())
    dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace)
    dictStr.setParseAction(cvtDict)
    ParserElement.setDefaultWhitespaceChars(' \t\r\n')

    setEntry = identifier.setParseAction(
        pythonize) | quotedString.setParseAction(removeQuotes) | dictStr
    setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace)
    setStr.setParseAction(cvtTuple)

    objEntry = dictStr.ignore(pythonStyleComment)
    objStr << delimitedList(objEntry, delim=LineEnd())

    return objStr.parseString(text)[0]
Example #43
0
def create_bnf(allow_tuple=False, free_word=False):
    cvt_int = lambda toks: int(toks[0])
    cvt_real = lambda toks: float(toks[0])
    cvt_bool =  lambda toks: toks[0].lower == 'true'
    cvt_none =  lambda toks: [None]
    cvt_tuple = lambda toks : tuple(toks.asList())
    cvt_dict = lambda toks: dict(toks.asList())


    # define punctuation as suppressed literals
    (lparen, rparen, lbrack, rbrack,
     lbrace, rbrace, colon) = map(Suppress,"()[]{}:")

    integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer")
    integer.setParseAction(cvt_int)

    boolean = Keyword("False", caseless = True)
    boolean.setParseAction(cvt_bool)

    none = Keyword("None", caseless = True)
    none.setParseAction(cvt_none)

    real = Combine(Optional(oneOf("+ -"))+ Word(nums)
                   + "." + Optional(Word(nums))
                   + Optional("e" + Optional(oneOf("+ -"))
                              + Word(nums))).setName("real")
    real.setParseAction(cvt_real)

    tuple_str = Forward()
    list_str = Forward()
    dict_str = Forward()

    if free_word:
        string = Word(alphas8bit + "_-/.+**" + alphanums)

    else:
        string = Word(alphas8bit + alphas, alphas8bit + alphanums + "_" )

    list_item = (none | boolean | real | integer | list_str | tuple_str
                 | dict_str
                 | quotedString.setParseAction(removeQuotes)
                 | string )
    list_item2 = list_item | Empty().setParseAction(lambda: [None])

    tuple_inner = Optional(delimitedList(list_item)) + Optional(Suppress(","))
    tuple_inner.setParseAction(cvt_tuple)
    tuple_str << (Suppress("(") + tuple_inner  + Suppress(")"))

    list_inner = Optional(delimitedList(list_item) + Optional(Suppress(",")))
    list_inner.setParseAction(lambda toks: list(toks))
    list_str << (lbrack + list_inner + rbrack)

    dict_entry = Group(list_item + colon + list_item2)
    dict_inner = delimitedList(dict_entry) + Optional(Suppress(","))
    dict_inner.setParseAction(cvt_dict)
    dict_str << (lbrace + Optional(dict_inner) + rbrace)

    dict_or_tuple =  dict_inner | tuple_inner

    if allow_tuple:
        return dict_or_tuple

    else:
        return dict_inner
    return dict(toks.asList())


# define punctuation as suppressed literals
lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon = map(
    Suppress, "()[]{}:")
identifier = Regex(r"[a-zA-Z_][\w]+")
integer = Regex(r"[+-]?\d+").setName("integer").setParseAction(cvtInt)
real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real").setParseAction(
    cvtReal)
tupleStr = Forward()
listStr = Forward()
dictStr = Forward()

unicodeString.setParseAction(lambda t: t[0][2:-1].decode('unicode-escape'))
quotedString.setParseAction(lambda t: t[0][1:-1])
boolLiteral = oneOf("True False").setParseAction(cvtBool)
noneLiteral = Literal("None").setParseAction(replaceWith(None))

listItem = real | integer | quotedString | unicodeString | boolLiteral | noneLiteral | Group(
    listStr) | tupleStr | dictStr

tupleStr << (Suppress("(") + Optional(delimitedList(listItem)) +
             Optional(Suppress(",")) + Suppress(")"))
tupleStr.setParseAction(cvtTuple)

listStr << (lbrack +
            Optional(delimitedList(listItem) + Optional(Suppress(","))) +
            rbrack)

dictEntry = Group(listItem + colon + listItem)
Example #45
0
            elif t == '-':  # Next tokens needs to be negated
                negation = True
            else:  # Append to query the token
                if negation:
                    t = ~t
                if operation == 'or':
                    query |= t
                else:
                    query &= t
    return query


NO_BRTS = printables.replace('(', '').replace(')', '')
SINGLE = Word(NO_BRTS.replace('*', ''))
WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS)
QUOTED = quotedString.setParseAction(removeQuotes)

OPER_AND = CaselessLiteral('and')
OPER_OR = CaselessLiteral('or')
OPER_NOT = '-'

TERM = Combine(Optional(Word(alphas).setResultsName('meta') + ':') +
               (QUOTED.setResultsName('query') |
                WILDCARDS.setResultsName('query')))
TERM.setParseAction(createQ)

EXPRESSION = operatorPrecedence(TERM, [
    (OPER_NOT, 1, opAssoc.RIGHT),
    (OPER_OR, 2, opAssoc.LEFT),
    (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)])
EXPRESSION.setParseAction(unionQ)
Example #46
0
REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])'
REGEX_LOG_FORMAT_VARIABLE = r'\$([a-zA-Z0-9\_]+)'
REGEX_CONFIG_INCLUDES = r'include (.+);'
LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \
                      '"$request" $status $body_bytes_sent ' \
                      '"$http_referer" "$http_user_agent"'
LOG_FORMAT_COMMON   = '$remote_addr - $remote_user [$time_local] ' \
                      '"$request" $status $body_bytes_sent ' \
                      '"$http_x_forwarded_for"'

# common parser element
semicolon = Literal(';').suppress()
# nginx string parameter can contain any character except: { ; " '
parameter = Word(''.join(c for c in printables if c not in set('{;"\'')))
# which can also be quoted
parameter = parameter | quotedString.setParseAction(removeQuotes)


def detect_config_path():
    """
    Get nginx configuration file path based on `nginx -V` output
    :return: detected nginx configuration file path
    """
    try:
        proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE)
    except OSError:
        error_exit(
            'Access log file or format was not set and nginx config file cannot be detected. '
            + 'Perhaps nginx is not in your PATH?')

    stdout, stderr = proc.communicate()
    def __repr__(self):
        return repr(self.query)

    def express(self, env):
        return self.query.express(env)

def domain_expression_action(*args):
    print 'd:', args, [type(i) for i in args]

def value_list_action(*args):
    print 'v:', args, [type(i) for i in args]

integer_value = Regex(r'[-]?\d+').setParseAction(NumericIntegerAction)
float_value = Regex(r'[-]?\d+(\.\d*)?([eE]\d+)?').setParseAction(NumericFloatAction)
value_chars = Word(alphas + alphas8bit, alphanums + alphas8bit + '%.-_*;:')
string_value = (value_chars | quotedString.setParseAction(removeQuotes)).setParseAction(StringAction)
# value can contain any string once it's quoted

value = string_value | integer_value | float_value
value_list = (string_value ^ delimitedList(string_value) ^ OneOrMore(string_value))

binop = oneOf('= == != <> < <= > >= not like contains has ilike '
              'icontains ihas is').setName('binop')
domain = Word(alphas, alphanums).setName('domain')
domain_values = Group(value_list.copy())
domain_expression = (domain + Literal('=') + Literal('*') + stringEnd) \
                    | (domain + binop + domain_values + stringEnd)

AND_ = CaselessLiteral("and")
OR_  = CaselessLiteral("or")
NOT_ = CaselessLiteral("not") | Literal('!')
    def __init__(self):


        self.json_query = {'query':{}, 'and': [], 'or': []}
        self.tokens = None
        #--------------------------------------------------------------------------------------
        # <integer> ::= 0-9
        # <double>  ::= 0-9 ('.' 0-9)
        # <number>  ::= <integer> | <double>
        #--------------------------------------------------------------------------------------
        integer = Regex(r'-?[0-9]+') # Word matches space for some reason
        double = Regex(r'-?[0-9]+.?[0-9]*')
        number = double | integer

        #--------------------------------------------------------------------------------------
        # <python-string>   ::= (String surrounded by double-quotes)
        # <wildcard-string> ::= <python-string>
        # <limited-string>  ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes)
        # <field>           ::= <limited-string> | "*"
        # <coords>          ::= "LAT" <number> "LON" <number>
        # <units>           ::= ('km' | 'mi' | 'nm')
        # <distance>        ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?)
        #--------------------------------------------------------------------------------------
        python_string = quotedString.setParseAction(removeQuotes)
        wildcard_string = python_string
        limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes)
        field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes)
        coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number
        units = CaselessLiteral('km') | CaselessLiteral('mi')
        distance = number + units
        distance.setParseAction( lambda x : self.frame.update({'dist' : float(x[0]), 'units' : x[1]}))


        #--------------------------------------------------------------------------------------
        # Date
        #--------------------------------------------------------------------------------------
        date = python_string
        
        #--------------------------------------------------------------------------------------
        # <query-filter> ::= "FILTER" <python-string>
        # <index-name>   ::= <python-string>
        # <resource-id>  ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes)
        # <collection-id> ::= <resource-id>
        #--------------------------------------------------------------------------------------
        query_filter = CaselessLiteral("FILTER") + python_string
        # Add the filter to the frame object
        query_filter.setParseAction(lambda x : self.frame.update({'filter' : x[1]}))
        index_name = MatchFirst(python_string)
        # Add the index to the frame object
        index_name.setParseAction(lambda x : self.frame.update({'index' : x[0]}))
        resource_id = Regex(r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')').setParseAction(removeQuotes)
        collection_id = resource_id


        #--------------------------------------------------------------------------------------
        # <from-statement> ::= "FROM" <number> 
        # <to-statement>   ::= "TO" <number>
        #--------------------------------------------------------------------------------------
        from_statement = CaselessLiteral("FROM") + number
        from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]}))
        to_statement = CaselessLiteral("TO") + number
        to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]}))


        #--------------------------------------------------------------------------------------
        # <date-from-statement> ::= "FROM" <date> 
        # <date-to-statement>   ::= "TO" <date>
        #--------------------------------------------------------------------------------------
        date_from_statement = CaselessLiteral("FROM") + date
        date_from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]}))
        date_to_statement = CaselessLiteral("TO") + date
        date_to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]}))


        #--------------------------------------------------------------------------------------
        # <time-query> ::= "TIME FROM" <date> "TO" <date>
        #--------------------------------------------------------------------------------------
        time_query = CaselessLiteral("TIME") + Optional(date_from_statement) + Optional(date_to_statement)
        time_query.setParseAction(lambda x : self.time_frame())
           # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}}))

        #--------------------------------------------------------------------------------------
        # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement>
        #--------------------------------------------------------------------------------------
        time_bounds = CaselessLiteral("TIMEBOUNDS") + date_from_statement + date_to_statement
        time_bounds.setParseAction(lambda x : self.time_bounds_frame())

        #--------------------------------------------------------------------------------------
        # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement>        
        #--------------------------------------------------------------------------------------
        vertical_bounds = CaselessLiteral("VERTICAL") + from_statement + to_statement
        vertical_bounds.setParseAction(lambda x : self.vertical_bounds_frame())
        
        #--------------------------------------------------------------------------------------
        # <range-query>  ::= "VALUES" [<from-statement>] [<to-statement>]
        #--------------------------------------------------------------------------------------
        range_query = CaselessLiteral("VALUES") + Optional(from_statement) + Optional(to_statement)
        # Add the range to the frame object
        range_query.setParseAction(lambda x : self.range_frame())

        #--------------------------------------------------------------------------------------
        # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords>
        # <geo-bbox>     ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords>
        #--------------------------------------------------------------------------------------
        geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords
        geo_distance.setParseAction(lambda x : self.frame.update({'lat': float(x[5]), 'lon':float(x[7])}))
        geo_bbox = CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords
        geo_bbox.setParseAction(lambda x : self.frame.update({'top_left':[float(x[5]),float(x[3])], 'bottom_right':[float(x[10]),float(x[8])]}))

        #--------------------------------------------------------------------------------------
        # <field-query>  ::= <wildcard-string>
        # <term-query>   ::= "IS" <field-query>
        # <fuzzy-query>  ::= "LIKE" <field-query>
        # <match-query>  ::= "MATCH" <field-query>
        # <geo-query>    ::= "GEO" ( <geo-distance> | <geo-bbox> )
        #--------------------------------------------------------------------------------------
        field_query = wildcard_string
        term_query = CaselessLiteral("IS") + field_query
        term_query.setParseAction(lambda x : self.frame.update({'value':x[1]}))
        
        geo_query = CaselessLiteral("GEO") + ( geo_distance | geo_bbox )

        fuzzy_query = CaselessLiteral("LIKE") + field_query
        fuzzy_query.setParseAction(lambda x : self.frame.update({'fuzzy':x[1]}))
        match_query = CaselessLiteral("MATCH") + field_query
        match_query.setParseAction(lambda x : self.frame.update({'match':x[1]}))

        #--------------------------------------------------------------------------------------
        # <limit-parameter>  ::= "LIMIT" <integer>
        # <depth-parameter>  ::= "DEPTH" <integer>
        # <order-parameter>  ::= "ORDER" "BY" <limited-string>
        # <offset-parameter> ::= "SKIP" <integer>
        # <query-parameter>  ::= <order-paramater> | <limit-parameter>
        #--------------------------------------------------------------------------------------
        limit_parameter = CaselessLiteral("LIMIT") + integer
        limit_parameter.setParseAction(lambda x: self.json_query.update({'limit' : int(x[1])}))
        depth_parameter = CaselessLiteral("DEPTH") + integer
        depth_parameter.setParseAction(lambda x: self.frame.update({'depth' : int(x[1])}))
        order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string
        order_parameter.setParseAction(lambda x: self.json_query.update({'order' : {x[2] : 'asc'}}))
        offset_parameter = CaselessLiteral("SKIP") + integer
        offset_parameter.setParseAction(lambda x : self.json_query.update({'skip' : int(x[1])}))
        query_parameter = limit_parameter | order_parameter | offset_parameter

        #--------------------------------------------------------------------------------------
        # <search-query>      ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]*
        # <collection-query>  ::= "IN <collection-id>"
        # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ]
        # <owner-query>       ::= "HAS" <resource-id> [ <depth-parameter> ]
        # <query>             ::= <search-query> | <association-query> | <collection-query> | <owner-query>
        #--------------------------------------------------------------------------------------
        search_query = CaselessLiteral("SEARCH") + field + (range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name
        # Add the field to the frame object
        search_query.setParseAction(lambda x : self.frame.update({'field' : x[1]}))
        collection_query = CaselessLiteral("IN") + collection_id
        collection_query.setParseAction(lambda x : self.frame.update({'collection': x[1]}))
        association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter)
        # Add the association to the frame object
        association_query.setParseAction(lambda x : self.frame.update({'association':x[2]}))
        owner_query = CaselessLiteral("HAS") + resource_id + Optional(depth_parameter)
        owner_query.setParseAction(lambda x : self.frame.update({'owner':x[1]}))
        query = search_query | association_query | collection_query | owner_query

        #--------------------------------------------------------------------------------------
        # <primary-query>  ::= <query> [<query-filter>]
        # <atom>           ::= <query>
        # <intersection>   ::= "AND" <atom>
        # <union>          ::= "OR" <atom>
        # <sentence>       ::= <primary-query> [<intersection>]* [<union>]*
        #--------------------------------------------------------------------------------------
        primary_query = query + Optional(query_filter)
        # Set the primary query on the json_query to the frame and clear the frame
        primary_query.setParseAction(lambda x : self.push_frame())
        atom = query
        intersection = CaselessLiteral("AND") + atom
        # Add an AND operation to the json_query and clear the frame
        intersection.setParseAction(lambda x : self.and_frame())
        union = CaselessLiteral("OR") + atom
        # Add an OR operation to the json_query and clear the frame
        union.setParseAction(lambda x : self.or_frame())

        self.sentence = primary_query + (intersection ^ union)*(0,None) + query_parameter*(0,None)
Example #49
0
def _create_field_parser(metadata):
    """Create a parser that can handle field definitions.

    BFN like grammar::

        rule       ::= [pid | extend | override]
                       json_id ["," aliases]":"
                           body
        json_id    ::= (letter|"_") (letter|digit|_)*
        aliases    ::= json_id ["," aliases]

        pid        ::= @persistent_identifier( level )
        extend     ::= @extend
        override   ::= @override
        hidden     ::= @hidden

        body       ::=(creator* | derived | calculated) (extensions)*

        creator    ::= [decorators] format "," tag "," expr
        derived    ::= [decorators] expr
        calculated ::= [decorators] expr

    To check the syntactics of the parser extensions or decorators please go to
    :mod:`jsonalchemy.jsonext.parsers`
    """
    indent_stack = [1]

    # Independent/special decorators
    persistent_identifier = (
        Keyword('@persistent_identifier').suppress() + nestedExpr()
    ).setResultsName('pid').setParseAction(lambda toks: int(toks[0][0]))
    override = Keyword('@override').suppress()\
        .setResultsName('override')\
        .setParseAction(lambda toks: True)
    extend = Keyword('@extend').suppress()\
        .setResultsName('extend')\
        .setParseAction(lambda toks: True)
    hidden = Keyword('@hidden').suppress()\
        .setResultsName('hidden')\
        .setParseAction(lambda toks: True)
    rule_decorators = (Optional(persistent_identifier) & Optional(override)
                       & Optional(extend) & Optional(hidden))

    # Field definition decorators
    field_decorators = Each([
        Optional(parser.parse_element(indent_stack))
        for parser in six.itervalues(metadata.parsers)
        if issubclass(parser, DecoratorBaseExtensionParser)
    ])

    # Creator rules
    creator_body = (Optional(field_decorators).setResultsName('decorators') +
                    Word(alphas, alphanums + '_') + Literal(',').suppress() +
                    quotedString.setParseAction(removeQuotes) +
                    Literal(',').suppress() +
                    PYTHON_ALLOWED_EXPR).setParseAction(
                        lambda toks: {
                            'source_format': toks[-3],
                            'source_tags': toks[-2].split(' '),
                            'function': compile(toks[-1].strip(), '', 'eval'),
                            'type': 'creator',
                            'decorators': toks.decorators.asDict()
                        }).setResultsName('creator_def', listAllMatches=True)
    creator = (Keyword('creator:').suppress() +
               indentedBlock(OneOrMore(creator_body), indent_stack))

    # Derived and calculated rules
    der_calc_body = (Optional(field_decorators).setResultsName('decorators') +
                     PYTHON_ALLOWED_EXPR)
    derived = (Keyword('derived:').suppress() +
               indentedBlock(der_calc_body, indent_stack)).setParseAction(
                   lambda toks: {
                       'source_format': 'derived',
                       'source_tags': None,
                       'function': compile(toks[-1].strip(), '', 'eval'),
                       'type': 'derived',
                       'decorators': toks.decorators.asDict()
                   }).setResultsName('derived_def')
    calculated = (Keyword('calculated:').suppress() +
                  indentedBlock(der_calc_body, indent_stack)).setParseAction(
                      lambda toks: {
                          'source_format': 'calculated',
                          'source_tags': None,
                          'function': compile(toks[-1].strip(), '', 'eval'),
                          'type': 'calculated',
                          'decorators': toks.decorators.asDict()
                      }).setResultsName('calculated_def')

    rule_sections = [
        Optional(creator | derived | calculated),
    ]
    rule_sections.extend([
        Optional(parser.parse_element(indent_stack))
        for parser in six.itervalues(metadata.parsers)
        if issubclass(parser, FieldBaseExtensionParser)
    ])

    json_id = (IDENT +
               Optional(Suppress(',') +
                        delimitedList(Word(alphanums + '_'))) +
               Suppress(':')
               ).setResultsName('field')\
        .setParseAction(lambda toks: {'json_id': toks[0],
                                      'aliases': toks[1:]})

    rule = Group(
        Optional(rule_decorators) + json_id +
        indentedBlock(Each(rule_sections), indent_stack))

    return OneOrMore(COMMENT.suppress() | rule)
Example #50
0
 def parse_element(cls, indent_stack):
     return (Keyword("@depends_on").suppress() +
             Literal('(').suppress() +
             delimitedList(quotedString.setParseAction(removeQuotes)) +
             Literal(')').suppress()
             ).setResultsName("depends_on")
Example #51
0
    def __init__(self):

        self.json_query = {'query': {}, 'and': [], 'or': []}
        self.tokens = None
        #--------------------------------------------------------------------------------------
        # <integer> ::= 0-9
        # <double>  ::= 0-9 ('.' 0-9)
        # <number>  ::= <integer> | <double>
        #--------------------------------------------------------------------------------------
        integer = Regex(r'-?[0-9]+')  # Word matches space for some reason
        double = Regex(r'-?[0-9]+.?[0-9]*')
        number = double | integer

        #--------------------------------------------------------------------------------------
        # <python-string>   ::= (String surrounded by double-quotes)
        # <wildcard-string> ::= <python-string>
        # <limited-string>  ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes)
        # <field>           ::= <limited-string> | "*"
        # <coords>          ::= "LAT" <number> "LON" <number>
        # <units>           ::= ('km' | 'mi' | 'nm')
        # <distance>        ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?)
        #--------------------------------------------------------------------------------------
        python_string = quotedString.setParseAction(removeQuotes)
        wildcard_string = python_string
        limited_string = Regex(
            r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(
                removeQuotes)
        field = limited_string ^ CaselessLiteral('"*"').setParseAction(
            removeQuotes)
        coords = CaselessLiteral("LAT") + number + CaselessLiteral(
            "LON") + number
        units = CaselessLiteral('km') | CaselessLiteral('mi')
        distance = number + units
        distance.setParseAction(lambda x: self.frame.update({
            'dist': float(x[0]),
            'units': x[1]
        }))

        #--------------------------------------------------------------------------------------
        # Date
        #--------------------------------------------------------------------------------------
        date = python_string

        #--------------------------------------------------------------------------------------
        # <query-filter> ::= "FILTER" <python-string>
        # <index-name>   ::= <python-string>
        # <resource-id>  ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes)
        # <collection-id> ::= <resource-id>
        #--------------------------------------------------------------------------------------
        query_filter = CaselessLiteral("FILTER") + python_string
        # Add the filter to the frame object
        query_filter.setParseAction(
            lambda x: self.frame.update({'filter': x[1]}))
        index_name = MatchFirst(python_string)
        # Add the index to the frame object
        index_name.setParseAction(lambda x: self.frame.update({'index': x[0]}))
        resource_id = Regex(
            r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')'
        ).setParseAction(removeQuotes)
        collection_id = resource_id

        #--------------------------------------------------------------------------------------
        # <from-statement> ::= "FROM" <number>
        # <to-statement>   ::= "TO" <number>
        #--------------------------------------------------------------------------------------
        from_statement = CaselessLiteral("FROM") + number
        from_statement.setParseAction(
            lambda x: self.frame.update({'from': x[1]}))
        to_statement = CaselessLiteral("TO") + number
        to_statement.setParseAction(lambda x: self.frame.update({'to': x[1]}))

        #--------------------------------------------------------------------------------------
        # <date-from-statement> ::= "FROM" <date>
        # <date-to-statement>   ::= "TO" <date>
        #--------------------------------------------------------------------------------------
        date_from_statement = CaselessLiteral("FROM") + date
        date_from_statement.setParseAction(
            lambda x: self.frame.update({'from': x[1]}))
        date_to_statement = CaselessLiteral("TO") + date
        date_to_statement.setParseAction(
            lambda x: self.frame.update({'to': x[1]}))

        #--------------------------------------------------------------------------------------
        # <time-query> ::= "TIME FROM" <date> "TO" <date>
        #--------------------------------------------------------------------------------------
        time_query = CaselessLiteral("TIME") + Optional(
            date_from_statement) + Optional(date_to_statement)
        time_query.setParseAction(lambda x: self.time_frame())
        # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}}))

        #--------------------------------------------------------------------------------------
        # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement>
        #--------------------------------------------------------------------------------------
        time_bounds = CaselessLiteral(
            "TIMEBOUNDS") + date_from_statement + date_to_statement
        time_bounds.setParseAction(lambda x: self.time_bounds_frame())

        #--------------------------------------------------------------------------------------
        # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement>
        #--------------------------------------------------------------------------------------
        vertical_bounds = CaselessLiteral(
            "VERTICAL") + from_statement + to_statement
        vertical_bounds.setParseAction(lambda x: self.vertical_bounds_frame())

        #--------------------------------------------------------------------------------------
        # <range-query>  ::= "VALUES" [<from-statement>] [<to-statement>]
        #--------------------------------------------------------------------------------------
        range_query = CaselessLiteral("VALUES") + Optional(
            from_statement) + Optional(to_statement)
        # Add the range to the frame object
        range_query.setParseAction(lambda x: self.range_frame())

        #--------------------------------------------------------------------------------------
        # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords>
        # <geo-bbox>     ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords>
        #--------------------------------------------------------------------------------------
        geo_distance = CaselessLiteral(
            "DISTANCE") + distance + CaselessLiteral("FROM") + coords
        geo_distance.setParseAction(
            lambda x: self.frame.update({
                'lat': float(x[5]),
                'lon': float(x[7])
            }))
        geo_bbox = CaselessLiteral("BOX") + CaselessLiteral(
            "TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords
        geo_bbox.setParseAction(lambda x: self.frame.update({
            'top_left': [float(x[5]), float(x[3])],
            'bottom_right': [float(x[10]), float(x[8])]
        }))

        #--------------------------------------------------------------------------------------
        # <field-query>  ::= <wildcard-string>
        # <term-query>   ::= "IS" <field-query>
        # <fuzzy-query>  ::= "LIKE" <field-query>
        # <match-query>  ::= "MATCH" <field-query>
        # <geo-query>    ::= "GEO" ( <geo-distance> | <geo-bbox> )
        #--------------------------------------------------------------------------------------
        field_query = wildcard_string
        term_query = CaselessLiteral("IS") + field_query
        term_query.setParseAction(lambda x: self.frame.update({'value': x[1]}))

        geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox)

        fuzzy_query = CaselessLiteral("LIKE") + field_query
        fuzzy_query.setParseAction(
            lambda x: self.frame.update({'fuzzy': x[1]}))
        match_query = CaselessLiteral("MATCH") + field_query
        match_query.setParseAction(
            lambda x: self.frame.update({'match': x[1]}))

        #--------------------------------------------------------------------------------------
        # <limit-parameter>  ::= "LIMIT" <integer>
        # <depth-parameter>  ::= "DEPTH" <integer>
        # <order-parameter>  ::= "ORDER" "BY" <limited-string>
        # <offset-parameter> ::= "SKIP" <integer>
        # <query-parameter>  ::= <order-paramater> | <limit-parameter>
        #--------------------------------------------------------------------------------------
        limit_parameter = CaselessLiteral("LIMIT") + integer
        limit_parameter.setParseAction(
            lambda x: self.json_query.update({'limit': int(x[1])}))
        depth_parameter = CaselessLiteral("DEPTH") + integer
        depth_parameter.setParseAction(
            lambda x: self.frame.update({'depth': int(x[1])}))
        order_parameter = CaselessLiteral("ORDER") + CaselessLiteral(
            "BY") + limited_string
        order_parameter.setParseAction(
            lambda x: self.json_query.update({'order': {
                x[2]: 'asc'
            }}))
        offset_parameter = CaselessLiteral("SKIP") + integer
        offset_parameter.setParseAction(
            lambda x: self.json_query.update({'skip': int(x[1])}))
        query_parameter = limit_parameter | order_parameter | offset_parameter

        #--------------------------------------------------------------------------------------
        # <search-query>      ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]*
        # <collection-query>  ::= "IN <collection-id>"
        # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ]
        # <owner-query>       ::= "HAS" <resource-id> [ <depth-parameter> ]
        # <query>             ::= <search-query> | <association-query> | <collection-query> | <owner-query>
        #--------------------------------------------------------------------------------------
        search_query = CaselessLiteral("SEARCH") + field + (
            range_query | term_query | fuzzy_query | match_query
            | vertical_bounds | time_bounds | time_query
            | geo_query) + CaselessLiteral("FROM") + index_name
        # Add the field to the frame object
        search_query.setParseAction(
            lambda x: self.frame.update({'field': x[1]}))
        collection_query = CaselessLiteral("IN") + collection_id
        collection_query.setParseAction(
            lambda x: self.frame.update({'collection': x[1]}))
        association_query = CaselessLiteral("BELONGS") + CaselessLiteral(
            "TO") + resource_id + Optional(depth_parameter)
        # Add the association to the frame object
        association_query.setParseAction(
            lambda x: self.frame.update({'association': x[2]}))
        owner_query = CaselessLiteral("HAS") + resource_id + Optional(
            depth_parameter)
        owner_query.setParseAction(
            lambda x: self.frame.update({'owner': x[1]}))
        query = search_query | association_query | collection_query | owner_query

        #--------------------------------------------------------------------------------------
        # <primary-query>  ::= <query> [<query-filter>]
        # <atom>           ::= <query>
        # <intersection>   ::= "AND" <atom>
        # <union>          ::= "OR" <atom>
        # <sentence>       ::= <primary-query> [<intersection>]* [<union>]*
        #--------------------------------------------------------------------------------------
        primary_query = query + Optional(query_filter)
        # Set the primary query on the json_query to the frame and clear the frame
        primary_query.setParseAction(lambda x: self.push_frame())
        atom = query
        intersection = CaselessLiteral("AND") + atom
        # Add an AND operation to the json_query and clear the frame
        intersection.setParseAction(lambda x: self.and_frame())
        union = CaselessLiteral("OR") + atom
        # Add an OR operation to the json_query and clear the frame
        union.setParseAction(lambda x: self.or_frame())

        self.sentence = primary_query + (intersection ^ union) * (
            0, None) + query_parameter * (0, None)
Example #52
0
from HTMLParser import HTMLParser
from urllib import unquote
# parse dict-like syntax    
from pyparsing import (Suppress, Regex, quotedString, Word, alphas, Group, alphanums, oneOf, Forward, Optional, dictOf, delimitedList, removeQuotes)

LBRACK,RBRACK,LBRACE,RBRACE,COLON,COMMA = map(Suppress,"[]{}:,")
integer = Regex(r"[+-]?\d+").setParseAction(lambda t:int(t[0]))
real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t:float(t[0]))
string_ = Word(alphas,alphanums+"_") | quotedString.setParseAction(removeQuotes)
bool_ = oneOf("true false").setParseAction(lambda t: t[0]=="true")
jsParser = Forward()

key = string_
dict_ = LBRACE - Optional(dictOf(key+COLON, jsParser+Optional(COMMA))) + RBRACE
list_ = LBRACK - Optional(delimitedList(jsParser)) + RBRACK
jsParser << (real | integer | string_ | bool_ | Group(list_ | dict_ ))

class WebParser(HTMLParser):
	def __init__(self):
		HTMLParser.__init__(self)
		self.stk = []
		self.result = {}

	def handle_starttag(self, tag, attrs):
		self.stk.append(tag)
	def handle_endtag(self, tag):
		self.stk.pop()
	def handle_data(self, data):
		b = 'var hClientFlashVars ='
		if len(self.stk) and not self.stk[-1:][0].lower() == 'script':
			return
Example #53
0
 def __init__(self):
     self.queryparts = {}
     
     stringValue = quotedString.setParseAction(removeQuotes)
     intValue = Word(nums).setParseAction(lambda t:int(t[0]))
     floatValue = Combine(Word(nums) + "." + Word(nums)).setParseAction(lambda t:float(t[0]))
     value = stringValue | floatValue | intValue
     valueList = delimitedList(value)
     openparenthesis = Suppress(Literal("("))
     closeparenthesis = Suppress(Literal(")"))
     opensquarebracket = Suppress(Literal("["))
     closesquarebracket = Suppress(Literal("]"))
     comma = Suppress(Literal(","))
     field = quotedString.setParseAction(removeQuotes)
     fieldList = delimitedList(field)
     
     BQLType = CaselessKeyword("$double") | CaselessKeyword("$string") | \
               CaselessKeyword("$object") | CaselessKeyword("$array") | \
               CaselessKeyword("$binary") | CaselessKeyword("$bool") | \
               CaselessKeyword("$date") | CaselessKeyword("$null") | \
               CaselessKeyword("$int32") | CaselessKeyword("$int") | \
               CaselessKeyword("$int64") | CaselessKeyword("$timestamp") | \
               CaselessKeyword("$exists") | CaselessKeyword("$nexists")
     
     BQLCompareOperator = CaselessKeyword("$lt") | CaselessKeyword("$lte") | \
                          CaselessKeyword("$gt") | CaselessKeyword("$gte") | \
                          CaselessKeyword("$eq") | CaselessKeyword("$neq") | \
                          CaselessKeyword("$regex")
                          
     
     BQLIncludeOperator = CaselessKeyword("$in") | CaselessKeyword("$nin")
     
     
     typecheck = BQLType + openparenthesis + Optional(fieldList) + closeparenthesis
     typecheck.setParseAction(self.parseType)
     
     comparison = BQLCompareOperator + openparenthesis + field + comma + value + closeparenthesis
     comparison.setParseAction(self.parseComparison)
     
     inclusion = BQLIncludeOperator + openparenthesis + field + comma + \
                 opensquarebracket + Optional(valueList) + closesquarebracket + closeparenthesis
     inclusion.setParseAction(self.parseInclusion)
     
     BQLTokens = ZeroOrMore(typecheck | comparison | inclusion)
     
     #-----------------------------------------------------------------------
     # Query Select Statement Parsing
     #-----------------------------------------------------------------------
     
     SELECT = (Suppress(CaselessKeyword("Select")) + openparenthesis + \
              Optional(fieldList) + closeparenthesis)
     SELECT.setParseAction(self.parseSelect)
     
     #-----------------------------------------------------------------------
     # Query From Statement Parsing
     #-----------------------------------------------------------------------
     
     FROM = (Suppress(CaselessKeyword("From")) + openparenthesis + \
            Optional(fieldList) + closeparenthesis)
     FROM.setParseAction(self.parseFrom)
     
     #-----------------------------------------------------------------------
     # Query Where Statement Parsing
     #-----------------------------------------------------------------------
     
     AND = (Suppress(CaselessKeyword("And")) + openparenthesis + \
                 BQLTokens + closeparenthesis).setParseAction(self.parseAnd)
     
     OR = (Suppress(CaselessKeyword("Or")) + openparenthesis + \
                BQLTokens + closeparenthesis).setParseAction(self.parseOr)
     
     WHERE = (Suppress(CaselessKeyword("Where")) + BQLTokens).setParseAction(self.parseWhere)\
             + ZeroOrMore(OR | AND)
     
     #-----------------------------------------------------------------------
     # Resultset Management Statement Parsing
     #-----------------------------------------------------------------------
     
     # distinct
     DISTINCT = (Suppress(CaselessKeyword("Distinct")) + openparenthesis + \
                fieldList + closeparenthesis).setParseAction(self.parseDistinct)
     
     # limit
     LIMIT = (Suppress(CaselessKeyword("Limit")) + openparenthesis + \
                   intValue + closeparenthesis).setParseAction(self.parseLimit)
     
     # sort ascending
     SORT = (CaselessKeyword("Asc") | CaselessKeyword("Desc")) + openparenthesis + \
               fieldList + closeparenthesis
     SORT.setParseAction(self.parseSort)
     
     CURSOR_CONTROL = ZeroOrMore(DISTINCT | LIMIT | SORT)
     
     #-----------------------------------------------------------------------
     # Query parser build up
     #-----------------------------------------------------------------------
     self.parser = SELECT + FROM + Optional(WHERE) + CURSOR_CONTROL + StringEnd()
Example #54
0
        Literal("sfixed64") | 
        Literal("bool")     | 
        Literal("string")   | 
        Literal("bytes")  
).setResultsName("fieldType")

messageRecursive = Forward() 

msgName = Word(alphas).setResultsName("messageName")

fieldName = Word(alphas).setResultsName("fieldName")

default =   Literal("[")        + \
            Literal("default")  + \
            quotedString.setParseAction(
                removeQuotes
            ).setResultsName("defaultValue") +  \
            Literal("]")

fieldUsage = (
    Literal("required")     | \
    Literal("optional")
).setResultsName("fieldUsage")

tag = Word(nums).setResultsName("tag")

field = Group(
    fieldUsage              + \
    fieldType               + \
    fieldName               + \
    Literal("=")            + \
Example #55
0
            elif t == '-':  # Next tokens needs to be negated
                negation = True
            else:  # Append to query the token
                if negation:
                    t = ~t
                if operation == 'or':
                    query |= t
                else:
                    query &= t
    return query


NO_BRTS = printables.replace('(', '').replace(')', '')
SINGLE = Word(NO_BRTS.replace('*', ''))
WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS)
QUOTED = quotedString.setParseAction(removeQuotes)

OPER_AND = CaselessLiteral('and')
OPER_OR = CaselessLiteral('or')
OPER_NOT = '-'

TERM = Combine(
    Optional(Word(alphas).setResultsName('meta') + ':') +
    (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query')))
TERM.setParseAction(createQ)

EXPRESSION = operatorPrecedence(
    TERM, [(OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT),
           (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)])
EXPRESSION.setParseAction(unionQ)
Example #56
0
# Projection
columnName = delimitedList(identifier, DOT, combine=True).setParseAction(Identifier)(
    "column"
)  # TODO: x AS y, x y, x `y`, x 'y', `x`, 'x'
columnNameList = Group(delimitedList(STAR | columnName)).setParseAction(ListValue)
tableName = delimitedList(identifier, DOT, combine=True).setParseAction(Identifier)("table")
tableNameList = Group(delimitedList(tableName)).setParseAction(ListValue)

whereExpr = Forward()  # WHERE

# TODO: indirect comparisons (e.g. "table1.field1.xyz = 3" becomes "table1.any(field1.xyz == 3)")
# TODO: math expression grammar (for both lval and rval)
equalityOp = OP_VAL_NULLSAFE_EQUAL ^ OP_EQUAL ^ OP_NOTEQUAL ^ OP_LT ^ OP_GT ^ OP_GTE ^ OP_LTE
likeOp = Optional(LOGOP_NOT) + OP_LIKE
betweenOp = Optional(LOGOP_NOT) + OP_BETWEEN  # [ NOT ] BETWEEN
stringValue = quotedString.setParseAction(StringValue)
realNumber = (
    Combine(
        Optional(sign)
        + (
            # decimal present
            ((Word(nums) + DOT + Optional(Word(nums)) | (DOT + Word(nums))) + Optional(E + Optional(sign) + Word(nums)))
            |
            # negative exp
            (Word(nums) + Optional(E + Optional(MINUS) + Word(nums)))
        )
    ).setParseAction(RealValue)
).setName(
    "real"
)  # .1, 1.2, 1.2e3, -1.2e+3, 1.2e-3
intNumber = (
Example #57
0
class SearchParser(object):
    """The parser for bauble.search.MapperSearch
    """

    numeric_value = Regex(r'[-]?\d+(\.\d*)?([eE]\d+)?').setParseAction(
        NumericToken)('number')
    unquoted_string = Word(alphanums + alphas8bit + '%.-_*;:')
    string_value = (quotedString.setParseAction(removeQuotes)
                    | unquoted_string).setParseAction(StringToken)('string')

    none_token = Literal('None').setParseAction(NoneToken)
    empty_token = Literal('Empty').setParseAction(EmptyToken)

    value_list = Forward()
    typed_value = (Literal("|") + unquoted_string + Literal("|") + value_list +
                   Literal("|")).setParseAction(TypedValueToken)

    value = (typed_value | numeric_value | none_token | empty_token
             | string_value).setParseAction(ValueToken)('value')
    value_list << Group(OneOrMore(value) ^ delimitedList(value)
                        ).setParseAction(ValueListAction)('value_list')

    domain = Word(alphas, alphanums)
    binop = oneOf('= == != <> < <= > >= not like contains has ilike '
                  'icontains ihas is')
    equals = Literal('=')
    star_value = Literal('*')
    domain_values = (value_list.copy())('domain_values')
    domain_expression = (
        (domain + equals + star_value + stringEnd)
        | (domain + binop + domain_values + stringEnd)
    ).setParseAction(DomainExpressionAction)('domain_expression')

    AND_ = WordStart() + (CaselessLiteral("AND") | Literal("&&")) + WordEnd()
    OR_ = WordStart() + (CaselessLiteral("OR") | Literal("||")) + WordEnd()
    NOT_ = WordStart() + (CaselessLiteral("NOT") | Literal('!')) + WordEnd()
    BETWEEN_ = WordStart() + CaselessLiteral("BETWEEN") + WordEnd()

    query_expression = Forward()('filter')
    identifier = Group(delimitedList(Word(alphas + '_', alphanums + '_'),
                                     '.')).setParseAction(IdentifierToken)
    ident_expression = (
        Group(identifier + binop + value).setParseAction(IdentExpressionToken)
        | (Literal('(') + query_expression +
           Literal(')')).setParseAction(ParenthesisedQuery))
    between_expression = Group(identifier + BETWEEN_ + value + AND_ +
                               value).setParseAction(BetweenExpressionAction)
    query_expression << infixNotation(
        (ident_expression | between_expression),
        [(NOT_, 1, opAssoc.RIGHT, SearchNotAction),
         (AND_, 2, opAssoc.LEFT, SearchAndAction),
         (OR_, 2, opAssoc.LEFT, SearchOrAction)])
    query = (domain + Keyword('where', caseless=True).suppress() +
             Group(query_expression) + stringEnd).setParseAction(QueryAction)

    statement = (query('query')
                 | domain_expression('domain')
                 | value_list('value_list')
                 ).setParseAction(StatementAction)('statement')

    def parse_string(self, text):
        '''request pyparsing object to parse text

        `text` can be either a query, or a domain expression, or a list of
        values. the `self.statement` pyparsing object parses the input text
        and return a pyparsing.ParseResults object that represents the input
        '''

        return self.statement.parseString(text)