class QueryParser(collections.OrderedDict): TAG_TOKEN = (pp.Keyword("tag") + pp.Suppress(":") + pp.Word(pp.alphas, pp.alphanums + "_")) SORT_TOKEN = (pp.Keyword("sort") + pp.Suppress(":") + (pp.Keyword("name") | pp.Keyword("date") | TAG_TOKEN) + pp.Optional( pp.Suppress(":") + pp.oneOf("s n d"), default="s", ) + pp.Optional( pp.Suppress(pp.Keyword("order")) + pp.Suppress(":") + (pp.Keyword("asc") | pp.Keyword("desc")), default="desc", )) SEARCH_TOKEN = ( ((pp.Keyword("name") | pp.Keyword("date")) + pp.Suppress(":") + (pp.Word(pp.printables) | pp.dblQuotedString().setParseAction(pp.removeQuotes) | pp.sglQuotedString().setParseAction(pp.removeQuotes))) | (TAG_TOKEN + pp.Optional( pp.Suppress(":") + (pp.Word(pp.printables) | pp.dblQuotedString().setParseAction(pp.removeQuotes) | pp.sglQuotedString().setParseAction(pp.removeQuotes))))) # TODO: date, from, to should be able to grab dates in EXIF tags # TODO: support quoted %c datetimes # TODO: support quoted datetimes with hour/minute/second individually DATETIME = (Date("%Y/%m/%d", DateHints.YEAR | DateHints.MONTH | DateHints.DAY) | Date("%Y/%m", DateHints.YEAR | DateHints.MONTH) | Date("%Y", DateHints.YEAR)) FROM_TOKEN = (pp.Keyword("from") + pp.Suppress(":") + DATETIME) TO_TOKEN = (pp.Keyword("to") + pp.Suppress(":") + DATETIME) TYPE_TOKEN = (pp.Keyword("type") + pp.Suppress(":") + (pp.Keyword("image") | pp.Keyword("video"))) QUERY_TOKEN = pp.Group(SORT_TOKEN | SEARCH_TOKEN | FROM_TOKEN | TO_TOKEN | TYPE_TOKEN) GRAMMAR = pp.Dict(pp.OneOrMore(QUERY_TOKEN)) def __init__(self, s, grammar=GRAMMAR): try: r = grammar.setDebug(logging.getLogger().isEnabledFor( logging.DEBUG)).parseString(s, parseAll=True) logging.debug("search query parse results: %s", r) self.update(collections.OrderedDict(r)) except (Exception, pp.ParseException, pp.RecursiveGrammarException, pp.ParseFatalException, pp.ParseSyntaxException) as e: raise QueryError("unable to parse query: %s" % e)
def _parse(mystr): LBRACE, RBRACE, EQUAL = map(pp.Suppress, "{}=") field = pp.Word(pp.printables + ' ', excludeChars='[]=') field.addParseAction(pp.tokenMap(str.rstrip)) string = pp.dblQuotedString().setParseAction(pp.removeQuotes) number = pp.pyparsing_common.number() date_expr = pp.Regex(r'\d\d\d\d-\d\d-\d\d') time_expr = pp.Regex(r'\d\d:\d\d:\d\d\.\d\d\d') scalar_value = (string | date_expr | time_expr | number) list_marker = pp.Suppress("[]") value_list = pp.Forward() jobject = pp.Forward() memberDef1 = pp.Group(field + EQUAL + scalar_value) memberDef2 = pp.Group(field + EQUAL + jobject) memberDef3 = pp.Group(field + list_marker + EQUAL + LBRACE + value_list + RBRACE) memberDef = memberDef1 | memberDef2 | memberDef3 value_list <<= (pp.delimitedList(scalar_value, ",") | pp.ZeroOrMore(pp.Group(pp.Dict(memberDef2)))) value_list.setParseAction(lambda t: [pp.ParseResults(t[:])]) members = pp.OneOrMore(memberDef) jobject <<= pp.Dict(LBRACE + pp.ZeroOrMore(memberDef) + RBRACE) # force empty jobject to be a dict jobject.setParseAction(lambda t: t or {}) parser = members parser = pp.OneOrMore(pp.Group(pp.Dict(memberDef))) return parser.parseString(mystr)
def _parse(mystr): LBRACE, RBRACE, EQUAL = map(pp.Suppress, "{}=") field = pp.Word(pp.printables + ' ', excludeChars='[]=') field.addParseAction(pp.tokenMap(str.rstrip)) string = pp.dblQuotedString().setParseAction(pp.removeQuotes) number = pp.pyparsing_common.number() date_expr = pp.Regex(r'\d\d\d\d-\d\d-\d\d') time_expr = pp.Regex(r'\d\d:\d\d:\d\d\.\d\d\d') nan = pp.Keyword('nan') scalar_value = (string | date_expr | time_expr | number | nan) list_marker = pp.Suppress("[]") value_list = pp.Forward() jobject = pp.Forward() memberDef1 = pp.Group(field + EQUAL + scalar_value) memberDef2 = pp.Group(field + EQUAL + jobject) memberDef3 = pp.Group(field + list_marker + EQUAL + LBRACE + value_list + RBRACE) memberDef = memberDef1 | memberDef2 | memberDef3 value_list <<= (pp.delimitedList(scalar_value, ",") | pp.ZeroOrMore(pp.Group(pp.Dict(memberDef2)))) value_list.setParseAction(lambda t: [pp.ParseResults(t[:])]) members = pp.OneOrMore(memberDef) jobject <<= pp.Dict(LBRACE + pp.ZeroOrMore(memberDef) + RBRACE) # force empty jobject to be a dict jobject.setParseAction(lambda t: t or {}) parser = members parser = pp.OneOrMore(pp.Group(pp.Dict(memberDef))) return parser.parseString(mystr)
def __init__(self): # directive grammar lparen = pp.Literal('(').suppress() rparen = pp.Literal(')').suppress() comma = pp.Literal(',').suppress() semicolon = pp.Literal(';').suppress() equalTok = pp.Literal('=').suppress() self.floatTok = pp.Optional((pp.Literal('-'))|pp.Literal('+')) + pp.Word(pp.nums) + pp.Optional(pp.Literal('.') + pp.Optional(pp.Word(pp.nums))) self.floatTok.addParseAction(lambda toks: float("".join(toks))) self.stringTok = pp.Group(pp.dblQuotedString() ^ pp.sglQuotedString()) self.stringTok.addParseAction(lambda toks: "".join(toks[0]).strip('"').strip("'")) self.trueTok = pp.Keyword("true") self.trueTok.addParseAction(lambda _: True) self.falseTok = pp.Keyword("false") self.falseTok.addParseAction(lambda _: False) self.boolTok = self.trueTok | self.falseTok self.identifierTok = pp.Word(pp.alphas + '_', pp.alphanums + '_')('identifier') # self.posKeywordTok = \ # pp.Keyword("tl") | \ # pp.Keyword("tc") | \ # pp.Keyword("tr") | \ # pp.Keyword("cl") | \ # pp.Keyword("cc") | \ # pp.Keyword("cr") | \ # pp.Keyword("bl") | \ # pp.Keyword("bc") | \ # pp.Keyword("br") self.posKeywordTok = \ pp.Word(pp.alphas + '_', pp.alphanums + '_') self.posKeywordTok.addParseAction(lambda toks: str(toks[0])) self.positionalArgTok = self.floatTok | self.stringTok | self.boolTok self.keywordArgTok = pp.Group(self.identifierTok + equalTok + (self.positionalArgTok | self.posKeywordTok)) self.keywordArgTok.addParseAction(lambda toks: [x for x in toks]) self.argsTok = pp.Optional( (self.positionalArgTok + pp.ZeroOrMore(comma + self.positionalArgTok) + pp.ZeroOrMore(comma + self.keywordArgTok)) | (self.keywordArgTok + pp.ZeroOrMore(comma + self.keywordArgTok)) )('args') # self.argsTok.addParseAction(lambda toks: [toks]) self.argsTok.addParseAction(lambda toks: DirectiveArgs(toks)) self.directiveTok = pp.Group(self.identifierTok + lparen + self.argsTok + rparen) self.mainTok = self.directiveTok + pp.ZeroOrMore(semicolon + self.directiveTok) + pp.Optional(semicolon)
def parse_list_vms(stdout, stderr): """ """ id_vm_name = dblQuotedString(alphas).setResultsName('name') id_vm_uuid = Word(srange("[a-zA-Z0-9_\-]")).setResultsName('uuid') left_brace = Suppress("{") right_brace = Suppress("}") vm_group = Group(id_vm_name + left_brace + id_vm_uuid + right_brace) vm_list = OneOrMore(vm_group) token_lists = vm_list.parseString(stdout, parseAll=True) return [{'name': token_list.name.replace('\"', ''), 'uuid': token_list.uuid} for token_list in token_lists]
def BNF(decorate): def eltFunc(s, l, toks): last = toks[-1] subj = toks[0] if last == '?': return Opt(toks[0]) elif last == '*': return Kleene(toks[0]) else: return subj nonterm = pp.Word(pp.alphanums + "'_$").setParseAction( decorate('nonterm', lambda s, loc, toks: NonTerminal(toks[0]))) reference = nonterm().setParseAction( decorate('reference', lambda s, l, toks: Reference(toks[0]))) char = pp.Word(pp.alphanums, exact=1) charrange = (pp.Literal('[').suppress() + char + pp.Literal('-').suppress() + char + pp.Literal(']').suppress()) \ .setParseAction(decorate('charrange', lambda s, l, toks: CharRange(toks[0], toks[1]))) literal = pp.dblQuotedString().setParseAction( decorate('literal', lambda s, l, toks: Literal(toks[0]))) optional = pp.Literal('?').setParseAction(decorate('?', lambda s, l, t: t)) star = pp.Literal('*').setParseAction(decorate('*', lambda s, l, t: t)) quant = optional | star alternations = pp.Forward() element = (((pp.Literal('(').suppress() + alternations + pp.Literal(')').suppress()).setParseAction( decorate('parens', lambda s, l, t: t)) | charrange | literal | reference) + pp.Optional(quant)).setParseAction(decorate('element', eltFunc)) alternation = (element + pp.ZeroOrMore(element)).setParseAction( decorate( 'alternation', lambda s, l, toks: toks[0] if len(toks) == 1 else Concatenation(toks))) alternations << alternation + pp.ZeroOrMore( pp.Literal('|').suppress() + alternation) alternations.setParseAction( decorate( 'alternations', lambda s, l, toks: toks[0] if len(toks) == 1 else Alternation(toks))) production = (nonterm + pp.Literal('=').suppress() + alternations + pp.Literal(';').suppress()).setParseAction( decorate('production', lambda s, l, toks: (toks[0], toks[1:]))) grammar = (production + pp.ZeroOrMore(production)).setParseAction( decorate('grammar', lambda s, l, toks: {t[0]: t[1] for t in toks})) return grammar
def __init__(self): self.session_keys = {} self.encrypted_data = None self.info = {} self.base64_buf = [] self.key_block = False self.data_block = False self.p1735 = False protect_kw = pp.Keyword('`pragma protect').suppress() identifier = pp.Word(pp.alphas, pp.alphanums + "_") number = pp.Word(pp.nums).setParseAction(lambda t: int(t[0])) string = pp.dblQuotedString().setParseAction(pp.removeQuotes) equals = pp.Suppress("=") lbrace = pp.Suppress('(') rbrace = pp.Suppress(')') simpleAssignment = (identifier + equals + (number | string)).setParseAction( self.assignment_action) multiAssignment = simpleAssignment + pp.ZeroOrMore(',' + simpleAssignment) tupleAssignment = identifier + equals + lbrace + multiAssignment + rbrace assignment = protect_kw + (multiAssignment | tupleAssignment) PSTART = (protect_kw + pp.CaselessLiteral('begin_protected')).setParseAction( self.begin) PFINISH = (protect_kw + pp.CaselessLiteral('end_protected')).setParseAction( self.finish) key_block = (protect_kw + pp.CaselessLiteral('key_block')).setParseAction( self.begin_key_block) data_block = (protect_kw + pp.CaselessLiteral('data_block')).setParseAction( self.begin_data_block) base64_string = pp.Word(pp.alphanums + "+-/=").setParseAction( self.base64_action) emptyLine = (pp.LineStart() + pp.LineEnd()).suppress() self.parser = (PSTART | assignment | key_block | data_block | base64_string | emptyLine | PFINISH)
def create_grammar(): TRUE = Keyword('true') FALSE = Keyword('false') NULL = Keyword('null') LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(Suppress, '[]{}:') string = dblQuotedString() number = pyparsing_common.number() object_ = Forward() value = Forward() elements = delimitedList(value) array = Group(LBRACK + Optional(elements, []) + RBRACK) value <<= (string | number | Group(object_) | array | TRUE | FALSE | NULL) member = Group(string + COLON + value) members = delimitedList(member) object_ <<= Dict(LBRACE + Optional(members) + RBRACE) return value
def _define_json(): # https://pyparsing.wikispaces.com/file/view/jsonParser.py TRUE = _make_keyword('true', True) FALSE = _make_keyword('false', False) NULL = _make_keyword('null', None) LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(Suppress, '[]{}:') jsonString = dblQuotedString().setParseAction(removeQuotes) jsonNumber = pyparsing_common.number() jsonObject = Forward() jsonValue = Forward() jsonElements = delimitedList(jsonValue) jsonArray = Group(LBRACK + Optional(jsonElements, []) + RBRACK) jsonValue << (jsonString | jsonNumber | Group(jsonObject) | jsonArray | TRUE | FALSE | NULL) # noqa memberDef = Group(jsonString + COLON + jsonValue) jsonMembers = delimitedList(memberDef) jsonObject << Dict(LBRACE + Optional(jsonMembers) + RBRACE) return jsonValue
def get_parser(): """A Parser that parses the dumped ParamFile attribute by FastPM. This must be a result produced by the lua dump module. Must be a lua table. e.g. { a = 3, b = {0, 1, 2,} } (modified from jsonParser.py example under pyparsing) When using the parser, convert the attribute from an array of U1 to a string first. """ import pyparsing as pp from pyparsing import pyparsing_common as ppc def make_keyword(kwd_str, kwd_value): return pp.Keyword(kwd_str).setParseAction( pp.replaceWith(kwd_value)) TRUE = make_keyword("true", True) FALSE = make_keyword("false", False) NULL = make_keyword("nil", None) LBRACE, RBRACE, ASSIGN, COMMA = map(pp.Suppress, "{}=,") luaName = pp.Word(pp.alphas + "_", pp.alphanums + "_") luaString = pp.dblQuotedString().setParseAction(pp.removeQuotes) luaNumber = ppc.number() luaObject = pp.Forward() luaValue = pp.Forward() luaElements = pp.delimitedList(luaValue) + pp.Optional(COMMA) luaArray = pp.Group(LBRACE + pp.Optional(luaElements, []) + RBRACE) luaValue << (luaString | luaNumber | pp.Group(luaObject) | luaArray | TRUE | FALSE | NULL) memberDef = pp.Group(luaName + ASSIGN + luaValue) luaMembers = pp.delimitedList(memberDef) + pp.Optional(COMMA) luaObject << pp.Dict(LBRACE + pp.Optional(luaMembers) + RBRACE) return luaObject
def _parse_study_search_string(self, searchstr, only_with_processed_data=False): """parses string into SQL query for study search Parameters ---------- searchstr : str The string to parse only_with_processed_data : bool Whether or not to return studies with processed data. Returns ------- study_sql : str SQL query for selecting studies with the required metadata columns sample_sql : str SQL query for each study to get the sample ids that mach the query meta_headers : list metadata categories in the query string in alphabetical order Notes ----- All searches are case-sensitive References ---------- .. [1] McGuire P (2007) Getting started with pyparsing. """ # build the parse grammar category = Word(alphas + nums + "_") seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | \ CaselessLiteral("startswith") value = Word(alphas + nums + "_" + ":" + ".") | \ dblQuotedString().setParseAction(removeQuotes) criterion = Group(category + seperator + value) criterion.setParseAction(SearchTerm) and_ = CaselessLiteral("and") or_ = CaselessLiteral("or") not_ = CaselessLiteral("not") optional_seps = Optional(and_ | or_ | not_) # create the grammar for parsing operators AND, OR, NOT search_expr = operatorPrecedence(criterion, [(not_, 1, opAssoc.RIGHT, SearchNot), (and_, 2, opAssoc.LEFT, SearchAnd), (or_, 2, opAssoc.LEFT, SearchOr)]) # parse the search string to get out the SQL WHERE formatted query eval_stack = (search_expr + stringEnd).parseString(searchstr)[0] sql_where = eval_stack.generate_sql() # this lookup will be used to select only studies with columns # of the correct type type_lookup = {int: 'integer', float: 'float8', str: 'varchar'} # parse out all metadata headers we need to have in a study, and # their corresponding types all_headers = [ c[0][0].term[0] for c in (criterion + optional_seps).scanString(searchstr) ] meta_headers = set(all_headers) all_types = [ c[0][0].term[2] for c in (criterion + optional_seps).scanString(searchstr) ] all_types = [ type_lookup[type(qdb.util.convert_type(s))] for s in all_types ] # sort headers and types so they return in same order every time. # Should be a relatively short list so very quick # argsort implementation taken from # http://stackoverflow.com/questions/3382352/ # equivalent-of-numpy-argsort-in-basic-python sort_order = sorted(range(len(all_headers)), key=all_headers.__getitem__) all_types = [all_types[x] for x in sort_order] all_headers.sort() # At this point it is possible that a metadata header has been # reference more than once in the query. If the types agree, then we # do not need to do anything. If the types do not agree (specifically, # if it appears to be numerical in one case and string in another), # then we need to give varchar the precedence. meta_header_type_lookup = dict() for header, header_type in zip(all_headers, all_types): if header not in meta_header_type_lookup: meta_header_type_lookup[header] = header_type else: if header_type == 'varchar' or \ meta_header_type_lookup[header] == 'varchar': meta_header_type_lookup[header] = 'varchar' # create the study finding SQL # remove metadata headers that are in study table meta_headers.discard('sample_id') meta_headers = tuple(meta_headers.difference(self.study_cols)) # get all study ids that contain all metadata categories searched for sql = [] if meta_headers: # have study-specific metadata, so need to find specific studies for meta in meta_headers: if meta_header_type_lookup[meta] in ('integer', 'float8'): allowable_types = "('integer', 'float8')" else: allowable_types = "('varchar')" sql.append("SELECT study_id FROM qiita.study_sample_columns " "WHERE lower(column_name) = lower('%s') and " "column_type in %s" % (qdb.util.scrub_data(meta), allowable_types)) else: # no study-specific metadata, so need all studies sql.append("SELECT study_id FROM qiita.study_sample_columns") # combine the query if only_with_processed_data: sql.append("SELECT DISTINCT study_id " "FROM qiita.study_artifact " "JOIN qiita.artifact USING (artifact_id) " "JOIN qiita.artifact_type USING (artifact_type_id) " "WHERE artifact_type = 'BIOM'") # restrict to studies in portal sql.append("SELECT study_id from qiita.study_portal " "JOIN qiita.portal_type USING (portal_type_id) " "WHERE portal = '%s'" % qiita_config.portal) study_sql = ' INTERSECT '.join(sql) # create the sample finding SQL, getting both sample id and values # build the sql formatted list of metadata headers header_info = [] for meta in meta_header_type_lookup: if meta in self.study_cols: header_info.append("st.%s" % meta) else: header_info.append("sa.%s" % meta) # build the SQL query sample_sql = ("SELECT ss.sample_id,%s " "FROM qiita.study_sample ss " "JOIN qiita.sample_{0} sa ON ss.sample_id = sa.sample_id" " JOIN qiita.study st ON st.study_id = ss.study_id " "WHERE %s" % (','.join(header_info), sql_where)) return study_sql, sample_sql, meta_header_type_lookup.keys()
tokenstr = "\n".join(tokens) mod_string = "comment{:d} comment\n(\n{:s}\n)".format(locn, tokenstr) mod_def = pp.Dict( pp.Group( identifier.setResultsName("_name") + identifier.setResultsName("_type") + LPAREN + pp.Group(pp.OneOrMore(pp.dblSlashComment)).setResultsName("text") + RPAREN)) return mod_def.parseString(mod_string) tor_comment.setParseAction(comment_handler) tor_comment.setResultsName("_name") tor_comment.setResultsName("_type") tor_string = pp.dblQuotedString() | pp.Word(pp.alphas, pp.alphanums + "_-.") number = pp.pyparsing_common.number() tor_members = pp.Forward() tor_value = pp.Forward() tor_struct = pp.Literal("struct").setResultsName("_type") + LPAREN + pp.Dict( tor_members) + RPAREN tor_sequence = pp.Literal("sequence").setResultsName( "_type") + LPAREN + pp.delimitedList(tor_value) + RPAREN tor_ref = pp.Literal("ref").setResultsName( "_type") + LPAREN + identifier + RPAREN tor_value << (tor_sequence | tor_ref | tor_struct | tor_string | pp.Group(number + identifier) | number) member_def = pp.Dict(pp.Group(identifier + COLON + tor_value))
s15 = number(15, True) s16 = number(16, True) def string_format(s, l, t): x = t[0].replace("\\n", "\n") x = x.replace("\\t", "\t") return x.replace("\\r", "\r") label = label_name + colon reg = pp.Suppress("$") + pp.Word("01234567").setParseAction(to_int) ireg = reg | spec_imm creg = pp.Suppress("$cr") + pp.Word("0123").setParseAction(to_int) cond = pp.oneOf("eq ne gt gte lt lte ult ulte")("cond") string = pp.dblQuotedString() string.addParseAction(pp.removeQuotes) string.addParseAction(string_format) comment = ";" + pp.restOfLine # actions creg.addParseAction(_build(tokens.ControlRegister)) reg.addParseAction(_build(tokens.Register)) cond.addParseAction(_build(tokens.Condition)) ireg.addParseAction(_build(tokens.ImmRegister)) label_name.addParseAction(_build(tokens.Label)) iden = label_name.copy() # used in macros
return True except ValueError: return False W = Suppress(ZeroOrMore(White())) C = Suppress(',') WCW = W + C + W LPF, RPF = map(Suppress, '()') LP = Suppress('(') + W RP = W + Suppress(')') word = Word(alphanums) ns = Word(alphanums + '_-.') identifier = Word(alphanums + '_') quote = dblQuotedString().setParseAction(removeQuotes) qid = quote | identifier delimited_quoted_list = And([Suppress('{'), delimitedList(quote), Suppress('}')]) delimited_unquoted_list = And([Suppress('{'), delimitedList(identifier), Suppress('}')]) def nest(*content): """Define a delimited list by enumerating each element of the list.""" if len(content) == 0: raise ValueError('no arguments supplied') return And([LPF, content[0]] + list(itt.chain.from_iterable(zip(itt.repeat(C), content[1:]))) + [RPF]) def one_of_tags( tags: List[str], canonical_tag: str,
def _parse_study_search_string(self, searchstr, only_with_processed_data=False): """parses string into SQL query for study search Parameters ---------- searchstr : str The string to parse only_with_processed_data : bool Whether or not to return studies with processed data. Returns ------- study_sql : str SQL query for selecting studies with the required metadata columns sample_sql : str SQL query for each study to get the sample ids that mach the query meta_headers : list metadata categories in the query string in alphabetical order Notes ----- All searches are case-sensitive References ---------- .. [1] McGuire P (2007) Getting started with pyparsing. """ # build the parse grammar category = Word(alphas + nums + "_") seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | CaselessLiteral("startswith") value = Word(alphas + nums + "_" + ":" + ".") | dblQuotedString().setParseAction(removeQuotes) criterion = Group(category + seperator + value) criterion.setParseAction(SearchTerm) and_ = CaselessLiteral("and") or_ = CaselessLiteral("or") not_ = CaselessLiteral("not") optional_seps = Optional(and_ | or_ | not_) # create the grammar for parsing operators AND, OR, NOT search_expr = operatorPrecedence( criterion, [(not_, 1, opAssoc.RIGHT, SearchNot), (and_, 2, opAssoc.LEFT, SearchAnd), (or_, 2, opAssoc.LEFT, SearchOr)], ) # parse the search string to get out the SQL WHERE formatted query eval_stack = (search_expr + stringEnd).parseString(searchstr)[0] sql_where = eval_stack.generate_sql() # parse out all metadata headers we need to have in a study, and # their corresponding types all_headers = [c[0][0].term[0] for c in (criterion + optional_seps).scanString(searchstr)] meta_headers = set(all_headers) all_types = [c[0][0].term[2] for c in (criterion + optional_seps).scanString(searchstr)] # sort headers and types so they return in same order every time. # Should be a relatively short list so very quick # argsort implementation taken from # http://stackoverflow.com/questions/3382352/ # equivalent-of-numpy-argsort-in-basic-python sort_order = sorted(range(len(all_headers)), key=all_headers.__getitem__) all_types = [all_types[x] for x in sort_order] all_headers.sort() # At this point it is possible that a metadata header has been # reference more than once in the query. If the types agree, then we # do not need to do anything. If the types do not agree (specifically, # if it appears to be numerical in one case and string in another), # then we need to give varchar the precedence. meta_header_type_lookup = dict() for header, header_type in zip(all_headers, all_types): if header not in meta_header_type_lookup: meta_header_type_lookup[header] = header_type else: if header_type == "varchar" or meta_header_type_lookup[header] == "varchar": meta_header_type_lookup[header] = "varchar" # create the study finding SQL # remove metadata headers that are in study table meta_headers.discard("sample_id") meta_headers = tuple(meta_headers.difference(self.study_cols)) # get all study ids that contain all metadata categories searched for sql = [] if meta_headers: # have study-specific metadata, so need to find specific studies for meta in meta_headers: sql.append( "SELECT DISTINCT table_name FROM " "information_schema.columns WHERE " "lower(column_name) = lower('{0}')".format(qdb.util.scrub_data(meta)) ) else: # no study-specific metadata, so need all studies sql.append("SELECT DISTINCT table_name " "FROM information_schema.columns") # combine the query if only_with_processed_data: sql.append( "SELECT DISTINCT 'sample_' || CAST(study_id AS VARCHAR)" "FROM qiita.study_artifact " "JOIN qiita.artifact USING (artifact_id) " "JOIN qiita.artifact_type USING (artifact_type_id) " "WHERE artifact_type = 'BIOM'" ) # restrict to studies in portal sql.append( "SELECT 'sample_' || CAST(study_id AS VARCHAR) " "FROM qiita.study_portal " "JOIN qiita.portal_type USING (portal_type_id) " "WHERE portal = '%s'" % qiita_config.portal ) study_sql = " INTERSECT ".join(sql) # create the sample finding SQL, getting both sample id and values # build the sql formatted list of metadata headers header_info = [] for meta in meta_header_type_lookup: if meta in self.study_cols: header_info.append("st.%s" % meta) else: header_info.append("sa.%s" % meta) # build the SQL query sample_sql = ( "SELECT ss.sample_id, %s " "FROM qiita.study_sample ss " "JOIN qiita.sample_{0} sa ON ss.sample_id = sa.sample_id" " JOIN qiita.study st ON st.study_id = ss.study_id " "WHERE %s" % (",".join(header_info), sql_where) ) return study_sql, sample_sql, meta_header_type_lookup.keys()
def _parse(self, content): gz.ParserElement.setDefaultWhitespaceChars(' \t') singleline_comment = "//" + gz.restOfLine multiline_comment = gz.cStyleComment comments = gz.MatchFirst([singleline_comment, multiline_comment]) real = gz.Combine( gz.Optional(gz.oneOf("+ -")) + gz.Optional(gz.Word(gz.nums)) + "." + gz.Word(gz.nums)).setName("real") integer = gz.Combine(gz.Optional(gz.oneOf("+ -")) + gz.Word(gz.nums)).setName("integer") nums = real | integer words = gz.Word(gz.alphas) string = gz.dblQuotedString() item_type = gz.Word(gz.alphas + "_") extensions = (gz.oneOf(["frag", "glsl", "jpeg", "jpg", "png", "vert"])) _filename = gz.ZeroOrMore(gz.Word(gz.alphanums + "_.") + '.') + gz.Word(gz.alphanums + "_") _filepath = gz.ZeroOrMore(gz.Word(gz.alphanums + "_.") + "/") filename = gz.Combine(_filename + '.' + extensions) filepath = gz.Combine(gz.Optional("/") + _filepath) fileany = gz.Combine(filepath + filename) importall = gz.Literal("*") importheader = gz.Literal("import").setResultsName('itemtype') + \ (importall | gz.Combine(gz.delimitedList(item_type, delim=",", combine=True))).setResultsName('imports') + \ gz.Literal("from").suppress() + (string | words).setResultsName('from') lineend = gz.OneOrMore(gz.LineEnd()).suppress() oplineend = gz.Optional(lineend) blockstart, blockend = gz.Literal("{").suppress(), gz.Literal( "}").suppress() blockname = gz.Combine( gz.Optional(gz.Word(gz.alphas + "_") + gz.Literal("/")) + gz.Word(gz.alphanums + "_")) blockoption = item_type.setResultsName('itemtype') + (gz.OneOrMore( fileany | nums | item_type)).setResultsName('arguments') + lineend blockinherit = gz.Literal(":").suppress() + blockname blockheader = item_type.setResultsName('itemtype') + gz.Optional(blockname).setResultsName('blockname') + \ gz.Group(gz.Optional(gz.OneOrMore(item_type))).setResultsName('arguments') + \ gz.Group(gz.Optional(blockinherit)).setResultsName('inheritance') blockinner = gz.Forward() blockinner << gz.Group(item_type.setResultsName('itemtype') + gz.Optional(blockname).setResultsName('blockname') + gz.ZeroOrMore(blockname).setResultsName('arguments') + oplineend + \ blockstart + oplineend + \ gz.ZeroOrMore(blockinner ^ gz.Group(blockoption)).setResultsName('blockbody') + \ oplineend + blockend) + oplineend block = gz.Group(blockheader + oplineend + \ blockstart + oplineend + \ gz.ZeroOrMore(blockinner ^ gz.Group(blockoption)).setResultsName('blockbody') + \ oplineend + blockend) + oplineend allitems = gz.ZeroOrMore(gz.Group(importheader) + lineend) + gz.ZeroOrMore(block) + oplineend allitems.ignore(comments) def makeBlock(token, level=0): tkeys = token.keys() if 'itemtype' in tkeys: item = GazeboMaterialItem(token['itemtype']) else: raise Exception("Cannot found itemtype in {0}".format(token)) if 'blockname' in tkeys: item._setName(token['blockname']) #item.addArgument(token['blockname']) if 'arguments' in tkeys: for xarg in token['arguments']: item.addArgument(xarg) if 'inheritance' in tkeys: for xarg in token['inheritance']: item.addInheritance(xarg) if 'blockbody' in tkeys: for child in token['blockbody']: if type(child) != str: item.addChild(makeBlock(child, level=level + 1)) else: raise Exception("Failured while parsing blockbody", child) return item for tokens, start, end in allitems.scanString(content): for t in tokens: self._root.addChild(makeBlock(t)) self._parsed = True
import pyparsing as pp from pyparsing import pyparsing_common as ppc def make_keyword(kwd_str, kwd_value): return pp.Keyword(kwd_str).setParseAction(pp.replaceWith(kwd_value)) TRUE = make_keyword("true", True) FALSE = make_keyword("false", False) NULL = make_keyword("null", None) LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(pp.Suppress, "[]{}:") jsonString = pp.dblQuotedString().setParseAction(pp.removeQuotes) jsonNumber = ppc.number() jsonObject = pp.Forward() jsonValue = pp.Forward() jsonElements = pp.delimitedList(jsonValue) jsonArray = pp.Group(LBRACK + pp.Optional(jsonElements, []) + RBRACK) jsonValue << (jsonString | jsonNumber | pp.Group(jsonObject) | jsonArray | TRUE | FALSE | NULL) memberDef = pp.Group(jsonString + COLON + jsonValue) jsonMembers = pp.delimitedList(memberDef) jsonObject << pp.Dict(LBRACE + pp.Optional(jsonMembers) + RBRACE) jsonComment = pp.cppStyleComment jsonObject.ignore(jsonComment)
def build_MEASURE(self): ## Grammar definition # literals self.var_list = dict() period = Literal(".") variable = Word(alphas, alphanums + "." + "_" + "-").setName("variable").setDebug(self.dbgLiterals) number = Word(nums+".").setName("number").setDebug(self.dbgLiterals) integer = Word(nums).setName("integer").setDebug(self.dbgLiterals) float = Combine(integer + "." + integer).setName("float").setDebug(self.dbgLiterals) ipAddress = Combine(integer + ('.' + integer)*3).setName("ipAddress").setDebug(self.dbgLiterals) quote = (Literal("\"").suppress()|Literal("'").suppress()).setName("quote").setDebug(self.dbgLiterals) string = (quote + Regex(r'(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*') + quote).setName("string").setDebug(self.dbgLiterals) # special characters oparen = Literal("(").suppress().setName("opening parenthesis").setDebug(self.dbgLiterals) eparen = Literal(")").suppress().setName("closing parenthesis").setDebug(self.dbgLiterals) semicolon = Literal(";").suppress().setName("semicolon").setDebug(self.dbgLiterals) comma = Literal(",").suppress().setName("comma").setDebug(self.dbgLiterals) obrace = Literal("{").suppress().setName("opening brace").setDebug(self.dbgLiterals) ebrace = Literal("}").suppress().setName("closing brace").setDebug(self.dbgLiterals) to = Literal("->").setName("right-arrow").setDebug(self.dbgLiterals) # section literals measurements = Literal("measurements").suppress().setDebug(self.dbgLiterals) zoneTok = Literal("zones").suppress().setDebug(self.dbgLiterals) actionTok = Literal("actions").suppress().setDebug(self.dbgLiterals) # arithmetic literals eq = Literal("=").setName("equal sign").setDebug(self.dbgLiterals) geq = Literal(">=").setName("greater or equal sign").setDebug(self.dbgLiterals) leq = Literal("<=").setName("less or equal sign").setDebug(self.dbgLiterals) gt = Literal(">").setName("greater than sign").setDebug(self.dbgLiterals) lt = Literal("<").setName("less than sign").setDebug(self.dbgLiterals) minus = Literal("-").setName("minus sign").setDebug(self.dbgLiterals) plus = Literal("+").setName("plus sign").setDebug(self.dbgLiterals) _and = (Literal("&&")|Literal("and")).setName("and sign").setDebug(self.dbgLiterals) _or = (Literal("||")|Literal("or")).setName("or sign").setDebug(self.dbgLiterals) _not = (Literal("!")|Literal("not")).setName("not sign").setDebug(self.dbgLiterals) # Productions for measurement definitions # paramExpr = Group(Optional(((variable)("pname") + eq.suppress() + (number|variable|dblQuotedString)("pval")) + ZeroOrMore(comma + (number|variable|dblQuotedString)("p")))) namedParam = Group((variable)("pname") + eq.suppress() + (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")) + Optional(comma))("param").setDebug(self.dbgMeasurement) paramExpr = Group(ZeroOrMore(namedParam))("params").setDebug(self.dbgMeasurement) functionExpr = Group(variable("fname") + oparen + paramExpr + eparen )("function").setDebug(self.dbgMeasurement) measurementExpr = Group(variable("mvar") + eq.suppress() + (functionExpr) + semicolon)("measure").setDebug(self.dbgMeasurement) measurementList = OneOrMore(measurementExpr).setDebug(self.dbgMeasurement) measure = Group(measurements + obrace + measurementList + ebrace)("measurements").setDebug(self.dbgMeasurement) # Productions for zone definitions arithParamExpr = Group(Optional((number|variable|string)("param") + ZeroOrMore(comma + (number|variable|string)("param")))).setDebug(self.dbgZones) arithNamedParam = Group((variable)("pname") + eq.suppress() + (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")|dblQuotedString("pstr")) + Optional(comma))("param").setDebug(self.dbgZones) arithParamExpr = Group(ZeroOrMore(arithNamedParam))("params").setDebug(self.dbgZones) arithFuncExpr = Group(variable("fname") + oparen + arithParamExpr("params") + eparen + Optional(comma))("function").setDebug(self.dbgZones) arithNestFuncExpr = Group(OneOrMore(arithFuncExpr))("params").setDebug(self.dbgZones) arithFuncExpr2 = Group(variable("fname") + oparen + arithNestFuncExpr + eparen)("function").setDebug(self.dbgZones) arithTok = (arithFuncExpr|arithFuncExpr2|number("num")|variable("var")).setDebug(self.dbgZones) opExpr = (eq|geq|leq|gt|lt|minus|plus|_and|_or).setDebug(self.dbgZones) arithExpr = Forward().setDebug(self.dbgZones) arithExpr << Group(oparen + Group((arithTok|arithExpr))("l") + opExpr("op") + Group((arithTok|arithExpr))("r") + eparen)("expression").setDebug(self.dbgZones) zoneExpr = Group(variable("zname") + eq.suppress() + arithExpr + semicolon)("zone").setName("ZoneExpr").setDebug(self.dbgZones) zones = Group(zoneTok + obrace + OneOrMore(zoneExpr) + ebrace)("zones").setName("Zones").setDebug(self.dbgZones) # Productions for action definitions actNamedParam = Group((variable)("pname") + eq.suppress() + (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")|dblQuotedString("pstr")) + Optional(comma))("param").setDebug(self.dbgActions) actParamExpr = Group(ZeroOrMore(actNamedParam))("params").setDebug(self.dbgActions) actFunExpr = Group(variable("fname") + oparen + actParamExpr + eparen + semicolon)("function").setDebug(self.dbgActions) # statevariable doesn't allow "-", because its confused with "->" statevariable = Word(alphas, alphanums + "." + "_").setName("statevariable") state = statevariable("state").setDebug(self.dbgActions) statetrans = Group(statevariable("from") + to.suppress() + statevariable("to"))("trans").setDebug(self.dbgActions) stateenter = Group(to.suppress() + statevariable("enter"))("edge").setDebug(self.dbgActions) stateleave = Group(statevariable("leave") + to.suppress())("edge").setDebug(self.dbgActions) fsm = (statetrans|stateleave | stateenter|state).setDebug(self.dbgActions) action = Group(fsm + eq.suppress() + Group(OneOrMore(actFunExpr))("functions"))("action").setDebug(self.dbgActions) actions = Group(actionTok + obrace + OneOrMore(action) + ebrace)("actions").setDebug(self.dbgActions) self.MEASURE = measure + zones + actions self.actionFunctions = [ {"fname":"Publish", "parameters": [ {"pname":"topic","type":"pstr"}, {"pname":"message","type":"pstr"}, ]}, {"fname":"Notify", "parameters": [ {"pname":"target","type":"pstr"}, {"pname":"message","type":"pstr"}, ]} ] self.zoneFunctions = [ {"fname":"AVG", "parameters": [ {"pname":"val","type":"pvar"}, {"pname":"max_age","type":"pstr"}, ]} ] self.measureFunctions = [ {"fname":"delay.twoway.icmp.us.mean", "parameters": [ {"pname":"source.ipv4","type":"pipaddr"}, {"pname":"destination.ipv4","type":"pipaddr"}, {"pname":"count","type":"pint"} ]}, {"fname":"overload.risk.rx", "parameters": [ {"pname":"interface","type":"pvar"} ]} ]
IntegerTok = Optional(Literal('-')) + Word(pyparsing.nums) IntegerTok.addParseAction(lambda toks: int("".join(toks))) UnsignedIntTok = Word(pyparsing.nums) UnsignedIntTok.addParseAction(lambda toks: int(toks[0])) FloatTok = Optional(Literal('-')) + Word( pyparsing.nums) + Optional(Literal('.') + Optional(Word(pyparsing.nums))) FloatTok.addParseAction(lambda toks: float("".join(toks))) HexStringTok = Word(pyparsing.hexnums) HexStringTok.addParseAction(lambda toks: int(toks[0], base=16)) UnquotedStringTok = ZeroOrMore( White()).suppress() + CharsNotIn("()\"\'" + " \r\n") UnquotedStringTok.addParseAction(lambda toks: "".join(toks).strip()) QuotedStringTok = Group(dblQuotedString() ^ sglQuotedString()) QuotedStringTok.addParseAction(lambda toks: "".join(toks[0]).strip('"')) AnystringTok = QuotedStringTok ^ UnquotedStringTok LeftParenTok = Literal('(').suppress() RightParenTok = Literal(')').suppress() BoolTrueTok = Keyword("yes", caseless=True) | Keyword("true", caseless=True) BoolTrueTok.addParseAction(lambda: True) BoolFalseTok = Keyword("no", caseless=True) | Keyword("false", caseless=True) BoolFalseTok.addParseAction(lambda: False) BooleanTok = BoolTrueTok | BoolFalseTok def _paren_stmt(keyword, *values, store=True): """
import pyparsing as pp from pydbml.definitions.generic import (expression, name, string_literal, boolean_literal, number_literal, expression_literal) from pydbml.definitions.common import _, _c, c, n, note, pk, unique from pydbml.definitions.reference import ref_inline from pydbml.classes import Column pp.ParserElement.setDefaultWhitespaceChars(' \t\r') type_args = ("(" + pp.originalTextFor(expression)('args') + ")") type_name = (pp.Word(pp.alphanums + '_') | pp.dblQuotedString())('name') column_type = (type_name + type_args[0, 1]) def parse_column_type(s, l, t): ''' int or "mytype" or varchar(255) ''' result = t['name'] args = t.get('args') result += '(' + args + ')' if args else '' return result column_type.setParseAction(parse_column_type) default = pp.CaselessLiteral('default:').suppress() + _ - ( string_literal | expression_literal | boolean_literal.setParseAction(lambda s, l, t: { 'true': True,
def _parse_study_search_string(self, searchstr, only_with_processed_data=False): """parses string into SQL query for study search Parameters ---------- searchstr : str The string to parse only_with_processed_data : bool Whether or not to return studies with processed data. Returns ------- study_sql : str SQL query for selecting studies with the required metadata columns sample_sql : str SQL query for each study to get the sample ids that mach the query meta_headers : list metadata categories in the query string in alphabetical order Notes ----- All searches are case-sensitive References ---------- .. [1] McGuire P (2007) Getting started with pyparsing. """ # build the parse grammar category = Word(alphas + nums + "_") seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | \ CaselessLiteral("startswith") value = Word(alphas + nums + "_" + ":" + ".") | \ dblQuotedString().setParseAction(removeQuotes) criterion = Group(category + seperator + value) criterion.setParseAction(SearchTerm) and_ = CaselessLiteral("and") or_ = CaselessLiteral("or") not_ = CaselessLiteral("not") optional_seps = Optional(and_ | or_ | not_) # create the grammar for parsing operators AND, OR, NOT search_expr = operatorPrecedence( criterion, [ (not_, 1, opAssoc.RIGHT, SearchNot), (and_, 2, opAssoc.LEFT, SearchAnd), (or_, 2, opAssoc.LEFT, SearchOr)]) # parse the search string to get out the SQL WHERE formatted query eval_stack = (search_expr + stringEnd).parseString(searchstr)[0] sql_where = eval_stack.generate_sql() # this lookup will be used to select only studies with columns # of the correct type type_lookup = {int: 'integer', float: 'float8', str: 'varchar'} # parse out all metadata headers we need to have in a study, and # their corresponding types all_headers = [c[0][0].term[0] for c in (criterion + optional_seps).scanString(searchstr)] meta_headers = set(all_headers) all_types = [c[0][0].term[2] for c in (criterion + optional_seps).scanString(searchstr)] all_types = [type_lookup[type(typecast_string(s))] for s in all_types] # sort headers and types so they return in same order every time. # Should be a relatively short list so very quick # argsort implementation taken from # http://stackoverflow.com/questions/3382352/ # equivalent-of-numpy-argsort-in-basic-python sort_order = sorted(range(len(all_headers)), key=all_headers.__getitem__) all_types = [all_types[x] for x in sort_order] all_headers.sort() # At this point it is possible that a metadata header has been # reference more than once in the query. If the types agree, then we # do not need to do anything. If the types do not agree (specifically, # if it appears to be numerical in one case and string in another), # then we need to give varchar the precedence. meta_header_type_lookup = dict() for header, header_type in zip(all_headers, all_types): if header not in meta_header_type_lookup: meta_header_type_lookup[header] = header_type else: if header_type == 'varchar' or \ meta_header_type_lookup[header] == 'varchar': meta_header_type_lookup[header] = 'varchar' # create the study finding SQL # remove metadata headers that are in required_sample_info table meta_headers = meta_headers.difference(self.required_cols).difference( self.study_cols) # get all study ids that contain all metadata categories searched for sql = [] if meta_headers: # have study-specific metadata, so need to find specific studies for meta in meta_headers: if meta_header_type_lookup[meta] in ('integer', 'float8'): allowable_types = "('integer', 'float8')" else: allowable_types = "('varchar')" sql.append("SELECT study_id FROM qiita.study_sample_columns " "WHERE lower(column_name) = lower('%s') and " "column_type in %s" % (scrub_data(meta), allowable_types)) else: # no study-specific metadata, so need all studies sql.append("SELECT study_id FROM qiita.study_sample_columns") # combine the query if only_with_processed_data: sql.append('SELECT study_id FROM qiita.study_processed_data') study_sql = ' INTERSECT '.join(sql) # create the sample finding SQL, getting both sample id and values # build the sql formatted list of metadata headers header_info = [] for meta in meta_header_type_lookup: if meta in self.required_cols: header_info.append("r.%s" % meta) elif meta in self.study_cols: header_info.append("st.%s" % meta) else: header_info.append("sa.%s" % meta) # build the SQL query sample_sql = ("SELECT r.sample_id,%s FROM qiita.required_sample_info " "r JOIN qiita.sample_{0} sa ON sa.sample_id = " "r.sample_id JOIN qiita.study st ON st.study_id = " "r.study_id WHERE %s" % (','.join(header_info), sql_where)) return study_sql, sample_sql, meta_header_type_lookup.keys()