Esempio n. 1
0
class QueryParser(collections.OrderedDict):
    TAG_TOKEN = (pp.Keyword("tag") + pp.Suppress(":") +
                 pp.Word(pp.alphas, pp.alphanums + "_"))

    SORT_TOKEN = (pp.Keyword("sort") + pp.Suppress(":") +
                  (pp.Keyword("name") | pp.Keyword("date") | TAG_TOKEN) +
                  pp.Optional(
                      pp.Suppress(":") + pp.oneOf("s n d"),
                      default="s",
                  ) + pp.Optional(
                      pp.Suppress(pp.Keyword("order")) + pp.Suppress(":") +
                      (pp.Keyword("asc") | pp.Keyword("desc")),
                      default="desc",
                  ))

    SEARCH_TOKEN = (
        ((pp.Keyword("name") | pp.Keyword("date")) + pp.Suppress(":") +
         (pp.Word(pp.printables)
          | pp.dblQuotedString().setParseAction(pp.removeQuotes)
          | pp.sglQuotedString().setParseAction(pp.removeQuotes))) |
        (TAG_TOKEN + pp.Optional(
            pp.Suppress(":") +
            (pp.Word(pp.printables)
             | pp.dblQuotedString().setParseAction(pp.removeQuotes)
             | pp.sglQuotedString().setParseAction(pp.removeQuotes)))))

    # TODO: date, from, to should be able to grab dates in EXIF tags

    # TODO: support quoted %c datetimes
    # TODO: support quoted datetimes with hour/minute/second individually
    DATETIME = (Date("%Y/%m/%d",
                     DateHints.YEAR | DateHints.MONTH | DateHints.DAY)
                | Date("%Y/%m", DateHints.YEAR | DateHints.MONTH)
                | Date("%Y", DateHints.YEAR))
    FROM_TOKEN = (pp.Keyword("from") + pp.Suppress(":") + DATETIME)
    TO_TOKEN = (pp.Keyword("to") + pp.Suppress(":") + DATETIME)

    TYPE_TOKEN = (pp.Keyword("type") + pp.Suppress(":") +
                  (pp.Keyword("image") | pp.Keyword("video")))

    QUERY_TOKEN = pp.Group(SORT_TOKEN | SEARCH_TOKEN | FROM_TOKEN | TO_TOKEN
                           | TYPE_TOKEN)

    GRAMMAR = pp.Dict(pp.OneOrMore(QUERY_TOKEN))

    def __init__(self, s, grammar=GRAMMAR):
        try:
            r = grammar.setDebug(logging.getLogger().isEnabledFor(
                logging.DEBUG)).parseString(s, parseAll=True)

            logging.debug("search query parse results: %s", r)

            self.update(collections.OrderedDict(r))
        except (Exception, pp.ParseException, pp.RecursiveGrammarException,
                pp.ParseFatalException, pp.ParseSyntaxException) as e:
            raise QueryError("unable to parse query: %s" % e)
Esempio n. 2
0
def _parse(mystr):

    LBRACE, RBRACE, EQUAL = map(pp.Suppress, "{}=")
    field = pp.Word(pp.printables + ' ', excludeChars='[]=')
    field.addParseAction(pp.tokenMap(str.rstrip))
    string = pp.dblQuotedString().setParseAction(pp.removeQuotes)
    number = pp.pyparsing_common.number()
    date_expr = pp.Regex(r'\d\d\d\d-\d\d-\d\d')
    time_expr = pp.Regex(r'\d\d:\d\d:\d\d\.\d\d\d')
    scalar_value = (string | date_expr | time_expr | number)

    list_marker = pp.Suppress("[]")
    value_list = pp.Forward()
    jobject = pp.Forward()

    memberDef1 = pp.Group(field + EQUAL + scalar_value)
    memberDef2 = pp.Group(field + EQUAL + jobject)
    memberDef3 = pp.Group(field + list_marker + EQUAL + LBRACE + value_list +
                          RBRACE)
    memberDef = memberDef1 | memberDef2 | memberDef3

    value_list <<= (pp.delimitedList(scalar_value, ",")
                    | pp.ZeroOrMore(pp.Group(pp.Dict(memberDef2))))
    value_list.setParseAction(lambda t: [pp.ParseResults(t[:])])

    members = pp.OneOrMore(memberDef)
    jobject <<= pp.Dict(LBRACE + pp.ZeroOrMore(memberDef) + RBRACE)
    # force empty jobject to be a dict
    jobject.setParseAction(lambda t: t or {})

    parser = members
    parser = pp.OneOrMore(pp.Group(pp.Dict(memberDef)))

    return parser.parseString(mystr)
Esempio n. 3
0
def _parse(mystr):

    LBRACE, RBRACE, EQUAL = map(pp.Suppress, "{}=")
    field = pp.Word(pp.printables + ' ', excludeChars='[]=')
    field.addParseAction(pp.tokenMap(str.rstrip))
    string = pp.dblQuotedString().setParseAction(pp.removeQuotes)
    number = pp.pyparsing_common.number()
    date_expr = pp.Regex(r'\d\d\d\d-\d\d-\d\d')
    time_expr = pp.Regex(r'\d\d:\d\d:\d\d\.\d\d\d')
    nan = pp.Keyword('nan')
    scalar_value = (string | date_expr | time_expr | number | nan)

    list_marker = pp.Suppress("[]")
    value_list = pp.Forward()
    jobject = pp.Forward()

    memberDef1 = pp.Group(field + EQUAL + scalar_value)
    memberDef2 = pp.Group(field + EQUAL + jobject)
    memberDef3 = pp.Group(field + list_marker + EQUAL + LBRACE + value_list +
                          RBRACE)
    memberDef = memberDef1 | memberDef2 | memberDef3

    value_list <<= (pp.delimitedList(scalar_value, ",") |
                    pp.ZeroOrMore(pp.Group(pp.Dict(memberDef2))))
    value_list.setParseAction(lambda t: [pp.ParseResults(t[:])])

    members = pp.OneOrMore(memberDef)
    jobject <<= pp.Dict(LBRACE + pp.ZeroOrMore(memberDef) + RBRACE)
    # force empty jobject to be a dict
    jobject.setParseAction(lambda t: t or {})

    parser = members
    parser = pp.OneOrMore(pp.Group(pp.Dict(memberDef)))

    return parser.parseString(mystr)
Esempio n. 4
0
    def __init__(self):
        # directive grammar

        lparen = pp.Literal('(').suppress()
        rparen = pp.Literal(')').suppress()
        comma = pp.Literal(',').suppress()
        semicolon = pp.Literal(';').suppress()
        equalTok = pp.Literal('=').suppress()

        self.floatTok = pp.Optional((pp.Literal('-'))|pp.Literal('+')) + pp.Word(pp.nums) + pp.Optional(pp.Literal('.') + pp.Optional(pp.Word(pp.nums)))
        self.floatTok.addParseAction(lambda toks: float("".join(toks)))

        self.stringTok = pp.Group(pp.dblQuotedString() ^ pp.sglQuotedString())
        self.stringTok.addParseAction(lambda toks: "".join(toks[0]).strip('"').strip("'"))

        self.trueTok = pp.Keyword("true")
        self.trueTok.addParseAction(lambda _: True)

        self.falseTok = pp.Keyword("false")
        self.falseTok.addParseAction(lambda _: False)

        self.boolTok = self.trueTok | self.falseTok

        self.identifierTok = pp.Word(pp.alphas + '_', pp.alphanums + '_')('identifier')

        # self.posKeywordTok = \
        #     pp.Keyword("tl") | \
        #     pp.Keyword("tc") | \
        #     pp.Keyword("tr") | \
        #     pp.Keyword("cl") | \
        #     pp.Keyword("cc") | \
        #     pp.Keyword("cr") | \
        #     pp.Keyword("bl") | \
        #     pp.Keyword("bc") | \
        #     pp.Keyword("br")
        self.posKeywordTok = \
            pp.Word(pp.alphas + '_', pp.alphanums + '_')
        self.posKeywordTok.addParseAction(lambda toks: str(toks[0]))

        self.positionalArgTok = self.floatTok | self.stringTok | self.boolTok
        self.keywordArgTok = pp.Group(self.identifierTok + equalTok + (self.positionalArgTok | self.posKeywordTok))
        self.keywordArgTok.addParseAction(lambda toks: [x for x in toks])

        self.argsTok = pp.Optional(
            (self.positionalArgTok + pp.ZeroOrMore(comma + self.positionalArgTok) + pp.ZeroOrMore(comma + self.keywordArgTok)) |
            (self.keywordArgTok + pp.ZeroOrMore(comma + self.keywordArgTok))
        )('args')
        # self.argsTok.addParseAction(lambda toks: [toks])
        self.argsTok.addParseAction(lambda toks: DirectiveArgs(toks))

        self.directiveTok = pp.Group(self.identifierTok + lparen + self.argsTok + rparen)
        self.mainTok = self.directiveTok + pp.ZeroOrMore(semicolon + self.directiveTok) + pp.Optional(semicolon)
Esempio n. 5
0
def parse_list_vms(stdout, stderr):
    """
    """
    id_vm_name = dblQuotedString(alphas).setResultsName('name')
    id_vm_uuid = Word(srange("[a-zA-Z0-9_\-]")).setResultsName('uuid')
    left_brace = Suppress("{")
    right_brace = Suppress("}")
    vm_group = Group(id_vm_name + left_brace + id_vm_uuid + right_brace)
    vm_list = OneOrMore(vm_group)

    token_lists = vm_list.parseString(stdout, parseAll=True)
    return [{'name': token_list.name.replace('\"', ''),
             'uuid': token_list.uuid} for token_list in token_lists]
Esempio n. 6
0
def BNF(decorate):
    def eltFunc(s, l, toks):
        last = toks[-1]
        subj = toks[0]
        if last == '?':
            return Opt(toks[0])
        elif last == '*':
            return Kleene(toks[0])
        else:
            return subj

    nonterm = pp.Word(pp.alphanums + "'_$").setParseAction(
        decorate('nonterm', lambda s, loc, toks: NonTerminal(toks[0])))
    reference = nonterm().setParseAction(
        decorate('reference', lambda s, l, toks: Reference(toks[0])))
    char = pp.Word(pp.alphanums, exact=1)
    charrange = (pp.Literal('[').suppress() + char + pp.Literal('-').suppress() + char + pp.Literal(']').suppress()) \
        .setParseAction(decorate('charrange', lambda s, l, toks: CharRange(toks[0], toks[1])))
    literal = pp.dblQuotedString().setParseAction(
        decorate('literal', lambda s, l, toks: Literal(toks[0])))
    optional = pp.Literal('?').setParseAction(decorate('?', lambda s, l, t: t))
    star = pp.Literal('*').setParseAction(decorate('*', lambda s, l, t: t))
    quant = optional | star
    alternations = pp.Forward()
    element = (((pp.Literal('(').suppress() + alternations +
                 pp.Literal(')').suppress()).setParseAction(
                     decorate('parens', lambda s, l, t: t))
                | charrange | literal | reference) +
               pp.Optional(quant)).setParseAction(decorate('element', eltFunc))
    alternation = (element + pp.ZeroOrMore(element)).setParseAction(
        decorate(
            'alternation', lambda s, l, toks: toks[0]
            if len(toks) == 1 else Concatenation(toks)))
    alternations << alternation + pp.ZeroOrMore(
        pp.Literal('|').suppress() + alternation)
    alternations.setParseAction(
        decorate(
            'alternations', lambda s, l, toks: toks[0]
            if len(toks) == 1 else Alternation(toks)))
    production = (nonterm + pp.Literal('=').suppress() + alternations +
                  pp.Literal(';').suppress()).setParseAction(
                      decorate('production', lambda s, l, toks:
                               (toks[0], toks[1:])))
    grammar = (production + pp.ZeroOrMore(production)).setParseAction(
        decorate('grammar', lambda s, l, toks: {t[0]: t[1]
                                                for t in toks}))
    return grammar
Esempio n. 7
0
    def __init__(self):
        self.session_keys = {}
        self.encrypted_data = None
        self.info = {}
        self.base64_buf = []
        self.key_block = False
        self.data_block = False
        self.p1735 = False

        protect_kw = pp.Keyword('`pragma protect').suppress()
        identifier = pp.Word(pp.alphas, pp.alphanums + "_")
        number = pp.Word(pp.nums).setParseAction(lambda t: int(t[0]))
        string = pp.dblQuotedString().setParseAction(pp.removeQuotes)
        equals = pp.Suppress("=")
        lbrace = pp.Suppress('(')
        rbrace = pp.Suppress(')')
        simpleAssignment = (identifier + equals +
                            (number | string)).setParseAction(
                                self.assignment_action)
        multiAssignment = simpleAssignment + pp.ZeroOrMore(',' +
                                                           simpleAssignment)
        tupleAssignment = identifier + equals + lbrace + multiAssignment + rbrace
        assignment = protect_kw + (multiAssignment | tupleAssignment)

        PSTART = (protect_kw +
                  pp.CaselessLiteral('begin_protected')).setParseAction(
                      self.begin)
        PFINISH = (protect_kw +
                   pp.CaselessLiteral('end_protected')).setParseAction(
                       self.finish)

        key_block = (protect_kw +
                     pp.CaselessLiteral('key_block')).setParseAction(
                         self.begin_key_block)
        data_block = (protect_kw +
                      pp.CaselessLiteral('data_block')).setParseAction(
                          self.begin_data_block)

        base64_string = pp.Word(pp.alphanums + "+-/=").setParseAction(
            self.base64_action)
        emptyLine = (pp.LineStart() + pp.LineEnd()).suppress()

        self.parser = (PSTART | assignment | key_block | data_block
                       | base64_string | emptyLine | PFINISH)
Esempio n. 8
0
def create_grammar():
    TRUE = Keyword('true')
    FALSE = Keyword('false')
    NULL = Keyword('null')

    LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(Suppress, '[]{}:')

    string = dblQuotedString()
    number = pyparsing_common.number()

    object_ = Forward()
    value = Forward()
    elements = delimitedList(value)
    array = Group(LBRACK + Optional(elements, []) + RBRACK)
    value <<= (string | number | Group(object_) | array | TRUE | FALSE | NULL)
    member = Group(string + COLON + value)
    members = delimitedList(member)
    object_ <<= Dict(LBRACE + Optional(members) + RBRACE)

    return value
Esempio n. 9
0
def _define_json():
    # https://pyparsing.wikispaces.com/file/view/jsonParser.py
    TRUE = _make_keyword('true', True)
    FALSE = _make_keyword('false', False)
    NULL = _make_keyword('null', None)

    LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(Suppress, '[]{}:')

    jsonString = dblQuotedString().setParseAction(removeQuotes)
    jsonNumber = pyparsing_common.number()

    jsonObject = Forward()
    jsonValue = Forward()
    jsonElements = delimitedList(jsonValue)
    jsonArray = Group(LBRACK + Optional(jsonElements, []) + RBRACK)
    jsonValue << (jsonString | jsonNumber | Group(jsonObject) | jsonArray
                  | TRUE | FALSE | NULL)  # noqa
    memberDef = Group(jsonString + COLON + jsonValue)
    jsonMembers = delimitedList(memberDef)
    jsonObject << Dict(LBRACE + Optional(jsonMembers) + RBRACE)
    return jsonValue
Esempio n. 10
0
    def get_parser():
        """A Parser that parses the dumped ParamFile attribute by FastPM.

        This must be a result produced by the lua dump module. Must be
        a lua table. e.g.
        { a = 3, b = {0, 1, 2,} }

        (modified from jsonParser.py example under pyparsing)

        When using the parser, convert the attribute from an array of U1 to a string first.
        """
        import pyparsing as pp
        from pyparsing import pyparsing_common as ppc

        def make_keyword(kwd_str, kwd_value):
            return pp.Keyword(kwd_str).setParseAction(
                pp.replaceWith(kwd_value))

        TRUE = make_keyword("true", True)
        FALSE = make_keyword("false", False)
        NULL = make_keyword("nil", None)

        LBRACE, RBRACE, ASSIGN, COMMA = map(pp.Suppress, "{}=,")

        luaName = pp.Word(pp.alphas + "_", pp.alphanums + "_")
        luaString = pp.dblQuotedString().setParseAction(pp.removeQuotes)
        luaNumber = ppc.number()

        luaObject = pp.Forward()
        luaValue = pp.Forward()
        luaElements = pp.delimitedList(luaValue) + pp.Optional(COMMA)
        luaArray = pp.Group(LBRACE + pp.Optional(luaElements, []) + RBRACE)
        luaValue << (luaString | luaNumber | pp.Group(luaObject) | luaArray
                     | TRUE | FALSE | NULL)
        memberDef = pp.Group(luaName + ASSIGN + luaValue)
        luaMembers = pp.delimitedList(memberDef) + pp.Optional(COMMA)
        luaObject << pp.Dict(LBRACE + pp.Optional(luaMembers) + RBRACE)
        return luaObject
Esempio n. 11
0
    def _parse_study_search_string(self,
                                   searchstr,
                                   only_with_processed_data=False):
        """parses string into SQL query for study search

        Parameters
        ----------
        searchstr : str
            The string to parse
        only_with_processed_data : bool
            Whether or not to return studies with processed data.

        Returns
        -------
        study_sql : str
            SQL query for selecting studies with the required metadata columns
        sample_sql : str
            SQL query for each study to get the sample ids that mach the query
        meta_headers : list
            metadata categories in the query string in alphabetical order

        Notes
        -----
        All searches are case-sensitive

        References
        ----------
        .. [1] McGuire P (2007) Getting started with pyparsing.
        """
        # build the parse grammar
        category = Word(alphas + nums + "_")
        seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | \
            CaselessLiteral("startswith")
        value = Word(alphas + nums + "_" + ":" + ".") | \
            dblQuotedString().setParseAction(removeQuotes)
        criterion = Group(category + seperator + value)
        criterion.setParseAction(SearchTerm)
        and_ = CaselessLiteral("and")
        or_ = CaselessLiteral("or")
        not_ = CaselessLiteral("not")
        optional_seps = Optional(and_ | or_ | not_)

        # create the grammar for parsing operators AND, OR, NOT
        search_expr = operatorPrecedence(criterion,
                                         [(not_, 1, opAssoc.RIGHT, SearchNot),
                                          (and_, 2, opAssoc.LEFT, SearchAnd),
                                          (or_, 2, opAssoc.LEFT, SearchOr)])

        # parse the search string to get out the SQL WHERE formatted query
        eval_stack = (search_expr + stringEnd).parseString(searchstr)[0]
        sql_where = eval_stack.generate_sql()

        # this lookup will be used to select only studies with columns
        # of the correct type
        type_lookup = {int: 'integer', float: 'float8', str: 'varchar'}

        # parse out all metadata headers we need to have in a study, and
        # their corresponding types
        all_headers = [
            c[0][0].term[0]
            for c in (criterion + optional_seps).scanString(searchstr)
        ]
        meta_headers = set(all_headers)
        all_types = [
            c[0][0].term[2]
            for c in (criterion + optional_seps).scanString(searchstr)
        ]
        all_types = [
            type_lookup[type(qdb.util.convert_type(s))] for s in all_types
        ]

        # sort headers and types so they return in same order every time.
        # Should be a relatively short list so very quick
        # argsort implementation taken from
        # http://stackoverflow.com/questions/3382352/
        # equivalent-of-numpy-argsort-in-basic-python
        sort_order = sorted(range(len(all_headers)),
                            key=all_headers.__getitem__)
        all_types = [all_types[x] for x in sort_order]
        all_headers.sort()

        # At this point it is possible that a metadata header has been
        # reference more than once in the query. If the types agree, then we
        # do not need to do anything. If the types do not agree (specifically,
        # if it appears to be numerical in one case and string in another),
        # then we need to give varchar the precedence.
        meta_header_type_lookup = dict()
        for header, header_type in zip(all_headers, all_types):
            if header not in meta_header_type_lookup:
                meta_header_type_lookup[header] = header_type
            else:
                if header_type == 'varchar' or \
                        meta_header_type_lookup[header] == 'varchar':
                    meta_header_type_lookup[header] = 'varchar'

        # create the study finding SQL
        # remove metadata headers that are in study table
        meta_headers.discard('sample_id')
        meta_headers = tuple(meta_headers.difference(self.study_cols))

        # get all study ids that contain all metadata categories searched for
        sql = []
        if meta_headers:
            # have study-specific metadata, so need to find specific studies
            for meta in meta_headers:
                if meta_header_type_lookup[meta] in ('integer', 'float8'):
                    allowable_types = "('integer', 'float8')"
                else:
                    allowable_types = "('varchar')"

                sql.append("SELECT study_id FROM qiita.study_sample_columns "
                           "WHERE lower(column_name) = lower('%s') and "
                           "column_type in %s" %
                           (qdb.util.scrub_data(meta), allowable_types))
        else:
            # no study-specific metadata, so need all studies
            sql.append("SELECT study_id FROM qiita.study_sample_columns")

        # combine the query
        if only_with_processed_data:
            sql.append("SELECT DISTINCT study_id "
                       "FROM qiita.study_artifact "
                       "JOIN qiita.artifact USING (artifact_id) "
                       "JOIN qiita.artifact_type USING (artifact_type_id) "
                       "WHERE artifact_type = 'BIOM'")

        # restrict to studies in portal
        sql.append("SELECT study_id from qiita.study_portal "
                   "JOIN qiita.portal_type USING (portal_type_id) "
                   "WHERE portal = '%s'" % qiita_config.portal)
        study_sql = ' INTERSECT '.join(sql)

        # create  the sample finding SQL, getting both sample id and values
        # build the sql formatted list of metadata headers
        header_info = []
        for meta in meta_header_type_lookup:
            if meta in self.study_cols:
                header_info.append("st.%s" % meta)
            else:
                header_info.append("sa.%s" % meta)
        # build the SQL query

        sample_sql = ("SELECT ss.sample_id,%s "
                      "FROM qiita.study_sample ss "
                      "JOIN qiita.sample_{0} sa ON ss.sample_id = sa.sample_id"
                      " JOIN qiita.study st ON st.study_id = ss.study_id "
                      "WHERE %s" % (','.join(header_info), sql_where))
        return study_sql, sample_sql, meta_header_type_lookup.keys()
Esempio n. 12
0
    tokenstr = "\n".join(tokens)
    mod_string = "comment{:d} comment\n(\n{:s}\n)".format(locn, tokenstr)
    mod_def = pp.Dict(
        pp.Group(
            identifier.setResultsName("_name") +
            identifier.setResultsName("_type") + LPAREN +
            pp.Group(pp.OneOrMore(pp.dblSlashComment)).setResultsName("text") +
            RPAREN))
    return mod_def.parseString(mod_string)


tor_comment.setParseAction(comment_handler)
tor_comment.setResultsName("_name")
tor_comment.setResultsName("_type")

tor_string = pp.dblQuotedString() | pp.Word(pp.alphas, pp.alphanums + "_-.")
number = pp.pyparsing_common.number()

tor_members = pp.Forward()
tor_value = pp.Forward()

tor_struct = pp.Literal("struct").setResultsName("_type") + LPAREN + pp.Dict(
    tor_members) + RPAREN
tor_sequence = pp.Literal("sequence").setResultsName(
    "_type") + LPAREN + pp.delimitedList(tor_value) + RPAREN
tor_ref = pp.Literal("ref").setResultsName(
    "_type") + LPAREN + identifier + RPAREN
tor_value << (tor_sequence | tor_ref | tor_struct | tor_string
              | pp.Group(number + identifier) | number)

member_def = pp.Dict(pp.Group(identifier + COLON + tor_value))
Esempio n. 13
0
s15 = number(15, True)
s16 = number(16, True)


def string_format(s, l, t):
    x = t[0].replace("\\n", "\n")
    x = x.replace("\\t", "\t")
    return x.replace("\\r", "\r")


label = label_name + colon
reg = pp.Suppress("$") + pp.Word("01234567").setParseAction(to_int)
ireg = reg | spec_imm
creg = pp.Suppress("$cr") + pp.Word("0123").setParseAction(to_int)
cond = pp.oneOf("eq ne gt gte lt lte ult ulte")("cond")
string = pp.dblQuotedString()
string.addParseAction(pp.removeQuotes)
string.addParseAction(string_format)

comment = ";" + pp.restOfLine

# actions

creg.addParseAction(_build(tokens.ControlRegister))
reg.addParseAction(_build(tokens.Register))
cond.addParseAction(_build(tokens.Condition))
ireg.addParseAction(_build(tokens.ImmRegister))
label_name.addParseAction(_build(tokens.Label))
iden = label_name.copy() # used in macros

Esempio n. 14
0
        return True
    except ValueError:
        return False


W = Suppress(ZeroOrMore(White()))
C = Suppress(',')
WCW = W + C + W
LPF, RPF = map(Suppress, '()')
LP = Suppress('(') + W
RP = W + Suppress(')')

word = Word(alphanums)
ns = Word(alphanums + '_-.')
identifier = Word(alphanums + '_')
quote = dblQuotedString().setParseAction(removeQuotes)
qid = quote | identifier
delimited_quoted_list = And([Suppress('{'), delimitedList(quote), Suppress('}')])
delimited_unquoted_list = And([Suppress('{'), delimitedList(identifier), Suppress('}')])


def nest(*content):
    """Define a delimited list by enumerating each element of the list."""
    if len(content) == 0:
        raise ValueError('no arguments supplied')
    return And([LPF, content[0]] + list(itt.chain.from_iterable(zip(itt.repeat(C), content[1:]))) + [RPF])


def one_of_tags(
    tags: List[str],
    canonical_tag: str,
Esempio n. 15
0
    def _parse_study_search_string(self, searchstr, only_with_processed_data=False):
        """parses string into SQL query for study search

        Parameters
        ----------
        searchstr : str
            The string to parse
        only_with_processed_data : bool
            Whether or not to return studies with processed data.

        Returns
        -------
        study_sql : str
            SQL query for selecting studies with the required metadata columns
        sample_sql : str
            SQL query for each study to get the sample ids that mach the query
        meta_headers : list
            metadata categories in the query string in alphabetical order

        Notes
        -----
        All searches are case-sensitive

        References
        ----------
        .. [1] McGuire P (2007) Getting started with pyparsing.
        """
        # build the parse grammar
        category = Word(alphas + nums + "_")
        seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | CaselessLiteral("startswith")
        value = Word(alphas + nums + "_" + ":" + ".") | dblQuotedString().setParseAction(removeQuotes)
        criterion = Group(category + seperator + value)
        criterion.setParseAction(SearchTerm)
        and_ = CaselessLiteral("and")
        or_ = CaselessLiteral("or")
        not_ = CaselessLiteral("not")
        optional_seps = Optional(and_ | or_ | not_)

        # create the grammar for parsing operators AND, OR, NOT
        search_expr = operatorPrecedence(
            criterion,
            [(not_, 1, opAssoc.RIGHT, SearchNot), (and_, 2, opAssoc.LEFT, SearchAnd), (or_, 2, opAssoc.LEFT, SearchOr)],
        )

        # parse the search string to get out the SQL WHERE formatted query
        eval_stack = (search_expr + stringEnd).parseString(searchstr)[0]
        sql_where = eval_stack.generate_sql()

        # parse out all metadata headers we need to have in a study, and
        # their corresponding types
        all_headers = [c[0][0].term[0] for c in (criterion + optional_seps).scanString(searchstr)]
        meta_headers = set(all_headers)
        all_types = [c[0][0].term[2] for c in (criterion + optional_seps).scanString(searchstr)]

        # sort headers and types so they return in same order every time.
        # Should be a relatively short list so very quick
        # argsort implementation taken from
        # http://stackoverflow.com/questions/3382352/
        # equivalent-of-numpy-argsort-in-basic-python
        sort_order = sorted(range(len(all_headers)), key=all_headers.__getitem__)
        all_types = [all_types[x] for x in sort_order]
        all_headers.sort()

        # At this point it is possible that a metadata header has been
        # reference more than once in the query. If the types agree, then we
        # do not need to do anything. If the types do not agree (specifically,
        # if it appears to be numerical in one case and string in another),
        # then we need to give varchar the precedence.
        meta_header_type_lookup = dict()
        for header, header_type in zip(all_headers, all_types):
            if header not in meta_header_type_lookup:
                meta_header_type_lookup[header] = header_type
            else:
                if header_type == "varchar" or meta_header_type_lookup[header] == "varchar":
                    meta_header_type_lookup[header] = "varchar"

        # create the study finding SQL
        # remove metadata headers that are in study table
        meta_headers.discard("sample_id")
        meta_headers = tuple(meta_headers.difference(self.study_cols))

        # get all study ids that contain all metadata categories searched for
        sql = []
        if meta_headers:
            # have study-specific metadata, so need to find specific studies
            for meta in meta_headers:
                sql.append(
                    "SELECT DISTINCT table_name FROM "
                    "information_schema.columns WHERE "
                    "lower(column_name) = lower('{0}')".format(qdb.util.scrub_data(meta))
                )
        else:
            # no study-specific metadata, so need all studies
            sql.append("SELECT DISTINCT table_name " "FROM information_schema.columns")

        # combine the query
        if only_with_processed_data:
            sql.append(
                "SELECT DISTINCT 'sample_' || CAST(study_id AS VARCHAR)"
                "FROM qiita.study_artifact "
                "JOIN qiita.artifact USING (artifact_id) "
                "JOIN qiita.artifact_type USING (artifact_type_id) "
                "WHERE artifact_type = 'BIOM'"
            )

        # restrict to studies in portal
        sql.append(
            "SELECT 'sample_' || CAST(study_id AS VARCHAR) "
            "FROM qiita.study_portal "
            "JOIN qiita.portal_type USING (portal_type_id) "
            "WHERE portal = '%s'" % qiita_config.portal
        )
        study_sql = " INTERSECT ".join(sql)

        # create  the sample finding SQL, getting both sample id and values
        # build the sql formatted list of metadata headers
        header_info = []
        for meta in meta_header_type_lookup:
            if meta in self.study_cols:
                header_info.append("st.%s" % meta)
            else:
                header_info.append("sa.%s" % meta)
        # build the SQL query

        sample_sql = (
            "SELECT ss.sample_id, %s "
            "FROM qiita.study_sample ss "
            "JOIN qiita.sample_{0} sa ON ss.sample_id = sa.sample_id"
            " JOIN qiita.study st ON st.study_id = ss.study_id "
            "WHERE %s" % (",".join(header_info), sql_where)
        )

        return study_sql, sample_sql, meta_header_type_lookup.keys()
Esempio n. 16
0
    def _parse(self, content):
        gz.ParserElement.setDefaultWhitespaceChars(' \t')
        singleline_comment = "//" + gz.restOfLine
        multiline_comment = gz.cStyleComment
        comments = gz.MatchFirst([singleline_comment, multiline_comment])

        real = gz.Combine(
            gz.Optional(gz.oneOf("+ -")) + gz.Optional(gz.Word(gz.nums)) +
            "." + gz.Word(gz.nums)).setName("real")
        integer = gz.Combine(gz.Optional(gz.oneOf("+ -")) +
                             gz.Word(gz.nums)).setName("integer")
        nums = real | integer

        words = gz.Word(gz.alphas)
        string = gz.dblQuotedString()
        item_type = gz.Word(gz.alphas + "_")

        extensions = (gz.oneOf(["frag", "glsl", "jpeg", "jpg", "png", "vert"]))
        _filename = gz.ZeroOrMore(gz.Word(gz.alphanums + "_.") +
                                  '.') + gz.Word(gz.alphanums + "_")
        _filepath = gz.ZeroOrMore(gz.Word(gz.alphanums + "_.") + "/")
        filename = gz.Combine(_filename + '.' + extensions)
        filepath = gz.Combine(gz.Optional("/") + _filepath)
        fileany = gz.Combine(filepath + filename)

        importall = gz.Literal("*")
        importheader = gz.Literal("import").setResultsName('itemtype') + \
          (importall | gz.Combine(gz.delimitedList(item_type, delim=",", combine=True))).setResultsName('imports') + \
          gz.Literal("from").suppress() + (string | words).setResultsName('from')

        lineend = gz.OneOrMore(gz.LineEnd()).suppress()
        oplineend = gz.Optional(lineend)

        blockstart, blockend = gz.Literal("{").suppress(), gz.Literal(
            "}").suppress()

        blockname = gz.Combine(
            gz.Optional(gz.Word(gz.alphas + "_") + gz.Literal("/")) +
            gz.Word(gz.alphanums + "_"))
        blockoption = item_type.setResultsName('itemtype') + (gz.OneOrMore(
            fileany | nums | item_type)).setResultsName('arguments') + lineend
        blockinherit = gz.Literal(":").suppress() + blockname
        blockheader  = item_type.setResultsName('itemtype') + gz.Optional(blockname).setResultsName('blockname') + \
           gz.Group(gz.Optional(gz.OneOrMore(item_type))).setResultsName('arguments') + \
           gz.Group(gz.Optional(blockinherit)).setResultsName('inheritance')

        blockinner = gz.Forward()
        blockinner << gz.Group(item_type.setResultsName('itemtype') + gz.Optional(blockname).setResultsName('blockname') + gz.ZeroOrMore(blockname).setResultsName('arguments') + oplineend + \
           blockstart + oplineend + \
           gz.ZeroOrMore(blockinner ^ gz.Group(blockoption)).setResultsName('blockbody') + \
           oplineend + blockend) + oplineend

        block = gz.Group(blockheader + oplineend + \
           blockstart + oplineend + \
           gz.ZeroOrMore(blockinner ^ gz.Group(blockoption)).setResultsName('blockbody') + \
           oplineend + blockend) + oplineend

        allitems = gz.ZeroOrMore(gz.Group(importheader) +
                                 lineend) + gz.ZeroOrMore(block) + oplineend
        allitems.ignore(comments)

        def makeBlock(token, level=0):
            tkeys = token.keys()
            if 'itemtype' in tkeys:
                item = GazeboMaterialItem(token['itemtype'])
            else:
                raise Exception("Cannot found itemtype in {0}".format(token))

            if 'blockname' in tkeys:
                item._setName(token['blockname'])
                #item.addArgument(token['blockname'])
            if 'arguments' in tkeys:
                for xarg in token['arguments']:
                    item.addArgument(xarg)
            if 'inheritance' in tkeys:
                for xarg in token['inheritance']:
                    item.addInheritance(xarg)

            if 'blockbody' in tkeys:
                for child in token['blockbody']:
                    if type(child) != str:
                        item.addChild(makeBlock(child, level=level + 1))
                    else:
                        raise Exception("Failured while parsing blockbody",
                                        child)

            return item

        for tokens, start, end in allitems.scanString(content):
            for t in tokens:
                self._root.addChild(makeBlock(t))
        self._parsed = True
Esempio n. 17
0
import pyparsing as pp
from pyparsing import pyparsing_common as ppc


def make_keyword(kwd_str, kwd_value):
    return pp.Keyword(kwd_str).setParseAction(pp.replaceWith(kwd_value))


TRUE = make_keyword("true", True)
FALSE = make_keyword("false", False)
NULL = make_keyword("null", None)

LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(pp.Suppress, "[]{}:")

jsonString = pp.dblQuotedString().setParseAction(pp.removeQuotes)
jsonNumber = ppc.number()

jsonObject = pp.Forward()
jsonValue = pp.Forward()
jsonElements = pp.delimitedList(jsonValue)
jsonArray = pp.Group(LBRACK + pp.Optional(jsonElements, []) + RBRACK)
jsonValue << (jsonString | jsonNumber | pp.Group(jsonObject) | jsonArray | TRUE
              | FALSE | NULL)
memberDef = pp.Group(jsonString + COLON + jsonValue)
jsonMembers = pp.delimitedList(memberDef)
jsonObject << pp.Dict(LBRACE + pp.Optional(jsonMembers) + RBRACE)

jsonComment = pp.cppStyleComment
jsonObject.ignore(jsonComment)
Esempio n. 18
0
    def build_MEASURE(self):
        
        ## Grammar definition
        # literals
        self.var_list = dict()
        period = Literal(".")

        variable = Word(alphas, alphanums + "." + "_" + "-").setName("variable").setDebug(self.dbgLiterals)
        number = Word(nums+".").setName("number").setDebug(self.dbgLiterals)
        integer = Word(nums).setName("integer").setDebug(self.dbgLiterals)
        float = Combine(integer + "." + integer).setName("float").setDebug(self.dbgLiterals)
        ipAddress = Combine(integer + ('.' + integer)*3).setName("ipAddress").setDebug(self.dbgLiterals)
        quote = (Literal("\"").suppress()|Literal("'").suppress()).setName("quote").setDebug(self.dbgLiterals)
        string = (quote + Regex(r'(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*') + quote).setName("string").setDebug(self.dbgLiterals)

        # special characters
        oparen = Literal("(").suppress().setName("opening parenthesis").setDebug(self.dbgLiterals)
        eparen = Literal(")").suppress().setName("closing parenthesis").setDebug(self.dbgLiterals)
        semicolon = Literal(";").suppress().setName("semicolon").setDebug(self.dbgLiterals)
        comma = Literal(",").suppress().setName("comma").setDebug(self.dbgLiterals)
        obrace = Literal("{").suppress().setName("opening brace").setDebug(self.dbgLiterals)
        ebrace = Literal("}").suppress().setName("closing brace").setDebug(self.dbgLiterals)
        to = Literal("->").setName("right-arrow").setDebug(self.dbgLiterals)


        # section literals
        measurements = Literal("measurements").suppress().setDebug(self.dbgLiterals)
        zoneTok = Literal("zones").suppress().setDebug(self.dbgLiterals)
        actionTok = Literal("actions").suppress().setDebug(self.dbgLiterals)

        # arithmetic literals
        eq = Literal("=").setName("equal sign").setDebug(self.dbgLiterals)
        geq = Literal(">=").setName("greater or equal sign").setDebug(self.dbgLiterals)
        leq = Literal("<=").setName("less or equal sign").setDebug(self.dbgLiterals)
        gt = Literal(">").setName("greater than sign").setDebug(self.dbgLiterals)
        lt = Literal("<").setName("less than sign").setDebug(self.dbgLiterals)
        minus = Literal("-").setName("minus sign").setDebug(self.dbgLiterals)
        plus = Literal("+").setName("plus sign").setDebug(self.dbgLiterals)
        _and = (Literal("&&")|Literal("and")).setName("and sign").setDebug(self.dbgLiterals)
        _or = (Literal("||")|Literal("or")).setName("or sign").setDebug(self.dbgLiterals)
        _not = (Literal("!")|Literal("not")).setName("not sign").setDebug(self.dbgLiterals)

        # Productions for measurement definitions
#        paramExpr = Group(Optional(((variable)("pname") + eq.suppress() + (number|variable|dblQuotedString)("pval")) + ZeroOrMore(comma + (number|variable|dblQuotedString)("p"))))

        namedParam = Group((variable)("pname") + eq.suppress() + (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")) + Optional(comma))("param").setDebug(self.dbgMeasurement)
        paramExpr =  Group(ZeroOrMore(namedParam))("params").setDebug(self.dbgMeasurement)
        functionExpr = Group(variable("fname") + oparen + paramExpr + eparen )("function").setDebug(self.dbgMeasurement)


        measurementExpr = Group(variable("mvar") + eq.suppress() + (functionExpr) + semicolon)("measure").setDebug(self.dbgMeasurement)
        measurementList = OneOrMore(measurementExpr).setDebug(self.dbgMeasurement)
        measure = Group(measurements + obrace + measurementList + ebrace)("measurements").setDebug(self.dbgMeasurement)



        # Productions for zone definitions
        arithParamExpr = Group(Optional((number|variable|string)("param") + ZeroOrMore(comma + (number|variable|string)("param")))).setDebug(self.dbgZones)

        arithNamedParam = Group((variable)("pname") + eq.suppress() +
                              (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")|dblQuotedString("pstr"))
                              + Optional(comma))("param").setDebug(self.dbgZones)
        arithParamExpr =  Group(ZeroOrMore(arithNamedParam))("params").setDebug(self.dbgZones)

        arithFuncExpr = Group(variable("fname") + oparen + arithParamExpr("params") + eparen + Optional(comma))("function").setDebug(self.dbgZones)

        arithNestFuncExpr = Group(OneOrMore(arithFuncExpr))("params").setDebug(self.dbgZones)
        arithFuncExpr2 = Group(variable("fname") + oparen + arithNestFuncExpr + eparen)("function").setDebug(self.dbgZones)

        arithTok = (arithFuncExpr|arithFuncExpr2|number("num")|variable("var")).setDebug(self.dbgZones)
        opExpr = (eq|geq|leq|gt|lt|minus|plus|_and|_or).setDebug(self.dbgZones)
        arithExpr = Forward().setDebug(self.dbgZones)
        arithExpr << Group(oparen + Group((arithTok|arithExpr))("l") + opExpr("op") + Group((arithTok|arithExpr))("r") + eparen)("expression").setDebug(self.dbgZones)

        zoneExpr = Group(variable("zname") + eq.suppress() + arithExpr + semicolon)("zone").setName("ZoneExpr").setDebug(self.dbgZones)
        zones = Group(zoneTok + obrace + OneOrMore(zoneExpr) + ebrace)("zones").setName("Zones").setDebug(self.dbgZones)

        # Productions for action definitions
        actNamedParam = Group((variable)("pname") + eq.suppress() +
                              (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")|dblQuotedString("pstr"))
                              + Optional(comma))("param").setDebug(self.dbgActions)
        actParamExpr =  Group(ZeroOrMore(actNamedParam))("params").setDebug(self.dbgActions)

        actFunExpr = Group(variable("fname") + oparen + actParamExpr + eparen + semicolon)("function").setDebug(self.dbgActions)

        # statevariable doesn't allow "-", because its confused with "->"
        statevariable = Word(alphas, alphanums + "." + "_").setName("statevariable")

        state = statevariable("state").setDebug(self.dbgActions)
        statetrans = Group(statevariable("from") + to.suppress() + statevariable("to"))("trans").setDebug(self.dbgActions)
        stateenter = Group(to.suppress() + statevariable("enter"))("edge").setDebug(self.dbgActions)
        stateleave = Group(statevariable("leave") + to.suppress())("edge").setDebug(self.dbgActions)
        fsm = (statetrans|stateleave | stateenter|state).setDebug(self.dbgActions)

        action = Group(fsm + eq.suppress() + Group(OneOrMore(actFunExpr))("functions"))("action").setDebug(self.dbgActions)
        actions = Group(actionTok + obrace + OneOrMore(action) + ebrace)("actions").setDebug(self.dbgActions)

        self.MEASURE = measure + zones + actions

        self.actionFunctions = [
            {"fname":"Publish",
             "parameters": [
                 {"pname":"topic","type":"pstr"},
                 {"pname":"message","type":"pstr"},
             ]},
            {"fname":"Notify",
             "parameters": [
                 {"pname":"target","type":"pstr"},
                 {"pname":"message","type":"pstr"},
             ]}
        ]
        self.zoneFunctions = [
            {"fname":"AVG",
             "parameters": [
                 {"pname":"val","type":"pvar"},
                 {"pname":"max_age","type":"pstr"},
             ]}
        ]

        self.measureFunctions = [
            {"fname":"delay.twoway.icmp.us.mean",
             "parameters": [
                 {"pname":"source.ipv4","type":"pipaddr"},
                 {"pname":"destination.ipv4","type":"pipaddr"},
                 {"pname":"count","type":"pint"}
             ]},
            {"fname":"overload.risk.rx",
             "parameters": [
                 {"pname":"interface","type":"pvar"}
             ]}
        ]
Esempio n. 19
0
IntegerTok = Optional(Literal('-')) + Word(pyparsing.nums)
IntegerTok.addParseAction(lambda toks: int("".join(toks)))

UnsignedIntTok = Word(pyparsing.nums)
UnsignedIntTok.addParseAction(lambda toks: int(toks[0]))
FloatTok = Optional(Literal('-')) + Word(
    pyparsing.nums) + Optional(Literal('.') + Optional(Word(pyparsing.nums)))
FloatTok.addParseAction(lambda toks: float("".join(toks)))
HexStringTok = Word(pyparsing.hexnums)
HexStringTok.addParseAction(lambda toks: int(toks[0], base=16))

UnquotedStringTok = ZeroOrMore(
    White()).suppress() + CharsNotIn("()\"\'" + " \r\n")
UnquotedStringTok.addParseAction(lambda toks: "".join(toks).strip())

QuotedStringTok = Group(dblQuotedString() ^ sglQuotedString())
QuotedStringTok.addParseAction(lambda toks: "".join(toks[0]).strip('"'))

AnystringTok = QuotedStringTok ^ UnquotedStringTok
LeftParenTok = Literal('(').suppress()
RightParenTok = Literal(')').suppress()

BoolTrueTok = Keyword("yes", caseless=True) | Keyword("true", caseless=True)
BoolTrueTok.addParseAction(lambda: True)
BoolFalseTok = Keyword("no", caseless=True) | Keyword("false", caseless=True)
BoolFalseTok.addParseAction(lambda: False)
BooleanTok = BoolTrueTok | BoolFalseTok


def _paren_stmt(keyword, *values, store=True):
    """
Esempio n. 20
0
import pyparsing as pp
from pydbml.definitions.generic import (expression, name, string_literal,
                                        boolean_literal, number_literal,
                                        expression_literal)
from pydbml.definitions.common import _, _c, c, n, note, pk, unique
from pydbml.definitions.reference import ref_inline
from pydbml.classes import Column

pp.ParserElement.setDefaultWhitespaceChars(' \t\r')

type_args = ("(" + pp.originalTextFor(expression)('args') + ")")
type_name = (pp.Word(pp.alphanums + '_') | pp.dblQuotedString())('name')
column_type = (type_name + type_args[0, 1])


def parse_column_type(s, l, t):
    '''
    int or "mytype" or varchar(255)
    '''
    result = t['name']
    args = t.get('args')
    result += '(' + args + ')' if args else ''
    return result


column_type.setParseAction(parse_column_type)

default = pp.CaselessLiteral('default:').suppress() + _ - (
    string_literal | expression_literal
    | boolean_literal.setParseAction(lambda s, l, t: {
        'true': True,
Esempio n. 21
0
    def _parse_study_search_string(self, searchstr,
                                   only_with_processed_data=False):
        """parses string into SQL query for study search

        Parameters
        ----------
        searchstr : str
            The string to parse
        only_with_processed_data : bool
            Whether or not to return studies with processed data.

        Returns
        -------
        study_sql : str
            SQL query for selecting studies with the required metadata columns
        sample_sql : str
            SQL query for each study to get the sample ids that mach the query
        meta_headers : list
            metadata categories in the query string in alphabetical order

        Notes
        -----
        All searches are case-sensitive

        References
        ----------
        .. [1] McGuire P (2007) Getting started with pyparsing.
        """
        # build the parse grammar
        category = Word(alphas + nums + "_")
        seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | \
            CaselessLiteral("startswith")
        value = Word(alphas + nums + "_" + ":" + ".") | \
            dblQuotedString().setParseAction(removeQuotes)
        criterion = Group(category + seperator + value)
        criterion.setParseAction(SearchTerm)
        and_ = CaselessLiteral("and")
        or_ = CaselessLiteral("or")
        not_ = CaselessLiteral("not")
        optional_seps = Optional(and_ | or_ | not_)

        # create the grammar for parsing operators AND, OR, NOT
        search_expr = operatorPrecedence(
            criterion, [
                (not_, 1, opAssoc.RIGHT, SearchNot),
                (and_, 2, opAssoc.LEFT, SearchAnd),
                (or_, 2, opAssoc.LEFT, SearchOr)])

        # parse the search string to get out the SQL WHERE formatted query
        eval_stack = (search_expr + stringEnd).parseString(searchstr)[0]
        sql_where = eval_stack.generate_sql()

        # this lookup will be used to select only studies with columns
        # of the correct type
        type_lookup = {int: 'integer', float: 'float8', str: 'varchar'}

        # parse out all metadata headers we need to have in a study, and
        # their corresponding types
        all_headers = [c[0][0].term[0] for c in
                       (criterion + optional_seps).scanString(searchstr)]
        meta_headers = set(all_headers)
        all_types = [c[0][0].term[2] for c in
                     (criterion + optional_seps).scanString(searchstr)]
        all_types = [type_lookup[type(typecast_string(s))] for s in all_types]

        # sort headers and types so they return in same order every time.
        # Should be a relatively short list so very quick
        # argsort implementation taken from
        # http://stackoverflow.com/questions/3382352/
        # equivalent-of-numpy-argsort-in-basic-python
        sort_order = sorted(range(len(all_headers)),
                            key=all_headers.__getitem__)
        all_types = [all_types[x] for x in sort_order]
        all_headers.sort()

        # At this point it is possible that a metadata header has been
        # reference more than once in the query. If the types agree, then we
        # do not need to do anything. If the types do not agree (specifically,
        # if it appears to be numerical in one case and string in another),
        # then we need to give varchar the precedence.
        meta_header_type_lookup = dict()
        for header, header_type in zip(all_headers, all_types):
            if header not in meta_header_type_lookup:
                meta_header_type_lookup[header] = header_type
            else:
                if header_type == 'varchar' or \
                        meta_header_type_lookup[header] == 'varchar':
                    meta_header_type_lookup[header] = 'varchar'

        # create the study finding SQL
        # remove metadata headers that are in required_sample_info table
        meta_headers = meta_headers.difference(self.required_cols).difference(
            self.study_cols)

        # get all study ids that contain all metadata categories searched for
        sql = []
        if meta_headers:
            # have study-specific metadata, so need to find specific studies
            for meta in meta_headers:
                if meta_header_type_lookup[meta] in ('integer', 'float8'):
                    allowable_types = "('integer', 'float8')"
                else:
                    allowable_types = "('varchar')"

                sql.append("SELECT study_id FROM qiita.study_sample_columns "
                           "WHERE lower(column_name) = lower('%s') and "
                           "column_type in %s" %
                           (scrub_data(meta), allowable_types))
        else:
            # no study-specific metadata, so need all studies
            sql.append("SELECT study_id FROM qiita.study_sample_columns")

        # combine the query
        if only_with_processed_data:
            sql.append('SELECT study_id FROM qiita.study_processed_data')
        study_sql = ' INTERSECT '.join(sql)

        # create  the sample finding SQL, getting both sample id and values
        # build the sql formatted list of metadata headers
        header_info = []
        for meta in meta_header_type_lookup:
            if meta in self.required_cols:
                header_info.append("r.%s" % meta)
            elif meta in self.study_cols:
                header_info.append("st.%s" % meta)
            else:
                header_info.append("sa.%s" % meta)
        # build the SQL query
        sample_sql = ("SELECT r.sample_id,%s FROM qiita.required_sample_info "
                      "r JOIN qiita.sample_{0} sa ON sa.sample_id = "
                      "r.sample_id JOIN qiita.study st ON st.study_id = "
                      "r.study_id WHERE %s" %
                      (','.join(header_info), sql_where))
        return study_sql, sample_sql, meta_header_type_lookup.keys()
Esempio n. 22
0
s15 = number(15, True)
s16 = number(16, True)


def string_format(s, l, t):
    x = t[0].replace("\\n", "\n")
    x = x.replace("\\t", "\t")
    return x.replace("\\r", "\r")


label = label_name + colon
reg = pp.Suppress("$") + pp.Word("01234567").setParseAction(to_int)
ireg = reg | spec_imm
creg = pp.Suppress("$cr") + pp.Word("0123").setParseAction(to_int)
cond = pp.oneOf("eq ne gt gte lt lte ult ulte")("cond")
string = pp.dblQuotedString()
string.addParseAction(pp.removeQuotes)
string.addParseAction(string_format)

comment = ";" + pp.restOfLine

# actions

creg.addParseAction(_build(tokens.ControlRegister))
reg.addParseAction(_build(tokens.Register))
cond.addParseAction(_build(tokens.Condition))
ireg.addParseAction(_build(tokens.ImmRegister))
label_name.addParseAction(_build(tokens.Label))
iden = label_name.copy() # used in macros