예제 #1
0
def mkSetQuery(q: SetQuery):
    if isinstance(q.from_, SetQuery):
        ss, r = mkSetQuery(q.from_)
        _, arr = mkPathStm(column(ss[-1]).name, r, True)
        src = func.jsonb_array_elements(arr).alias()
    else:
        ss = []
        _, arr = mkPathStm(eavs.c.data, q.from_, True)
        src = func.jsonb_array_elements(arr).alias()

    _, r = mkPathStm(column(src.name), q.path)

    return (ss + [src]), r
예제 #2
0
    def query(self, view_kwargs):
        query_ = self.session.query(Substance)
        if request.args.get("identifier") is not None:
            search_term = getInchikey(request.args.get("identifier"))

            # This allows reference to the aliased results from synonym select_from jsonb_array_elements
            val = db.column("value", type_=JSONB)
            # Construct synonym subquery.
            synonym_subquery = (self.session.query(Substance.id).select_from(
                Substance,
                func.jsonb_array_elements(Substance.identifiers["synonyms"]),
            ).filter(
                val["identifier"].astext.ilike(f"{search_term}")).subquery())

            query_ = self.session.query(Substance, ).filter(
                or_(
                    Substance.identifiers["preferred_name"].astext.ilike(
                        f"{search_term}"),
                    Substance.identifiers["inchikey"].astext.ilike(
                        f"{search_term}"),
                    Substance.identifiers["compound_id"].astext.ilike(
                        search_term),
                    Substance.id.ilike(search_term),
                    Substance.identifiers["casrn"].astext.ilike(
                        f"{search_term}"),
                    Substance.identifiers["display_name"].astext.ilike(
                        f"{search_term}"),
                    Substance.id.in_(synonym_subquery),
                ))
        return query_
예제 #3
0
def pg(element: function, compiler: SQLCompiler, **kw: Dict[str, Any]) -> str:

    args = iter(element.clauses)  # type: ignore
    json_field = next(args)  # type: ignore
    type_bind_param = next(args)  # type: ignore
    type_: TypeEngine = type_bind_param.value  # type: ignore

    assert isinstance(type_, TypeEngine) or issubclass(type_, TypeEngine)

    select_from = sqla_func.jsonb_array_elements(cast(
        json_field, JSONB)).table_valued("value")
    statement = select([sqla_func.array_agg(cast(select_from.column, type_))])

    return compiler.process(statement, **kw)
예제 #4
0
def filter_to_playlist_mood(session, mood, query, correlation):
    """
    Takes a session that is querying for playlists and filters the playlists
    to only those with the dominant mood provided.
    Dominant mood means that *most* of its tracks are of the specified mood.

    This method takes a query inserts a filter clause on it and returns the same query.
    We filter down those playlists to dominant mood by running an "exists" clause
    on a dominant mood subquery.

    Args:
        session: SQLALchemy session.
        mood: (string) The mood to query against.
        query: The base query to filter on
        correlation: An optional correlation / subquery to correlate against.

    Returns: A modified version of `query` with an extra filter clause.
    """
    if not mood:
        return query

    tracks_subquery = (session.query(
        func.jsonb_array_elements(
            correlation.c.playlist_contents['track_ids']).op('->>')(
                'track').cast(Integer)))

    if correlation is not None:
        # If this query runs against a nested subquery, it might need to
        # be manually correlated to that subquery so it doesn't pull in all
        # playlists here.
        tracks_subquery = tracks_subquery.correlate(correlation)

    # Query for the most common mood in a playlist
    dominant_mood_subquery = (session.query(
        Track.mood.label('mood'),
        func.max(Track.track_id).label('latest'),
        func.count(Track.mood).label('cnt')).filter(
            Track.is_current == True, Track.is_delete == False,
            Track.track_id.in_(tracks_subquery)).group_by(Track.mood).order_by(
                desc('cnt'), desc('latest')).limit(1).subquery())

    # Match the provided mood against the dominant mood for playlists
    mood_exists_query = (session.query(dominant_mood_subquery.c.mood).filter(
        func.lower(dominant_mood_subquery.c.mood) == func.lower(mood)))

    # Filter playlist query to those that have the most common mood checking that
    # there `exists` such a playlist with the dominant mood
    return query.filter(mood_exists_query.exists())
예제 #5
0
def mkGroupQuery(source, qg: GroupQuery):
    if (qg.operator == BoolOp.andOp):
        res = list(map(basicOrGroupQuery(source), qg.children))
        subExprs = [e for _, e in res if e is not None]
        sources = [s for src, _ in res for s in src]
        return sources, and_(*subExprs)
    elif (qg.operator == BoolOp.orOp):
        res = list(map(basicOrGroupQuery(source), qg.children))
        subExprs = [e for _, e in res if e is not None]
        sources = [s for src, _ in res for s in src]
        return sources, or_(*subExprs)
    elif (qg.operator == Quantifier.exists):
        _, arr = mkPathStm(source, qg.from_, True)
        tmp = func.jsonb_array_elements(arr).alias()
        res_ls, subExpr = basicOrGroupQuery(column(tmp.name))(qg.children[0])
        return ([tmp] + res_ls), subExpr
예제 #6
0
class Base(DeclarativeBase):
    pass


class Test(Base):
    __tablename__ = "test_table_json"

    id = mapped_column(Integer, primary_key=True)
    data: Mapped[Dict[str, Any]] = mapped_column(JSONB)

    ident: Mapped[_py_uuid] = mapped_column(UUID())

    ident_str: Mapped[str] = mapped_column(UUID(as_uuid=False))


elem = func.jsonb_array_elements(Test.data, type_=JSONB).column_valued("elem")

stmt = select(Test).where(
    or_(
        cast("example code", ARRAY(Text)).contained_by(
            array([select(elem["code"].astext).scalar_subquery()])
        ),
        cast("stefan", ARRAY(Text)).contained_by(
            array([select(elem["code"]["new_value"].astext).scalar_subquery()])
        ),
    )
)
print(stmt)


t1 = Test()
예제 #7
0
 def topvalues(self,
               field,
               flt=None,
               topnbr=10,
               sort=None,
               limit=None,
               skip=None,
               least=False):
     """
     This method makes use of the aggregation framework to produce
     top values for a given field or pseudo-field. Pseudo-fields are:
       - category / label / asnum / country / net[:mask]
       - port
       - port:open / :closed / :filtered / :<servicename>
       - portlist:open / :closed / :filtered
       - countports:open / :closed / :filtered
       - service / service:<portnbr>
       - product / product:<portnbr>
       - cpe / cpe.<part> / cpe:<cpe_spec> / cpe.<part>:<cpe_spec>
       - devicetype / devicetype:<portnbr>
       - script:<scriptid> / script:<port>:<scriptid>
         / script:host:<scriptid>
       - cert.* / smb.* / sshkey.*
       - httphdr / httphdr.{name,value} / httphdr:<name>
       - modbus.* / s7.* / enip.*
       - mongo.dbs.*
       - vulns.*
       - screenwords
       - file.* / file.*:scriptid
       - hop
     """
     if flt is None:
         flt = self.flt_empty
     base = flt.query(
         select([self.tables.scan.id
                 ]).select_from(flt.select_from)).cte("base")
     order = "count" if least else desc("count")
     outputproc = None
     if field == "port":
         field = self._topstructure(
             self.tables.port,
             [self.tables.port.protocol, self.tables.port.port],
             self.tables.port.state == "open")
     elif field == "ttl":
         field = self._topstructure(
             self.tables.port,
             [self.tables.port.state_reason_ttl],
             self.tables.port.state_reason_ttl != None,
             # noqa: E711 (BinaryExpression)
         )
     elif field == "ttlinit":
         field = self._topstructure(
             self.tables.port,
             [
                 func.least(
                     255,
                     func.power(
                         2,
                         func.ceil(
                             func.log(2,
                                      self.tables.port.state_reason_ttl))))
             ],
             self.tables.port.state_reason_ttl != None,
             # noqa: E711 (BinaryExpression)
         )
         outputproc = int
     elif field.startswith('port:'):
         info = field[5:]
         field = self._topstructure(
             self.tables.port,
             [self.tables.port.protocol, self.tables.port.port],
             (self.tables.port.state == info)
             if info in ['open', 'filtered', 'closed', 'open|filtered'] else
             (self.tables.port.service_name == info),
         )
     elif field.startswith('countports:'):
         info = field[11:]
         return (
             {"count": result[0], "_id": result[1]}
             for result in self.db.execute(
                 select([func.count().label("count"),
                         column('cnt')])
                 .select_from(
                     select([func.count().label('cnt')])
                     .select_from(self.tables.port)
                     .where(and_(
                         self.tables.port.state == info,
                         # self.tables.port.scan.in_(base),
                         exists(
                             select([1])\
                             .select_from(base)\
                             .where(
                                 self.tables.port.scan == base.c.id
                             )
                         ),
                     ))\
                     .group_by(self.tables.port.scan)\
                     .alias('cnt')
                 ).group_by('cnt').order_by(order).limit(topnbr)
             )
         )
     elif field.startswith('portlist:'):
         ### Deux options pour filtrer:
         ###   -1- self.tables.port.scan.in_(base),
         ###   -2- exists(select([1])\
         ###       .select_from(base)\
         ###       .where(
         ###         self.tables.port.scan == base.c.id
         ###       )),
         ###
         ### D'après quelques tests, l'option -1- est plus beaucoup
         ### rapide quand (base) est pas ou peu sélectif, l'option
         ### -2- un peu plus rapide quand (base) est très sélectif
         ###
         ### TODO: vérifier si c'est pareil pour:
         ###  - countports:open
         ###  - tous les autres
         info = field[9:]
         return (
             {
                 "count":
                 result[0],
                 "_id": [(proto, int(port)) for proto, port in (
                     elt.split(',')
                     for elt in result[1][3:-3].split(')","('))]
             } for result in self.db.execute(
                 select([func.count().label("count"),
                         column('ports')]).
                 select_from(
                     select([
                         func.array_agg(
                             postgresql.aggregate_order_by(
                                 tuple_(self.tables.port.protocol, self.
                                        tables.port.port).label('a'),
                                 tuple_(
                                     self.tables.port.protocol, self.tables.
                                     port.port).label('a'))).label('ports'),
                     ]).where(
                         and_(
                             self.tables.port.state == info,
                             self.tables.port.scan.in_(
                                 base),
                             # exists(select([1])\
                             #        .select_from(base)\
                             #        .where(
                             #            self.tables.port.scan == base.c.id
                             #        )),
                         )).group_by(self.tables.port.scan).alias('ports')
                 ).group_by('ports').order_by(order).limit(topnbr)))
     elif field == "service":
         field = self._topstructure(self.tables.port,
                                    [self.tables.port.service_name],
                                    self.tables.port.state == "open")
     elif field.startswith("service:"):
         info = field[8:]
         if '/' in info:
             info = info.split('/', 1)
             field = self._topstructure(
                 self.tables.port,
                 [self.tables.port.service_name],
                 and_(self.tables.port.protocol == info[0],
                      self.tables.port.port == int(info[1])),
             )
         else:
             field = self._topstructure(self.tables.port,
                                        [self.tables.port.service_name],
                                        self.tables.port.port == int(info))
     elif field == "product":
         field = self._topstructure(
             self.tables.port,
             [
                 self.tables.port.service_name,
                 self.tables.port.service_product
             ],
             self.tables.port.state == "open",
         )
     elif field.startswith("product:"):
         info = field[8:]
         if info.isdigit():
             info = int(info)
             flt = self.flt_and(flt, self.searchport(info))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info),
             )
         elif info.startswith('tcp/') or info.startswith('udp/'):
             info = (info[:3], int(info[4:]))
             flt = self.flt_and(flt,
                                self.searchport(info[1], protocol=info[0]))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info[1],
                      self.tables.port.protocol == info[0]),
             )
         else:
             flt = self.flt_and(flt, self.searchservice(info))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.service_name == info),
             )
     elif field == "devicetype":
         field = self._topstructure(self.tables.port,
                                    [self.tables.port.service_devicetype],
                                    self.tables.port.state == "open")
     elif field.startswith("devicetype:"):
         info = field[11:]
         if info.isdigit():
             info = int(info)
             flt = self.flt_and(flt, self.searchport(info))
             field = self._topstructure(
                 self.tables.port, [self.tables.port.service_devicetype],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info))
         elif info.startswith('tcp/') or info.startswith('udp/'):
             info = (info[:3], int(info[4:]))
             flt = self.flt_and(flt,
                                self.searchport(info[1], protocol=info[0]))
             field = self._topstructure(
                 self.tables.port, [self.tables.port.service_devicetype],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info[1],
                      self.tables.port.protocol == info[0]))
         else:
             flt = self.flt_and(flt, self.searchservice(info))
             field = self._topstructure(
                 self.tables.port, [self.tables.port.service_devicetype],
                 and_(self.tables.port.state == "open",
                      self.tables.port.service_name == info))
     elif field == "version":
         field = self._topstructure(
             self.tables.port,
             [
                 self.tables.port.service_name,
                 self.tables.port.service_product,
                 self.tables.port.service_version
             ],
             self.tables.port.state == "open",
         )
     elif field.startswith("version:"):
         info = field[8:]
         if info.isdigit():
             info = int(info)
             flt = self.flt_and(flt, self.searchport(info))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product,
                     self.tables.port.service_version
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info),
             )
         elif info.startswith('tcp/') or info.startswith('udp/'):
             info = (info[:3], int(info[4:]))
             flt = self.flt_and(flt,
                                self.searchport(info[1], protocol=info[0]))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product,
                     self.tables.port.service_version
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info[1],
                      self.tables.port.protocol == info[0]),
             )
         elif ':' in info:
             info = info.split(':', 1)
             flt = self.flt_and(
                 flt, self.searchproduct(info[1], service=info[0]))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product,
                     self.tables.port.service_version
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.service_name == info[0],
                      self.tables.port.service_product == info[1]),
             )
         else:
             flt = self.flt_and(flt, self.searchservice(info))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product,
                     self.tables.port.service_version
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.service_name == info),
             )
     elif field == "asnum":
         field = self._topstructure(self.tables.scan,
                                    [self.tables.scan.info["as_num"]])
     elif field == "as":
         field = self._topstructure(self.tables.scan, [
             self.tables.scan.info["as_num"],
             self.tables.scan.info["as_name"]
         ])
     elif field == "country":
         field = self._topstructure(self.tables.scan, [
             self.tables.scan.info["country_code"],
             self.tables.scan.info["country_name"]
         ])
     elif field == "city":
         field = self._topstructure(self.tables.scan, [
             self.tables.scan.info["country_code"],
             self.tables.scan.info["city"]
         ])
     elif field == "net" or field.startswith("net:"):
         info = field[4:]
         info = int(info) if info else 24
         field = self._topstructure(
             self.tables.scan,
             [func.set_masklen(text("scan.addr::cidr"), info)],
         )
     elif field == "script" or field.startswith("script:"):
         info = field[7:]
         if info:
             field = self._topstructure(self.tables.script,
                                        [self.tables.script.output],
                                        self.tables.script.name == info)
         else:
             field = self._topstructure(self.tables.script,
                                        [self.tables.script.name])
     elif field in ["category", "categories"]:
         field = self._topstructure(self.tables.category,
                                    [self.tables.category.name])
     elif field.startswith('cert.'):
         subfield = field[5:]
         field = self._topstructure(
             self.tables.script,
             [self.tables.script.data['ssl-cert'][subfield]],
             and_(self.tables.script.name == 'ssl-cert',
                  self.tables.script.data['ssl-cert'].has_key(
                      subfield)))  # noqa: W601 (BinaryExpression)
     elif field == "source":
         field = self._topstructure(self.tables.scan,
                                    [self.tables.scan.source])
     elif field == "domains":
         field = self._topstructure(
             self.tables.hostname,
             [func.unnest(self.tables.hostname.domains)])
     elif field.startswith("domains:"):
         level = int(field[8:]) - 1
         base1 = (select([
             func.unnest(self.tables.hostname.domains).label("domains")
         ]).where(
             exists(
                 select([1]).select_from(base).where(
                     self.tables.hostname.scan == base.c.id))).cte("base1"))
         return ({
             "count": result[1],
             "_id": result[0]
         } for result in self.db.execute(
             select([base1.c.domains,
                     func.count().label("count")]).where(
                         base1.c.domains.op('~')
                         ('^([^\\.]+\\.){%d}[^\\.]+$' %
                          level)).group_by(base1.c.domains).order_by(
                              order).limit(topnbr)))
     elif field == "hop":
         field = self._topstructure(self.tables.hop,
                                    [self.tables.hop.ipaddr])
     elif field.startswith('hop') and field[3] in ':>':
         ttl = int(field[4:])
         field = self._topstructure(
             self.tables.hop,
             [self.tables.hop.ipaddr],
             (self.tables.hop.ttl > ttl) if field[3] == '>' else
             (self.tables.hop.ttl == ttl),
         )
     elif field == 'file' or (field.startswith('file')
                              and field[4] in '.:'):
         if field.startswith('file:'):
             scripts = field[5:]
             if '.' in scripts:
                 scripts, field = scripts.split('.', 1)
             else:
                 field = 'filename'
             scripts = scripts.split(',')
             flt = (self.tables.script.name == scripts[0] if len(scripts)
                    == 1 else self.tables.script.name.in_(scripts))
         else:
             field = field[5:] or 'filename'
             flt = True
         field = self._topstructure(
             self.tables.script,
             [
                 func.jsonb_array_elements(
                     func.jsonb_array_elements(
                         self.tables.script.data['ls']['volumes']).op('->')
                     ('files')).op('->>')(field).label(field)
             ],
             and_(
                 flt,
                 self.tables.script.data.op('@>')(
                     '{"ls": {"volumes": [{"files": []}]}}'),
             ),
         )
     elif field.startswith('modbus.'):
         subfield = field[7:]
         field = self._topstructure(
             self.tables.script,
             [self.tables.script.data['modbus-discover'][subfield]],
             and_(
                 self.tables.script.name == 'modbus-discover',
                 self.tables.script.data['modbus-discover'].has_key(
                     subfield)),
             # noqa: W601 (BinaryExpression)
         )
     elif field.startswith('s7.'):
         subfield = field[3:]
         field = self._topstructure(
             self.tables.script,
             [self.tables.script.data['s7-info'][subfield]],
             and_(self.tables.script.name == 's7-info',
                  self.tables.script.data['s7-info'].has_key(subfield)),
             # noqa: W601 (BinaryExpression)
         )
     elif field == 'httphdr':
         flt = self.flt_and(flt, self.searchscript(name="http-headers"))
         field = self._topstructure(
             self.tables.script,
             [
                 column("hdr").op('->>')('name').label("name"),
                 column("hdr").op('->>')('value').label("value")
             ],
             self.tables.script.name == 'http-headers',
             [column("name"), column("value")],
             func.jsonb_array_elements(
                 self.tables.script.data['http-headers']).alias('hdr'),
         )
     elif field.startswith('httphdr.'):
         flt = self.flt_and(flt, self.searchscript(name="http-headers"))
         field = self._topstructure(
             self.tables.script,
             [column("hdr").op('->>')(field[8:]).label("topvalue")],
             self.tables.script.name == 'http-headers',
             [column("topvalue")],
             func.jsonb_array_elements(
                 self.tables.script.data['http-headers']).alias('hdr'),
         )
     elif field.startswith('httphdr:'):
         flt = self.flt_and(flt, self.searchhttphdr(name=field[8:].lower()))
         field = self._topstructure(
             self.tables.script,
             [column("hdr").op('->>')("value").label("value")],
             and_(self.tables.script.name == 'http-headers',
                  column("hdr").op('->>')("name") == field[8:].lower()),
             [column("value")],
             func.jsonb_array_elements(
                 self.tables.script.data['http-headers']).alias('hdr'),
         )
     else:
         raise NotImplementedError()
     s_from = {
         self.tables.script:
         join(self.tables.script, self.tables.port),
         self.tables.port:
         self.tables.port,
         self.tables.category:
         join(self.tables.association_scan_category, self.tables.category),
         self.tables.hostname:
         self.tables.hostname,
         self.tables.hop:
         join(self.tables.trace, self.tables.hop),
     }
     where_clause = {
         self.tables.script:
         self.tables.port.scan == base.c.id,
         self.tables.port:
         self.tables.port.scan == base.c.id,
         self.tables.category:
         self.tables.association_scan_category.scan == base.c.id,
         self.tables.hostname:
         self.tables.hostname.scan == base.c.id,
         self.tables.hop:
         self.tables.trace.scan == base.c.id
     }
     if field.base == self.tables.scan:
         req = flt.query(
             select([func.count().label("count")] +
                    field.fields).select_from(
                        self.tables.scan).group_by(*field.fields))
     else:
         req = (select([func.count().label("count")] +
                       field.fields).select_from(s_from[field.base]))
         if field.extraselectfrom is not None:
             req = req.select_from(field.extraselectfrom)
         req = (req.group_by(
             *(field.fields if field.group_by is None else field.group_by
               )).where(
                   exists(
                       select([1]).select_from(base).where(
                           where_clause[field.base]))))
     if field.where is not None:
         req = req.where(field.where)
     if outputproc is None:
         return ({
             "count": result[0],
             "_id": result[1:] if len(result) > 2 else result[1]
         } for result in self.db.execute(req.order_by(order).limit(topnbr)))
     else:
         return ({
             "count":
             result[0],
             "_id":
             outputproc(result[1:] if len(result) > 2 else result[1])
         } for result in self.db.execute(req.order_by(order).limit(topnbr)))