def mkSetQuery(q: SetQuery): if isinstance(q.from_, SetQuery): ss, r = mkSetQuery(q.from_) _, arr = mkPathStm(column(ss[-1]).name, r, True) src = func.jsonb_array_elements(arr).alias() else: ss = [] _, arr = mkPathStm(eavs.c.data, q.from_, True) src = func.jsonb_array_elements(arr).alias() _, r = mkPathStm(column(src.name), q.path) return (ss + [src]), r
def query(self, view_kwargs): query_ = self.session.query(Substance) if request.args.get("identifier") is not None: search_term = getInchikey(request.args.get("identifier")) # This allows reference to the aliased results from synonym select_from jsonb_array_elements val = db.column("value", type_=JSONB) # Construct synonym subquery. synonym_subquery = (self.session.query(Substance.id).select_from( Substance, func.jsonb_array_elements(Substance.identifiers["synonyms"]), ).filter( val["identifier"].astext.ilike(f"{search_term}")).subquery()) query_ = self.session.query(Substance, ).filter( or_( Substance.identifiers["preferred_name"].astext.ilike( f"{search_term}"), Substance.identifiers["inchikey"].astext.ilike( f"{search_term}"), Substance.identifiers["compound_id"].astext.ilike( search_term), Substance.id.ilike(search_term), Substance.identifiers["casrn"].astext.ilike( f"{search_term}"), Substance.identifiers["display_name"].astext.ilike( f"{search_term}"), Substance.id.in_(synonym_subquery), )) return query_
def pg(element: function, compiler: SQLCompiler, **kw: Dict[str, Any]) -> str: args = iter(element.clauses) # type: ignore json_field = next(args) # type: ignore type_bind_param = next(args) # type: ignore type_: TypeEngine = type_bind_param.value # type: ignore assert isinstance(type_, TypeEngine) or issubclass(type_, TypeEngine) select_from = sqla_func.jsonb_array_elements(cast( json_field, JSONB)).table_valued("value") statement = select([sqla_func.array_agg(cast(select_from.column, type_))]) return compiler.process(statement, **kw)
def filter_to_playlist_mood(session, mood, query, correlation): """ Takes a session that is querying for playlists and filters the playlists to only those with the dominant mood provided. Dominant mood means that *most* of its tracks are of the specified mood. This method takes a query inserts a filter clause on it and returns the same query. We filter down those playlists to dominant mood by running an "exists" clause on a dominant mood subquery. Args: session: SQLALchemy session. mood: (string) The mood to query against. query: The base query to filter on correlation: An optional correlation / subquery to correlate against. Returns: A modified version of `query` with an extra filter clause. """ if not mood: return query tracks_subquery = (session.query( func.jsonb_array_elements( correlation.c.playlist_contents['track_ids']).op('->>')( 'track').cast(Integer))) if correlation is not None: # If this query runs against a nested subquery, it might need to # be manually correlated to that subquery so it doesn't pull in all # playlists here. tracks_subquery = tracks_subquery.correlate(correlation) # Query for the most common mood in a playlist dominant_mood_subquery = (session.query( Track.mood.label('mood'), func.max(Track.track_id).label('latest'), func.count(Track.mood).label('cnt')).filter( Track.is_current == True, Track.is_delete == False, Track.track_id.in_(tracks_subquery)).group_by(Track.mood).order_by( desc('cnt'), desc('latest')).limit(1).subquery()) # Match the provided mood against the dominant mood for playlists mood_exists_query = (session.query(dominant_mood_subquery.c.mood).filter( func.lower(dominant_mood_subquery.c.mood) == func.lower(mood))) # Filter playlist query to those that have the most common mood checking that # there `exists` such a playlist with the dominant mood return query.filter(mood_exists_query.exists())
def mkGroupQuery(source, qg: GroupQuery): if (qg.operator == BoolOp.andOp): res = list(map(basicOrGroupQuery(source), qg.children)) subExprs = [e for _, e in res if e is not None] sources = [s for src, _ in res for s in src] return sources, and_(*subExprs) elif (qg.operator == BoolOp.orOp): res = list(map(basicOrGroupQuery(source), qg.children)) subExprs = [e for _, e in res if e is not None] sources = [s for src, _ in res for s in src] return sources, or_(*subExprs) elif (qg.operator == Quantifier.exists): _, arr = mkPathStm(source, qg.from_, True) tmp = func.jsonb_array_elements(arr).alias() res_ls, subExpr = basicOrGroupQuery(column(tmp.name))(qg.children[0]) return ([tmp] + res_ls), subExpr
class Base(DeclarativeBase): pass class Test(Base): __tablename__ = "test_table_json" id = mapped_column(Integer, primary_key=True) data: Mapped[Dict[str, Any]] = mapped_column(JSONB) ident: Mapped[_py_uuid] = mapped_column(UUID()) ident_str: Mapped[str] = mapped_column(UUID(as_uuid=False)) elem = func.jsonb_array_elements(Test.data, type_=JSONB).column_valued("elem") stmt = select(Test).where( or_( cast("example code", ARRAY(Text)).contained_by( array([select(elem["code"].astext).scalar_subquery()]) ), cast("stefan", ARRAY(Text)).contained_by( array([select(elem["code"]["new_value"].astext).scalar_subquery()]) ), ) ) print(stmt) t1 = Test()
def topvalues(self, field, flt=None, topnbr=10, sort=None, limit=None, skip=None, least=False): """ This method makes use of the aggregation framework to produce top values for a given field or pseudo-field. Pseudo-fields are: - category / label / asnum / country / net[:mask] - port - port:open / :closed / :filtered / :<servicename> - portlist:open / :closed / :filtered - countports:open / :closed / :filtered - service / service:<portnbr> - product / product:<portnbr> - cpe / cpe.<part> / cpe:<cpe_spec> / cpe.<part>:<cpe_spec> - devicetype / devicetype:<portnbr> - script:<scriptid> / script:<port>:<scriptid> / script:host:<scriptid> - cert.* / smb.* / sshkey.* - httphdr / httphdr.{name,value} / httphdr:<name> - modbus.* / s7.* / enip.* - mongo.dbs.* - vulns.* - screenwords - file.* / file.*:scriptid - hop """ if flt is None: flt = self.flt_empty base = flt.query( select([self.tables.scan.id ]).select_from(flt.select_from)).cte("base") order = "count" if least else desc("count") outputproc = None if field == "port": field = self._topstructure( self.tables.port, [self.tables.port.protocol, self.tables.port.port], self.tables.port.state == "open") elif field == "ttl": field = self._topstructure( self.tables.port, [self.tables.port.state_reason_ttl], self.tables.port.state_reason_ttl != None, # noqa: E711 (BinaryExpression) ) elif field == "ttlinit": field = self._topstructure( self.tables.port, [ func.least( 255, func.power( 2, func.ceil( func.log(2, self.tables.port.state_reason_ttl)))) ], self.tables.port.state_reason_ttl != None, # noqa: E711 (BinaryExpression) ) outputproc = int elif field.startswith('port:'): info = field[5:] field = self._topstructure( self.tables.port, [self.tables.port.protocol, self.tables.port.port], (self.tables.port.state == info) if info in ['open', 'filtered', 'closed', 'open|filtered'] else (self.tables.port.service_name == info), ) elif field.startswith('countports:'): info = field[11:] return ( {"count": result[0], "_id": result[1]} for result in self.db.execute( select([func.count().label("count"), column('cnt')]) .select_from( select([func.count().label('cnt')]) .select_from(self.tables.port) .where(and_( self.tables.port.state == info, # self.tables.port.scan.in_(base), exists( select([1])\ .select_from(base)\ .where( self.tables.port.scan == base.c.id ) ), ))\ .group_by(self.tables.port.scan)\ .alias('cnt') ).group_by('cnt').order_by(order).limit(topnbr) ) ) elif field.startswith('portlist:'): ### Deux options pour filtrer: ### -1- self.tables.port.scan.in_(base), ### -2- exists(select([1])\ ### .select_from(base)\ ### .where( ### self.tables.port.scan == base.c.id ### )), ### ### D'après quelques tests, l'option -1- est plus beaucoup ### rapide quand (base) est pas ou peu sélectif, l'option ### -2- un peu plus rapide quand (base) est très sélectif ### ### TODO: vérifier si c'est pareil pour: ### - countports:open ### - tous les autres info = field[9:] return ( { "count": result[0], "_id": [(proto, int(port)) for proto, port in ( elt.split(',') for elt in result[1][3:-3].split(')","('))] } for result in self.db.execute( select([func.count().label("count"), column('ports')]). select_from( select([ func.array_agg( postgresql.aggregate_order_by( tuple_(self.tables.port.protocol, self. tables.port.port).label('a'), tuple_( self.tables.port.protocol, self.tables. port.port).label('a'))).label('ports'), ]).where( and_( self.tables.port.state == info, self.tables.port.scan.in_( base), # exists(select([1])\ # .select_from(base)\ # .where( # self.tables.port.scan == base.c.id # )), )).group_by(self.tables.port.scan).alias('ports') ).group_by('ports').order_by(order).limit(topnbr))) elif field == "service": field = self._topstructure(self.tables.port, [self.tables.port.service_name], self.tables.port.state == "open") elif field.startswith("service:"): info = field[8:] if '/' in info: info = info.split('/', 1) field = self._topstructure( self.tables.port, [self.tables.port.service_name], and_(self.tables.port.protocol == info[0], self.tables.port.port == int(info[1])), ) else: field = self._topstructure(self.tables.port, [self.tables.port.service_name], self.tables.port.port == int(info)) elif field == "product": field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product ], self.tables.port.state == "open", ) elif field.startswith("product:"): info = field[8:] if info.isdigit(): info = int(info) flt = self.flt_and(flt, self.searchport(info)) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product ], and_(self.tables.port.state == "open", self.tables.port.port == info), ) elif info.startswith('tcp/') or info.startswith('udp/'): info = (info[:3], int(info[4:])) flt = self.flt_and(flt, self.searchport(info[1], protocol=info[0])) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product ], and_(self.tables.port.state == "open", self.tables.port.port == info[1], self.tables.port.protocol == info[0]), ) else: flt = self.flt_and(flt, self.searchservice(info)) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product ], and_(self.tables.port.state == "open", self.tables.port.service_name == info), ) elif field == "devicetype": field = self._topstructure(self.tables.port, [self.tables.port.service_devicetype], self.tables.port.state == "open") elif field.startswith("devicetype:"): info = field[11:] if info.isdigit(): info = int(info) flt = self.flt_and(flt, self.searchport(info)) field = self._topstructure( self.tables.port, [self.tables.port.service_devicetype], and_(self.tables.port.state == "open", self.tables.port.port == info)) elif info.startswith('tcp/') or info.startswith('udp/'): info = (info[:3], int(info[4:])) flt = self.flt_and(flt, self.searchport(info[1], protocol=info[0])) field = self._topstructure( self.tables.port, [self.tables.port.service_devicetype], and_(self.tables.port.state == "open", self.tables.port.port == info[1], self.tables.port.protocol == info[0])) else: flt = self.flt_and(flt, self.searchservice(info)) field = self._topstructure( self.tables.port, [self.tables.port.service_devicetype], and_(self.tables.port.state == "open", self.tables.port.service_name == info)) elif field == "version": field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], self.tables.port.state == "open", ) elif field.startswith("version:"): info = field[8:] if info.isdigit(): info = int(info) flt = self.flt_and(flt, self.searchport(info)) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], and_(self.tables.port.state == "open", self.tables.port.port == info), ) elif info.startswith('tcp/') or info.startswith('udp/'): info = (info[:3], int(info[4:])) flt = self.flt_and(flt, self.searchport(info[1], protocol=info[0])) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], and_(self.tables.port.state == "open", self.tables.port.port == info[1], self.tables.port.protocol == info[0]), ) elif ':' in info: info = info.split(':', 1) flt = self.flt_and( flt, self.searchproduct(info[1], service=info[0])) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], and_(self.tables.port.state == "open", self.tables.port.service_name == info[0], self.tables.port.service_product == info[1]), ) else: flt = self.flt_and(flt, self.searchservice(info)) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], and_(self.tables.port.state == "open", self.tables.port.service_name == info), ) elif field == "asnum": field = self._topstructure(self.tables.scan, [self.tables.scan.info["as_num"]]) elif field == "as": field = self._topstructure(self.tables.scan, [ self.tables.scan.info["as_num"], self.tables.scan.info["as_name"] ]) elif field == "country": field = self._topstructure(self.tables.scan, [ self.tables.scan.info["country_code"], self.tables.scan.info["country_name"] ]) elif field == "city": field = self._topstructure(self.tables.scan, [ self.tables.scan.info["country_code"], self.tables.scan.info["city"] ]) elif field == "net" or field.startswith("net:"): info = field[4:] info = int(info) if info else 24 field = self._topstructure( self.tables.scan, [func.set_masklen(text("scan.addr::cidr"), info)], ) elif field == "script" or field.startswith("script:"): info = field[7:] if info: field = self._topstructure(self.tables.script, [self.tables.script.output], self.tables.script.name == info) else: field = self._topstructure(self.tables.script, [self.tables.script.name]) elif field in ["category", "categories"]: field = self._topstructure(self.tables.category, [self.tables.category.name]) elif field.startswith('cert.'): subfield = field[5:] field = self._topstructure( self.tables.script, [self.tables.script.data['ssl-cert'][subfield]], and_(self.tables.script.name == 'ssl-cert', self.tables.script.data['ssl-cert'].has_key( subfield))) # noqa: W601 (BinaryExpression) elif field == "source": field = self._topstructure(self.tables.scan, [self.tables.scan.source]) elif field == "domains": field = self._topstructure( self.tables.hostname, [func.unnest(self.tables.hostname.domains)]) elif field.startswith("domains:"): level = int(field[8:]) - 1 base1 = (select([ func.unnest(self.tables.hostname.domains).label("domains") ]).where( exists( select([1]).select_from(base).where( self.tables.hostname.scan == base.c.id))).cte("base1")) return ({ "count": result[1], "_id": result[0] } for result in self.db.execute( select([base1.c.domains, func.count().label("count")]).where( base1.c.domains.op('~') ('^([^\\.]+\\.){%d}[^\\.]+$' % level)).group_by(base1.c.domains).order_by( order).limit(topnbr))) elif field == "hop": field = self._topstructure(self.tables.hop, [self.tables.hop.ipaddr]) elif field.startswith('hop') and field[3] in ':>': ttl = int(field[4:]) field = self._topstructure( self.tables.hop, [self.tables.hop.ipaddr], (self.tables.hop.ttl > ttl) if field[3] == '>' else (self.tables.hop.ttl == ttl), ) elif field == 'file' or (field.startswith('file') and field[4] in '.:'): if field.startswith('file:'): scripts = field[5:] if '.' in scripts: scripts, field = scripts.split('.', 1) else: field = 'filename' scripts = scripts.split(',') flt = (self.tables.script.name == scripts[0] if len(scripts) == 1 else self.tables.script.name.in_(scripts)) else: field = field[5:] or 'filename' flt = True field = self._topstructure( self.tables.script, [ func.jsonb_array_elements( func.jsonb_array_elements( self.tables.script.data['ls']['volumes']).op('->') ('files')).op('->>')(field).label(field) ], and_( flt, self.tables.script.data.op('@>')( '{"ls": {"volumes": [{"files": []}]}}'), ), ) elif field.startswith('modbus.'): subfield = field[7:] field = self._topstructure( self.tables.script, [self.tables.script.data['modbus-discover'][subfield]], and_( self.tables.script.name == 'modbus-discover', self.tables.script.data['modbus-discover'].has_key( subfield)), # noqa: W601 (BinaryExpression) ) elif field.startswith('s7.'): subfield = field[3:] field = self._topstructure( self.tables.script, [self.tables.script.data['s7-info'][subfield]], and_(self.tables.script.name == 's7-info', self.tables.script.data['s7-info'].has_key(subfield)), # noqa: W601 (BinaryExpression) ) elif field == 'httphdr': flt = self.flt_and(flt, self.searchscript(name="http-headers")) field = self._topstructure( self.tables.script, [ column("hdr").op('->>')('name').label("name"), column("hdr").op('->>')('value').label("value") ], self.tables.script.name == 'http-headers', [column("name"), column("value")], func.jsonb_array_elements( self.tables.script.data['http-headers']).alias('hdr'), ) elif field.startswith('httphdr.'): flt = self.flt_and(flt, self.searchscript(name="http-headers")) field = self._topstructure( self.tables.script, [column("hdr").op('->>')(field[8:]).label("topvalue")], self.tables.script.name == 'http-headers', [column("topvalue")], func.jsonb_array_elements( self.tables.script.data['http-headers']).alias('hdr'), ) elif field.startswith('httphdr:'): flt = self.flt_and(flt, self.searchhttphdr(name=field[8:].lower())) field = self._topstructure( self.tables.script, [column("hdr").op('->>')("value").label("value")], and_(self.tables.script.name == 'http-headers', column("hdr").op('->>')("name") == field[8:].lower()), [column("value")], func.jsonb_array_elements( self.tables.script.data['http-headers']).alias('hdr'), ) else: raise NotImplementedError() s_from = { self.tables.script: join(self.tables.script, self.tables.port), self.tables.port: self.tables.port, self.tables.category: join(self.tables.association_scan_category, self.tables.category), self.tables.hostname: self.tables.hostname, self.tables.hop: join(self.tables.trace, self.tables.hop), } where_clause = { self.tables.script: self.tables.port.scan == base.c.id, self.tables.port: self.tables.port.scan == base.c.id, self.tables.category: self.tables.association_scan_category.scan == base.c.id, self.tables.hostname: self.tables.hostname.scan == base.c.id, self.tables.hop: self.tables.trace.scan == base.c.id } if field.base == self.tables.scan: req = flt.query( select([func.count().label("count")] + field.fields).select_from( self.tables.scan).group_by(*field.fields)) else: req = (select([func.count().label("count")] + field.fields).select_from(s_from[field.base])) if field.extraselectfrom is not None: req = req.select_from(field.extraselectfrom) req = (req.group_by( *(field.fields if field.group_by is None else field.group_by )).where( exists( select([1]).select_from(base).where( where_clause[field.base])))) if field.where is not None: req = req.where(field.where) if outputproc is None: return ({ "count": result[0], "_id": result[1:] if len(result) > 2 else result[1] } for result in self.db.execute(req.order_by(order).limit(topnbr))) else: return ({ "count": result[0], "_id": outputproc(result[1:] if len(result) > 2 else result[1]) } for result in self.db.execute(req.order_by(order).limit(topnbr)))