def map_csv(session): # use a logarithmic scale to give lesser used regions a chance query = select( columns=(MapStat.lat, MapStat.lon, func.cast(func.ceil(func.log10(MapStat.value)), Integer)), whereclause=MapStat.value >= 2) result = session.execute(query).fetchall() rows = StringIO() csvwriter = csv.writer(rows) csvwriter.writerow(('lat', 'lon', 'value')) for lat, lon, value in result: csvwriter.writerow((lat / 1000.0, lon / 1000.0, value)) return rows.getvalue()
def _calculate_seq_stats(self, sample_id, min_cdr3, max_cdr3, include_outliers, only_full_reads): seq_statistics = {} for name, stat in _seq_contexts.items(): seq_statistics[name] = SeqContextStats(self._session, **stat) # TODO: This should be automatically generated from _dist_fields query = self._session.query( Sequence.quality, Sequence.sample_id, Sequence.v_match, Sequence.j_match, Sequence.j_length, Sequence.v_gene, Sequence.j_gene, Sequence.in_frame, Sequence.stop, Sequence.functional, Sequence.copy_number, (Sequence.v_length + Sequence.num_gaps).label('v_length'), ( func.ceil(100 * Sequence.v_match / Sequence.v_length) ).label('v_identity'), Sequence.cdr3_num_nts.label('cdr3_length'), ).filter( Sequence.sample_id == sample_id ) if not include_outliers and min_cdr3 is not None: query = query.filter(Sequence.cdr3_num_nts >= min_cdr3, Sequence.cdr3_num_nts <= max_cdr3) if only_full_reads: query = query.filter(Sequence.partial == 0) for seq in query: for name, stat in seq_statistics.items(): stat.add_if_match(seq) self._add_stat(seq_statistics, sample_id, include_outliers, only_full_reads)
def topvalues(self, field, flt=None, topnbr=10, sort=None, limit=None, skip=None, least=False): """ This method makes use of the aggregation framework to produce top values for a given field or pseudo-field. Pseudo-fields are: - category / label / asnum / country / net[:mask] - port - port:open / :closed / :filtered / :<servicename> - portlist:open / :closed / :filtered - countports:open / :closed / :filtered - service / service:<portnbr> - product / product:<portnbr> - cpe / cpe.<part> / cpe:<cpe_spec> / cpe.<part>:<cpe_spec> - devicetype / devicetype:<portnbr> - script:<scriptid> / script:<port>:<scriptid> / script:host:<scriptid> - cert.* / smb.* / sshkey.* - httphdr / httphdr.{name,value} / httphdr:<name> - modbus.* / s7.* / enip.* - mongo.dbs.* - vulns.* - screenwords - file.* / file.*:scriptid - hop """ if flt is None: flt = self.flt_empty base = flt.query( select([self.tables.scan.id ]).select_from(flt.select_from)).cte("base") order = "count" if least else desc("count") outputproc = None if field == "port": field = self._topstructure( self.tables.port, [self.tables.port.protocol, self.tables.port.port], self.tables.port.state == "open") elif field == "ttl": field = self._topstructure( self.tables.port, [self.tables.port.state_reason_ttl], self.tables.port.state_reason_ttl != None, # noqa: E711 (BinaryExpression) ) elif field == "ttlinit": field = self._topstructure( self.tables.port, [ func.least( 255, func.power( 2, func.ceil( func.log(2, self.tables.port.state_reason_ttl)))) ], self.tables.port.state_reason_ttl != None, # noqa: E711 (BinaryExpression) ) outputproc = int elif field.startswith('port:'): info = field[5:] field = self._topstructure( self.tables.port, [self.tables.port.protocol, self.tables.port.port], (self.tables.port.state == info) if info in ['open', 'filtered', 'closed', 'open|filtered'] else (self.tables.port.service_name == info), ) elif field.startswith('countports:'): info = field[11:] return ( {"count": result[0], "_id": result[1]} for result in self.db.execute( select([func.count().label("count"), column('cnt')]) .select_from( select([func.count().label('cnt')]) .select_from(self.tables.port) .where(and_( self.tables.port.state == info, # self.tables.port.scan.in_(base), exists( select([1])\ .select_from(base)\ .where( self.tables.port.scan == base.c.id ) ), ))\ .group_by(self.tables.port.scan)\ .alias('cnt') ).group_by('cnt').order_by(order).limit(topnbr) ) ) elif field.startswith('portlist:'): ### Deux options pour filtrer: ### -1- self.tables.port.scan.in_(base), ### -2- exists(select([1])\ ### .select_from(base)\ ### .where( ### self.tables.port.scan == base.c.id ### )), ### ### D'après quelques tests, l'option -1- est plus beaucoup ### rapide quand (base) est pas ou peu sélectif, l'option ### -2- un peu plus rapide quand (base) est très sélectif ### ### TODO: vérifier si c'est pareil pour: ### - countports:open ### - tous les autres info = field[9:] return ( { "count": result[0], "_id": [(proto, int(port)) for proto, port in ( elt.split(',') for elt in result[1][3:-3].split(')","('))] } for result in self.db.execute( select([func.count().label("count"), column('ports')]). select_from( select([ func.array_agg( postgresql.aggregate_order_by( tuple_(self.tables.port.protocol, self. tables.port.port).label('a'), tuple_( self.tables.port.protocol, self.tables. port.port).label('a'))).label('ports'), ]).where( and_( self.tables.port.state == info, self.tables.port.scan.in_( base), # exists(select([1])\ # .select_from(base)\ # .where( # self.tables.port.scan == base.c.id # )), )).group_by(self.tables.port.scan).alias('ports') ).group_by('ports').order_by(order).limit(topnbr))) elif field == "service": field = self._topstructure(self.tables.port, [self.tables.port.service_name], self.tables.port.state == "open") elif field.startswith("service:"): info = field[8:] if '/' in info: info = info.split('/', 1) field = self._topstructure( self.tables.port, [self.tables.port.service_name], and_(self.tables.port.protocol == info[0], self.tables.port.port == int(info[1])), ) else: field = self._topstructure(self.tables.port, [self.tables.port.service_name], self.tables.port.port == int(info)) elif field == "product": field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product ], self.tables.port.state == "open", ) elif field.startswith("product:"): info = field[8:] if info.isdigit(): info = int(info) flt = self.flt_and(flt, self.searchport(info)) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product ], and_(self.tables.port.state == "open", self.tables.port.port == info), ) elif info.startswith('tcp/') or info.startswith('udp/'): info = (info[:3], int(info[4:])) flt = self.flt_and(flt, self.searchport(info[1], protocol=info[0])) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product ], and_(self.tables.port.state == "open", self.tables.port.port == info[1], self.tables.port.protocol == info[0]), ) else: flt = self.flt_and(flt, self.searchservice(info)) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product ], and_(self.tables.port.state == "open", self.tables.port.service_name == info), ) elif field == "devicetype": field = self._topstructure(self.tables.port, [self.tables.port.service_devicetype], self.tables.port.state == "open") elif field.startswith("devicetype:"): info = field[11:] if info.isdigit(): info = int(info) flt = self.flt_and(flt, self.searchport(info)) field = self._topstructure( self.tables.port, [self.tables.port.service_devicetype], and_(self.tables.port.state == "open", self.tables.port.port == info)) elif info.startswith('tcp/') or info.startswith('udp/'): info = (info[:3], int(info[4:])) flt = self.flt_and(flt, self.searchport(info[1], protocol=info[0])) field = self._topstructure( self.tables.port, [self.tables.port.service_devicetype], and_(self.tables.port.state == "open", self.tables.port.port == info[1], self.tables.port.protocol == info[0])) else: flt = self.flt_and(flt, self.searchservice(info)) field = self._topstructure( self.tables.port, [self.tables.port.service_devicetype], and_(self.tables.port.state == "open", self.tables.port.service_name == info)) elif field == "version": field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], self.tables.port.state == "open", ) elif field.startswith("version:"): info = field[8:] if info.isdigit(): info = int(info) flt = self.flt_and(flt, self.searchport(info)) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], and_(self.tables.port.state == "open", self.tables.port.port == info), ) elif info.startswith('tcp/') or info.startswith('udp/'): info = (info[:3], int(info[4:])) flt = self.flt_and(flt, self.searchport(info[1], protocol=info[0])) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], and_(self.tables.port.state == "open", self.tables.port.port == info[1], self.tables.port.protocol == info[0]), ) elif ':' in info: info = info.split(':', 1) flt = self.flt_and( flt, self.searchproduct(info[1], service=info[0])) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], and_(self.tables.port.state == "open", self.tables.port.service_name == info[0], self.tables.port.service_product == info[1]), ) else: flt = self.flt_and(flt, self.searchservice(info)) field = self._topstructure( self.tables.port, [ self.tables.port.service_name, self.tables.port.service_product, self.tables.port.service_version ], and_(self.tables.port.state == "open", self.tables.port.service_name == info), ) elif field == "asnum": field = self._topstructure(self.tables.scan, [self.tables.scan.info["as_num"]]) elif field == "as": field = self._topstructure(self.tables.scan, [ self.tables.scan.info["as_num"], self.tables.scan.info["as_name"] ]) elif field == "country": field = self._topstructure(self.tables.scan, [ self.tables.scan.info["country_code"], self.tables.scan.info["country_name"] ]) elif field == "city": field = self._topstructure(self.tables.scan, [ self.tables.scan.info["country_code"], self.tables.scan.info["city"] ]) elif field == "net" or field.startswith("net:"): info = field[4:] info = int(info) if info else 24 field = self._topstructure( self.tables.scan, [func.set_masklen(text("scan.addr::cidr"), info)], ) elif field == "script" or field.startswith("script:"): info = field[7:] if info: field = self._topstructure(self.tables.script, [self.tables.script.output], self.tables.script.name == info) else: field = self._topstructure(self.tables.script, [self.tables.script.name]) elif field in ["category", "categories"]: field = self._topstructure(self.tables.category, [self.tables.category.name]) elif field.startswith('cert.'): subfield = field[5:] field = self._topstructure( self.tables.script, [self.tables.script.data['ssl-cert'][subfield]], and_(self.tables.script.name == 'ssl-cert', self.tables.script.data['ssl-cert'].has_key( subfield))) # noqa: W601 (BinaryExpression) elif field == "source": field = self._topstructure(self.tables.scan, [self.tables.scan.source]) elif field == "domains": field = self._topstructure( self.tables.hostname, [func.unnest(self.tables.hostname.domains)]) elif field.startswith("domains:"): level = int(field[8:]) - 1 base1 = (select([ func.unnest(self.tables.hostname.domains).label("domains") ]).where( exists( select([1]).select_from(base).where( self.tables.hostname.scan == base.c.id))).cte("base1")) return ({ "count": result[1], "_id": result[0] } for result in self.db.execute( select([base1.c.domains, func.count().label("count")]).where( base1.c.domains.op('~') ('^([^\\.]+\\.){%d}[^\\.]+$' % level)).group_by(base1.c.domains).order_by( order).limit(topnbr))) elif field == "hop": field = self._topstructure(self.tables.hop, [self.tables.hop.ipaddr]) elif field.startswith('hop') and field[3] in ':>': ttl = int(field[4:]) field = self._topstructure( self.tables.hop, [self.tables.hop.ipaddr], (self.tables.hop.ttl > ttl) if field[3] == '>' else (self.tables.hop.ttl == ttl), ) elif field == 'file' or (field.startswith('file') and field[4] in '.:'): if field.startswith('file:'): scripts = field[5:] if '.' in scripts: scripts, field = scripts.split('.', 1) else: field = 'filename' scripts = scripts.split(',') flt = (self.tables.script.name == scripts[0] if len(scripts) == 1 else self.tables.script.name.in_(scripts)) else: field = field[5:] or 'filename' flt = True field = self._topstructure( self.tables.script, [ func.jsonb_array_elements( func.jsonb_array_elements( self.tables.script.data['ls']['volumes']).op('->') ('files')).op('->>')(field).label(field) ], and_( flt, self.tables.script.data.op('@>')( '{"ls": {"volumes": [{"files": []}]}}'), ), ) elif field.startswith('modbus.'): subfield = field[7:] field = self._topstructure( self.tables.script, [self.tables.script.data['modbus-discover'][subfield]], and_( self.tables.script.name == 'modbus-discover', self.tables.script.data['modbus-discover'].has_key( subfield)), # noqa: W601 (BinaryExpression) ) elif field.startswith('s7.'): subfield = field[3:] field = self._topstructure( self.tables.script, [self.tables.script.data['s7-info'][subfield]], and_(self.tables.script.name == 's7-info', self.tables.script.data['s7-info'].has_key(subfield)), # noqa: W601 (BinaryExpression) ) elif field == 'httphdr': flt = self.flt_and(flt, self.searchscript(name="http-headers")) field = self._topstructure( self.tables.script, [ column("hdr").op('->>')('name').label("name"), column("hdr").op('->>')('value').label("value") ], self.tables.script.name == 'http-headers', [column("name"), column("value")], func.jsonb_array_elements( self.tables.script.data['http-headers']).alias('hdr'), ) elif field.startswith('httphdr.'): flt = self.flt_and(flt, self.searchscript(name="http-headers")) field = self._topstructure( self.tables.script, [column("hdr").op('->>')(field[8:]).label("topvalue")], self.tables.script.name == 'http-headers', [column("topvalue")], func.jsonb_array_elements( self.tables.script.data['http-headers']).alias('hdr'), ) elif field.startswith('httphdr:'): flt = self.flt_and(flt, self.searchhttphdr(name=field[8:].lower())) field = self._topstructure( self.tables.script, [column("hdr").op('->>')("value").label("value")], and_(self.tables.script.name == 'http-headers', column("hdr").op('->>')("name") == field[8:].lower()), [column("value")], func.jsonb_array_elements( self.tables.script.data['http-headers']).alias('hdr'), ) else: raise NotImplementedError() s_from = { self.tables.script: join(self.tables.script, self.tables.port), self.tables.port: self.tables.port, self.tables.category: join(self.tables.association_scan_category, self.tables.category), self.tables.hostname: self.tables.hostname, self.tables.hop: join(self.tables.trace, self.tables.hop), } where_clause = { self.tables.script: self.tables.port.scan == base.c.id, self.tables.port: self.tables.port.scan == base.c.id, self.tables.category: self.tables.association_scan_category.scan == base.c.id, self.tables.hostname: self.tables.hostname.scan == base.c.id, self.tables.hop: self.tables.trace.scan == base.c.id } if field.base == self.tables.scan: req = flt.query( select([func.count().label("count")] + field.fields).select_from( self.tables.scan).group_by(*field.fields)) else: req = (select([func.count().label("count")] + field.fields).select_from(s_from[field.base])) if field.extraselectfrom is not None: req = req.select_from(field.extraselectfrom) req = (req.group_by( *(field.fields if field.group_by is None else field.group_by )).where( exists( select([1]).select_from(base).where( where_clause[field.base])))) if field.where is not None: req = req.where(field.where) if outputproc is None: return ({ "count": result[0], "_id": result[1:] if len(result) > 2 else result[1] } for result in self.db.execute(req.order_by(order).limit(topnbr))) else: return ({ "count": result[0], "_id": outputproc(result[1:] if len(result) > 2 else result[1]) } for result in self.db.execute(req.order_by(order).limit(topnbr)))
def grouped_eventpos(cls): return func.ceil(ChIPPeakData.eventpos/400) * 400
def grouped_eventpos(cls): return func.ceil(ChIPPeakData.eventpos / 400) * 400