예제 #1
0
파일: stats.py 프로젝트: v1ka5/ichnaea
def map_csv(session):
    # use a logarithmic scale to give lesser used regions a chance
    query = select(
        columns=(MapStat.lat, MapStat.lon,
                 func.cast(func.ceil(func.log10(MapStat.value)), Integer)),
        whereclause=MapStat.value >= 2)
    result = session.execute(query).fetchall()
    rows = StringIO()
    csvwriter = csv.writer(rows)
    csvwriter.writerow(('lat', 'lon', 'value'))
    for lat, lon, value in result:
        csvwriter.writerow((lat / 1000.0, lon / 1000.0, value))
    return rows.getvalue()
예제 #2
0
def map_csv(session):
    # use a logarithmic scale to give lesser used regions a chance
    query = select(
        columns=(MapStat.lat, MapStat.lon,
                 func.cast(func.ceil(func.log10(MapStat.value)), Integer)),
        whereclause=MapStat.value >= 2)
    result = session.execute(query).fetchall()
    rows = StringIO()
    csvwriter = csv.writer(rows)
    csvwriter.writerow(('lat', 'lon', 'value'))
    for lat, lon, value in result:
        csvwriter.writerow((lat / 1000.0, lon / 1000.0, value))
    return rows.getvalue()
    def _calculate_seq_stats(self, sample_id, min_cdr3, max_cdr3,
                             include_outliers, only_full_reads):
        seq_statistics = {}
        for name, stat in _seq_contexts.items():
            seq_statistics[name] = SeqContextStats(self._session, **stat)

        # TODO: This should be automatically generated from _dist_fields
        query = self._session.query(
            Sequence.quality,
            Sequence.sample_id,
            Sequence.v_match,
            Sequence.j_match,
            Sequence.j_length,
            Sequence.v_gene,
            Sequence.j_gene,
            Sequence.in_frame,
            Sequence.stop,
            Sequence.functional,
            Sequence.copy_number,
            (Sequence.v_length + Sequence.num_gaps).label('v_length'),
            (
                func.ceil(100 * Sequence.v_match / Sequence.v_length)
            ).label('v_identity'),
            Sequence.cdr3_num_nts.label('cdr3_length'),
        ).filter(
            Sequence.sample_id == sample_id
        )

        if not include_outliers and min_cdr3 is not None:
            query = query.filter(Sequence.cdr3_num_nts >= min_cdr3,
                                 Sequence.cdr3_num_nts <= max_cdr3)
        if only_full_reads:
            query = query.filter(Sequence.partial == 0)

        for seq in query:
            for name, stat in seq_statistics.items():
                stat.add_if_match(seq)

        self._add_stat(seq_statistics, sample_id, include_outliers,
                       only_full_reads)
예제 #4
0
    def _calculate_seq_stats(self, sample_id, min_cdr3, max_cdr3,
                             include_outliers, only_full_reads):
        seq_statistics = {}
        for name, stat in _seq_contexts.items():
            seq_statistics[name] = SeqContextStats(self._session, **stat)

        # TODO: This should be automatically generated from _dist_fields
        query = self._session.query(
            Sequence.quality,
            Sequence.sample_id,
            Sequence.v_match,
            Sequence.j_match,
            Sequence.j_length,
            Sequence.v_gene,
            Sequence.j_gene,
            Sequence.in_frame,
            Sequence.stop,
            Sequence.functional,
            Sequence.copy_number,
            (Sequence.v_length + Sequence.num_gaps).label('v_length'),
            (
                func.ceil(100 * Sequence.v_match / Sequence.v_length)
            ).label('v_identity'),
            Sequence.cdr3_num_nts.label('cdr3_length'),
        ).filter(
            Sequence.sample_id == sample_id
        )

        if not include_outliers and min_cdr3 is not None:
            query = query.filter(Sequence.cdr3_num_nts >= min_cdr3,
                                 Sequence.cdr3_num_nts <= max_cdr3)
        if only_full_reads:
            query = query.filter(Sequence.partial == 0)

        for seq in query:
            for name, stat in seq_statistics.items():
                stat.add_if_match(seq)

        self._add_stat(seq_statistics, sample_id, include_outliers,
                       only_full_reads)
예제 #5
0
 def topvalues(self,
               field,
               flt=None,
               topnbr=10,
               sort=None,
               limit=None,
               skip=None,
               least=False):
     """
     This method makes use of the aggregation framework to produce
     top values for a given field or pseudo-field. Pseudo-fields are:
       - category / label / asnum / country / net[:mask]
       - port
       - port:open / :closed / :filtered / :<servicename>
       - portlist:open / :closed / :filtered
       - countports:open / :closed / :filtered
       - service / service:<portnbr>
       - product / product:<portnbr>
       - cpe / cpe.<part> / cpe:<cpe_spec> / cpe.<part>:<cpe_spec>
       - devicetype / devicetype:<portnbr>
       - script:<scriptid> / script:<port>:<scriptid>
         / script:host:<scriptid>
       - cert.* / smb.* / sshkey.*
       - httphdr / httphdr.{name,value} / httphdr:<name>
       - modbus.* / s7.* / enip.*
       - mongo.dbs.*
       - vulns.*
       - screenwords
       - file.* / file.*:scriptid
       - hop
     """
     if flt is None:
         flt = self.flt_empty
     base = flt.query(
         select([self.tables.scan.id
                 ]).select_from(flt.select_from)).cte("base")
     order = "count" if least else desc("count")
     outputproc = None
     if field == "port":
         field = self._topstructure(
             self.tables.port,
             [self.tables.port.protocol, self.tables.port.port],
             self.tables.port.state == "open")
     elif field == "ttl":
         field = self._topstructure(
             self.tables.port,
             [self.tables.port.state_reason_ttl],
             self.tables.port.state_reason_ttl != None,
             # noqa: E711 (BinaryExpression)
         )
     elif field == "ttlinit":
         field = self._topstructure(
             self.tables.port,
             [
                 func.least(
                     255,
                     func.power(
                         2,
                         func.ceil(
                             func.log(2,
                                      self.tables.port.state_reason_ttl))))
             ],
             self.tables.port.state_reason_ttl != None,
             # noqa: E711 (BinaryExpression)
         )
         outputproc = int
     elif field.startswith('port:'):
         info = field[5:]
         field = self._topstructure(
             self.tables.port,
             [self.tables.port.protocol, self.tables.port.port],
             (self.tables.port.state == info)
             if info in ['open', 'filtered', 'closed', 'open|filtered'] else
             (self.tables.port.service_name == info),
         )
     elif field.startswith('countports:'):
         info = field[11:]
         return (
             {"count": result[0], "_id": result[1]}
             for result in self.db.execute(
                 select([func.count().label("count"),
                         column('cnt')])
                 .select_from(
                     select([func.count().label('cnt')])
                     .select_from(self.tables.port)
                     .where(and_(
                         self.tables.port.state == info,
                         # self.tables.port.scan.in_(base),
                         exists(
                             select([1])\
                             .select_from(base)\
                             .where(
                                 self.tables.port.scan == base.c.id
                             )
                         ),
                     ))\
                     .group_by(self.tables.port.scan)\
                     .alias('cnt')
                 ).group_by('cnt').order_by(order).limit(topnbr)
             )
         )
     elif field.startswith('portlist:'):
         ### Deux options pour filtrer:
         ###   -1- self.tables.port.scan.in_(base),
         ###   -2- exists(select([1])\
         ###       .select_from(base)\
         ###       .where(
         ###         self.tables.port.scan == base.c.id
         ###       )),
         ###
         ### D'après quelques tests, l'option -1- est plus beaucoup
         ### rapide quand (base) est pas ou peu sélectif, l'option
         ### -2- un peu plus rapide quand (base) est très sélectif
         ###
         ### TODO: vérifier si c'est pareil pour:
         ###  - countports:open
         ###  - tous les autres
         info = field[9:]
         return (
             {
                 "count":
                 result[0],
                 "_id": [(proto, int(port)) for proto, port in (
                     elt.split(',')
                     for elt in result[1][3:-3].split(')","('))]
             } for result in self.db.execute(
                 select([func.count().label("count"),
                         column('ports')]).
                 select_from(
                     select([
                         func.array_agg(
                             postgresql.aggregate_order_by(
                                 tuple_(self.tables.port.protocol, self.
                                        tables.port.port).label('a'),
                                 tuple_(
                                     self.tables.port.protocol, self.tables.
                                     port.port).label('a'))).label('ports'),
                     ]).where(
                         and_(
                             self.tables.port.state == info,
                             self.tables.port.scan.in_(
                                 base),
                             # exists(select([1])\
                             #        .select_from(base)\
                             #        .where(
                             #            self.tables.port.scan == base.c.id
                             #        )),
                         )).group_by(self.tables.port.scan).alias('ports')
                 ).group_by('ports').order_by(order).limit(topnbr)))
     elif field == "service":
         field = self._topstructure(self.tables.port,
                                    [self.tables.port.service_name],
                                    self.tables.port.state == "open")
     elif field.startswith("service:"):
         info = field[8:]
         if '/' in info:
             info = info.split('/', 1)
             field = self._topstructure(
                 self.tables.port,
                 [self.tables.port.service_name],
                 and_(self.tables.port.protocol == info[0],
                      self.tables.port.port == int(info[1])),
             )
         else:
             field = self._topstructure(self.tables.port,
                                        [self.tables.port.service_name],
                                        self.tables.port.port == int(info))
     elif field == "product":
         field = self._topstructure(
             self.tables.port,
             [
                 self.tables.port.service_name,
                 self.tables.port.service_product
             ],
             self.tables.port.state == "open",
         )
     elif field.startswith("product:"):
         info = field[8:]
         if info.isdigit():
             info = int(info)
             flt = self.flt_and(flt, self.searchport(info))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info),
             )
         elif info.startswith('tcp/') or info.startswith('udp/'):
             info = (info[:3], int(info[4:]))
             flt = self.flt_and(flt,
                                self.searchport(info[1], protocol=info[0]))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info[1],
                      self.tables.port.protocol == info[0]),
             )
         else:
             flt = self.flt_and(flt, self.searchservice(info))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.service_name == info),
             )
     elif field == "devicetype":
         field = self._topstructure(self.tables.port,
                                    [self.tables.port.service_devicetype],
                                    self.tables.port.state == "open")
     elif field.startswith("devicetype:"):
         info = field[11:]
         if info.isdigit():
             info = int(info)
             flt = self.flt_and(flt, self.searchport(info))
             field = self._topstructure(
                 self.tables.port, [self.tables.port.service_devicetype],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info))
         elif info.startswith('tcp/') or info.startswith('udp/'):
             info = (info[:3], int(info[4:]))
             flt = self.flt_and(flt,
                                self.searchport(info[1], protocol=info[0]))
             field = self._topstructure(
                 self.tables.port, [self.tables.port.service_devicetype],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info[1],
                      self.tables.port.protocol == info[0]))
         else:
             flt = self.flt_and(flt, self.searchservice(info))
             field = self._topstructure(
                 self.tables.port, [self.tables.port.service_devicetype],
                 and_(self.tables.port.state == "open",
                      self.tables.port.service_name == info))
     elif field == "version":
         field = self._topstructure(
             self.tables.port,
             [
                 self.tables.port.service_name,
                 self.tables.port.service_product,
                 self.tables.port.service_version
             ],
             self.tables.port.state == "open",
         )
     elif field.startswith("version:"):
         info = field[8:]
         if info.isdigit():
             info = int(info)
             flt = self.flt_and(flt, self.searchport(info))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product,
                     self.tables.port.service_version
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info),
             )
         elif info.startswith('tcp/') or info.startswith('udp/'):
             info = (info[:3], int(info[4:]))
             flt = self.flt_and(flt,
                                self.searchport(info[1], protocol=info[0]))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product,
                     self.tables.port.service_version
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.port == info[1],
                      self.tables.port.protocol == info[0]),
             )
         elif ':' in info:
             info = info.split(':', 1)
             flt = self.flt_and(
                 flt, self.searchproduct(info[1], service=info[0]))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product,
                     self.tables.port.service_version
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.service_name == info[0],
                      self.tables.port.service_product == info[1]),
             )
         else:
             flt = self.flt_and(flt, self.searchservice(info))
             field = self._topstructure(
                 self.tables.port,
                 [
                     self.tables.port.service_name,
                     self.tables.port.service_product,
                     self.tables.port.service_version
                 ],
                 and_(self.tables.port.state == "open",
                      self.tables.port.service_name == info),
             )
     elif field == "asnum":
         field = self._topstructure(self.tables.scan,
                                    [self.tables.scan.info["as_num"]])
     elif field == "as":
         field = self._topstructure(self.tables.scan, [
             self.tables.scan.info["as_num"],
             self.tables.scan.info["as_name"]
         ])
     elif field == "country":
         field = self._topstructure(self.tables.scan, [
             self.tables.scan.info["country_code"],
             self.tables.scan.info["country_name"]
         ])
     elif field == "city":
         field = self._topstructure(self.tables.scan, [
             self.tables.scan.info["country_code"],
             self.tables.scan.info["city"]
         ])
     elif field == "net" or field.startswith("net:"):
         info = field[4:]
         info = int(info) if info else 24
         field = self._topstructure(
             self.tables.scan,
             [func.set_masklen(text("scan.addr::cidr"), info)],
         )
     elif field == "script" or field.startswith("script:"):
         info = field[7:]
         if info:
             field = self._topstructure(self.tables.script,
                                        [self.tables.script.output],
                                        self.tables.script.name == info)
         else:
             field = self._topstructure(self.tables.script,
                                        [self.tables.script.name])
     elif field in ["category", "categories"]:
         field = self._topstructure(self.tables.category,
                                    [self.tables.category.name])
     elif field.startswith('cert.'):
         subfield = field[5:]
         field = self._topstructure(
             self.tables.script,
             [self.tables.script.data['ssl-cert'][subfield]],
             and_(self.tables.script.name == 'ssl-cert',
                  self.tables.script.data['ssl-cert'].has_key(
                      subfield)))  # noqa: W601 (BinaryExpression)
     elif field == "source":
         field = self._topstructure(self.tables.scan,
                                    [self.tables.scan.source])
     elif field == "domains":
         field = self._topstructure(
             self.tables.hostname,
             [func.unnest(self.tables.hostname.domains)])
     elif field.startswith("domains:"):
         level = int(field[8:]) - 1
         base1 = (select([
             func.unnest(self.tables.hostname.domains).label("domains")
         ]).where(
             exists(
                 select([1]).select_from(base).where(
                     self.tables.hostname.scan == base.c.id))).cte("base1"))
         return ({
             "count": result[1],
             "_id": result[0]
         } for result in self.db.execute(
             select([base1.c.domains,
                     func.count().label("count")]).where(
                         base1.c.domains.op('~')
                         ('^([^\\.]+\\.){%d}[^\\.]+$' %
                          level)).group_by(base1.c.domains).order_by(
                              order).limit(topnbr)))
     elif field == "hop":
         field = self._topstructure(self.tables.hop,
                                    [self.tables.hop.ipaddr])
     elif field.startswith('hop') and field[3] in ':>':
         ttl = int(field[4:])
         field = self._topstructure(
             self.tables.hop,
             [self.tables.hop.ipaddr],
             (self.tables.hop.ttl > ttl) if field[3] == '>' else
             (self.tables.hop.ttl == ttl),
         )
     elif field == 'file' or (field.startswith('file')
                              and field[4] in '.:'):
         if field.startswith('file:'):
             scripts = field[5:]
             if '.' in scripts:
                 scripts, field = scripts.split('.', 1)
             else:
                 field = 'filename'
             scripts = scripts.split(',')
             flt = (self.tables.script.name == scripts[0] if len(scripts)
                    == 1 else self.tables.script.name.in_(scripts))
         else:
             field = field[5:] or 'filename'
             flt = True
         field = self._topstructure(
             self.tables.script,
             [
                 func.jsonb_array_elements(
                     func.jsonb_array_elements(
                         self.tables.script.data['ls']['volumes']).op('->')
                     ('files')).op('->>')(field).label(field)
             ],
             and_(
                 flt,
                 self.tables.script.data.op('@>')(
                     '{"ls": {"volumes": [{"files": []}]}}'),
             ),
         )
     elif field.startswith('modbus.'):
         subfield = field[7:]
         field = self._topstructure(
             self.tables.script,
             [self.tables.script.data['modbus-discover'][subfield]],
             and_(
                 self.tables.script.name == 'modbus-discover',
                 self.tables.script.data['modbus-discover'].has_key(
                     subfield)),
             # noqa: W601 (BinaryExpression)
         )
     elif field.startswith('s7.'):
         subfield = field[3:]
         field = self._topstructure(
             self.tables.script,
             [self.tables.script.data['s7-info'][subfield]],
             and_(self.tables.script.name == 's7-info',
                  self.tables.script.data['s7-info'].has_key(subfield)),
             # noqa: W601 (BinaryExpression)
         )
     elif field == 'httphdr':
         flt = self.flt_and(flt, self.searchscript(name="http-headers"))
         field = self._topstructure(
             self.tables.script,
             [
                 column("hdr").op('->>')('name').label("name"),
                 column("hdr").op('->>')('value').label("value")
             ],
             self.tables.script.name == 'http-headers',
             [column("name"), column("value")],
             func.jsonb_array_elements(
                 self.tables.script.data['http-headers']).alias('hdr'),
         )
     elif field.startswith('httphdr.'):
         flt = self.flt_and(flt, self.searchscript(name="http-headers"))
         field = self._topstructure(
             self.tables.script,
             [column("hdr").op('->>')(field[8:]).label("topvalue")],
             self.tables.script.name == 'http-headers',
             [column("topvalue")],
             func.jsonb_array_elements(
                 self.tables.script.data['http-headers']).alias('hdr'),
         )
     elif field.startswith('httphdr:'):
         flt = self.flt_and(flt, self.searchhttphdr(name=field[8:].lower()))
         field = self._topstructure(
             self.tables.script,
             [column("hdr").op('->>')("value").label("value")],
             and_(self.tables.script.name == 'http-headers',
                  column("hdr").op('->>')("name") == field[8:].lower()),
             [column("value")],
             func.jsonb_array_elements(
                 self.tables.script.data['http-headers']).alias('hdr'),
         )
     else:
         raise NotImplementedError()
     s_from = {
         self.tables.script:
         join(self.tables.script, self.tables.port),
         self.tables.port:
         self.tables.port,
         self.tables.category:
         join(self.tables.association_scan_category, self.tables.category),
         self.tables.hostname:
         self.tables.hostname,
         self.tables.hop:
         join(self.tables.trace, self.tables.hop),
     }
     where_clause = {
         self.tables.script:
         self.tables.port.scan == base.c.id,
         self.tables.port:
         self.tables.port.scan == base.c.id,
         self.tables.category:
         self.tables.association_scan_category.scan == base.c.id,
         self.tables.hostname:
         self.tables.hostname.scan == base.c.id,
         self.tables.hop:
         self.tables.trace.scan == base.c.id
     }
     if field.base == self.tables.scan:
         req = flt.query(
             select([func.count().label("count")] +
                    field.fields).select_from(
                        self.tables.scan).group_by(*field.fields))
     else:
         req = (select([func.count().label("count")] +
                       field.fields).select_from(s_from[field.base]))
         if field.extraselectfrom is not None:
             req = req.select_from(field.extraselectfrom)
         req = (req.group_by(
             *(field.fields if field.group_by is None else field.group_by
               )).where(
                   exists(
                       select([1]).select_from(base).where(
                           where_clause[field.base]))))
     if field.where is not None:
         req = req.where(field.where)
     if outputproc is None:
         return ({
             "count": result[0],
             "_id": result[1:] if len(result) > 2 else result[1]
         } for result in self.db.execute(req.order_by(order).limit(topnbr)))
     else:
         return ({
             "count":
             result[0],
             "_id":
             outputproc(result[1:] if len(result) > 2 else result[1])
         } for result in self.db.execute(req.order_by(order).limit(topnbr)))
예제 #6
0
파일: data.py 프로젝트: steve-federowicz/om
 def grouped_eventpos(cls):
     return func.ceil(ChIPPeakData.eventpos/400) * 400
예제 #7
0
파일: datasets.py 프로젝트: justintanwk/ome
 def grouped_eventpos(cls):
     return func.ceil(ChIPPeakData.eventpos / 400) * 400