예제 #1
0
    def run(self):
        criteria = self.job.criteria

        ar = DeviceManager.get_device(criteria.appresponse_device)

        if self.table.options.source == 'packets':

            source_name = criteria.appresponse_source

            if source_name.startswith(SourceProxy.JOB_PREFIX):
                job_id = source_name.lstrip(SourceProxy.JOB_PREFIX)
                source = SourceProxy(ar.capture.get_job_by_id(job_id))
            else:
                file_id = source_name.lstrip(SourceProxy.FILE_PREFIX)
                source = SourceProxy(ar.fs.get_file_by_id(file_id))

        else:
            source = SourceProxy(name=self.table.options.source)

        col_extractors, col_names = [], {}

        for col in self.table.get_columns(synthetic=False):
            col_names[col.options.extractor] = col.name

            if col.iskey:
                col_extractors.append(Key(col.options.extractor))
            else:
                col_extractors.append(Value(col.options.extractor))

        # If the data source is of file type and entire PCAP
        # is set True, then set start end times to None

        if isinstance(source, File) and criteria.entire_pcap:
            start = None
            end = None
        else:
            start = datetime_to_seconds(criteria.starttime)
            end = datetime_to_seconds(criteria.endtime)

        granularity = criteria.granularity.total_seconds()

        data_def = DataDef(source=source,
                           columns=col_extractors,
                           granularity=str(granularity),
                           start=start,
                           end=end)

        report = Report(ar)
        report.add(data_def)
        report.run()

        df = report.get_dataframe()
        df.columns = map(lambda x: col_names[x], df.columns)

        def to_int(x):
            return x if str(x).isdigit() else None

        def to_float(x):
            return x if str(x).replace('.', '', 1).isdigit() else None

        # Numerical columns can be returned as '#N/D' when not available
        # Thus convert them to None to help sorting
        for col in self.table.get_columns(synthetic=False):
            if col.datatype == Column.DATATYPE_FLOAT:
                df[col.name] = df[col.name].apply(lambda x: to_float(x))
            elif col.datatype == Column.DATATYPE_INTEGER:
                df[col.name] = df[col.name].apply(lambda x: to_int(x))
            elif col.datatype == Column.DATATYPE_TIME:
                if granularity < 1:
                    # The fractional epoch time values are in string
                    # Thus needs to be converted to float
                    df[col.name] = df[col.name].apply(float)

        if self.table.options.sort_col_name:
            df.sort(columns=self.table.options.sort_col_name,
                    ascending=self.table.options.ascending,
                    inplace=True)
        return QueryComplete(df)
예제 #2
0
    def run(self):
        criteria = self.job.criteria

        ar = DeviceManager.get_device(criteria.appresponse_device)

        if self.table.options.source == 'packets':

            source_name = criteria.appresponse_source

            if source_name.startswith(SourceProxy.JOB_PREFIX):
                job_id = source_name.lstrip(SourceProxy.JOB_PREFIX)
                source = SourceProxy(ar.capture.get_job_by_id(job_id))
            else:
                file_id = source_name.lstrip(SourceProxy.FILE_PREFIX)
                source = SourceProxy(ar.fs.get_file_by_id(file_id))

        else:
            source = SourceProxy(name=self.table.options.source)

        col_extractors = []
        col_names = {}
        aliases = {}

        for col in self.table.get_columns(synthetic=False):
            col_names[col.options.extractor] = col.name

            if col.iskey:
                col_extractors.append(Key(col.options.extractor))
            else:
                col_extractors.append(Value(col.options.extractor))

            if col.options.alias:
                aliases[col.options.extractor] = col.options.alias
                col_extractors.append(Value(col.options.alias))

        # If the data source is of file type and entire PCAP
        # is set True, then set start end times to None

        if (self.table.options.source == 'packets' and
                source.path.startswith(SourceProxy.FILE_PREFIX) and
                criteria.entire_pcap):
            start = None
            end = None
        else:
            start = datetime_to_seconds(criteria.starttime)
            end = datetime_to_seconds(criteria.endtime)

        granularity = criteria.granularity.total_seconds()

        resolution = None

        # temp fix for https://bugzilla.nbttech.com/show_bug.cgi?id=305478
        # if we aren't asking for a timeseries, make sure the data gets
        # aggregated by making resolution greater than the report duration
        if (self.table.options.source == 'packets' and
                'start_time' not in col_names.keys() and
                'end_time' not in col_names.keys()):
            resolution = end - start + granularity

        data_def = DataDef(
            source=source,
            columns=col_extractors,
            granularity=granularity,
            resolution=resolution,
            start=start,
            end=end)

        if hasattr(criteria, 'appresponse_steelfilter'):
            logger.debug('calculating steelfilter expression ...')
            filterexpr = self.job.combine_filterexprs(
                exprs=criteria.appresponse_steelfilter
            )
            if filterexpr:
                logger.debug('applying steelfilter expression: %s'
                             % filterexpr)
                data_def.add_filter(TrafficFilter(type_='steelfilter',
                                                  value=filterexpr))

        report = Report(ar)
        report.add(data_def)
        report.run()

        df = report.get_dataframe()

        report.delete()

        if aliases:
            # overwrite columns with their alias values, then drop 'em
            for k, v in aliases.iteritems():
                df[k] = df[v]
                df.drop(v, 1, inplace=True)

        df.columns = map(lambda x: col_names[x], df.columns)

        def to_int(x):
            return x if str(x).isdigit() else None

        def to_float(x):
            return x if str(x).replace('.', '', 1).isdigit() else None

        # Numerical columns can be returned as '#N/D' when not available
        # Thus convert them to None to help sorting
        for col in self.table.get_columns(synthetic=False):
            if col.datatype == Column.DATATYPE_FLOAT:
                df[col.name] = df[col.name].apply(lambda x: to_float(x))
            elif col.datatype == Column.DATATYPE_INTEGER:
                df[col.name] = df[col.name].apply(lambda x: to_int(x))
            elif col.datatype == Column.DATATYPE_TIME:
                if granularity < 1:
                    # The fractional epoch time values are in string
                    # Thus needs to be converted to float
                    df[col.name] = df[col.name].apply(float)

        if self.table.options.sort_col_name:
            df.sort(columns=self.table.options.sort_col_name,
                    ascending=self.table.options.ascending,
                    inplace=True)
        return QueryComplete(df)