Exemple #1
0
    def analyze(self, jobs):
        criteria = self.job.criteria

        ar_query_table = Table.from_ref(
            self.table.options.related_tables['basetable']
        )

        depjobs = {}

        # For every (ar, job), we spin off a new job to grab the data, then
        # merge everything into one dataframe at the end.
        for s in Device.objects.filter(module='appresponse', enabled=True):
            ar = DeviceManager.get_device(s.id)

            for job in ar.capture.get_jobs():
                # Start with criteria from the primary table -- this gives us
                # endtime, duration and filterexpr.
                bytes_criteria = copy.copy(criteria)
                bytes_criteria.appresponse_device = s.id
                bytes_criteria.appresponse_source = 'jobs/' + job.id
                bytes_criteria.granularity = datetime.timedelta(0, 1)

                newjob = Job.create(table=ar_query_table,
                                    criteria=bytes_criteria)

                depjobs[newjob.id] = newjob

        return QueryContinue(self.collect, depjobs)
Exemple #2
0
    def analyze(self, jobs):

        df = jobs['overall'].data()

        # First clear all the dynamic columns that were associated with
        # the table last time the report is run
        # do not delete the time column
        for col in self.table.get_columns():
            if col.name == 'time':
                continue
            col.delete()

        # Get the top N values of the value column
        val_col = self.table.options.value_column_name
        pivot_col = self.table.options.pivot_column_name
        n = self.table.options.n

        pivots = list(
            df.sort_values(val_col, ascending=False).head(n)[pivot_col])

        for pivot in pivots:
            # Add pivot column to the table
            AppResponseColumn.create(self.table, pivot, pivot)

        # Create an AppResponseTimeSeries Job
        self.job.criteria.pivot_column_names = ','.join(pivots)
        ts_table_ref = self.table.options.related_tables['ts']
        table = Table.from_ref(ts_table_ref)

        job = Job.create(table=table,
                         criteria=self.job.criteria,
                         update_progress=False,
                         parent=self.job)

        return QueryContinue(self.collect, jobs={'ts': job})
    def analyze(self, jobs=None):

        download_table = Table.from_ref(
            self.table.options.related_tables['download_table'])

        # Create source and destination download jobs
        depjobs = {}

        c = self.job.criteria
        sharks = [
            ('1-source', c.netshark_device_src, c.netshark_source_name_src),
            ('2-dest', c.netshark_device_dst, c.netshark_source_name_dst)
        ]

        for shark in sharks:
            sc = copy.copy(c)
            name, device, source = shark
            sc.netshark_device = device
            sc.netshark_source_name = source
            sc.segment = name

            job = Job.create(table=download_table,
                             criteria=sc,
                             update_progress=True,
                             parent=self.job)
            logger.debug("Created %s: %s download job with criteria %s" %
                         (job, name, sc))
            depjobs[job.id] = job

        return QueryContinue(self.collect, depjobs)
    def analyze(self, jobs):
        criteria = self.job.criteria

        sharks_query_table = Table.from_ref(
            self.table.options.related_tables['basetable'])

        depjobs = {}

        # For every (shark, job), we spin off a new job to grab the data, then
        # merge everything into one dataframe at the end.
        for s in Device.objects.filter(module='netshark', enabled=True):
            shark = DeviceManager.get_device(s.id)

            for capjob in shark.get_capture_jobs():
                # Start with criteria from the primary table -- this gives us
                # endtime, duration and netshark_filterexpr.
                bytes_criteria = copy.copy(criteria)
                bytes_criteria.netshark_device = s.id
                bytes_criteria.netshark_source_name = 'jobs/' + capjob.name
                bytes_criteria.resolution = datetime.timedelta(0, 1)
                bytes_criteria.aggregated = True

                job = Job.create(table=sharks_query_table,
                                 criteria=bytes_criteria)

                depjobs[job.id] = job

        return QueryContinue(self.collect, depjobs)
Exemple #5
0
    def analyze(self, jobs=None):

        filtered_list = ExistingIntervals.objects.filter(
            table_handle=self.handle)

        existing_intervals = None

        if filtered_list:
            existing_intervals = filtered_list[0].intervals

            if self.query_interval in existing_intervals:
                # Search DB for the queried data
                return QueryComplete(
                    self.query(self.query_interval.start,
                               self.query_interval.end))

        intervals_to_call = self._check_intervals(self.query_interval -
                                                  existing_intervals)

        dep_jobs = {}
        for interval in intervals_to_call:
            criteria = copy.copy(self.job.criteria)
            # Use the two time related fields
            criteria.starttime = interval.start
            criteria.endtime = interval.end
            job = Job.create(table=self.ds_table,
                             criteria=criteria,
                             update_progress=False,
                             parent=self.job)
            dep_jobs[job.id] = job

        return QueryContinue(self.collect, jobs=dep_jobs)
    def run(self):
        # Collect all dependent tables
        tables = self.table.options.tables
        if not tables:
            return QueryContinue(self._analyze, {})

        logger.debug("%s: dependent tables: %s" % (self, tables))
        jobs = {}

        for (name, ref) in tables.items():
            table = Table.from_ref(ref)
            job = Job.create(table, self.job.criteria,
                             update_progress=self.job.update_progress,
                             parent=self.job)

            logger.debug("%s: dependent job %s" % (self, job))
            jobs[name] = job

        return QueryContinue(self._analyze, jobs)
    def analyze(self, jobs):
        logger.debug('%s analyze - received jobs: %s' % (self, jobs))

        basetable = Table.from_ref(
            self.table.options['related_tables']['template']
        )
        data = jobs['source'].data()
        if data is None:
            return QueryError('No data available to analyze')

        # find column whose min/max is largest deviation from mean
        # then take row from that column where min/max occurs
        if self.table.options['max']:
            idx = (data.max() / data.mean()).idxmax()
            frow = data.ix[data[idx].idxmax()]
        else:
            idx = (data.min() / data.mean()).idxmin()
            frow = data.ix[data[idx].idxmin()]

        # get time value from extracted row to calculate new start/end times
        ftime = frow['time']
        duration = parse_timedelta(self.table.options['zoom_duration'])
        resolution = parse_timedelta(self.table.options['zoom_resolution'])
        stime = ftime - (duration / 2)
        etime = ftime + (duration / 2)

        criteria = self.job.criteria

        if 'resolution' in criteria:
            criteria['resolution'] = resolution
        else:
            criteria['granularity'] = resolution

        criteria['duration'] = duration
        criteria['_orig_duration'] = duration
        criteria['starttime'] = stime
        criteria['_orig_starttime'] = stime
        criteria['endtime'] = etime
        criteria['_orig_endtime'] = etime

        logging.debug('Creating FocusedAnalysis job with updated criteria %s'
                      % criteria)

        job = Job.create(basetable, criteria, self.job.update_progress)
        return QueryContinue(self.finish, {'job': job})
    def analyze(self, jobs=None):
        logger.debug('TimeSeriesTable analysis with jobs %s' % jobs)

        filtered_list = ExistingIntervals.objects.filter(
            table_handle=self.handle, criteria=self.no_time_criteria)

        existing_intervals = None

        if filtered_list:
            existing_intervals = filtered_list[0].intervals
            logger.debug('Found existing intervals for handle %s: %s' %
                         (self.handle, existing_intervals))

            if self.query_interval in existing_intervals:
                logger.debug('Query interval totally covered by DB, returning '
                             'DB query.')
                # Search DB for the queried data
                data = self.query(self.query_interval.start,
                                  self.query_interval.end)
                return QueryComplete(data)

            logger.debug('Query interval only partially covered by DB ...')

        intervals_to_call = self._check_intervals(self.query_interval -
                                                  existing_intervals)

        logger.debug('Setting up %d jobs to cover missing data '
                     'for these intervals: %s' %
                     (len(intervals_to_call), intervals_to_call))
        dep_jobs = {}
        for interval in intervals_to_call:
            criteria = copy.copy(self.job.criteria)
            # Use the two time related fields
            criteria.starttime = interval.start
            criteria.endtime = interval.end
            job = Job.create(table=self.ds_table,
                             criteria=criteria,
                             update_progress=False,
                             parent=self.job)
            dep_jobs[job.id] = job

        return QueryContinue(self.collect, jobs=dep_jobs)
    def analyze(self, jobs):

        tag = Tag.objects.get(id=self.job.criteria.tag).name

        cmd_table = Table.from_ref(
            self.table.options.related_tables['base'])

        dep_jobs = {}

        for sh_db in Device.objects.filter_by_tag(tag, module='steelhead',
                                                  enabled=True):
            criteria = copy.copy(self.job.criteria)
            criteria.dev = sh_db
            job = Job.create(table=cmd_table, criteria=criteria,
                             parent=self.job)
            dep_jobs[job.id] = job

        if not dep_jobs:
            return QueryError("No enabled steelhead "
                              "devices found with tag '{}'".format(tag))

        return QueryContinue(self.collect, jobs=dep_jobs)
    def analyze(self, jobs=None):

        criteria = self.job.criteria

        if jobs:
            job = list(jobs.values())[0]
            if job.status == Job.ERROR:
                raise AnalysisException("%s for getting pcap file failed: %s" %
                                        (job, job.message))
            criteria.entire_pcap = True
            self.filename = job.data()['filename'][0]
        else:
            self.filename = criteria.pcapfilename

        pcap = PcapFile(self.filename)

        try:
            pcap_info = pcap.info()
        except ValueError:
            raise AnalysisException("No packets in %s" % self.filename)

        logger.debug("%s: File info %s" % (self.__class__.__name__, pcap_info))

        self.pkt_num = int(pcap_info['Number of packets'])

        min_pkt_num = self.table.options.split_threshold

        wt = Table.from_ref(self.table.options.related_tables['wireshark'])

        depjobs = {}
        if self.pkt_num < min_pkt_num:
            # No need to split the pcap file
            criteria.pcapfilename = self.filename
            criteria.entire_pcap = True
            job = Job.create(table=wt,
                             criteria=criteria,
                             update_progress=False,
                             parent=self.job)

            depjobs[job.id] = job

            logger.debug("%s starting single job" % self.__class__.__name__)
            return QueryContinue(self.collect, depjobs)

        self.output_dir = os.path.join(SPLIT_DIR, self.file_handle)
        self.split_pcap()

        split_files = os.listdir(self.output_dir)

        if not split_files:
            raise AnalysisException('No pcap file found after splitting %s' %
                                    self.filename)

        for split in split_files:
            # use wireshark table
            ws_criteria = copy.copy(criteria)
            ws_criteria.pcapfilename = os.path.join(self.output_dir, split)

            # for ease of removing the split directory in collect func
            ws_criteria.output_dir = self.output_dir

            job = Job.create(table=wt,
                             criteria=ws_criteria,
                             update_progress=False,
                             parent=self.job)

            depjobs[job.id] = job

        logger.debug("%s starting multiple jobs" % self.__class__.__name__)

        return QueryContinue(self.collect, jobs=depjobs)