def analyze(self, jobs): criteria = self.job.criteria ar_query_table = Table.from_ref( self.table.options.related_tables['basetable'] ) depjobs = {} # For every (ar, job), we spin off a new job to grab the data, then # merge everything into one dataframe at the end. for s in Device.objects.filter(module='appresponse', enabled=True): ar = DeviceManager.get_device(s.id) for job in ar.capture.get_jobs(): # Start with criteria from the primary table -- this gives us # endtime, duration and filterexpr. bytes_criteria = copy.copy(criteria) bytes_criteria.appresponse_device = s.id bytes_criteria.appresponse_source = 'jobs/' + job.id bytes_criteria.granularity = datetime.timedelta(0, 1) newjob = Job.create(table=ar_query_table, criteria=bytes_criteria) depjobs[newjob.id] = newjob return QueryContinue(self.collect, depjobs)
def analyze(self, jobs): df = jobs['overall'].data() # First clear all the dynamic columns that were associated with # the table last time the report is run # do not delete the time column for col in self.table.get_columns(): if col.name == 'time': continue col.delete() # Get the top N values of the value column val_col = self.table.options.value_column_name pivot_col = self.table.options.pivot_column_name n = self.table.options.n pivots = list( df.sort_values(val_col, ascending=False).head(n)[pivot_col]) for pivot in pivots: # Add pivot column to the table AppResponseColumn.create(self.table, pivot, pivot) # Create an AppResponseTimeSeries Job self.job.criteria.pivot_column_names = ','.join(pivots) ts_table_ref = self.table.options.related_tables['ts'] table = Table.from_ref(ts_table_ref) job = Job.create(table=table, criteria=self.job.criteria, update_progress=False, parent=self.job) return QueryContinue(self.collect, jobs={'ts': job})
def analyze(self, jobs=None): download_table = Table.from_ref( self.table.options.related_tables['download_table']) # Create source and destination download jobs depjobs = {} c = self.job.criteria sharks = [ ('1-source', c.netshark_device_src, c.netshark_source_name_src), ('2-dest', c.netshark_device_dst, c.netshark_source_name_dst) ] for shark in sharks: sc = copy.copy(c) name, device, source = shark sc.netshark_device = device sc.netshark_source_name = source sc.segment = name job = Job.create(table=download_table, criteria=sc, update_progress=True, parent=self.job) logger.debug("Created %s: %s download job with criteria %s" % (job, name, sc)) depjobs[job.id] = job return QueryContinue(self.collect, depjobs)
def analyze(self, jobs): criteria = self.job.criteria sharks_query_table = Table.from_ref( self.table.options.related_tables['basetable']) depjobs = {} # For every (shark, job), we spin off a new job to grab the data, then # merge everything into one dataframe at the end. for s in Device.objects.filter(module='netshark', enabled=True): shark = DeviceManager.get_device(s.id) for capjob in shark.get_capture_jobs(): # Start with criteria from the primary table -- this gives us # endtime, duration and netshark_filterexpr. bytes_criteria = copy.copy(criteria) bytes_criteria.netshark_device = s.id bytes_criteria.netshark_source_name = 'jobs/' + capjob.name bytes_criteria.resolution = datetime.timedelta(0, 1) bytes_criteria.aggregated = True job = Job.create(table=sharks_query_table, criteria=bytes_criteria) depjobs[job.id] = job return QueryContinue(self.collect, depjobs)
def analyze(self, jobs=None): filtered_list = ExistingIntervals.objects.filter( table_handle=self.handle) existing_intervals = None if filtered_list: existing_intervals = filtered_list[0].intervals if self.query_interval in existing_intervals: # Search DB for the queried data return QueryComplete( self.query(self.query_interval.start, self.query_interval.end)) intervals_to_call = self._check_intervals(self.query_interval - existing_intervals) dep_jobs = {} for interval in intervals_to_call: criteria = copy.copy(self.job.criteria) # Use the two time related fields criteria.starttime = interval.start criteria.endtime = interval.end job = Job.create(table=self.ds_table, criteria=criteria, update_progress=False, parent=self.job) dep_jobs[job.id] = job return QueryContinue(self.collect, jobs=dep_jobs)
def run(self): # Collect all dependent tables tables = self.table.options.tables if not tables: return QueryContinue(self._analyze, {}) logger.debug("%s: dependent tables: %s" % (self, tables)) jobs = {} for (name, ref) in tables.items(): table = Table.from_ref(ref) job = Job.create(table, self.job.criteria, update_progress=self.job.update_progress, parent=self.job) logger.debug("%s: dependent job %s" % (self, job)) jobs[name] = job return QueryContinue(self._analyze, jobs)
def analyze(self, jobs): logger.debug('%s analyze - received jobs: %s' % (self, jobs)) basetable = Table.from_ref( self.table.options['related_tables']['template'] ) data = jobs['source'].data() if data is None: return QueryError('No data available to analyze') # find column whose min/max is largest deviation from mean # then take row from that column where min/max occurs if self.table.options['max']: idx = (data.max() / data.mean()).idxmax() frow = data.ix[data[idx].idxmax()] else: idx = (data.min() / data.mean()).idxmin() frow = data.ix[data[idx].idxmin()] # get time value from extracted row to calculate new start/end times ftime = frow['time'] duration = parse_timedelta(self.table.options['zoom_duration']) resolution = parse_timedelta(self.table.options['zoom_resolution']) stime = ftime - (duration / 2) etime = ftime + (duration / 2) criteria = self.job.criteria if 'resolution' in criteria: criteria['resolution'] = resolution else: criteria['granularity'] = resolution criteria['duration'] = duration criteria['_orig_duration'] = duration criteria['starttime'] = stime criteria['_orig_starttime'] = stime criteria['endtime'] = etime criteria['_orig_endtime'] = etime logging.debug('Creating FocusedAnalysis job with updated criteria %s' % criteria) job = Job.create(basetable, criteria, self.job.update_progress) return QueryContinue(self.finish, {'job': job})
def analyze(self, jobs=None): logger.debug('TimeSeriesTable analysis with jobs %s' % jobs) filtered_list = ExistingIntervals.objects.filter( table_handle=self.handle, criteria=self.no_time_criteria) existing_intervals = None if filtered_list: existing_intervals = filtered_list[0].intervals logger.debug('Found existing intervals for handle %s: %s' % (self.handle, existing_intervals)) if self.query_interval in existing_intervals: logger.debug('Query interval totally covered by DB, returning ' 'DB query.') # Search DB for the queried data data = self.query(self.query_interval.start, self.query_interval.end) return QueryComplete(data) logger.debug('Query interval only partially covered by DB ...') intervals_to_call = self._check_intervals(self.query_interval - existing_intervals) logger.debug('Setting up %d jobs to cover missing data ' 'for these intervals: %s' % (len(intervals_to_call), intervals_to_call)) dep_jobs = {} for interval in intervals_to_call: criteria = copy.copy(self.job.criteria) # Use the two time related fields criteria.starttime = interval.start criteria.endtime = interval.end job = Job.create(table=self.ds_table, criteria=criteria, update_progress=False, parent=self.job) dep_jobs[job.id] = job return QueryContinue(self.collect, jobs=dep_jobs)
def analyze(self, jobs): tag = Tag.objects.get(id=self.job.criteria.tag).name cmd_table = Table.from_ref( self.table.options.related_tables['base']) dep_jobs = {} for sh_db in Device.objects.filter_by_tag(tag, module='steelhead', enabled=True): criteria = copy.copy(self.job.criteria) criteria.dev = sh_db job = Job.create(table=cmd_table, criteria=criteria, parent=self.job) dep_jobs[job.id] = job if not dep_jobs: return QueryError("No enabled steelhead " "devices found with tag '{}'".format(tag)) return QueryContinue(self.collect, jobs=dep_jobs)
def analyze(self, jobs=None): criteria = self.job.criteria if jobs: job = list(jobs.values())[0] if job.status == Job.ERROR: raise AnalysisException("%s for getting pcap file failed: %s" % (job, job.message)) criteria.entire_pcap = True self.filename = job.data()['filename'][0] else: self.filename = criteria.pcapfilename pcap = PcapFile(self.filename) try: pcap_info = pcap.info() except ValueError: raise AnalysisException("No packets in %s" % self.filename) logger.debug("%s: File info %s" % (self.__class__.__name__, pcap_info)) self.pkt_num = int(pcap_info['Number of packets']) min_pkt_num = self.table.options.split_threshold wt = Table.from_ref(self.table.options.related_tables['wireshark']) depjobs = {} if self.pkt_num < min_pkt_num: # No need to split the pcap file criteria.pcapfilename = self.filename criteria.entire_pcap = True job = Job.create(table=wt, criteria=criteria, update_progress=False, parent=self.job) depjobs[job.id] = job logger.debug("%s starting single job" % self.__class__.__name__) return QueryContinue(self.collect, depjobs) self.output_dir = os.path.join(SPLIT_DIR, self.file_handle) self.split_pcap() split_files = os.listdir(self.output_dir) if not split_files: raise AnalysisException('No pcap file found after splitting %s' % self.filename) for split in split_files: # use wireshark table ws_criteria = copy.copy(criteria) ws_criteria.pcapfilename = os.path.join(self.output_dir, split) # for ease of removing the split directory in collect func ws_criteria.output_dir = self.output_dir job = Job.create(table=wt, criteria=ws_criteria, update_progress=False, parent=self.job) depjobs[job.id] = job logger.debug("%s starting multiple jobs" % self.__class__.__name__) return QueryContinue(self.collect, jobs=depjobs)