def post(self, request, pk): """Create new Job for the specified table using POSTed criteria.""" table = Table.objects.get(pk=pk) all_fields = dict((f.keyword, f) for f in table.fields.all()) # data needs to be not-None or form will be created as unbound data = self.request.POST or {} form = TableFieldForm(all_fields, use_widgets=False, include_hidden=True, data=data) if form.is_valid(check_unknown=True): criteria = form.criteria() else: return Response(form.errors, status=status.HTTP_400_BAD_REQUEST) try: job = Job.create(table, criteria) job.start() serializer = JobSerializer(job, many=False) return Response(serializer.data, status=status.HTTP_201_CREATED, headers=self.get_success_headers(job)) except Exception as e: msg = 'Error processing Job: %s' % e.message raise JobCreationError(msg)
def analyze(self, jobs): criteria = self.job.criteria ar_query_table = Table.from_ref( self.table.options.related_tables['basetable'] ) depjobs = {} # For every (ar, job), we spin off a new job to grab the data, then # merge everything into one dataframe at the end. for s in Device.objects.filter(module='appresponse', enabled=True): ar = DeviceManager.get_device(s.id) for job in ar.capture.get_jobs(): # Start with criteria from the primary table -- this gives us # endtime, duration and filterexpr. bytes_criteria = copy.copy(criteria) bytes_criteria.appresponse_device = s.id bytes_criteria.appresponse_source = 'jobs/' + job.id bytes_criteria.granularity = datetime.timedelta(0, 1) newjob = Job.create(table=ar_query_table, criteria=bytes_criteria) depjobs[newjob.id] = newjob return QueryContinue(self.collect, depjobs)
def analyze(self, jobs): criteria = self.job.criteria tzname = criteria.business_hours_tzname tz = pytz.timezone(tzname) times = jobs['times'].data() if times is None or len(times) == 0: return QueryComplete(None) basetable = Table.from_ref( self.table.options.related_tables['basetable'] ) # Create all the jobs depjobs = {} for i, row in times.iterrows(): (t0, t1) = (row['starttime'], row['endtime']) sub_criteria = copy.copy(criteria) sub_criteria.starttime = t0.astimezone(tz) sub_criteria.endtime = t1.astimezone(tz) job = Job.create(table=basetable, criteria=sub_criteria, update_progress=False, parent=self.job) logger.debug("Created %s: %s - %s" % (job, t0, t1)) depjobs[job.id] = job return QueryContinue(self.collect, depjobs)
def analyze(self, jobs=None): download_table = Table.from_ref( self.table.options.related_tables['download_table']) # Create source and destination download jobs depjobs = {} c = self.job.criteria sharks = [ ('1-source', c.netshark_device_src, c.netshark_source_name_src), ('2-dest', c.netshark_device_dst, c.netshark_source_name_dst) ] for shark in sharks: sc = copy.copy(c) name, device, source = shark sc.netshark_device = device sc.netshark_source_name = source sc.segment = name job = Job.create(table=download_table, criteria=sc, update_progress=True, parent=self.job) logger.debug("Created %s: %s download job with criteria %s" % (job, name, sc)) depjobs[job.id] = job return QueryContinue(self.collect, depjobs)
def analyze(self, jobs): criteria = self.job.criteria sharks_query_table = Table.from_ref( self.table.options.related_tables['basetable']) depjobs = {} # For every (shark, job), we spin off a new job to grab the data, then # merge everything into one dataframe at the end. for s in Device.objects.filter(module='netshark', enabled=True): shark = DeviceManager.get_device(s.id) for capjob in shark.get_capture_jobs(): # Start with criteria from the primary table -- this gives us # endtime, duration and netshark_filterexpr. bytes_criteria = copy.copy(criteria) bytes_criteria.netshark_device = s.id bytes_criteria.netshark_source_name = 'jobs/' + capjob.name bytes_criteria.resolution = datetime.timedelta(0, 1) bytes_criteria.aggregated = True job = Job.create(table=sharks_query_table, criteria=bytes_criteria) depjobs[job.id] = job return QueryContinue(self.collect, depjobs)
def analyze(self, jobs): df = jobs['overall'].data() # First clear all the dynamic columns that were associated with # the table last time the report is run # do not delete the time column for col in self.table.get_columns(): if col.name == 'time': continue col.delete() # Get the top N values of the value column val_col = self.table.options.value_column_name pivot_col = self.table.options.pivot_column_name n = self.table.options.n pivots = list( df.sort_values(val_col, ascending=False).head(n)[pivot_col]) for pivot in pivots: # Add pivot column to the table AppResponseColumn.create(self.table, pivot, pivot) # Create an AppResponseTimeSeries Job self.job.criteria.pivot_column_names = ','.join(pivots) ts_table_ref = self.table.options.related_tables['ts'] table = Table.from_ref(ts_table_ref) job = Job.create(table=table, criteria=self.job.criteria, update_progress=False, parent=self.job) return QueryContinue(self.collect, jobs={'ts': job})
def analyze(self, jobs=None): download_table = Table.from_ref( self.table.options.related_tables['download_table'] ) # Create source and destination download jobs depjobs = {} c = self.job.criteria sharks = [ ('1-source', c.netshark_device_src, c.netshark_source_name_src), ('2-dest', c.netshark_device_dst, c.netshark_source_name_dst) ] for shark in sharks: sc = copy.copy(c) name, device, source = shark sc.netshark_device = device sc.netshark_source_name = source sc.segment = name job = Job.create(table=download_table, criteria=sc, update_progress=True, parent=self.job) logger.debug("Created %s: %s download job with criteria %s" % (job, name, sc)) depjobs[job.id] = job return QueryContinue(self.collect, depjobs)
def analyze(self, jobs=None): filtered_list = ExistingIntervals.objects.filter( table_handle=self.handle) existing_intervals = None if filtered_list: existing_intervals = filtered_list[0].intervals if self.query_interval in existing_intervals: # Search DB for the queried data return QueryComplete( self.query(self.query_interval.start, self.query_interval.end)) intervals_to_call = self._check_intervals(self.query_interval - existing_intervals) dep_jobs = {} for interval in intervals_to_call: criteria = copy.copy(self.job.criteria) # Use the two time related fields criteria.starttime = interval.start criteria.endtime = interval.end job = Job.create(table=self.ds_table, criteria=criteria, update_progress=False, parent=self.job) dep_jobs[job.id] = job return QueryContinue(self.collect, jobs=dep_jobs)
def analyze(self, jobs): logger.debug('%s analyze - received jobs: %s' % (self, jobs)) basetable = Table.from_ref( self.table.options['related_tables']['template'] ) data = jobs['source'].data() if data is None: return QueryError('No data available to analyze') # find column whose min/max is largest deviation from mean # then take row from that column where min/max occurs if self.table.options['max']: idx = (data.max() / data.mean()).idxmax() frow = data.ix[data[idx].idxmax()] else: idx = (data.min() / data.mean()).idxmin() frow = data.ix[data[idx].idxmin()] # get time value from extracted row to calculate new start/end times ftime = frow['time'] duration = parse_timedelta(self.table.options['zoom_duration']) resolution = parse_timedelta(self.table.options['zoom_resolution']) stime = ftime - (duration / 2) etime = ftime + (duration / 2) criteria = self.job.criteria if 'resolution' in criteria: criteria['resolution'] = resolution else: criteria['granularity'] = resolution criteria['duration'] = duration criteria['_orig_duration'] = duration criteria['starttime'] = stime criteria['_orig_starttime'] = stime criteria['endtime'] = etime criteria['_orig_endtime'] = etime logging.debug('Creating FocusedAnalysis job with updated criteria %s' % criteria) job = Job.create(basetable, criteria, self.job.update_progress) return QueryContinue(self.finish, {'job': job})
def run(self): # Collect all dependent tables tables = self.table.options.tables if not tables: return QueryContinue(self._analyze, {}) logger.debug("%s: dependent tables: %s" % (self, tables)) jobs = {} for (name, ref) in tables.items(): table = Table.from_ref(ref) job = Job.create(table, self.job.criteria, update_progress=self.job.update_progress, parent=self.job) logger.debug("%s: dependent job %s" % (self, job)) jobs[name] = job return QueryContinue(self._analyze, jobs)
def analyze(self, jobs=None): logger.debug('TimeSeriesTable analysis with jobs %s' % jobs) filtered_list = ExistingIntervals.objects.filter( table_handle=self.handle, criteria=self.no_time_criteria) existing_intervals = None if filtered_list: existing_intervals = filtered_list[0].intervals logger.debug('Found existing intervals for handle %s: %s' % (self.handle, existing_intervals)) if self.query_interval in existing_intervals: logger.debug('Query interval totally covered by DB, returning ' 'DB query.') # Search DB for the queried data data = self.query(self.query_interval.start, self.query_interval.end) return QueryComplete(data) logger.debug('Query interval only partially covered by DB ...') intervals_to_call = self._check_intervals(self.query_interval - existing_intervals) logger.debug('Setting up %d jobs to cover missing data ' 'for these intervals: %s' % (len(intervals_to_call), intervals_to_call)) dep_jobs = {} for interval in intervals_to_call: criteria = copy.copy(self.job.criteria) # Use the two time related fields criteria.starttime = interval.start criteria.endtime = interval.end job = Job.create(table=self.ds_table, criteria=criteria, update_progress=False, parent=self.job) dep_jobs[job.id] = job return QueryContinue(self.collect, jobs=dep_jobs)
def analyze(self, jobs=None): logger.debug('TimeSeriesTable analysis with jobs %s' % jobs) filtered_list = ExistingIntervals.objects.filter( table_handle=self.handle, criteria=self.no_time_criteria ) existing_intervals = None if filtered_list: existing_intervals = filtered_list[0].intervals logger.debug('Found existing intervals for handle %s: %s' % (self.handle, existing_intervals)) if self.query_interval in existing_intervals: logger.debug('Query interval totally covered by DB, returning ' 'DB query.') # Search DB for the queried data data = self.query(self.query_interval.start, self.query_interval.end) return QueryComplete(data) logger.debug('Query interval only partially covered by DB ...') intervals_to_call = self._check_intervals( self.query_interval - existing_intervals) logger.debug('Setting up %d jobs to cover missing data ' 'for these intervals: %s' % (len(intervals_to_call), intervals_to_call)) dep_jobs = {} for interval in intervals_to_call: criteria = copy.copy(self.job.criteria) # Use the two time related fields criteria.starttime = interval.start criteria.endtime = interval.end job = Job.create(table=self.ds_table, criteria=criteria, update_progress=False, parent=self.job) dep_jobs[job.id] = job return QueryContinue(self.collect, jobs=dep_jobs)
def analyze(self, jobs): tag = Tag.objects.get(id=self.job.criteria.tag).name cmd_table = Table.from_ref( self.table.options.related_tables['base']) dep_jobs = {} for sh_db in Device.objects.filter_by_tag(tag, module='steelhead', enabled=True): criteria = copy.copy(self.job.criteria) criteria.dev = sh_db job = Job.create(table=cmd_table, criteria=criteria, parent=self.job) dep_jobs[job.id] = job if not dep_jobs: return QueryError("No enabled steelhead " "devices found with tag '{}'".format(tag)) return QueryContinue(self.collect, jobs=dep_jobs)
def post(self, request, pk): """Create new Job for the specified table using POSTed criteria.""" table = Table.objects.get(pk=pk) all_fields = dict((f.keyword, f) for f in table.fields.all()) # data needs to be not-None or form will be created as unbound data = self.request.POST or {} form = TableFieldForm(all_fields, use_widgets=False, data=data) if form.is_valid(check_unknown=True): criteria = form.criteria() else: return Response(form.errors, status=status.HTTP_400_BAD_REQUEST) try: job = Job.create(table, criteria) job.start() serializer = JobSerializer(job, many=False) return Response(serializer.data, status=status.HTTP_201_CREATED, headers=self.get_success_headers(job)) except Exception as e: msg = 'Error processing Job: %s' % e.message raise JobCreationError(msg)
def handle(self, *args, **options): """ Main command handler. """ self.options = options if options['table_list']: # print out the id's instead of processing anything output = [] for t in Table.objects.all(): output.append([t.id, t.namespace, t.queryclassname, t.name, t]) Formatter.print_table(output, ['ID', 'Namespace', 'QueryClass', 'Name', 'Table']) elif options['table_list_by_report']: # or print them out organized by report/widget/table output = [] reports = Report.objects.all() for report in reports: for table in report.tables(): for widget in table.widget_set.all(): line = [table.id, report.title, widget.title, table] output.append(line) Formatter.print_table(output, ['ID', 'Report', 'Widget', 'Table']) elif options['criteria_list']: if 'table_id' in options and options['table_id'] is not None: table = Table.objects.get(id=options['table_id']) elif 'table_name' in options and options['table_name'] is not None: table = Table.objects.get(name=options['table_name']) else: raise ValueError("Must specify either --table-id or " "--table-name to run a table") form = self.get_form(table) # Only show criteria options that were included in report # and given a label, other ones are for internal table use. # criteria like ignore_cache can still be passed in, they # just won't be shown in this list output = [(k, v.label) for k, v in form.fields.iteritems() if v.label] Formatter.print_table(output, ['Keyword', 'Label']) else: if 'table_id' in options and options['table_id'] is not None: table = Table.objects.get(id=options['table_id']) elif 'table_name' in options and options['table_name'] is not None: table = Table.objects.get(name=options['table_name']) else: raise ValueError("Must specify either --table-id or " "--table-name to run a table") # Django gives us a nice error if we can't find the table self.console('Table %s found.' % table) # Parse criteria options criteria_options = {} if 'criteria' in options and options['criteria'] is not None: for s in options['criteria']: (k, v) = s.split(':', 1) criteria_options[k] = v form = self.get_form(table, data=criteria_options) if not form.is_valid(check_unknown=True): self.console('Invalid criteria:') logger.error('Invalid criteria: %s' % ','.join('%s:%s' % (k, v) for k, v in form.errors.iteritems())) for k, v in form.errors.iteritems(): self.console(' %s: %s' % (k, ','.join(v))) sys.exit(1) criteria = form.criteria() columns = [c.name for c in table.get_columns()] if options['only_columns']: print columns return job = Job.create(table=table, criteria=criteria, update_progress=False) job.save() self.console('Job created: %s' % job) self.console('Criteria: %s' % criteria.print_details()) start_time = datetime.datetime.now() job.start() self.console('Job running . . ', ending='') # wait for results while not job.done(): # self.console('. ', ending='') # self.stdout.flush() time.sleep(1) end_time = datetime.datetime.now() delta = end_time - start_time seconds = float(delta.microseconds + (delta.seconds + delta.days*24*3600)*10**6)/10**6 self.console('Done!! (elapsed time: %.2f seconds)' % seconds) self.console('') # Need to refresh the column list in case the job changed them # (ephemeral cols) columns = [c.name for c in table.get_columns()] if job.status == job.COMPLETE: if options['as_csv']: if options['output_file']: with open(options['output_file'], 'w') as f: for line in Formatter.get_csv(job.values(), columns): f.write(line) f.write('\n') else: Formatter.print_csv(job.values(), columns) else: Formatter.print_table(job.values(), columns) else: self.console("Job completed with an error:") self.console(job.message) sys.exit(1)
def analyze(self, jobs=None): criteria = self.job.criteria if jobs: job = list(jobs.values())[0] if job.status == Job.ERROR: raise AnalysisException("%s for getting pcap file failed: %s" % (job, job.message)) criteria.entire_pcap = True self.filename = job.data()['filename'][0] else: self.filename = criteria.pcapfilename pcap = PcapFile(self.filename) try: pcap_info = pcap.info() except ValueError: raise AnalysisException("No packets in %s" % self.filename) logger.debug("%s: File info %s" % (self.__class__.__name__, pcap_info)) self.pkt_num = int(pcap_info['Number of packets']) min_pkt_num = self.table.options.split_threshold wt = Table.from_ref(self.table.options.related_tables['wireshark']) depjobs = {} if self.pkt_num < min_pkt_num: # No need to split the pcap file criteria.pcapfilename = self.filename criteria.entire_pcap = True job = Job.create(table=wt, criteria=criteria, update_progress=False, parent=self.job) depjobs[job.id] = job logger.debug("%s starting single job" % self.__class__.__name__) return QueryContinue(self.collect, depjobs) self.output_dir = os.path.join(SPLIT_DIR, self.file_handle) self.split_pcap() split_files = os.listdir(self.output_dir) if not split_files: raise AnalysisException('No pcap file found after splitting %s' % self.filename) for split in split_files: # use wireshark table ws_criteria = copy.copy(criteria) ws_criteria.pcapfilename = os.path.join(self.output_dir, split) # for ease of removing the split directory in collect func ws_criteria.output_dir = self.output_dir job = Job.create(table=wt, criteria=ws_criteria, update_progress=False, parent=self.job) depjobs[job.id] = job logger.debug("%s starting multiple jobs" % self.__class__.__name__) return QueryContinue(self.collect, jobs=depjobs)
def handle(self, *args, **options): """ Main command handler. """ self.options = options if options['table_list']: # print out the id's instead of processing anything output = [] for t in Table.objects.all(): output.append([t.id, t.namespace, t.queryclassname, t.name, t]) Formatter.print_table( output, ['ID', 'Namespace', 'QueryClass', 'Name', 'Table']) elif options['table_list_by_report']: # or print them out organized by report/widget/table output = [] reports = Report.objects.all() for report in reports: for table in report.tables(): for widget in table.widget_set.all(): line = [table.id, report.title, widget.title, table] output.append(line) Formatter.print_table(output, ['ID', 'Report', 'Widget', 'Table']) elif options['criteria_list']: if 'table_id' in options and options['table_id'] is not None: table = Table.objects.get(id=options['table_id']) elif 'table_name' in options and options['table_name'] is not None: table = Table.objects.get(name=options['table_name']) else: raise ValueError("Must specify either --table-id or " "--table-name to run a table") form = self.get_form(table) # Only show criteria options that were included in report # and given a label, other ones are for internal table use. # criteria like ignore_cache can still be passed in, they # just won't be shown in this list output = [(k, v.label) for k, v in form.fields.iteritems() if v.label] Formatter.print_table(output, ['Keyword', 'Label']) else: if 'table_id' in options and options['table_id'] is not None: table = Table.objects.get(id=options['table_id']) elif 'table_name' in options and options['table_name'] is not None: table = Table.objects.get(name=options['table_name']) else: raise ValueError("Must specify either --table-id or " "--table-name to run a table") # Django gives us a nice error if we can't find the table self.console('Table %s found.' % table) # Parse criteria options criteria_options = {} if 'criteria' in options and options['criteria'] is not None: for s in options['criteria']: (k, v) = s.split(':', 1) criteria_options[k] = v form = self.get_form(table, data=criteria_options) if not form.is_valid(check_unknown=True): self.console('Invalid criteria:') logger.error('Invalid criteria: %s' % ','.join('%s:%s' % (k, v) for k, v in form.errors.iteritems())) for k, v in form.errors.iteritems(): self.console(' %s: %s' % (k, ','.join(v))) sys.exit(1) criteria = form.criteria() columns = [c.name for c in table.get_columns()] if options['only_columns']: print columns return job = Job.create(table=table, criteria=criteria, update_progress=False) job.save() self.console('Job created: %s' % job) self.console('Criteria: %s' % criteria.print_details()) start_time = datetime.datetime.now() job.start() self.console('Job running . . ', ending='') # wait for results while not job.done(): # self.console('. ', ending='') # self.stdout.flush() time.sleep(1) end_time = datetime.datetime.now() delta = end_time - start_time seconds = float(delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10**6) / 10**6 self.console('Done!! (elapsed time: %.2f seconds)' % seconds) self.console('') # Need to refresh the column list in case the job changed them # (ephemeral cols) columns = [c.name for c in table.get_columns()] if job.status == job.COMPLETE: if options['as_csv']: if options['output_file']: with open(options['output_file'], 'w') as f: for line in Formatter.get_csv( job.values(), columns): f.write(line) f.write('\n') else: Formatter.print_csv(job.values(), columns) else: Formatter.print_table(job.values(), columns) else: self.console("Job completed with an error:") self.console(job.message) sys.exit(1)
def post(self, request, namespace, report_slug, widget_slug, format=None): logger.debug("Received POST for report %s, widget %s: %s" % (report_slug, widget_slug, request.POST)) report = get_object_or_404(Report, namespace=namespace, slug=report_slug) widget = get_object_or_404( Widget, slug=widget_slug, section__in=Section.objects.filter(report=report) ) req_json = json.loads(request.POST['criteria']) fields = widget.collect_fields() form = TableFieldForm(fields, use_widgets=False, hidden_fields=report.hidden_fields, include_hidden=True, data=req_json, files=request.FILES) if not form.is_valid(): raise ValueError("Widget internal criteria form is invalid:\n%s" % (form.errors.as_text())) if form.is_valid(): logger.debug('Form passed validation: %s' % form) formdata = form.cleaned_data logger.debug('Form cleaned data: %s' % formdata) # parse time and localize to user profile timezone timezone = get_timezone(request) form.apply_timezone(timezone) try: form_criteria = form.criteria() logger.debug('Form_criteria: %s' % form_criteria) job = Job.create(table=widget.table(), criteria=form_criteria) job.start() wjob = WidgetJob(widget=widget, job=job) wjob.save() logger.debug("Created WidgetJob %s for report %s (handle %s)" % (str(wjob), report_slug, job.handle)) return Response({"joburl": reverse('report-job-detail', args=[namespace, report_slug, widget_slug, wjob.id])}) except Exception as e: logger.exception("Failed to start job, an exception occurred") ei = sys.exc_info() resp = {} resp['message'] = "".join( traceback.format_exception_only(*sys.exc_info()[0:2])), resp['exception'] = "".join( traceback.format_exception(*sys.exc_info())) return JsonResponse(resp, status=400) else: logger.error("form is invalid, entering debugger") from IPython import embed; embed()
def post(self, request, namespace, report_slug, widget_slug, format=None): logger.debug("Received POST for report %s, widget %s: %s" % (report_slug, widget_slug, request.POST)) report = get_object_or_404(Report, namespace=namespace, slug=report_slug) widget = get_object_or_404( Widget, slug=widget_slug, section__in=Section.objects.filter(report=report)) req_json = json.loads(request.POST['criteria']) fields = widget.collect_fields() form = TableFieldForm(fields, use_widgets=False, hidden_fields=report.hidden_fields, include_hidden=True, data=req_json, files=request.FILES) if not form.is_valid(): raise ValueError("Widget internal criteria form is invalid:\n%s" % (form.errors.as_text())) if form.is_valid(): logger.debug('Form passed validation: %s' % form) formdata = form.cleaned_data logger.debug('Form cleaned data: %s' % formdata) # parse time and localize to user profile timezone timezone = pytz.timezone(request.user.timezone) form.apply_timezone(timezone) try: form_criteria = form.criteria() logger.debug('Form_criteria: %s' % form_criteria) job = Job.create(table=widget.table(), criteria=form_criteria) job.start() wjob = WidgetJob(widget=widget, job=job) wjob.save() logger.debug("Created WidgetJob %s for report %s (handle %s)" % (str(wjob), report_slug, job.handle)) return Response({ "joburl": reverse( 'report-job-detail', args=[namespace, report_slug, widget_slug, wjob.id]) }) except Exception as e: logger.exception("Failed to start job, an exception occurred") ei = sys.exc_info() resp = {} resp['message'] = "".join( traceback.format_exception_only(*sys.exc_info()[0:2])), resp['exception'] = "".join( traceback.format_exception(*sys.exc_info())) return JsonResponse(resp, status=400) else: logger.error("form is invalid, entering debugger") from IPython import embed embed()