def analyze(self, jobs): """ Pivot data results from jobs """ df = jobs.values()[0].data() if (self.table.options.pivot_column is None or self.table.options.pivot_value is None): msg = ('Both "pivot_column" and "pivot_value" options need ' 'to be specified for PivotTables.') logger.error(msg) return QueryError(msg) pivot = df.pivot(index=self.table.options.pivot_index, columns=self.table.options.pivot_column, values=self.table.options.pivot_value).reset_index() # since numeric values may now be columns, change them to strings # for proper pattern matching downstream pivot.rename(columns=lambda x: str(x), inplace=True) col_names = [x for x in pivot.columns] cur_cols = [c.name for c in self.job.get_columns(synthetic=False)] for c in col_names: if c not in cur_cols: label = self.table.options.pivot_column_prefix + c Column.create(self.job.table, name=c, label=label, ephemeral=self.job, datatype=self.table.options.pivot_datatype) return QueryComplete(pivot)
def run(self): # Collect all dependent tables options = self.table.options model = get_schema_map()[options.schema] df = model.objects.get_dataframe() if df.empty: return QueryError( 'No metrics defined for schema "%s". Add new metrics ' 'using the <a href="%s">admin interface</a>.' % (options.schema, reverse('admin:metrics_plugin_%s_changelist' % model.__name__.lower())) ) # Add some default columns as needed # new ones are created as normal columns vs ephemeral - the table # schema will not be dynamic, any changes will be done via code # changes and/or a report reload. # We check to see if some have already been defined to allow for # customization of the actual labels or column display keys = list(df.keys()) for k in keys: try: Column.objects.get(table=self.job.table, name=k) except ObjectDoesNotExist: Column.create(self.job.table, k, k.title(), datatype='string') logger.debug("%s: completed successfully" % self) return QueryComplete(df)
def post_run(self): """Execute any Functions saved to Table. In most cases, this function will be simply overridden by a subclass which will implement its own detailed processing. This method provides a shortcut to support passing a Function directly to the create method. """ options = self.table.options if options.function is None: return QueryError("Table %s has no analysis function defined" % self.table) try: df = options.function(self, options.tables, self.job.criteria) except Exception as e: return QueryError("Analysis function %s failed" % options.function, e) logger.debug("%s: completed successfully" % self) return QueryComplete(df)
def analyze(self, jobs): logger.debug('%s analyze - received jobs: %s' % (self, jobs)) basetable = Table.from_ref( self.table.options['related_tables']['template'] ) data = jobs['source'].data() if data is None: return QueryError('No data available to analyze') # find column whose min/max is largest deviation from mean # then take row from that column where min/max occurs if self.table.options['max']: idx = (data.max() / data.mean()).idxmax() frow = data.ix[data[idx].idxmax()] else: idx = (data.min() / data.mean()).idxmin() frow = data.ix[data[idx].idxmin()] # get time value from extracted row to calculate new start/end times ftime = frow['time'] duration = parse_timedelta(self.table.options['zoom_duration']) resolution = parse_timedelta(self.table.options['zoom_resolution']) stime = ftime - (duration / 2) etime = ftime + (duration / 2) criteria = self.job.criteria if 'resolution' in criteria: criteria['resolution'] = resolution else: criteria['granularity'] = resolution criteria['duration'] = duration criteria['_orig_duration'] = duration criteria['starttime'] = stime criteria['_orig_starttime'] = stime criteria['endtime'] = etime criteria['_orig_endtime'] = etime logging.debug('Creating FocusedAnalysis job with updated criteria %s' % criteria) job = Job.create(basetable, criteria, self.job.update_progress) return QueryContinue(self.finish, {'job': job})
def analyze(self, jobs): tag = Tag.objects.get(id=self.job.criteria.tag).name cmd_table = Table.from_ref( self.table.options.related_tables['base']) dep_jobs = {} for sh_db in Device.objects.filter_by_tag(tag, module='steelhead', enabled=True): criteria = copy.copy(self.job.criteria) criteria.dev = sh_db job = Job.create(table=cmd_table, criteria=criteria, parent=self.job) dep_jobs[job.id] = job if not dep_jobs: return QueryError("No enabled steelhead " "devices found with tag '{}'".format(tag)) return QueryContinue(self.collect, jobs=dep_jobs)
def _analyze(self, jobs=None): logger.debug("%s: all dependent jobs complete" % str(self)) if jobs: for (name, job) in jobs.items(): if job.status == job.ERROR: return QueryError("Dependent Job '%s' failed: %s" % (name, job.message)) if hasattr(self, 'analyze'): return self.analyze(jobs) else: # Compatibility mode - old code uses def post_run() and expects # self.tables to be set tables = {} if jobs: for (name, job) in jobs.items(): f = job.data() tables[name] = f logger.debug("%s: Table[%s] - %d rows" % (self, name, len(f) if f is not None else 0)) self.tables = tables return self.post_run()
def run(self): """ Main execution method """ args = self._prepare_report_args() with lock: report = SingleQueryReport(args.profiler) report.run( realm=self.table.options.realm, groupby=args.profiler.groupbys[self.table.options.groupby], centricity=args.centricity, columns=args.columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, data_filter=args.datafilter, resolution=args.resolution, sort_col=self.table.options.sort_col, sync=False, limit=args.limit ) data = self._wait_for_data(report) if not data: msg = 'Report %s returned no data' % self.job logger.error(msg) return QueryError(msg) def tonumber(s): # return an int if the string represents an integer, # a float if it represents a float # None otherwise. # check the int first since float() captures both try: return int(s) except ValueError: try: return float(s) except: return None others = [] totals = [] for i, col in enumerate(args.columns): if i == 0: others.append(u'Others') totals.append(u'Total') elif tonumber(data[0][i]): others.append(0) totals.append(0) else: others.append(u'') totals.append(u'') for i, row in enumerate(data): for j, col in enumerate(args.columns): val = tonumber(row[j]) if val: row[j] = val totals[j] += row[j] if i > self.table.rows: others[j] += row[j] # Clip the table at the row limit, then add two more # for other and total if self.table.rows > 0: data = data[:self.table.rows] self.table.rows += 2 data.append(others) data.append(totals) # Formatting: # - Add percents of total to numeric columns # - Strip "ByLocation|" from the groups if it exists # - Parse dns for row in data: for j, col in enumerate(args.columns): if isinstance(row[j], float): row[j] = "%.2f (%.0f%%)" % \ (row[j], 100 * row[j] / totals[j]) elif isinstance(row[j], int): row[j] = "%d (%.0f%%)" % \ (row[j], 100 * row[j] / totals[j]) elif isinstance(row[j], str): if row[j].startswith('ByLocation|'): row[j] = row[j][11:] elif ((col == 'cli_host_dns' or col == 'srv_host_dns') and ('|' in row[j])): # If we're using dns columns, they are ip|name # We should use the name if it's non-empty, # ip otherwise ip, name = row[j].split('|') if name: row[j] = name else: row[j] = ip logger.info("Report %s returned %s rows" % (self.job, len(data))) return QueryComplete(data)
def run(self): args = self._prepare_report_args() base_table = Table.from_ref(self.table.options.base) base_col = base_table.get_columns()[0] # only calculate other when we aren't filtering data include_other = self.table.options.include_other if self.job.criteria.netprofiler_filterexpr: include_other = False if self.table.options.groupby not in self.CONFIG: raise ValueError('not supported for groupby=%s' % self.table.options.groupby) config = self.CONFIG[self.table.options.groupby] # num_reports / cur_report are used to compute min/max pct num_reports = (1 + (1 if self.table.options.top_n else 0) + (1 if include_other else 0)) cur_report = 0 if self.table.options.top_n: # Run a top-n report to drive the criteria for each column query_column_defs = self.run_top_n(config, args, base_col, minpct=0, maxpct=(100/num_reports)) cur_report += 1 else: query_column_defs = self.job.criteria.query_columns if isinstance(query_column_defs, types.StringTypes): query_column_defs = json.loads(query_column_defs) query_columns = [col['json'] for col in query_column_defs] if not query_columns: msg = 'Unable to compute query colums for job %s' % self.job logger.error(msg) return QueryError(msg) with lock: report = TrafficTimeSeriesReport(args.profiler) columns = [args.columns[0], base_col.name] logger.info("Query Columns: %s" % str(query_columns)) if self.table.options.groupby == 'host_group': host_group_type = 'ByLocation' else: host_group_type = None report.run( centricity=args.centricity, columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sync=False, host_group_type=host_group_type, query_columns_groupby=config.groupby, query_columns=query_columns ) data = self._wait_for_data(report, minpct=cur_report * (100/num_reports), maxpct=(cur_report + 1) * (100/num_reports)) cur_report += 1 df = pandas.DataFrame(data, columns=(['time'] + [col['name'] for col in query_column_defs])) # Create ephemeral columns for all the data based # on the related base table for col in query_column_defs: Column.create(self.job.table, col['name'], col['label'], ephemeral=self.job, datatype=base_col.datatype, formatter=base_col.formatter) if include_other: # Run a separate timeseries query with no column filters # to get "totals" then use that to compute an "other" column with lock: report = SingleQueryReport(args.profiler) report.run( realm='traffic_overall_time_series', centricity=args.centricity, groupby=args.profiler.groupbys['time'], columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sync=False ) totals = self._wait_for_data(report, minpct=cur_report * (100/num_reports), maxpct=(cur_report + 1) * (100/num_reports)) df = df.set_index('time') df['subtotal'] = df.sum(axis=1) totals_df = (pandas.DataFrame(totals, columns=['time', 'total']) .set_index('time')) df = df.merge(totals_df, left_index=True, right_index=True) df['other'] = df['total'] = df['subtotal'] colnames = ['time'] + [col['name'] for col in query_column_defs] + ['other'] # Drop the extraneous total and subtotal columns df = (df.reset_index().ix[:, colnames]) Column.create(self.job.table, 'other', 'Other', ephemeral=self.job, datatype=base_col.datatype, formatter=base_col.formatter) logger.info("Report %s returned %s rows" % (self.job, len(df))) return QueryComplete(df)