def analyze(self, jobs): """ Pivot data results from jobs """ df = jobs.values()[0].data() if (self.table.options.pivot_column is None or self.table.options.pivot_value is None): msg = ('Both "pivot_column" and "pivot_value" options need ' 'to be specified for PivotTables.') logger.error(msg) return QueryError(msg) pivot = df.pivot(index=self.table.options.pivot_index, columns=self.table.options.pivot_column, values=self.table.options.pivot_value).reset_index() # since numeric values may now be columns, change them to strings # for proper pattern matching downstream pivot.rename(columns=lambda x: str(x), inplace=True) col_names = [x for x in pivot.columns] cur_cols = [c.name for c in self.job.get_columns(synthetic=False)] for c in col_names: if c not in cur_cols: label = self.table.options.pivot_column_prefix + c Column.create(self.job.table, name=c, label=label, ephemeral=self.job, datatype=self.table.options.pivot_datatype) return QueryComplete(pivot)
def analyze(self, jobs): """ Pivot data results from jobs """ job = jobs.values()[0] rs = self.table.options.resample_interval try: rs = '{0}s'.format(int(job.criteria.resample_interval)) except ValueError: logger.warning("{0}: resample_interval ({2}) not set or valid in " "job criteria {1}".format(self, job.criteria, rs)) job.criteria.resample_interval = u'{0}'.format(rs.split('s')[0]) df = job.data() rs_df = resample(df, self.table.options.resample_column, rs, self.table.options.resample_operation) curcols = [c.name for c in self.job.get_columns(synthetic=False)] jcols = [c.name for c in job.get_columns(synthetic=False)] for c in jcols: if c not in curcols: # Default data type is float. Column.create(self.job.table, name=c, label=c, ephemeral=self.job) return QueryComplete(rs_df)
def run(self): # Collect all dependent tables options = self.table.options model = get_schema_map()[options.schema] df = model.objects.get_dataframe() if df.empty: return QueryError( 'No metrics defined for schema "%s". Add new metrics ' 'using the <a href="%s">admin interface</a>.' % (options.schema, reverse('admin:metrics_plugin_%s_changelist' % model.__name__.lower())) ) # Add some default columns as needed # new ones are created as normal columns vs ephemeral - the table # schema will not be dynamic, any changes will be done via code # changes and/or a report reload. # We check to see if some have already been defined to allow for # customization of the actual labels or column display keys = list(df.keys()) for k in keys: try: Column.objects.get(table=self.job.table, name=k) except ObjectDoesNotExist: Column.create(self.job.table, k, k.title(), datatype='string') logger.debug("%s: completed successfully" % self) return QueryComplete(df)
def analyze(self, jobs): """ Pivot data results from jobs """ job = jobs.values()[0] rs = self.table.options.resample_interval try: rs = '{0}s'.format(int(job.criteria.resample_interval)) except ValueError: logger.warning( "{0}: resample_interval ({2}) not set or valid in " "job criteria {1}".format(self, job.criteria, rs)) job.criteria.resample_interval = u'{0}'.format(rs.split('s')[0]) df = job.data() rs_df = resample(df, self.table.options.resample_column, rs, self.table.options.resample_operation) curcols = [c.name for c in self.job.get_columns(synthetic=False)] jcols = [c.name for c in job.get_columns(synthetic=False)] for c in jcols: if c not in curcols: # Default data type is float. Column.create(self.job.table, name=c, label=c, ephemeral=self.job) return QueryComplete(rs_df)
def _refresh_columns(self, profiler, report, query): # Delete columns for col in self.table.get_columns(): col.delete() cols = [] for col in query.columns: if col.id >= EPHEMERAL_COLID: cols.append(col) if not cols: cols = report.get_columns(self.table.options.widget_id) if query.is_time_series: # 98 is the column id for 'time' cols = [profiler.columns[98]] + cols for col in cols: if (col.json['type'] == 'float' or col.json['type'] == 'reltime' or col.json['rate'] == 'opt'): data_type = 'float' elif col.json['type'] == 'time': data_type = 'time' elif col.json['type'] == 'int': data_type = 'integer' else: data_type = 'string' col_name = col.label if col.ephemeral else col.key Column.create(self.table, col_name, col.label, datatype=data_type, iskey=col.iskey)
def run(self): """ Main execution method """ criteria = self.job.criteria if criteria.netprofiler_device == '': logger.debug('%s: No netprofiler device selected' % self.table) self.job.mark_error("No NetProfiler Device Selected") return False profiler = DeviceManager.get_device(criteria.netprofiler_device) report = ServiceLocationReport(profiler) tf = TimeFilter(start=criteria.starttime, end=criteria.endtime) logger.info( 'Running NetProfilerServiceByLocTable %d report for timeframe %s' % (self.table.id, str(tf))) with lock: report.run(timefilter=tf, sync=False) done = False logger.info("Waiting for report to complete") while not done: time.sleep(0.5) with lock: s = report.status() self.job.mark_progress(progress=int(s['percent'])) done = (s['status'] == 'completed') # Retrieve the data with lock: data = report.get_data() query = report.get_query_by_index(0) tz = criteria.starttime.tzinfo # Update criteria criteria.starttime = (datetime.datetime .utcfromtimestamp(query.actual_t0) .replace(tzinfo=tz)) criteria.endtime = (datetime.datetime .utcfromtimestamp(query.actual_t1) .replace(tzinfo=tz)) self.job.safe_update(actual_criteria=criteria) if len(data) == 0: return QueryComplete(None) # Add ephemeral columns for everything Column.create(self.job.table, 'location', 'Location', ephemeral=self.job, datatype='string') for k in data[0].keys(): if k == 'location': continue Column.create(self.job.table, k, k, ephemeral=self.job, datatype='string', formatter='rvbd.formatHealth') df = pandas.DataFrame(data) if self.job.table.options.rgb: state_map = {Service.SVC_NOT_AVAILABLE: 'gray', Service.SVC_DISABLED: 'gray', Service.SVC_INIT: 'gray', Service.SVC_NORMAL: 'green', Service.SVC_LOW: 'yellow', Service.SVC_MED: 'yellow', Service.SVC_HIGH: 'red', Service.SVC_NODATA: 'gray'} df = df.replace(state_map.keys(), state_map.values()) return QueryComplete(df)
def run(self): args = self._prepare_report_args() base_table = Table.from_ref(self.table.options.base) base_col = base_table.get_columns()[0] # only calculate other when we aren't filtering data include_other = self.table.options.include_other if self.job.criteria.netprofiler_filterexpr: include_other = False if self.table.options.groupby not in self.CONFIG: raise ValueError('not supported for groupby=%s' % self.table.options.groupby) config = self.CONFIG[self.table.options.groupby] # num_reports / cur_report are used to compute min/max pct num_reports = (1 + (1 if self.table.options.top_n else 0) + (1 if include_other else 0)) cur_report = 0 if self.table.options.top_n: # Run a top-n report to drive the criteria for each column query_column_defs = self.run_top_n(config, args, base_col, minpct=0, maxpct=(100/num_reports)) cur_report += 1 else: query_column_defs = self.job.criteria.query_columns if isinstance(query_column_defs, types.StringTypes): query_column_defs = json.loads(query_column_defs) query_columns = [col['json'] for col in query_column_defs] with lock: report = TrafficTimeSeriesReport(args.profiler) columns = [args.columns[0], base_col.name] logger.info("Query Columns: %s" % str(query_columns)) if self.table.options.groupby == 'host_group': host_group_type = 'ByLocation' else: host_group_type = None report.run( centricity=args.centricity, columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sync=False, host_group_type=host_group_type, query_columns_groupby=config.groupby, query_columns=query_columns ) data = self._wait_for_data(report, minpct=cur_report * (100/num_reports), maxpct=(cur_report + 1) * (100/num_reports)) cur_report += 1 df = pandas.DataFrame(data, columns=(['time'] + [col['name'] for col in query_column_defs])) # Create ephemeral columns for all the data based # on the related base table for col in query_column_defs: Column.create(self.job.table, col['name'], col['label'], ephemeral=self.job, datatype=base_col.datatype, formatter=base_col.formatter) if include_other: # Run a separate timeseries query with no column filters # to get "totals" then use that to compute an "other" column with lock: report = SingleQueryReport(args.profiler) report.run( realm='traffic_overall_time_series', groupby=args.profiler.groupbys['time'], columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sync=False ) totals = self._wait_for_data(report, minpct=cur_report * (100/num_reports), maxpct=(cur_report + 1) * (100/num_reports)) df = df.set_index('time') df['subtotal'] = df.sum(axis=1) totals_df = (pandas.DataFrame(totals, columns=['time', 'total']) .set_index('time')) df = df.merge(totals_df, left_index=True, right_index=True) df['other'] = df['total'] = df['subtotal'] colnames = ['time'] + [col['name'] for col in query_column_defs] + ['other'] # Drop the extraneous total and subtotal columns df = (df.reset_index().ix[:, colnames]) Column.create(self.job.table, 'other', 'Other', ephemeral=self.job, datatype=base_col.datatype, formatter=base_col.formatter) logger.info("Report %s returned %s rows" % (self.job, len(df))) return QueryComplete(df)
def run(self): args = self._prepare_report_args() base_table = Table.from_ref(self.table.options.base) base_col = base_table.get_columns()[0] # only calculate other when we aren't filtering data include_other = self.table.options.include_other if self.job.criteria.netprofiler_filterexpr: include_other = False if self.table.options.groupby not in self.CONFIG: raise ValueError('not supported for groupby=%s' % self.table.options.groupby) config = self.CONFIG[self.table.options.groupby] # num_reports / cur_report are used to compute min/max pct num_reports = (1 + (1 if self.table.options.top_n else 0) + (1 if include_other else 0)) cur_report = 0 if self.table.options.top_n: # Run a top-n report to drive the criteria for each column query_column_defs = self.run_top_n(config, args, base_col, minpct=0, maxpct=(100/num_reports)) cur_report += 1 else: query_column_defs = self.job.criteria.query_columns if isinstance(query_column_defs, types.StringTypes): query_column_defs = json.loads(query_column_defs) query_columns = [col['json'] for col in query_column_defs] if not query_columns: msg = 'Unable to compute query colums for job %s' % self.job logger.error(msg) return QueryError(msg) with lock: report = TrafficTimeSeriesReport(args.profiler) columns = [args.columns[0], base_col.name] logger.info("Query Columns: %s" % str(query_columns)) if self.table.options.groupby == 'host_group': host_group_type = 'ByLocation' else: host_group_type = None report.run( centricity=args.centricity, columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sync=False, host_group_type=host_group_type, query_columns_groupby=config.groupby, query_columns=query_columns ) data = self._wait_for_data(report, minpct=cur_report * (100/num_reports), maxpct=(cur_report + 1) * (100/num_reports)) cur_report += 1 df = pandas.DataFrame(data, columns=(['time'] + [col['name'] for col in query_column_defs])) # Create ephemeral columns for all the data based # on the related base table for col in query_column_defs: Column.create(self.job.table, col['name'], col['label'], ephemeral=self.job, datatype=base_col.datatype, formatter=base_col.formatter) if include_other: # Run a separate timeseries query with no column filters # to get "totals" then use that to compute an "other" column with lock: report = SingleQueryReport(args.profiler) report.run( realm='traffic_overall_time_series', centricity=args.centricity, groupby=args.profiler.groupbys['time'], columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sync=False ) totals = self._wait_for_data(report, minpct=cur_report * (100/num_reports), maxpct=(cur_report + 1) * (100/num_reports)) df = df.set_index('time') df['subtotal'] = df.sum(axis=1) totals_df = (pandas.DataFrame(totals, columns=['time', 'total']) .set_index('time')) df = df.merge(totals_df, left_index=True, right_index=True) df['other'] = df['total'] = df['subtotal'] colnames = ['time'] + [col['name'] for col in query_column_defs] + ['other'] # Drop the extraneous total and subtotal columns df = (df.reset_index().ix[:, colnames]) Column.create(self.job.table, 'other', 'Other', ephemeral=self.job, datatype=base_col.datatype, formatter=base_col.formatter) logger.info("Report %s returned %s rows" % (self.job, len(df))) return QueryComplete(df)