def run(self): """ Main execution method """ args = self._prepare_report_args() with lock: report = SingleQueryReport(args.profiler) report.run( realm=self.table.options.realm, groupby=args.profiler.groupbys[self.table.options.groupby], centricity=args.centricity, columns=args.columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, data_filter=args.datafilter, resolution=args.resolution, sort_col=args.sortcol, sync=False, limit=args.limit ) data = self._wait_for_data(report) if self.table.rows > 0: data = data[:self.table.rows] logger.info("Report %s returned %s rows" % (self.job, len(data))) return QueryComplete(data)
def run_top_n(self, config, args, base_col, minpct, maxpct): columns = config.columns + [base_col.name] with lock: report = SingleQueryReport(args.profiler) report.run( realm='traffic_summary', centricity=args.centricity, groupby=args.profiler.groupbys[self.table.options.groupby], columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sort_col=base_col.name, sync=False ) rows = self._wait_for_data(report, minpct=minpct, maxpct=maxpct) if not rows: msg = ('Error computing top-n columns for TimeSeries report, ' 'no columns were found.') logger.error(msg) return [] defs = [] parser = getattr(self, config.parser) for row in rows[:int(self.table.options.top_n)]: defs.append(parser(row)) return defs
def run_top_n(self, config, args, base_col, minpct, maxpct): columns = config.columns + [base_col.name] with lock: report = SingleQueryReport(args.profiler) report.run( realm='traffic_summary', groupby=args.profiler.groupbys[self.table.options.groupby], columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sort_col=base_col.name, sync=False) rows = self._wait_for_data(report, minpct=minpct, maxpct=maxpct) defs = [] parser = getattr(self, config.parser) for row in rows[:int(self.table.options.top_n)]: defs.append(parser(row)) return defs
def run_top_n(self, config, args, base_col, minpct, maxpct): columns = config.columns + [base_col.name] with lock: report = SingleQueryReport(args.profiler) report.run( realm='traffic_summary', groupby=args.profiler.groupbys[self.table.options.groupby], columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sort_col=base_col.name, sync=False ) rows = self._wait_for_data(report, minpct=minpct, maxpct=maxpct) defs = [] parser = getattr(self, config.parser) for row in rows[:int(self.table.options.top_n)]: defs.append(parser(row)) return defs
def run(self): """ Main execution method """ args = self._prepare_report_args() with lock: report = SingleQueryReport(args.profiler) report.run( realm=self.table.options.realm, groupby=args.profiler.groupbys[self.table.options.groupby], centricity=args.centricity, columns=args.columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, data_filter=args.datafilter, resolution=args.resolution, sort_col=self.table.options.sort_col, sync=False, limit=args.limit ) data = self._wait_for_data(report) if not data: msg = 'Report %s returned no data' % self.job logger.error(msg) return QueryError(msg) def tonumber(s): # return an int if the string represents an integer, # a float if it represents a float # None otherwise. # check the int first since float() captures both try: return int(s) except ValueError: try: return float(s) except: return None others = [] totals = [] for i, col in enumerate(args.columns): if i == 0: others.append(u'Others') totals.append(u'Total') elif tonumber(data[0][i]): others.append(0) totals.append(0) else: others.append(u'') totals.append(u'') for i, row in enumerate(data): for j, col in enumerate(args.columns): val = tonumber(row[j]) if val: row[j] = val totals[j] += row[j] if i > self.table.rows: others[j] += row[j] # Clip the table at the row limit, then add two more # for other and total if self.table.rows > 0: data = data[:self.table.rows] self.table.rows += 2 data.append(others) data.append(totals) # Formatting: # - Add percents of total to numeric columns # - Strip "ByLocation|" from the groups if it exists # - Parse dns for row in data: for j, col in enumerate(args.columns): if isinstance(row[j], float): row[j] = u"%.2f (%.0f%%)" % \ (row[j], 100 * row[j] / totals[j]) elif isinstance(row[j], int): row[j] = u"%d (%.0f%%)" % \ (row[j], 100 * row[j] / totals[j]) elif isinstance(row[j], unicode): if row[j].startswith(u'ByLocation|'): row[j] = row[j][11:] elif (col == u'cli_host_dns' or col == u'srv_host_dns') \ and (u'|' in row[j]): # If we're using dns columns, they are ip|name # We should use the name if it's non-empty, # ip otherwise ip, name = row[j].split(u'|') if name: row[j] = name else: row[j] = ip logger.info("Report %s returned %s rows" % (self.job, len(data))) return QueryComplete(data)
def run(self): """ Main execution method """ args = self._prepare_report_args() with lock: report = SingleQueryReport(args.profiler) report.run( realm=self.table.options.realm, groupby=args.profiler.groupbys[self.table.options.groupby], centricity=args.centricity, columns=args.columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, data_filter=args.datafilter, resolution=args.resolution, sort_col=self.table.options.sort_col, sync=False, limit=args.limit ) data = self._wait_for_data(report) if not data: msg = 'Report %s returned no data' % self.job logger.error(msg) return QueryError(msg) def tonumber(s): # return an int if the string represents an integer, # a float if it represents a float # None otherwise. # check the int first since float() captures both try: return int(s) except ValueError: try: return float(s) except: return None others = [] totals = [] for i, col in enumerate(args.columns): if i == 0: others.append(u'Others') totals.append(u'Total') elif tonumber(data[0][i]): others.append(0) totals.append(0) else: others.append(u'') totals.append(u'') for i, row in enumerate(data): for j, col in enumerate(args.columns): val = tonumber(row[j]) if val: row[j] = val totals[j] += row[j] if i > self.table.rows: others[j] += row[j] # Clip the table at the row limit, then add two more # for other and total if self.table.rows > 0: data = data[:self.table.rows] self.table.rows += 2 data.append(others) data.append(totals) # Formatting: # - Add percents of total to numeric columns # - Strip "ByLocation|" from the groups if it exists # - Parse dns for row in data: for j, col in enumerate(args.columns): if isinstance(row[j], float): row[j] = "%.2f (%.0f%%)" % \ (row[j], 100 * row[j] / totals[j]) elif isinstance(row[j], int): row[j] = "%d (%.0f%%)" % \ (row[j], 100 * row[j] / totals[j]) elif isinstance(row[j], str): if row[j].startswith('ByLocation|'): row[j] = row[j][11:] elif ((col == 'cli_host_dns' or col == 'srv_host_dns') and ('|' in row[j])): # If we're using dns columns, they are ip|name # We should use the name if it's non-empty, # ip otherwise ip, name = row[j].split('|') if name: row[j] = name else: row[j] = ip logger.info("Report %s returned %s rows" % (self.job, len(data))) return QueryComplete(data)
def run(self): args = self._prepare_report_args() base_table = Table.from_ref(self.table.options.base) base_col = base_table.get_columns()[0] # only calculate other when we aren't filtering data include_other = self.table.options.include_other if self.job.criteria.netprofiler_filterexpr: include_other = False if self.table.options.groupby not in self.CONFIG: raise ValueError('not supported for groupby=%s' % self.table.options.groupby) config = self.CONFIG[self.table.options.groupby] # num_reports / cur_report are used to compute min/max pct num_reports = (1 + (1 if self.table.options.top_n else 0) + (1 if include_other else 0)) cur_report = 0 if self.table.options.top_n: # Run a top-n report to drive the criteria for each column query_column_defs = self.run_top_n(config, args, base_col, minpct=0, maxpct=(100/num_reports)) cur_report += 1 else: query_column_defs = self.job.criteria.query_columns if isinstance(query_column_defs, types.StringTypes): query_column_defs = json.loads(query_column_defs) query_columns = [col['json'] for col in query_column_defs] if not query_columns: msg = 'Unable to compute query colums for job %s' % self.job logger.error(msg) return QueryError(msg) with lock: report = TrafficTimeSeriesReport(args.profiler) columns = [args.columns[0], base_col.name] logger.info("Query Columns: %s" % str(query_columns)) if self.table.options.groupby == 'host_group': host_group_type = 'ByLocation' else: host_group_type = None report.run( centricity=args.centricity, columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sync=False, host_group_type=host_group_type, query_columns_groupby=config.groupby, query_columns=query_columns ) data = self._wait_for_data(report, minpct=cur_report * (100/num_reports), maxpct=(cur_report + 1) * (100/num_reports)) cur_report += 1 df = pandas.DataFrame(data, columns=(['time'] + [col['name'] for col in query_column_defs])) # Create ephemeral columns for all the data based # on the related base table for col in query_column_defs: Column.create(self.job.table, col['name'], col['label'], ephemeral=self.job, datatype=base_col.datatype, formatter=base_col.formatter) if include_other: # Run a separate timeseries query with no column filters # to get "totals" then use that to compute an "other" column with lock: report = SingleQueryReport(args.profiler) report.run( realm='traffic_overall_time_series', centricity=args.centricity, groupby=args.profiler.groupbys['time'], columns=columns, timefilter=args.timefilter, trafficexpr=args.trafficexpr, resolution=args.resolution, sync=False ) totals = self._wait_for_data(report, minpct=cur_report * (100/num_reports), maxpct=(cur_report + 1) * (100/num_reports)) df = df.set_index('time') df['subtotal'] = df.sum(axis=1) totals_df = (pandas.DataFrame(totals, columns=['time', 'total']) .set_index('time')) df = df.merge(totals_df, left_index=True, right_index=True) df['other'] = df['total'] = df['subtotal'] colnames = ['time'] + [col['name'] for col in query_column_defs] + ['other'] # Drop the extraneous total and subtotal columns df = (df.reset_index().ix[:, colnames]) Column.create(self.job.table, 'other', 'Other', ephemeral=self.job, datatype=base_col.datatype, formatter=base_col.formatter) logger.info("Report %s returned %s rows" % (self.job, len(df))) return QueryComplete(df)