Ejemplo n.º 1
0
    def analyze(self, jobs):
        """ Pivot data results from jobs """

        df = jobs.values()[0].data()

        if (self.table.options.pivot_column is None
                or self.table.options.pivot_value is None):
            msg = ('Both "pivot_column" and "pivot_value" options need '
                   'to be specified for PivotTables.')
            logger.error(msg)
            return QueryError(msg)

        pivot = df.pivot(index=self.table.options.pivot_index,
                         columns=self.table.options.pivot_column,
                         values=self.table.options.pivot_value).reset_index()

        # since numeric values may now be columns, change them to strings
        # for proper pattern matching downstream
        pivot.rename(columns=lambda x: str(x), inplace=True)

        col_names = [x for x in pivot.columns]
        cur_cols = [c.name for c in self.job.get_columns(synthetic=False)]

        for c in col_names:
            if c not in cur_cols:
                label = self.table.options.pivot_column_prefix + c
                Column.create(self.job.table,
                              name=c,
                              label=label,
                              ephemeral=self.job,
                              datatype=self.table.options.pivot_datatype)

        return QueryComplete(pivot)
Ejemplo n.º 2
0
    def run(self):
        # Collect all dependent tables
        options = self.table.options

        model = get_schema_map()[options.schema]
        df = model.objects.get_dataframe()

        if df.empty:
            return QueryError(
                'No metrics defined for schema "%s".  Add new metrics '
                'using the <a href="%s">admin interface</a>.'
                % (options.schema,
                   reverse('admin:metrics_plugin_%s_changelist'
                           % model.__name__.lower()))
            )

        # Add some default columns as needed
        # new ones are created as normal columns vs ephemeral - the table
        # schema will not be dynamic, any changes will be done via code
        # changes and/or a report reload.

        # We check to see if some have already been defined to allow for
        # customization of the actual labels or column display
        keys = list(df.keys())

        for k in keys:
            try:
                Column.objects.get(table=self.job.table, name=k)
            except ObjectDoesNotExist:
                Column.create(self.job.table, k, k.title(), datatype='string')

        logger.debug("%s: completed successfully" % self)
        return QueryComplete(df)
Ejemplo n.º 3
0
    def post_run(self):
        """Execute any Functions saved to Table.

        In most cases, this function will be simply overridden by a
        subclass which will implement its own detailed processing.  This
        method provides a shortcut to support passing a Function
        directly to the create method.
        """
        options = self.table.options
        if options.function is None:
            return QueryError("Table %s has no analysis function defined" %
                              self.table)

        try:
            df = options.function(self, options.tables, self.job.criteria)

        except Exception as e:
            return QueryError("Analysis function %s failed" % options.function,
                              e)

        logger.debug("%s: completed successfully" % self)
        return QueryComplete(df)
Ejemplo n.º 4
0
    def analyze(self, jobs):
        logger.debug('%s analyze - received jobs: %s' % (self, jobs))

        basetable = Table.from_ref(
            self.table.options['related_tables']['template']
        )
        data = jobs['source'].data()
        if data is None:
            return QueryError('No data available to analyze')

        # find column whose min/max is largest deviation from mean
        # then take row from that column where min/max occurs
        if self.table.options['max']:
            idx = (data.max() / data.mean()).idxmax()
            frow = data.ix[data[idx].idxmax()]
        else:
            idx = (data.min() / data.mean()).idxmin()
            frow = data.ix[data[idx].idxmin()]

        # get time value from extracted row to calculate new start/end times
        ftime = frow['time']
        duration = parse_timedelta(self.table.options['zoom_duration'])
        resolution = parse_timedelta(self.table.options['zoom_resolution'])
        stime = ftime - (duration / 2)
        etime = ftime + (duration / 2)

        criteria = self.job.criteria

        if 'resolution' in criteria:
            criteria['resolution'] = resolution
        else:
            criteria['granularity'] = resolution

        criteria['duration'] = duration
        criteria['_orig_duration'] = duration
        criteria['starttime'] = stime
        criteria['_orig_starttime'] = stime
        criteria['endtime'] = etime
        criteria['_orig_endtime'] = etime

        logging.debug('Creating FocusedAnalysis job with updated criteria %s'
                      % criteria)

        job = Job.create(basetable, criteria, self.job.update_progress)
        return QueryContinue(self.finish, {'job': job})
Ejemplo n.º 5
0
    def analyze(self, jobs):

        tag = Tag.objects.get(id=self.job.criteria.tag).name

        cmd_table = Table.from_ref(
            self.table.options.related_tables['base'])

        dep_jobs = {}

        for sh_db in Device.objects.filter_by_tag(tag, module='steelhead',
                                                  enabled=True):
            criteria = copy.copy(self.job.criteria)
            criteria.dev = sh_db
            job = Job.create(table=cmd_table, criteria=criteria,
                             parent=self.job)
            dep_jobs[job.id] = job

        if not dep_jobs:
            return QueryError("No enabled steelhead "
                              "devices found with tag '{}'".format(tag))

        return QueryContinue(self.collect, jobs=dep_jobs)
Ejemplo n.º 6
0
    def _analyze(self, jobs=None):
        logger.debug("%s: all dependent jobs complete" % str(self))

        if jobs:
            for (name, job) in jobs.items():
                if job.status == job.ERROR:
                    return QueryError("Dependent Job '%s' failed: %s" %
                                      (name, job.message))

        if hasattr(self, 'analyze'):
            return self.analyze(jobs)
        else:
            # Compatibility mode - old code uses def post_run() and expects
            # self.tables to be set
            tables = {}
            if jobs:
                for (name, job) in jobs.items():
                    f = job.data()
                    tables[name] = f
                logger.debug("%s: Table[%s] - %d rows" %
                             (self, name, len(f) if f is not None else 0))

            self.tables = tables
            return self.post_run()
Ejemplo n.º 7
0
    def run(self):
        """ Main execution method
        """
        args = self._prepare_report_args()

        with lock:
            report = SingleQueryReport(args.profiler)
            report.run(
                realm=self.table.options.realm,
                groupby=args.profiler.groupbys[self.table.options.groupby],
                centricity=args.centricity,
                columns=args.columns,
                timefilter=args.timefilter,
                trafficexpr=args.trafficexpr,
                data_filter=args.datafilter,
                resolution=args.resolution,
                sort_col=self.table.options.sort_col,
                sync=False,
                limit=args.limit
            )

        data = self._wait_for_data(report)

        if not data:
            msg = 'Report %s returned no data' % self.job
            logger.error(msg)
            return QueryError(msg)

        def tonumber(s):
            # return an int if the string represents an integer,
            # a float if it represents a float
            # None otherwise.
            # check the int first since float() captures both
            try:
                return int(s)
            except ValueError:
                try:
                    return float(s)
                except:
                    return None

        others = []
        totals = []
        for i, col in enumerate(args.columns):
            if i == 0:
                others.append(u'Others')
                totals.append(u'Total')
            elif tonumber(data[0][i]):
                others.append(0)
                totals.append(0)
            else:
                others.append(u'')
                totals.append(u'')

        for i, row in enumerate(data):
            for j, col in enumerate(args.columns):
                val = tonumber(row[j])
                if val:
                    row[j] = val
                    totals[j] += row[j]
                    if i > self.table.rows:
                        others[j] += row[j]

        # Clip the table at the row limit, then add two more
        # for other and total
        if self.table.rows > 0:
            data = data[:self.table.rows]
        self.table.rows += 2

        data.append(others)
        data.append(totals)

        # Formatting:
        #  - Add percents of total to numeric columns
        #  - Strip "ByLocation|" from the groups if it exists
        #  - Parse dns
        for row in data:
            for j, col in enumerate(args.columns):
                if isinstance(row[j], float):
                    row[j] = "%.2f  (%.0f%%)" % \
                            (row[j], 100 * row[j] / totals[j])
                elif isinstance(row[j], int):
                    row[j] = "%d  (%.0f%%)" % \
                            (row[j], 100 * row[j] / totals[j])
                elif isinstance(row[j], str):
                    if row[j].startswith('ByLocation|'):
                        row[j] = row[j][11:]
                    elif ((col == 'cli_host_dns' or col == 'srv_host_dns')
                          and ('|' in row[j])):
                        # If we're using dns columns, they are ip|name
                        # We should use the name if it's non-empty,
                        # ip otherwise
                        ip, name = row[j].split('|')
                        if name:
                            row[j] = name
                        else:
                            row[j] = ip
        logger.info("Report %s returned %s rows" % (self.job, len(data)))
        return QueryComplete(data)
Ejemplo n.º 8
0
    def run(self):
        args = self._prepare_report_args()
        base_table = Table.from_ref(self.table.options.base)
        base_col = base_table.get_columns()[0]

        # only calculate other when we aren't filtering data
        include_other = self.table.options.include_other
        if self.job.criteria.netprofiler_filterexpr:
            include_other = False

        if self.table.options.groupby not in self.CONFIG:
            raise ValueError('not supported for groupby=%s' %
                             self.table.options.groupby)

        config = self.CONFIG[self.table.options.groupby]

        # num_reports / cur_report are used to compute min/max pct
        num_reports = (1 +
                       (1 if self.table.options.top_n else 0) +
                       (1 if include_other else 0))
        cur_report = 0

        if self.table.options.top_n:
            # Run a top-n report to drive the criteria for each column
            query_column_defs = self.run_top_n(config, args, base_col,
                                               minpct=0,
                                               maxpct=(100/num_reports))
            cur_report += 1
        else:
            query_column_defs = self.job.criteria.query_columns
            if isinstance(query_column_defs, types.StringTypes):
                query_column_defs = json.loads(query_column_defs)

        query_columns = [col['json'] for col in query_column_defs]

        if not query_columns:
            msg = 'Unable to compute query colums for job %s' % self.job
            logger.error(msg)
            return QueryError(msg)

        with lock:
            report = TrafficTimeSeriesReport(args.profiler)
            columns = [args.columns[0], base_col.name]
            logger.info("Query Columns: %s" % str(query_columns))

            if self.table.options.groupby == 'host_group':
                host_group_type = 'ByLocation'
            else:
                host_group_type = None

            report.run(
                centricity=args.centricity,
                columns=columns,
                timefilter=args.timefilter,
                trafficexpr=args.trafficexpr,
                resolution=args.resolution,
                sync=False,
                host_group_type=host_group_type,
                query_columns_groupby=config.groupby,
                query_columns=query_columns
            )

        data = self._wait_for_data(report,
                                   minpct=cur_report * (100/num_reports),
                                   maxpct=(cur_report + 1) * (100/num_reports))
        cur_report += 1

        df = pandas.DataFrame(data,
                              columns=(['time'] + [col['name'] for
                                                   col in query_column_defs]))

        # Create ephemeral columns for all the data based
        # on the related base table
        for col in query_column_defs:
            Column.create(self.job.table, col['name'], col['label'],
                          ephemeral=self.job, datatype=base_col.datatype,
                          formatter=base_col.formatter)

        if include_other:
            # Run a separate timeseries query with no column filters
            # to get "totals" then use that to compute an "other" column

            with lock:
                report = SingleQueryReport(args.profiler)
                report.run(
                    realm='traffic_overall_time_series',
                    centricity=args.centricity,
                    groupby=args.profiler.groupbys['time'],
                    columns=columns,
                    timefilter=args.timefilter,
                    trafficexpr=args.trafficexpr,
                    resolution=args.resolution,
                    sync=False
                )

            totals = self._wait_for_data(report,
                                         minpct=cur_report * (100/num_reports),
                                         maxpct=(cur_report + 1) * (100/num_reports))

            df = df.set_index('time')
            df['subtotal'] = df.sum(axis=1)
            totals_df = (pandas.DataFrame(totals, columns=['time', 'total'])
                         .set_index('time'))

            df = df.merge(totals_df, left_index=True, right_index=True)
            df['other'] = df['total'] = df['subtotal']
            colnames = ['time'] + [col['name'] for col in query_column_defs] + ['other']

            # Drop the extraneous total and subtotal columns
            df = (df.reset_index().ix[:, colnames])

            Column.create(self.job.table, 'other', 'Other',
                          ephemeral=self.job, datatype=base_col.datatype,
                          formatter=base_col.formatter)

        logger.info("Report %s returned %s rows" % (self.job, len(df)))
        return QueryComplete(df)