Example #1
0
    def analyze(self, jobs):
        """ Pivot data results from jobs """

        df = jobs.values()[0].data()

        if (self.table.options.pivot_column is None
                or self.table.options.pivot_value is None):
            msg = ('Both "pivot_column" and "pivot_value" options need '
                   'to be specified for PivotTables.')
            logger.error(msg)
            return QueryError(msg)

        pivot = df.pivot(index=self.table.options.pivot_index,
                         columns=self.table.options.pivot_column,
                         values=self.table.options.pivot_value).reset_index()

        # since numeric values may now be columns, change them to strings
        # for proper pattern matching downstream
        pivot.rename(columns=lambda x: str(x), inplace=True)

        col_names = [x for x in pivot.columns]
        cur_cols = [c.name for c in self.job.get_columns(synthetic=False)]

        for c in col_names:
            if c not in cur_cols:
                label = self.table.options.pivot_column_prefix + c
                Column.create(self.job.table,
                              name=c,
                              label=label,
                              ephemeral=self.job,
                              datatype=self.table.options.pivot_datatype)

        return QueryComplete(pivot)
Example #2
0
    def analyze(self, jobs):
        """ Pivot data results from jobs """
        job = jobs.values()[0]

        rs = self.table.options.resample_interval
        try:
            rs = '{0}s'.format(int(job.criteria.resample_interval))
        except ValueError:
            logger.warning("{0}: resample_interval ({2}) not set or valid in "
                           "job criteria {1}".format(self, job.criteria, rs))

            job.criteria.resample_interval = u'{0}'.format(rs.split('s')[0])

        df = job.data()
        rs_df = resample(df, self.table.options.resample_column, rs,
                         self.table.options.resample_operation)

        curcols = [c.name for c in self.job.get_columns(synthetic=False)]
        jcols = [c.name for c in job.get_columns(synthetic=False)]
        for c in jcols:
            if c not in curcols:
                # Default data type is float.
                Column.create(self.job.table,
                              name=c,
                              label=c,
                              ephemeral=self.job)

        return QueryComplete(rs_df)
Example #3
0
    def run(self):
        # Collect all dependent tables
        options = self.table.options

        model = get_schema_map()[options.schema]
        df = model.objects.get_dataframe()

        if df.empty:
            return QueryError(
                'No metrics defined for schema "%s".  Add new metrics '
                'using the <a href="%s">admin interface</a>.'
                % (options.schema,
                   reverse('admin:metrics_plugin_%s_changelist'
                           % model.__name__.lower()))
            )

        # Add some default columns as needed
        # new ones are created as normal columns vs ephemeral - the table
        # schema will not be dynamic, any changes will be done via code
        # changes and/or a report reload.

        # We check to see if some have already been defined to allow for
        # customization of the actual labels or column display
        keys = list(df.keys())

        for k in keys:
            try:
                Column.objects.get(table=self.job.table, name=k)
            except ObjectDoesNotExist:
                Column.create(self.job.table, k, k.title(), datatype='string')

        logger.debug("%s: completed successfully" % self)
        return QueryComplete(df)
Example #4
0
    def analyze(self, jobs):
        """ Pivot data results from jobs """
        job = jobs.values()[0]

        rs = self.table.options.resample_interval
        try:
            rs = '{0}s'.format(int(job.criteria.resample_interval))
        except ValueError:
            logger.warning(
                "{0}: resample_interval ({2}) not set or valid in "
                "job criteria {1}".format(self, job.criteria, rs))

            job.criteria.resample_interval = u'{0}'.format(rs.split('s')[0])

        df = job.data()
        rs_df = resample(df,
                         self.table.options.resample_column,
                         rs,
                         self.table.options.resample_operation)

        curcols = [c.name for c in self.job.get_columns(synthetic=False)]
        jcols = [c.name for c in job.get_columns(synthetic=False)]
        for c in jcols:
            if c not in curcols:
                # Default data type is float.
                Column.create(self.job.table,
                              name=c,
                              label=c,
                              ephemeral=self.job)

        return QueryComplete(rs_df)
Example #5
0
    def analyze(self, jobs):
        """ Pivot data results from jobs """

        df = jobs.values()[0].data()

        if (self.table.options.pivot_column is None or
                self.table.options.pivot_value is None):
            msg = ('Both "pivot_column" and "pivot_value" options need '
                   'to be specified for PivotTables.')
            logger.error(msg)
            return QueryError(msg)

        pivot = df.pivot(index=self.table.options.pivot_index,
                         columns=self.table.options.pivot_column,
                         values=self.table.options.pivot_value).reset_index()

        # since numeric values may now be columns, change them to strings
        # for proper pattern matching downstream
        pivot.rename(columns=lambda x: str(x), inplace=True)

        col_names = [x for x in pivot.columns]
        cur_cols = [c.name for c in self.job.get_columns(synthetic=False)]

        for c in col_names:
            if c not in cur_cols:
                label = self.table.options.pivot_column_prefix + c
                Column.create(self.job.table, name=c, label=label,
                              ephemeral=self.job,
                              datatype=self.table.options.pivot_datatype)

        return QueryComplete(pivot)
Example #6
0
    def _refresh_columns(self, profiler, report, query):

        # Delete columns
        for col in self.table.get_columns():
            col.delete()

        cols = []
        for col in query.columns:
            if col.id >= EPHEMERAL_COLID:
                cols.append(col)

        if not cols:
            cols = report.get_columns(self.table.options.widget_id)

        if query.is_time_series:
            # 98 is the column id for 'time'
            cols = [profiler.columns[98]] + cols

        for col in cols:
            if (col.json['type'] == 'float' or col.json['type'] == 'reltime'
                    or col.json['rate'] == 'opt'):

                data_type = 'float'

            elif col.json['type'] == 'time':
                data_type = 'time'

            elif col.json['type'] == 'int':
                data_type = 'integer'

            else:
                data_type = 'string'

            col_name = col.label if col.ephemeral else col.key
            Column.create(self.table,
                          col_name,
                          col.label,
                          datatype=data_type,
                          iskey=col.iskey)
    def _refresh_columns(self, profiler, report, query):

        # Delete columns
        for col in self.table.get_columns():
            col.delete()

        cols = []
        for col in query.columns:
            if col.id >= EPHEMERAL_COLID:
                cols.append(col)

        if not cols:
            cols = report.get_columns(self.table.options.widget_id)

        if query.is_time_series:
            # 98 is the column id for 'time'
            cols = [profiler.columns[98]] + cols

        for col in cols:
            if (col.json['type'] == 'float' or
                    col.json['type'] == 'reltime' or
                    col.json['rate'] == 'opt'):

                data_type = 'float'

            elif col.json['type'] == 'time':
                data_type = 'time'

            elif col.json['type'] == 'int':
                data_type = 'integer'

            else:
                data_type = 'string'

            col_name = col.label if col.ephemeral else col.key
            Column.create(self.table, col_name, col.label,
                          datatype=data_type, iskey=col.iskey)
    def run(self):
        """ Main execution method
        """
        criteria = self.job.criteria

        if criteria.netprofiler_device == '':
            logger.debug('%s: No netprofiler device selected' % self.table)
            self.job.mark_error("No NetProfiler Device Selected")
            return False

        profiler = DeviceManager.get_device(criteria.netprofiler_device)
        report = ServiceLocationReport(profiler)

        tf = TimeFilter(start=criteria.starttime,
                        end=criteria.endtime)

        logger.info(
            'Running NetProfilerServiceByLocTable %d report for timeframe %s' %
            (self.table.id, str(tf)))

        with lock:
            report.run(timefilter=tf, sync=False)

        done = False
        logger.info("Waiting for report to complete")
        while not done:
            time.sleep(0.5)
            with lock:
                s = report.status()

            self.job.mark_progress(progress=int(s['percent']))
            done = (s['status'] == 'completed')

        # Retrieve the data
        with lock:
            data = report.get_data()
            query = report.get_query_by_index(0)

            tz = criteria.starttime.tzinfo
            # Update criteria
            criteria.starttime = (datetime.datetime
                                  .utcfromtimestamp(query.actual_t0)
                                  .replace(tzinfo=tz))
            criteria.endtime = (datetime.datetime
                                .utcfromtimestamp(query.actual_t1)
                                .replace(tzinfo=tz))

        self.job.safe_update(actual_criteria=criteria)

        if len(data) == 0:
            return QueryComplete(None)

        # Add ephemeral columns for everything
        Column.create(self.job.table, 'location', 'Location',
                      ephemeral=self.job, datatype='string')
        for k in data[0].keys():
            if k == 'location':
                continue

            Column.create(self.job.table, k, k,
                          ephemeral=self.job, datatype='string',
                          formatter='rvbd.formatHealth')

        df = pandas.DataFrame(data)

        if self.job.table.options.rgb:
            state_map = {Service.SVC_NOT_AVAILABLE: 'gray',
                         Service.SVC_DISABLED: 'gray',
                         Service.SVC_INIT: 'gray',
                         Service.SVC_NORMAL: 'green',
                         Service.SVC_LOW: 'yellow',
                         Service.SVC_MED: 'yellow',
                         Service.SVC_HIGH: 'red',
                         Service.SVC_NODATA: 'gray'}

            df = df.replace(state_map.keys(),
                            state_map.values())

        return QueryComplete(df)
    def run(self):
        args = self._prepare_report_args()
        base_table = Table.from_ref(self.table.options.base)
        base_col = base_table.get_columns()[0]

        # only calculate other when we aren't filtering data
        include_other = self.table.options.include_other
        if self.job.criteria.netprofiler_filterexpr:
            include_other = False

        if self.table.options.groupby not in self.CONFIG:
            raise ValueError('not supported for groupby=%s' %
                             self.table.options.groupby)

        config = self.CONFIG[self.table.options.groupby]

        # num_reports / cur_report are used to compute min/max pct
        num_reports = (1 +
                       (1 if self.table.options.top_n else 0) +
                       (1 if include_other else 0))
        cur_report = 0

        if self.table.options.top_n:
            # Run a top-n report to drive the criteria for each column
            query_column_defs = self.run_top_n(config, args, base_col,
                                               minpct=0,
                                               maxpct=(100/num_reports))
            cur_report += 1
        else:
            query_column_defs = self.job.criteria.query_columns
            if isinstance(query_column_defs, types.StringTypes):
                query_column_defs = json.loads(query_column_defs)

        query_columns = [col['json'] for col in query_column_defs]

        with lock:
            report = TrafficTimeSeriesReport(args.profiler)
            columns = [args.columns[0], base_col.name]
            logger.info("Query Columns: %s" % str(query_columns))

            if self.table.options.groupby == 'host_group':
                host_group_type = 'ByLocation'
            else:
                host_group_type = None

            report.run(
                centricity=args.centricity,
                columns=columns,
                timefilter=args.timefilter,
                trafficexpr=args.trafficexpr,
                resolution=args.resolution,
                sync=False,
                host_group_type=host_group_type,
                query_columns_groupby=config.groupby,
                query_columns=query_columns
            )

        data = self._wait_for_data(report,
                                   minpct=cur_report * (100/num_reports),
                                   maxpct=(cur_report + 1) * (100/num_reports))
        cur_report += 1

        df = pandas.DataFrame(data,
                              columns=(['time'] + [col['name'] for
                                                   col in query_column_defs]))

        # Create ephemeral columns for all the data based
        # on the related base table
        for col in query_column_defs:
            Column.create(self.job.table, col['name'], col['label'],
                          ephemeral=self.job, datatype=base_col.datatype,
                          formatter=base_col.formatter)

        if include_other:
            # Run a separate timeseries query with no column filters
            # to get "totals" then use that to compute an "other" column

            with lock:
                report = SingleQueryReport(args.profiler)
                report.run(
                    realm='traffic_overall_time_series',
                    groupby=args.profiler.groupbys['time'],
                    columns=columns,
                    timefilter=args.timefilter,
                    trafficexpr=args.trafficexpr,
                    resolution=args.resolution,
                    sync=False
                )

            totals = self._wait_for_data(report,
                                         minpct=cur_report * (100/num_reports),
                                         maxpct=(cur_report + 1) * (100/num_reports))

            df = df.set_index('time')
            df['subtotal'] = df.sum(axis=1)
            totals_df = (pandas.DataFrame(totals, columns=['time', 'total'])
                         .set_index('time'))

            df = df.merge(totals_df, left_index=True, right_index=True)
            df['other'] = df['total'] = df['subtotal']
            colnames = ['time'] + [col['name'] for col in query_column_defs] + ['other']

            # Drop the extraneous total and subtotal columns
            df = (df.reset_index().ix[:, colnames])

            Column.create(self.job.table, 'other', 'Other',
                          ephemeral=self.job, datatype=base_col.datatype,
                          formatter=base_col.formatter)

        logger.info("Report %s returned %s rows" % (self.job, len(df)))
        return QueryComplete(df)
Example #10
0
    def run(self):
        """ Main execution method
        """
        criteria = self.job.criteria

        if criteria.netprofiler_device == '':
            logger.debug('%s: No netprofiler device selected' % self.table)
            self.job.mark_error("No NetProfiler Device Selected")
            return False

        profiler = DeviceManager.get_device(criteria.netprofiler_device)
        report = ServiceLocationReport(profiler)

        tf = TimeFilter(start=criteria.starttime,
                        end=criteria.endtime)

        logger.info(
            'Running NetProfilerServiceByLocTable %d report for timeframe %s' %
            (self.table.id, str(tf)))

        with lock:
            report.run(timefilter=tf, sync=False)

        done = False
        logger.info("Waiting for report to complete")
        while not done:
            time.sleep(0.5)
            with lock:
                s = report.status()

            self.job.mark_progress(progress=int(s['percent']))
            done = (s['status'] == 'completed')

        # Retrieve the data
        with lock:
            data = report.get_data()
            query = report.get_query_by_index(0)

            tz = criteria.starttime.tzinfo
            # Update criteria
            criteria.starttime = (datetime.datetime
                                  .utcfromtimestamp(query.actual_t0)
                                  .replace(tzinfo=tz))
            criteria.endtime = (datetime.datetime
                                .utcfromtimestamp(query.actual_t1)
                                .replace(tzinfo=tz))

        self.job.safe_update(actual_criteria=criteria)

        if len(data) == 0:
            return QueryComplete(None)

        # Add ephemeral columns for everything
        Column.create(self.job.table, 'location', 'Location',
                      ephemeral=self.job, datatype='string')
        for k in data[0].keys():
            if k == 'location':
                continue

            Column.create(self.job.table, k, k,
                          ephemeral=self.job, datatype='string',
                          formatter='rvbd.formatHealth')

        df = pandas.DataFrame(data)

        if self.job.table.options.rgb:
            state_map = {Service.SVC_NOT_AVAILABLE: 'gray',
                         Service.SVC_DISABLED: 'gray',
                         Service.SVC_INIT: 'gray',
                         Service.SVC_NORMAL: 'green',
                         Service.SVC_LOW: 'yellow',
                         Service.SVC_MED: 'yellow',
                         Service.SVC_HIGH: 'red',
                         Service.SVC_NODATA: 'gray'}

            df = df.replace(state_map.keys(),
                            state_map.values())

        return QueryComplete(df)
Example #11
0
    def run(self):
        args = self._prepare_report_args()
        base_table = Table.from_ref(self.table.options.base)
        base_col = base_table.get_columns()[0]

        # only calculate other when we aren't filtering data
        include_other = self.table.options.include_other
        if self.job.criteria.netprofiler_filterexpr:
            include_other = False

        if self.table.options.groupby not in self.CONFIG:
            raise ValueError('not supported for groupby=%s' %
                             self.table.options.groupby)

        config = self.CONFIG[self.table.options.groupby]

        # num_reports / cur_report are used to compute min/max pct
        num_reports = (1 +
                       (1 if self.table.options.top_n else 0) +
                       (1 if include_other else 0))
        cur_report = 0

        if self.table.options.top_n:
            # Run a top-n report to drive the criteria for each column
            query_column_defs = self.run_top_n(config, args, base_col,
                                               minpct=0,
                                               maxpct=(100/num_reports))
            cur_report += 1
        else:
            query_column_defs = self.job.criteria.query_columns
            if isinstance(query_column_defs, types.StringTypes):
                query_column_defs = json.loads(query_column_defs)

        query_columns = [col['json'] for col in query_column_defs]

        if not query_columns:
            msg = 'Unable to compute query colums for job %s' % self.job
            logger.error(msg)
            return QueryError(msg)

        with lock:
            report = TrafficTimeSeriesReport(args.profiler)
            columns = [args.columns[0], base_col.name]
            logger.info("Query Columns: %s" % str(query_columns))

            if self.table.options.groupby == 'host_group':
                host_group_type = 'ByLocation'
            else:
                host_group_type = None

            report.run(
                centricity=args.centricity,
                columns=columns,
                timefilter=args.timefilter,
                trafficexpr=args.trafficexpr,
                resolution=args.resolution,
                sync=False,
                host_group_type=host_group_type,
                query_columns_groupby=config.groupby,
                query_columns=query_columns
            )

        data = self._wait_for_data(report,
                                   minpct=cur_report * (100/num_reports),
                                   maxpct=(cur_report + 1) * (100/num_reports))
        cur_report += 1

        df = pandas.DataFrame(data,
                              columns=(['time'] + [col['name'] for
                                                   col in query_column_defs]))

        # Create ephemeral columns for all the data based
        # on the related base table
        for col in query_column_defs:
            Column.create(self.job.table, col['name'], col['label'],
                          ephemeral=self.job, datatype=base_col.datatype,
                          formatter=base_col.formatter)

        if include_other:
            # Run a separate timeseries query with no column filters
            # to get "totals" then use that to compute an "other" column

            with lock:
                report = SingleQueryReport(args.profiler)
                report.run(
                    realm='traffic_overall_time_series',
                    centricity=args.centricity,
                    groupby=args.profiler.groupbys['time'],
                    columns=columns,
                    timefilter=args.timefilter,
                    trafficexpr=args.trafficexpr,
                    resolution=args.resolution,
                    sync=False
                )

            totals = self._wait_for_data(report,
                                         minpct=cur_report * (100/num_reports),
                                         maxpct=(cur_report + 1) * (100/num_reports))

            df = df.set_index('time')
            df['subtotal'] = df.sum(axis=1)
            totals_df = (pandas.DataFrame(totals, columns=['time', 'total'])
                         .set_index('time'))

            df = df.merge(totals_df, left_index=True, right_index=True)
            df['other'] = df['total'] = df['subtotal']
            colnames = ['time'] + [col['name'] for col in query_column_defs] + ['other']

            # Drop the extraneous total and subtotal columns
            df = (df.reset_index().ix[:, colnames])

            Column.create(self.job.table, 'other', 'Other',
                          ephemeral=self.job, datatype=base_col.datatype,
                          formatter=base_col.formatter)

        logger.info("Report %s returned %s rows" % (self.job, len(df)))
        return QueryComplete(df)