Example #1
0
    def __init__(self):
        self.urls = list()  # The only thing that caller (view) needs

        self.eventlet_pool = GreenPool()  # for seriesly requests
        self.mp_pool = Pool(cpu_count())  # for plotting

        self.seriesly = SerieslyHandler()
Example #2
0
def seriesly_proxy(request):
    sh = SerieslyHandler()
    db_name = sh.build_dbname(
        cluster=request.GET["cluster"],
        server=request.GET.get("server"),
        bucket=request.GET.get("bucket"),
        collector=request.GET.get("collector"),
    )
    data = sh.query_raw_data(db_name, name=request.GET["name"])

    content = json.dumps(data)
    return HttpResponse(content)
Example #3
0
def seriesly_proxy(request):
    sh = SerieslyHandler()
    db_name = sh.build_dbname(
        cluster=request.GET["cluster"],
        server=request.GET.get("server"),
        bucket=request.GET.get("bucket"),
        index=request.GET.get("index"),
        collector=request.GET.get("collector"),
    )
    data = sh.query_raw_data(db_name, name=request.GET["name"])

    content = json.dumps(data)
    return HttpResponse(content)
Example #4
0
class Analyzer(object):
    def __init__(self):
        self.seriesly = SerieslyHandler()

    def get_time_series(self, observables):
        for ol in observables:
            for observable in ol:
                if observable:
                    raw_data = self.seriesly.query_data(observable)
                    if raw_data:
                        s = pd.Series(raw_data)
                        if len(s.unique()) == 1:
                            continue
                        s = pd.rolling_median(s, window=3)
                        title = Plotter.generate_title(observable)
                        yield title, s

    def create_data_frame(self, observables):
        df = pd.DataFrame()
        for title, series in self.get_time_series(observables):
            new_df = pd.DataFrame(data=series, columns=(title, ))
            df = pd.concat((df, new_df), axis=1)
        return df

    def corr(self, snapshots):
        observables = Report(snapshots)()
        df = self.create_data_frame(observables)
        return df.columns.values.tolist(), df.corr().fillna(0).values.tolist()
Example #5
0
class Analyzer(object):

    def __init__(self):
        self.seriesly = SerieslyHandler()

    def get_time_series(self, observables):
        for ol in observables:
            for observable in ol:
                if observable:
                    raw_data = self.seriesly.query_data(observable)
                    if raw_data:
                        s = pd.Series(raw_data)
                        if len(s.unique()) == 1:
                            continue
                        s = pd.rolling_median(s, window=3)
                        title = Plotter.generate_title(observable)
                        yield title, s

    def create_data_frame(self, observables):
        df = pd.DataFrame()
        for title, series in self.get_time_series(observables):
            new_df = pd.DataFrame(data=series, columns=(title, ))
            df = pd.concat((df, new_df), axis=1)
        return df

    def corr(self, snapshots):
        observables = Report(snapshots)()
        df = self.create_data_frame(observables)
        return df.columns.values.tolist(), df.corr().fillna(0).values.tolist()
Example #6
0
    def __init__(self):
        self.urls = list()  # The only thing that caller (view) needs

        self.eventlet_pool = GreenPool()  # for seriesly requests
        self.mp_pool = Pool(cpu_count())  # for plotting

        self.seriesly = SerieslyHandler()
Example #7
0
 def __init__(self):
     self.seriesly = SerieslyHandler()
Example #8
0
class Plotter(object):
    """Plotter helper that reads data from seriesly database and generates
    handy charts with url/filesystem meta information."""
    def __init__(self):
        self.urls = list()  # The only thing that caller (view) needs

        self.eventlet_pool = GreenPool()  # for seriesly requests
        self.mp_pool = Pool(cpu_count())  # for plotting

        self.seriesly = SerieslyHandler()

    def __del__(self):
        self.mp_pool.close()

    @staticmethod
    def generate_title(observable):
        """[server/bucket] metric"""
        metric = observable.name.replace("/", "_")
        if observable.bucket:
            return "[{}] {}".format(observable.bucket, metric)
        elif observable.server:
            return "[{}] {}".format(observable.server, metric)
        elif observable.index and "." in observable.index:
            name = observable.index.split(".")
            return "[{}] [{}] {}".format(name[0], name[1], metric)
        else:
            return metric

    def generate_png_meta(self, snapshot, cluster, title):
        """Generate output filenames and URLs based on object attributes."""
        filename = "".join((snapshot, cluster, title))
        filename = re.sub(r"[\[\]/\\:\*\?\"<>\|& ]", "", filename)
        filename += "{suffix}.png"

        media_url = settings.MEDIA_URL + filename
        media_path = os.path.join(settings.MEDIA_ROOT, filename)
        return media_url, media_path

    def get_series(self, metric, data):
        """Convert raw data to Pandas time series."""
        series = pd.Series(data)
        series.dropna()  # otherwise it may break kde
        if metric in constants.NON_ZERO_VALUES and (series == 0).all():
            return None
        series.rename(lambda x: dateutil.parser.parse(x), inplace=True)
        series.rename(lambda x: int(x.strftime('%s')), inplace=True)
        series.rename(lambda x: x - series.index.values.min(), inplace=True)
        return series

    def extract(self, observables, skip_df=False):
        """Top-level abstraction for data and metadata extraction."""
        merge = defaultdict(list)
        title = ""
        colors = Colors()
        for observable in observables:
            color = colors.next()
            if observable:
                data = self.seriesly.query_data(observable)
                if data:
                    series = self.get_series(metric=observable.name, data=data)
                    if series is not None:
                        merge["series"].append(series)
                        merge["labels"].append(observable.snapshot.name)
                        merge["colors"].append(color)
                        merge["clusters"].append(
                            observable.snapshot.cluster.name)
                        merge["snapshots"].append(observable.snapshot.name)
                        title = self.generate_title(observable)

        url, fname = self.generate_png_meta("".join(merge["snapshots"]),
                                            "".join(merge["clusters"]), title)

        return merge["series"], merge["labels"], merge[
            "colors"], title, fname, url

    def detect_rebalance(self, observables):
        """Check first observable object which is expected to be rebalance
        progress characteristic."""
        rebalances = []
        if observables[0] and observables[0].name == "rebalance_progress":
            series, _, _, _, _, _ = self.extract(observables, skip_df=True)
            for s in series:
                s = s.dropna()
                if (s == 0).all():
                    return []
                rebalance = s[s > 0]
                rebalances.append((rebalance.index[0], rebalance.index[-1]))
        return rebalances

    def plot(self, snapshots):
        """End-point method that orchestrates concurrent extraction and
        plotting."""
        observables = Report(snapshots)()

        rebalances = self.detect_rebalance(observables[0])

        # Asynchronously extract data
        apply_results = list()
        for data in self.eventlet_pool.imap(self.extract, observables):
            series, labels, colors, title, filename, url = data
            if series:
                metric = title.split()[-1]
                ylabel = constants.LABELS.get(metric, metric)

                chart_ids = [""]
                if metric in constants.HISTOGRAMS:
                    chart_ids += ["_histo"]
                if metric in constants.ZOOM_HISTOGRAMS:
                    chart_ids += ["_lt90", "_gt80"]
                if metric in constants.KDE:
                    chart_ids += ["_kde"]
                if metric in constants.SMOOTH_SUBPLOTS:
                    chart_ids[0] = "_subplot"
                    chart_ids += ["_score"]

                for chart_id in chart_ids:
                    fname = filename.format(suffix=chart_id)
                    if not os.path.exists(fname):
                        apply_results.append(
                            self.mp_pool.apply_async(
                                plot_as_png,
                                args=(fname, series, labels, colors, ylabel,
                                      chart_id, rebalances)))
                    self.urls.append([title, url.format(suffix=chart_id)])
        # Plot all charts in parallel
        for result in apply_results:
            result.get()
Example #9
0
 def __init__(self):
     self.seriesly = SerieslyHandler()
Example #10
0
class Plotter(object):

    """Plotter helper that reads data from seriesly database and generates
    handy charts with url/filesystem meta information."""

    def __init__(self):
        self.urls = list()  # The only thing that caller (view) needs

        self.eventlet_pool = GreenPool()  # for seriesly requests
        self.mp_pool = Pool(cpu_count())  # for plotting

        self.seriesly = SerieslyHandler()

    def __del__(self):
        self.mp_pool.close()

    @staticmethod
    def generate_title(observable):
        """[server/bucket] metric"""
        metric = observable.name.replace("/", "_")
        if observable.bucket:
            return "[{}] {}".format(observable.bucket, metric)
        elif observable.server:
            return "[{}] {}".format(observable.server, metric)
        else:
            return metric

    def generate_png_meta(self, snapshot, cluster, title):
        """Generate output filenames and URLs based on object attributes."""
        filename = "".join((snapshot, cluster, title))
        filename = re.sub(r"[\[\]/\\:\*\?\"<>\|& ]", "", filename)
        filename += "{suffix}.png"

        media_url = settings.MEDIA_URL + filename
        media_path = os.path.join(settings.MEDIA_ROOT, filename)
        return media_url, media_path

    def get_series(self, metric, data):
        """Convert raw data to Pandas time series."""
        series = pd.Series(data)
        series.dropna()  # otherwise it may break kde
        if metric in constants.NON_ZERO_VALUES and (series == 0).all():
            return None
        series.index = series.index.astype("uint64")
        series.rename(lambda x: x - series.index.values.min(), inplace=True)
        series.rename(lambda x: x / 1000, inplace=True)  # ms -> s
        return series

    def extract(self, observables, skip_df=False):
        """Top-level abstraction for data and metadata extraction."""
        merge = defaultdict(list)
        title = ""
        colors = Colors()
        for observable in observables:
            color = colors.next()
            if observable:
                data = self.seriesly.query_data(observable)
                if data:
                    series = self.get_series(metric=observable.name, data=data)
                    if series is not None:
                        merge["series"].append(series)
                        merge["labels"].append(observable.snapshot.name)
                        merge["colors"].append(color)
                        merge["clusters"].append(observable.snapshot.cluster.name)
                        merge["snapshots"].append(observable.snapshot.name)
                        title = self.generate_title(observable)

        url, fname = self.generate_png_meta("".join(merge["snapshots"]),
                                            "".join(merge["clusters"]),
                                            title)

        return merge["series"], merge["labels"], merge["colors"], title, fname, url

    def detect_rebalance(self, observables):
        """Check first observable object which is expected to be rebalance
        progress characteristic."""
        rebalances = []
        if observables[0] and observables[0].name == "rebalance_progress":
            series, _, _, _, _, _ = self.extract(observables, skip_df=True)
            for s in series:
                s = s.dropna()
                if (s == 0).all():
                    return []
                rebalance = s[s > 0]
                rebalances.append((rebalance.index[0], rebalance.index[-1]))
        return rebalances

    def plot(self, snapshots):
        """End-point method that orchestrates concurrent extraction and
        plotting."""
        observables = Report(snapshots)()
        rebalances = self.detect_rebalance(observables[0])

        # Asynchronously extract data
        apply_results = list()
        for data in self.eventlet_pool.imap(self.extract, observables):
            series, labels, colors, title, filename, url = data
            if series:
                metric = title.split()[-1]
                ylabel = constants.LABELS.get(metric, metric)

                chart_ids = [""]
                if metric in constants.HISTOGRAMS:
                    chart_ids += ["_histo"]
                if metric in constants.ZOOM_HISTOGRAMS:
                    chart_ids += ["_lt90", "_gt80"]
                if metric in constants.KDE:
                    chart_ids += ["_kde"]
                if metric in constants.SMOOTH_SUBPLOTS:
                    chart_ids[0] = "_subplot"
                    chart_ids += ["_score"]

                for chart_id in chart_ids:
                    fname = filename.format(suffix=chart_id)
                    if not os.path.exists(fname):
                        apply_results.append(self.mp_pool.apply_async(
                            plot_as_png,
                            args=(fname,
                                  series, labels, colors, ylabel, chart_id,
                                  rebalances)
                        ))
                    self.urls.append([title, url.format(suffix=chart_id)])
        # Plot all charts in parallel
        for result in apply_results:
            result.get()