def __init__(self): self.urls = list() # The only thing that caller (view) needs self.eventlet_pool = GreenPool() # for seriesly requests self.mp_pool = Pool(cpu_count()) # for plotting self.seriesly = SerieslyHandler()
def seriesly_proxy(request): sh = SerieslyHandler() db_name = sh.build_dbname( cluster=request.GET["cluster"], server=request.GET.get("server"), bucket=request.GET.get("bucket"), collector=request.GET.get("collector"), ) data = sh.query_raw_data(db_name, name=request.GET["name"]) content = json.dumps(data) return HttpResponse(content)
def seriesly_proxy(request): sh = SerieslyHandler() db_name = sh.build_dbname( cluster=request.GET["cluster"], server=request.GET.get("server"), bucket=request.GET.get("bucket"), index=request.GET.get("index"), collector=request.GET.get("collector"), ) data = sh.query_raw_data(db_name, name=request.GET["name"]) content = json.dumps(data) return HttpResponse(content)
class Analyzer(object): def __init__(self): self.seriesly = SerieslyHandler() def get_time_series(self, observables): for ol in observables: for observable in ol: if observable: raw_data = self.seriesly.query_data(observable) if raw_data: s = pd.Series(raw_data) if len(s.unique()) == 1: continue s = pd.rolling_median(s, window=3) title = Plotter.generate_title(observable) yield title, s def create_data_frame(self, observables): df = pd.DataFrame() for title, series in self.get_time_series(observables): new_df = pd.DataFrame(data=series, columns=(title, )) df = pd.concat((df, new_df), axis=1) return df def corr(self, snapshots): observables = Report(snapshots)() df = self.create_data_frame(observables) return df.columns.values.tolist(), df.corr().fillna(0).values.tolist()
def __init__(self): self.seriesly = SerieslyHandler()
class Plotter(object): """Plotter helper that reads data from seriesly database and generates handy charts with url/filesystem meta information.""" def __init__(self): self.urls = list() # The only thing that caller (view) needs self.eventlet_pool = GreenPool() # for seriesly requests self.mp_pool = Pool(cpu_count()) # for plotting self.seriesly = SerieslyHandler() def __del__(self): self.mp_pool.close() @staticmethod def generate_title(observable): """[server/bucket] metric""" metric = observable.name.replace("/", "_") if observable.bucket: return "[{}] {}".format(observable.bucket, metric) elif observable.server: return "[{}] {}".format(observable.server, metric) elif observable.index and "." in observable.index: name = observable.index.split(".") return "[{}] [{}] {}".format(name[0], name[1], metric) else: return metric def generate_png_meta(self, snapshot, cluster, title): """Generate output filenames and URLs based on object attributes.""" filename = "".join((snapshot, cluster, title)) filename = re.sub(r"[\[\]/\\:\*\?\"<>\|& ]", "", filename) filename += "{suffix}.png" media_url = settings.MEDIA_URL + filename media_path = os.path.join(settings.MEDIA_ROOT, filename) return media_url, media_path def get_series(self, metric, data): """Convert raw data to Pandas time series.""" series = pd.Series(data) series.dropna() # otherwise it may break kde if metric in constants.NON_ZERO_VALUES and (series == 0).all(): return None series.rename(lambda x: dateutil.parser.parse(x), inplace=True) series.rename(lambda x: int(x.strftime('%s')), inplace=True) series.rename(lambda x: x - series.index.values.min(), inplace=True) return series def extract(self, observables, skip_df=False): """Top-level abstraction for data and metadata extraction.""" merge = defaultdict(list) title = "" colors = Colors() for observable in observables: color = colors.next() if observable: data = self.seriesly.query_data(observable) if data: series = self.get_series(metric=observable.name, data=data) if series is not None: merge["series"].append(series) merge["labels"].append(observable.snapshot.name) merge["colors"].append(color) merge["clusters"].append( observable.snapshot.cluster.name) merge["snapshots"].append(observable.snapshot.name) title = self.generate_title(observable) url, fname = self.generate_png_meta("".join(merge["snapshots"]), "".join(merge["clusters"]), title) return merge["series"], merge["labels"], merge[ "colors"], title, fname, url def detect_rebalance(self, observables): """Check first observable object which is expected to be rebalance progress characteristic.""" rebalances = [] if observables[0] and observables[0].name == "rebalance_progress": series, _, _, _, _, _ = self.extract(observables, skip_df=True) for s in series: s = s.dropna() if (s == 0).all(): return [] rebalance = s[s > 0] rebalances.append((rebalance.index[0], rebalance.index[-1])) return rebalances def plot(self, snapshots): """End-point method that orchestrates concurrent extraction and plotting.""" observables = Report(snapshots)() rebalances = self.detect_rebalance(observables[0]) # Asynchronously extract data apply_results = list() for data in self.eventlet_pool.imap(self.extract, observables): series, labels, colors, title, filename, url = data if series: metric = title.split()[-1] ylabel = constants.LABELS.get(metric, metric) chart_ids = [""] if metric in constants.HISTOGRAMS: chart_ids += ["_histo"] if metric in constants.ZOOM_HISTOGRAMS: chart_ids += ["_lt90", "_gt80"] if metric in constants.KDE: chart_ids += ["_kde"] if metric in constants.SMOOTH_SUBPLOTS: chart_ids[0] = "_subplot" chart_ids += ["_score"] for chart_id in chart_ids: fname = filename.format(suffix=chart_id) if not os.path.exists(fname): apply_results.append( self.mp_pool.apply_async( plot_as_png, args=(fname, series, labels, colors, ylabel, chart_id, rebalances))) self.urls.append([title, url.format(suffix=chart_id)]) # Plot all charts in parallel for result in apply_results: result.get()
class Plotter(object): """Plotter helper that reads data from seriesly database and generates handy charts with url/filesystem meta information.""" def __init__(self): self.urls = list() # The only thing that caller (view) needs self.eventlet_pool = GreenPool() # for seriesly requests self.mp_pool = Pool(cpu_count()) # for plotting self.seriesly = SerieslyHandler() def __del__(self): self.mp_pool.close() @staticmethod def generate_title(observable): """[server/bucket] metric""" metric = observable.name.replace("/", "_") if observable.bucket: return "[{}] {}".format(observable.bucket, metric) elif observable.server: return "[{}] {}".format(observable.server, metric) else: return metric def generate_png_meta(self, snapshot, cluster, title): """Generate output filenames and URLs based on object attributes.""" filename = "".join((snapshot, cluster, title)) filename = re.sub(r"[\[\]/\\:\*\?\"<>\|& ]", "", filename) filename += "{suffix}.png" media_url = settings.MEDIA_URL + filename media_path = os.path.join(settings.MEDIA_ROOT, filename) return media_url, media_path def get_series(self, metric, data): """Convert raw data to Pandas time series.""" series = pd.Series(data) series.dropna() # otherwise it may break kde if metric in constants.NON_ZERO_VALUES and (series == 0).all(): return None series.index = series.index.astype("uint64") series.rename(lambda x: x - series.index.values.min(), inplace=True) series.rename(lambda x: x / 1000, inplace=True) # ms -> s return series def extract(self, observables, skip_df=False): """Top-level abstraction for data and metadata extraction.""" merge = defaultdict(list) title = "" colors = Colors() for observable in observables: color = colors.next() if observable: data = self.seriesly.query_data(observable) if data: series = self.get_series(metric=observable.name, data=data) if series is not None: merge["series"].append(series) merge["labels"].append(observable.snapshot.name) merge["colors"].append(color) merge["clusters"].append(observable.snapshot.cluster.name) merge["snapshots"].append(observable.snapshot.name) title = self.generate_title(observable) url, fname = self.generate_png_meta("".join(merge["snapshots"]), "".join(merge["clusters"]), title) return merge["series"], merge["labels"], merge["colors"], title, fname, url def detect_rebalance(self, observables): """Check first observable object which is expected to be rebalance progress characteristic.""" rebalances = [] if observables[0] and observables[0].name == "rebalance_progress": series, _, _, _, _, _ = self.extract(observables, skip_df=True) for s in series: s = s.dropna() if (s == 0).all(): return [] rebalance = s[s > 0] rebalances.append((rebalance.index[0], rebalance.index[-1])) return rebalances def plot(self, snapshots): """End-point method that orchestrates concurrent extraction and plotting.""" observables = Report(snapshots)() rebalances = self.detect_rebalance(observables[0]) # Asynchronously extract data apply_results = list() for data in self.eventlet_pool.imap(self.extract, observables): series, labels, colors, title, filename, url = data if series: metric = title.split()[-1] ylabel = constants.LABELS.get(metric, metric) chart_ids = [""] if metric in constants.HISTOGRAMS: chart_ids += ["_histo"] if metric in constants.ZOOM_HISTOGRAMS: chart_ids += ["_lt90", "_gt80"] if metric in constants.KDE: chart_ids += ["_kde"] if metric in constants.SMOOTH_SUBPLOTS: chart_ids[0] = "_subplot" chart_ids += ["_score"] for chart_id in chart_ids: fname = filename.format(suffix=chart_id) if not os.path.exists(fname): apply_results.append(self.mp_pool.apply_async( plot_as_png, args=(fname, series, labels, colors, ylabel, chart_id, rebalances) )) self.urls.append([title, url.format(suffix=chart_id)]) # Plot all charts in parallel for result in apply_results: result.get()