def get_failures(self, node_id, threshold_secs=90, upper_failure_threshold=0.2, after=_AFTER_DEFAULT, before=_BEFORE_DEFAULT): """Find websites which have experienced two or more consecutive failures """ probes_count = self.get_probes_count(node_id) c = self.db.cursor() c.execute('select website, end_time from metrics where reason != 0 and end_time >= %s and end_time <= %s' 'and node_id = %s order by end_time', (datetime_to_mysql_date(after), datetime_to_mysql_date(before), node_id)) failed_websites = set() website2failure_time = defaultdict(list) for _ in range(c.rowcount): website, end_time = c.fetchone() if not website2failure_time[website]: website2failure_time[website].append(end_time) else: if end_time <= website2failure_time[website][-1] + datetime.timedelta(seconds=threshold_secs): failed_websites.add(website) website2failure_time[website].append(end_time) c.close() for k, v in list(website2failure_time.iteritems()): # I don't think it makes sense to take seriously websites which fail # more than certain rate if 1.0 * len(v) / probes_count > upper_failure_threshold: failed_websites.remove(k) return {w: website2failure_time[w] for w in failed_websites}
def _metric_to_mysql_tuple(self, m): return (m.node_id, m.website, m.state, datetime_to_mysql_date(m.start), datetime_to_mysql_date(m.end), (m.end - m.start).total_seconds(), # duration in seconds 0 if m.http_code is None else m.http_code, # normalized data datetime_to_mysql_date(m.end_1min), datetime_to_mysql_date(m.end_5min), 0 if m.state == Metric.STATE_OK else 1 )
def plot_duration_and_failure(self, website, after=_AFTER_DEFAULT, before=_BEFORE_DEFAULT): c = self.db.cursor() c.execute('select sum(failure), avg(duration), end_time_1min from metrics ' 'where website=%s ' 'and end_time >= %s and end_time <= %s ' 'group by end_time_1min ' 'having count(end_time_1min) >= 2', (website, datetime_to_mysql_date(after), datetime_to_mysql_date(before))) result = {'time': [], 'sum(failure)': [], 'avg(duration)': []} for _ in range(c.rowcount): failure, duration, time = c.fetchone() result['time'].append(time) result['sum(failure)'].append(int(failure)) result['avg(duration)'].append(float(duration)) c.close() df = pd.DataFrame(result) return df.plot(x='time', title=website, secondary_y='sum(failure)', figsize=(16, 12))