Example #1
0
    def get_failures(self, node_id, threshold_secs=90, upper_failure_threshold=0.2,
                     after=_AFTER_DEFAULT,
                     before=_BEFORE_DEFAULT):
        """Find websites which have experienced two or more consecutive failures
        """
        probes_count = self.get_probes_count(node_id)

        c = self.db.cursor()
        c.execute('select website, end_time from metrics where reason != 0 and end_time >= %s and end_time <= %s'
                  'and node_id = %s order by end_time',
                  (datetime_to_mysql_date(after), datetime_to_mysql_date(before), node_id))

        failed_websites = set()
        website2failure_time = defaultdict(list)
        for _ in range(c.rowcount):
            website, end_time = c.fetchone()

            if not website2failure_time[website]:
                website2failure_time[website].append(end_time)
            else:
                if end_time <= website2failure_time[website][-1] + datetime.timedelta(seconds=threshold_secs):
                    failed_websites.add(website)

                website2failure_time[website].append(end_time)
        c.close()

        for k, v in list(website2failure_time.iteritems()):
            # I don't think it makes sense to take seriously websites which fail
            # more than certain rate
            if 1.0 * len(v) / probes_count > upper_failure_threshold:
                failed_websites.remove(k)

        return {w: website2failure_time[w] for w in failed_websites}
Example #2
0
 def _metric_to_mysql_tuple(self, m):
     return (m.node_id,
             m.website,
             m.state,
             datetime_to_mysql_date(m.start),
             datetime_to_mysql_date(m.end),
             (m.end - m.start).total_seconds(),  # duration in seconds
             0 if m.http_code is None else m.http_code,
             # normalized data
             datetime_to_mysql_date(m.end_1min),
             datetime_to_mysql_date(m.end_5min),
             0 if m.state == Metric.STATE_OK else 1
             )
Example #3
0
    def plot_duration_and_failure(self, website,
                                  after=_AFTER_DEFAULT,
                                  before=_BEFORE_DEFAULT):
        c = self.db.cursor()

        c.execute('select sum(failure), avg(duration), end_time_1min from metrics '
                  'where website=%s '
                  'and end_time >= %s and end_time <= %s '
                  'group by end_time_1min '
                  'having count(end_time_1min) >= 2',
                  (website, datetime_to_mysql_date(after), datetime_to_mysql_date(before)))

        result = {'time': [], 'sum(failure)': [], 'avg(duration)': []}

        for _ in range(c.rowcount):
            failure, duration, time = c.fetchone()

            result['time'].append(time)
            result['sum(failure)'].append(int(failure))
            result['avg(duration)'].append(float(duration))
        c.close()

        df = pd.DataFrame(result)
        return df.plot(x='time', title=website, secondary_y='sum(failure)', figsize=(16, 12))