Example #1
0
    def __init__(self, log_path, hit_threshold=2000):
        """
        Construct an HTTP access log monitor
        :param log_path: path to the w3c-formatted HTTP access log file
        :param hit_threshold: alert when hits in the last 2 mins exceeds this
        """
        self.hit_threshold = hit_threshold

        self.log_tailer = LogfileTailer(log_path=log_path, parser=W3cParser())

        self.records = None
        self.process_time = None
        self.hits_alert_active = False
        self.alerts = []
Example #2
0
class Monitor(object):
    """
    Monitor HTTP traffic by consuming an actively written-to w3c-formatted HTTP access log
    """

    # Which fields to count the bandwidth and number of unique hits for
    fields_to_count = ('section', 'host', 'page', 'status')
    # Monitor latest traffic up to this long ago for hit threshold breaches
    hit_threshold_window = timedelta(seconds=120)
    # How many records to display for each field of interest
    top_n_to_display = 5

    def __init__(self, log_path, hit_threshold=2000):
        """
        Construct an HTTP access log monitor
        :param log_path: path to the w3c-formatted HTTP access log file
        :param hit_threshold: alert when hits in the last 2 mins exceeds this
        """
        self.hit_threshold = hit_threshold

        self.log_tailer = LogfileTailer(log_path=log_path, parser=W3cParser())

        self.records = None
        self.process_time = None
        self.hits_alert_active = False
        self.alerts = []

    def reset_stats(self):
        self.counts = defaultdict(Counter)
        self.bandwidths = defaultdict(Counter)
        self.totals = defaultdict(int)

    def update_stats(self, records):
        # Get counts for every field
        for record in records:
            for field in self.fields_to_count:
                self.counts[field][getattr(record, field)] += 1
                self.bandwidths[field][getattr(record, field)] += record.bytes
            self.totals['bytes'] += record.bytes

        self.totals['requests'] += len(records)

        # Get totals of unique requests
        for field, counter in self.counts.iteritems():
            self.totals[field] = len(counter)

    def update_recent_stats(self):
        # Update stats based on a recent timeframe. Requires considering all records,
        # not just the latest batch (though only those in the most recent window)
        self.recent_hits = 0
        now = datetime.now()
        i = len(self.records) - 1

        while i >= 0 and self.records[i].date > (now - self.hit_threshold_window):
            self.recent_hits += 1
            i -= 1

    def check_alerts(self):
        if self.recent_hits > self.hit_threshold and not self.hits_alert_active:
            self.alerts.append(Alert(self.hit_threshold, self.recent_hits))
            self.hits_alert_active = True
        elif self.recent_hits <= self.hit_threshold and self.hits_alert_active:
            self.alerts.append(Alert(self.hit_threshold, self.recent_hits))
            self.hits_alert_active = False

    def output_to_console(self):
        print '#################################'
        print 'Processed in {0:.3}s at {1}'.format(self.process_time,
                                                   datetime.now().strftime(DATE_FORMAT))
        if len(self.records) > 0:
            print 'Period: {0} - {1}'.format(
                self.records[0].date.strftime(DATE_FORMAT),
                self.records[-1].date.strftime(DATE_FORMAT)
            )
        print

        self.totals['pretty_bytes'] = pretty_bytes(self.totals['bytes'])
        print 'Hits: {requests}\tUsers: {host}\tData: {pretty_bytes}'.format(**self.totals)
        print 'Sections: {section}\tPages: {page}\t'.format(**self.totals)
        print 'Hits in last {0}s: {1}'.format(self.hit_threshold_window.seconds, self.recent_hits)
        print

        print 'No alerts to display' if not self.alerts else 'Alerts:'
        for alert in reversed(self.alerts):
            print alert
        print

        # TODO: use a table module to do this neater and more compactly
        for field, top_n_for_field in self.counts.iteritems():
            print 'Most hits by {0}'.format(field)
            for item, count in top_n_for_field.most_common(self.top_n_to_display):
                print '{0} ({1} hits, {2})'.format(
                    item, count, pretty_bytes(self.bandwidths[field][item])
                )
            print

        print '#################################'

    def process_new_records(self, records, reset):
        if reset:
            print 'Generating initial stats from w3c log'
            self.reset_stats()
            self.records = records
        else:
            print 'Updating stats with {0} new requests'.format(len(records))
            self.records.extend(records)

        self.update_stats(records)
        self.update_recent_stats()
        self.check_alerts()

    def run(self, tail=True, frequency=10):
        """
        Start the Monitor process, which runs continuously unless tail is False
        :param tail: loop continuously
        :param frequency: how often to get new records (only if tail is True)
        """
        print 'Parsing initial log file'
        while True:
            start = time.time()
            try:
                new_records, file_state = self.log_tailer.get_latest()
            except IOError as e:
                print e
                return

            parse_time = time.time()
            print 'Parsed in {0:.3}s'.format(parse_time - start)

            # Even if we have no new records, issue an update,
            #  since the time-dependent stats will have changed
            self.process_new_records(new_records, reset=(file_state == LogfileTailer.FILE_RESET))
            self.process_time = time.time() - parse_time
            self.output_to_console()
            if not tail:
                return
            time.sleep(frequency)