Ejemplo n.º 1
0
def filter_logs(log_file):
    """Extract valid downlods from logs and return IPs."""
    log.debug('Filtering logs in {}'.format(log_file))
    with log_file.open() as fh:
        ip_counter = Counter()
        for line in fh:
            try:
                _, timestamp, ip, status_code, request = line.strip().split()
            except ValueError:
                # wrong format
                continue

            if status_code != conf.LOG_HTTP_STATUS:
                log.debug(
                    'Not the status code we want: {}'.format(status_code))
                continue

            if not firefox_re.search(request):
                log.debug(
                    'Not the Firefox we\'re looking for: {}'.format(request))
                continue

            if ip_counter[ip] >= conf.IP_RATE_LIMIT_MAX:
                log.info('Skipped {} due to rate limit'.format(ip))
                statsd.incr('bart.ratelimit')
                continue

            ip_counter[ip] += 1

            yield ip
Ejemplo n.º 2
0
def filter_logs(log_file):
    """Extract valid downlods from logs and return IPs."""
    log.debug('Filtering logs in {}'.format(log_file))
    with log_file.open() as fh:
        ip_counter = Counter()
        for line in fh:
            try:
                _, timestamp, ip, status_code, request = line.strip().split()
            except ValueError:
                # wrong format
                continue

            if status_code != conf.LOG_HTTP_STATUS:
                log.debug('Not the status code we want: {}'.format(status_code))
                continue

            if not firefox_re.search(request):
                log.debug('Not the Firefox we\'re looking for: {}'.format(request))
                continue

            if ip_counter[ip] >= conf.IP_RATE_LIMIT_MAX:
                log.info('Skipped {} due to rate limit'.format(ip))
                statsd.incr('bart.ratelimit')
                continue

            ip_counter[ip] += 1

            yield ip
Ejemplo n.º 3
0
def rate_limit_ip(ip):
    """Return boolean whether the IP is rate limited"""
    calls = rate_limiter.get(ip, 0)
    if calls:
        if calls >= conf.IP_RATE_LIMIT_MAX:
            log.debug('Rate limited {}'.format(ip))
            statsd.incr('lisa.ratelimit', 0.5)
            return True

    rate_limiter.put(ip, calls + 1)
    return False
Ejemplo n.º 4
0
def main():
    counter = 0
    timer = statsd.timer('lisa.process_ip', rate=0.01)  # 1% sample rate

    while True:
        if KILLED:
            log.info('Shutdown successful')
            return 0

        try:
            ip_info = redis.brpop(rkeys.IPLOGS)
        except RedisError as e:
            log.error('Error with Redis: {}'.format(e))
            return 1

        # don't start above redis call as it will block to wait
        timer.start()

        log.debug('Got log data: ' + ip_info[1])
        try:
            rtype, ip = ip_info[1].split(',')
        except ValueError:
            continue

        timestamp = get_epoch_minute()

        if rate_limit_ip(ip, timestamp):
            continue

        record = geo.get(ip)
        if record:
            # everything goes for total count and map
            process_map(record, timestamp)
            # only shares get more processing
            if rtype != data_types.DOWNLOAD:
                process_share(record, rtype)

        timer.stop()
        statsd.incr('lisa.process_ip', rate=0.01)  # 1% sample rate

        if args.verbose:
            sys.stdout.write('.')
            sys.stdout.flush()

        # using a counter and if statement here instead of the
        # `rate` param on the gauge to avoid getting the length
        # of the Redis list every time.
        counter += 1
        if counter >= 1000:
            counter = 0
            statsd.gauge('queue.geoip', redis.llen(rkeys.IPLOGS))
Ejemplo n.º 5
0
def throw_at_lisa(log_file):
    """Put IPs on a queue in redis for Lisa to process."""
    log.debug('Throwing {} at Lisa'.format(log_file))
    if not log_file.exists():
        raise IOError('Log file not found: {}'.format(log_file))

    count = 0
    pipe = redis.pipeline()
    for ip in filter_logs(log_file):
        pipe.lpush(rkeys.IPLOGS, '0,' + ip)
        count += 1

    pipe.execute()
    statsd.incr('bart.ips_processed', count)
Ejemplo n.º 6
0
def throw_at_lisa(log_file):
    """Put IPs on a queue in redis for Lisa to process."""
    log.debug('Throwing {} at Lisa'.format(log_file))
    if not log_file.exists():
        raise IOError('Log file not found: {}'.format(log_file))

    count = 0
    pipe = redis.pipeline()
    for ip in filter_logs(log_file):
        pipe.lpush(rkeys.IPLOGS, '0,' + ip)
        count += 1

    pipe.execute()
    statsd.incr('bart.ips_processed', count)
Ejemplo n.º 7
0
def rate_limit_ip(ip, timestamp):
    """Return boolean whether the IP is rate limited"""
    key = 'ratelimit:{}:{}'.format(ip, timestamp)
    current = int(redis.get(key) or 0)
    if current >= conf.IP_RATE_LIMIT_MAX:
        log.warning('Rate limited {}'.format(ip))
        statsd.incr('lisa.ratelimit')
        return True

    pipe = redis.pipeline()
    pipe.incr(key).expire(key, 60)
    pipe.execute()

    return False
Ejemplo n.º 8
0
def main():
    global counter
    timer = statsd.timer('lisa.process_ip', rate=0.01)  # 1% sample rate
    pipe = redis.pipeline()

    while True:
        if KILLED:
            pipe.execute()
            log.info('Shutdown successful')
            return 0

        try:
            if args.benchmark:
                ip_info = redis.rpop(rkeys.IPLOGS)
            else:
                ip_info = redis.brpop(rkeys.IPLOGS)[1]
        except RedisError as e:
            log.error('Error with Redis: {}'.format(e))
            pipe.execute()
            return 1

        if ip_info is None:
            # benchmark run is over
            pipe.execute()
            return 0

        # don't start above redis call as it will block to wait
        timer.start()

        log.debug('Got log data: ' + ip_info)
        try:
            rtype, ip = ip_info.split(',')
        except ValueError:
            continue

        timestamp = get_epoch_minute()

        if rate_limit_ip(ip):
            continue

        record = geo.get(ip)
        if record:
            # everything goes for total count and map
            process_map(record, timestamp, pipe)
            # only shares get more processing
            if rtype != data_types.DOWNLOAD:
                process_share(record, rtype, pipe)

        timer.stop()
        statsd.incr('lisa.process_ip', rate=0.01)  # 1% sample rate

        if args.verbose:
            sys.stdout.write('.')
            sys.stdout.flush()

        # using a counter and if statement here instead of the
        # `rate` param on the gauge to avoid getting the length
        # of the Redis list every time.
        counter += 1
        if args.benchmark:
            if not counter % 1000:
                pipe.execute()
        else:
            if counter >= 1000:
                pipe.execute()
                counter = 0
                statsd.gauge('queue.geoip', redis.llen(rkeys.IPLOGS))