Example #1
0
def throw_at_lisa(log_file):
    """Put IPs on a queue in redis for Lisa to process."""
    log.debug('Throwing {} at Lisa'.format(log_file))
    if not log_file.exists():
        raise IOError('Log file not found: {}'.format(log_file))

    count = 0
    pipe = redis.pipeline()
    for ip in filter_logs(log_file):
        pipe.lpush(rkeys.IPLOGS, '0,' + ip)
        count += 1

    pipe.execute()
    statsd.incr('bart.ips_processed', count)
Example #2
0
def rate_limit_ip(ip, timestamp):
    """Return boolean whether the IP is rate limited"""
    key = 'ratelimit:{}:{}'.format(ip, timestamp)
    current = int(redis.get(key) or 0)
    if current >= conf.IP_RATE_LIMIT_MAX:
        log.warning('Rate limited {}'.format(ip))
        statsd.incr('lisa.ratelimit')
        return True

    pipe = redis.pipeline()
    pipe.incr(key).expire(key, 60)
    pipe.execute()

    return False
Example #3
0
def throw_at_lisa(log_file):
    """Put IPs on a queue in redis for Lisa to process."""
    log.debug('Throwing {} at Lisa'.format(log_file))
    if not log_file.exists():
        raise IOError('Log file not found: {}'.format(log_file))

    count = 0
    pipe = redis.pipeline()
    for ip in filter_logs(log_file):
        pipe.lpush(rkeys.IPLOGS, '0,' + ip)
        count += 1

    pipe.execute()
    statsd.incr('bart.ips_processed', count)
Example #4
0
def main():
    global counter
    timer = statsd.timer('lisa.process_ip', rate=0.01)  # 1% sample rate
    pipe = redis.pipeline()

    while True:
        if KILLED:
            pipe.execute()
            log.info('Shutdown successful')
            return 0

        try:
            if args.benchmark:
                ip_info = redis.rpop(rkeys.IPLOGS)
            else:
                ip_info = redis.brpop(rkeys.IPLOGS)[1]
        except RedisError as e:
            log.error('Error with Redis: {}'.format(e))
            pipe.execute()
            return 1

        if ip_info is None:
            # benchmark run is over
            pipe.execute()
            return 0

        # don't start above redis call as it will block to wait
        timer.start()

        log.debug('Got log data: ' + ip_info)
        try:
            rtype, ip = ip_info.split(',')
        except ValueError:
            continue

        timestamp = get_epoch_minute()

        if rate_limit_ip(ip):
            continue

        record = geo.get(ip)
        if record:
            # everything goes for total count and map
            process_map(record, timestamp, pipe)
            # only shares get more processing
            if rtype != data_types.DOWNLOAD:
                process_share(record, rtype, pipe)

        timer.stop()
        statsd.incr('lisa.process_ip', rate=0.01)  # 1% sample rate

        if args.verbose:
            sys.stdout.write('.')
            sys.stdout.flush()

        # using a counter and if statement here instead of the
        # `rate` param on the gauge to avoid getting the length
        # of the Redis list every time.
        counter += 1
        if args.benchmark:
            if not counter % 1000:
                pipe.execute()
        else:
            if counter >= 1000:
                pipe.execute()
                counter = 0
                statsd.gauge('queue.geoip', redis.llen(rkeys.IPLOGS))