def throw_at_lisa(log_file): """Put IPs on a queue in redis for Lisa to process.""" log.debug('Throwing {} at Lisa'.format(log_file)) if not log_file.exists(): raise IOError('Log file not found: {}'.format(log_file)) count = 0 pipe = redis.pipeline() for ip in filter_logs(log_file): pipe.lpush(rkeys.IPLOGS, '0,' + ip) count += 1 pipe.execute() statsd.incr('bart.ips_processed', count)
def rate_limit_ip(ip, timestamp): """Return boolean whether the IP is rate limited""" key = 'ratelimit:{}:{}'.format(ip, timestamp) current = int(redis.get(key) or 0) if current >= conf.IP_RATE_LIMIT_MAX: log.warning('Rate limited {}'.format(ip)) statsd.incr('lisa.ratelimit') return True pipe = redis.pipeline() pipe.incr(key).expire(key, 60) pipe.execute() return False
def main(): global counter timer = statsd.timer('lisa.process_ip', rate=0.01) # 1% sample rate pipe = redis.pipeline() while True: if KILLED: pipe.execute() log.info('Shutdown successful') return 0 try: if args.benchmark: ip_info = redis.rpop(rkeys.IPLOGS) else: ip_info = redis.brpop(rkeys.IPLOGS)[1] except RedisError as e: log.error('Error with Redis: {}'.format(e)) pipe.execute() return 1 if ip_info is None: # benchmark run is over pipe.execute() return 0 # don't start above redis call as it will block to wait timer.start() log.debug('Got log data: ' + ip_info) try: rtype, ip = ip_info.split(',') except ValueError: continue timestamp = get_epoch_minute() if rate_limit_ip(ip): continue record = geo.get(ip) if record: # everything goes for total count and map process_map(record, timestamp, pipe) # only shares get more processing if rtype != data_types.DOWNLOAD: process_share(record, rtype, pipe) timer.stop() statsd.incr('lisa.process_ip', rate=0.01) # 1% sample rate if args.verbose: sys.stdout.write('.') sys.stdout.flush() # using a counter and if statement here instead of the # `rate` param on the gauge to avoid getting the length # of the Redis list every time. counter += 1 if args.benchmark: if not counter % 1000: pipe.execute() else: if counter >= 1000: pipe.execute() counter = 0 statsd.gauge('queue.geoip', redis.llen(rkeys.IPLOGS))