def main(): counter = 0 timer = statsd.timer('lisa.process_ip', rate=0.01) # 1% sample rate while True: if KILLED: log.info('Shutdown successful') return 0 try: ip_info = redis.brpop(rkeys.IPLOGS) except RedisError as e: log.error('Error with Redis: {}'.format(e)) return 1 # don't start above redis call as it will block to wait timer.start() log.debug('Got log data: ' + ip_info[1]) try: rtype, ip = ip_info[1].split(',') except ValueError: continue timestamp = get_epoch_minute() if rate_limit_ip(ip, timestamp): continue record = geo.get(ip) if record: # everything goes for total count and map process_map(record, timestamp) # only shares get more processing if rtype != data_types.DOWNLOAD: process_share(record, rtype) timer.stop() statsd.incr('lisa.process_ip', rate=0.01) # 1% sample rate if args.verbose: sys.stdout.write('.') sys.stdout.flush() # using a counter and if statement here instead of the # `rate` param on the gauge to avoid getting the length # of the Redis list every time. counter += 1 if counter >= 1000: counter = 0 statsd.gauge('queue.geoip', redis.llen(rkeys.IPLOGS))
def get_data_for_timestamp(timestamp): """ Return aggregate map and share data dict for a timestamp. """ issue_continents = get_issue_dict() issue_countries = get_issue_dict() data = { 'map_total': int(redis.get(rkeys.MAP_TOTAL) or 0), 'map_previous_total': int(redis.get(rkeys.MAP_TOTAL_SNAPSHOT) or 0), 'map_geo': [], 'share_total': int(redis.get(rkeys.SHARE_TOTAL) or 0), 'continent_issues': {}, 'issue_continents': issue_continents, 'country_issues': {}, 'issue_countries': issue_countries, } statsd.gauge('milhouse.map_total', data['map_total']) redis.set(rkeys.MAP_TOTAL_SNAPSHOT, data['map_total']) map_geo_key = rkeys.MAP_GEO.format(timestamp) geo_data = redis.hgetall(map_geo_key) for latlon, count in geo_data.iteritems(): lat, lon = latlon.split(':') data['map_geo'].append({ 'lat': float(lat), 'lon': float(lon), 'count': int(count), }) # CONTINENTS # continent_totals = redis.hgetall(rkeys.SHARE_CONTINENTS) continent_issues = data['continent_issues'] for continent, count in continent_totals.iteritems(): count = int(count) issues = redis.hgetall(rkeys.SHARE_CONTINENT_ISSUES.format(continent)) continent_issues[continent] = {} for issue, issue_count in issues.iteritems(): issue_count = int(issue_count) issue = data_types.types_map[issue] percent = get_percent(issue_count, count) continent_issues[continent][issue] = percent issue_continents[issue].append({ 'continent': continent, 'count': percent, }) # COUNTRIES # country_totals = redis.hgetall(rkeys.SHARE_COUNTRIES) country_issues = data['country_issues'] for country, count in country_totals.iteritems(): count = int(count) if count < conf.COUNTRY_MIN_SHARE: continue issues = redis.hgetall(rkeys.SHARE_COUNTRY_ISSUES.format(country)) country_issues[country] = {} for issue, issue_count in issues.iteritems(): issue_count = int(issue_count) issue = data_types.types_map[issue] percent = get_percent(issue_count, count) country_issues[country][issue] = percent issue_countries[issue].append({ 'country': country, 'count': percent, }) # GLOBAL # share_issues = redis.hgetall(rkeys.SHARE_ISSUES) share_total = data['share_total'] global_issues = country_issues['GLOBAL'] = {} for issue, count in share_issues.iteritems(): count = int(count) issue = data_types.types_map[issue] global_issues[issue] = get_percent(count, share_total) return data
def main(): global counter timer = statsd.timer('lisa.process_ip', rate=0.01) # 1% sample rate pipe = redis.pipeline() while True: if KILLED: pipe.execute() log.info('Shutdown successful') return 0 try: if args.benchmark: ip_info = redis.rpop(rkeys.IPLOGS) else: ip_info = redis.brpop(rkeys.IPLOGS)[1] except RedisError as e: log.error('Error with Redis: {}'.format(e)) pipe.execute() return 1 if ip_info is None: # benchmark run is over pipe.execute() return 0 # don't start above redis call as it will block to wait timer.start() log.debug('Got log data: ' + ip_info) try: rtype, ip = ip_info.split(',') except ValueError: continue timestamp = get_epoch_minute() if rate_limit_ip(ip): continue record = geo.get(ip) if record: # everything goes for total count and map process_map(record, timestamp, pipe) # only shares get more processing if rtype != data_types.DOWNLOAD: process_share(record, rtype, pipe) timer.stop() statsd.incr('lisa.process_ip', rate=0.01) # 1% sample rate if args.verbose: sys.stdout.write('.') sys.stdout.flush() # using a counter and if statement here instead of the # `rate` param on the gauge to avoid getting the length # of the Redis list every time. counter += 1 if args.benchmark: if not counter % 1000: pipe.execute() else: if counter >= 1000: pipe.execute() counter = 0 statsd.gauge('queue.geoip', redis.llen(rkeys.IPLOGS))