def task_expire_stale_prefs(): """ Every day, clear out preferences objects that haven't been touched in > 30 days, in order to reduce abuse risk/space consumed """ min_last_updated = calendar.timegm((datetime.datetime.utcnow() - datetime.timedelta(days=30)).timetuple()) num_stale_records = config.mongo_db.preferences.find({'last_touched': {'$lt': min_last_updated}}).count() config.mongo_db.preferences.remove({'last_touched': {'$lt': min_last_updated}}) if num_stale_records: logger.warn("REMOVED %i stale preferences objects" % num_stale_records) start_task(task_expire_stale_prefs, delay=86400) # call again in 1 day
def start_api(): logger.info("Starting up RPC API handler...") group = start_task(api.serve_api) group.join() # block forever
def start_cp_blockfeed(): logger.info("Starting up aspire block feed poller...") start_task(blockfeed.process_cp_blockfeed)
def start_tasks(): start_task(task_compile_asset_pair_market_info) start_task(task_compile_asset_market_info)
def task_compile_asset_market_info(): assets_trading.compile_asset_market_info() # all done for this run...call again in a bit start_task(task_compile_asset_market_info, delay=COMPILE_ASSET_MARKET_INFO_PERIOD)
def start_tasks(): start_task(task_expire_stale_prefs) start_task(task_generate_wallet_stats)
def task_generate_wallet_stats(): """ Every 30 minutes, from the login history, update and generate wallet stats """ def gen_stats_for_network(network): assert network in ('mainnet', 'testnet') # get the latest date in the stats table present now = datetime.datetime.utcnow() latest_stat = config.mongo_db.wallet_stats.find({'network': network}).sort('when', pymongo.DESCENDING).limit(1) latest_stat = latest_stat[0] if latest_stat.count() else None new_entries = {} # the queries below work with data that happened on or after the date of the latest stat present # aggregate over the same period for new logins, adding the referrers to a set match_criteria = {'when': {"$gte": latest_stat['when']}, 'network': network, 'action': 'create'} \ if latest_stat else {'when': {"$lte": now}, 'network': network, 'action': 'create'} new_wallets = config.mongo_db.login_history.aggregate([ {"$match": match_criteria}, {"$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"} }}, {"$group": { "_id": {"year": "$year", "month": "$month", "day": "$day"}, "new_count": {"$sum": 1} }} ]) for e in new_wallets: ts = calendar.timegm(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) new_entries[ts] = { # a future wallet_stats entry 'when': datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']), 'network': network, 'new_count': e['new_count'], } referer_counts = config.mongo_db.login_history.aggregate([ {"$match": match_criteria}, {"$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"}, "referer": 1 }}, {"$group": { "_id": {"year": "$year", "month": "$month", "day": "$day", "referer": "$referer"}, #"uniqueReferers": {"$addToSet": "$_id"}, "count": {"$sum": 1} }} ]) for e in referer_counts: ts = calendar.timegm(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) assert ts in new_entries if e['_id']['referer'] is None: continue referer_key = urllib.parse.quote(e['_id']['referer']).replace('.', '%2E') if 'referers' not in new_entries[ts]: new_entries[ts]['referers'] = {} if e['_id']['referer'] not in new_entries[ts]['referers']: new_entries[ts]['referers'][referer_key] = 0 new_entries[ts]['referers'][referer_key] += 1 # logins (not new wallets) - generate stats match_criteria = {'when': {"$gte": latest_stat['when']}, 'network': network, 'action': 'login'} \ if latest_stat else {'when': {"$lte": now}, 'network': network, 'action': 'login'} logins = config.mongo_db.login_history.aggregate([ {"$match": match_criteria}, {"$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"}, "wallet_id": 1 }}, {"$group": { "_id": {"year": "$year", "month": "$month", "day": "$day"}, "login_count": {"$sum": 1}, "distinct_wallets": {"$addToSet": "$wallet_id"}, }} ]) for e in logins: ts = calendar.timegm(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) if ts not in new_entries: new_entries[ts] = { # a future wallet_stats entry 'when': datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']), 'network': network, 'new_count': 0, 'referers': [] } new_entries[ts]['login_count'] = e['login_count'] new_entries[ts]['distinct_login_count'] = len(e['distinct_wallets']) # add/replace the wallet_stats data if latest_stat: updated_entry_ts = calendar.timegm(datetime.datetime( latest_stat['when'].year, latest_stat['when'].month, latest_stat['when'].day).timetuple()) if updated_entry_ts in new_entries: updated_entry = new_entries[updated_entry_ts] del new_entries[updated_entry_ts] assert updated_entry['when'] == latest_stat['when'] del updated_entry['when'] # not required for the upsert logger.info( "Revised wallet statistics for partial day %s-%s-%s: %s" % (latest_stat['when'].year, latest_stat['when'].month, latest_stat['when'].day, updated_entry)) config.mongo_db.wallet_stats.update( {'when': latest_stat['when']}, {"$set": updated_entry}, upsert=True) if new_entries: # insert the rest #logger.info("Stats, new entries: %s" % new_entries.values()) config.mongo_db.wallet_stats.insert(list(new_entries.values())) logger.info("Added wallet statistics for %i full days" % len(list(new_entries.values()))) gen_stats_for_network('mainnet') gen_stats_for_network('testnet') start_task(task_generate_wallet_stats, delay=30 * 60) # call again in 30 minutes
def start_tasks(): start_task(task_compile_extended_feed_info)
def task_compile_extended_feed_info(): feeds = list(config.mongo_db.feeds.find({'info_status': 'needfetch'})) feed_info_urls = [] def inc_fetch_retry(feed, max_retry=FEED_MAX_RETRY, new_status='error', errors=[]): feed['fetch_info_retry'] += 1 feed['errors'] = errors if feed['fetch_info_retry'] == max_retry: feed['info_status'] = new_status config.mongo_db.feeds.save(feed) def process_feed_info(feed, info_data): # sanity check assert feed['info_status'] == 'needfetch' assert 'info_url' in feed assert util.is_valid_url( feed['info_url'], allow_no_protocol=True) # already validated in the fetch errors = util.is_valid_json(info_data, config.FEED_SCHEMA) if not isinstance(info_data, dict) or 'address' not in info_data: errors.append('Invalid data format') elif feed['source'] != info_data['address']: errors.append('Invalid address') if len(errors) > 0: inc_fetch_retry(feed, new_status='invalid', errors=errors) return (False, errors) feed['info_status'] = 'valid' # fetch any associated images... # TODO: parallelize this 2nd level feed image fetching ... (e.g. just compose a list here, and process it in later on) if 'image' in info_data: info_data['valid_image'] = util.fetch_image( info_data['image'], config.SUBDIR_FEED_IMAGES, feed['source'] + '_topic', fetch_timeout=5) if 'operator' in info_data and 'image' in info_data['operator']: info_data['operator']['valid_image'] = util.fetch_image( info_data['operator']['image'], config.SUBDIR_FEED_IMAGES, feed['source'] + '_owner', fetch_timeout=5) if 'targets' in info_data: for i in range(len(info_data['targets'])): if 'image' in info_data['targets'][i]: image_name = feed['source'] + '_tv_' + str( info_data['targets'][i]['value']) info_data['targets'][i]['valid_image'] = util.fetch_image( info_data['targets'][i]['image'], config.SUBDIR_FEED_IMAGES, image_name, fetch_timeout=5) feed['info_data'] = sanitize_json_data(info_data) config.mongo_db.feeds.save(feed) return (True, None) def feed_fetch_complete_hook(urls_data): logger.info( "Enhanced feed info fetching complete. %s unique URLs fetched. Processing..." % len(urls_data)) feeds = config.mongo_db.feeds.find({'info_status': 'needfetch'}) for feed in feeds: # logger.debug("Looking at feed %s: %s" % (feed, feed['info_url'])) if feed['info_url']: info_url = ('http://' + feed['info_url']) \ if not feed['info_url'].startswith('http://') and not feed['info_url'].startswith('https://') else feed['info_url'] if info_url not in urls_data: logger.warn( "URL %s not properly fetched (not one of %i entries in urls_data), skipping..." % (info_url, len(urls_data))) continue assert info_url in urls_data if not urls_data[info_url][0]: # request was not successful inc_fetch_retry(feed, max_retry=FEED_MAX_RETRY, errors=[urls_data[info_url][1]]) logger.warn( "Fetch for feed at %s not successful: %s (try %i of %i)" % (info_url, urls_data[info_url][1], feed['fetch_info_retry'], FEED_MAX_RETRY)) else: result = process_feed_info(feed, urls_data[info_url][1]) if not result[0]: logger.info( "Processing for feed at %s not successful: %s" % (info_url, result[1])) else: logger.info("Processing for feed at %s successful" % info_url) # compose and fetch all info URLs in all feeds with them for feed in feeds: assert feed['info_url'] feed_info_urls.append(( 'http://' + feed['info_url']) if not feed['info_url'].startswith('http://') and not feed['info_url'].startswith('https://') else feed['info_url']) feed_info_urls_str = ', '.join(feed_info_urls) feed_info_urls_str = ( feed_info_urls_str[:2000] + ' ...' ) if len(feed_info_urls_str ) > 2000 else feed_info_urls_str # truncate if necessary if len(feed_info_urls): logger.info('Fetching enhanced feed info for %i feeds: %s' % (len(feed_info_urls), feed_info_urls_str)) util.stream_fetch(feed_info_urls, feed_fetch_complete_hook, fetch_timeout=10, max_fetch_size=4 * 1024, urls_group_size=20, urls_group_time_spacing=20, per_request_complete_callback=lambda url, data: logger.debug("Feed at %s retrieved, result: %s" % (url, data))) start_task(task_compile_extended_feed_info, delay=60 * 5) # call again in 5 minutes
def start_tasks(): start_task(task_compile_extended_asset_info)
def task_compile_extended_asset_info(): assets = list( config.mongo_db.asset_extended_info.find({'info_status': 'needfetch'})) asset_info_urls = [] def asset_fetch_complete_hook(urls_data): logger.info( "Enhanced asset info fetching complete. %s unique URLs fetched. Processing..." % len(urls_data)) for asset in assets: logger.debug("Looking at asset %s: %s" % (asset, asset['info_url'])) if asset['info_url']: info_url = ('http://' + asset['info_url']) \ if not asset['info_url'].startswith('http://') and not asset['info_url'].startswith('https://') else asset['info_url'] assert info_url in urls_data if not urls_data[info_url][0]: # request was not successful inc_fetch_retry(asset, max_retry=ASSET_MAX_RETRY, errors=[urls_data[info_url][1]]) logger.warn( "Fetch for asset at %s not successful: %s (try %i of %i)" % (info_url, urls_data[info_url][1], asset['fetch_info_retry'], ASSET_MAX_RETRY)) else: result = process_asset_info(asset, urls_data[info_url][1]) if not result[0]: logger.info( "Processing for asset %s at %s not successful: %s" % (asset['asset'], info_url, result[1])) else: logger.debug( "Processing for asset %s at %s successful" % (asset['asset'], info_url)) # compose and fetch all info URLs in all assets with them for asset in assets: if not asset['info_url']: continue if asset.get('disabled', False): logger.info("ExtendedAssetInfo: Skipping disabled asset %s" % asset['asset']) continue # may or may not end with .json. may or may not start with http:// or https:// asset_info_urls.append( (('http://' + asset['info_url']) if not asset['info_url'].startswith('http://') and not asset['info_url'].startswith('https://') else asset['info_url'])) asset_info_urls_str = ', '.join(asset_info_urls) asset_info_urls_str = ( (asset_info_urls_str[:2000] + ' ...') if len(asset_info_urls_str) > 2000 else asset_info_urls_str ) # truncate if necessary if len(asset_info_urls): logger.info('Fetching enhanced asset info for %i assets: %s' % (len(asset_info_urls), asset_info_urls_str)) util.stream_fetch( asset_info_urls, asset_fetch_complete_hook, fetch_timeout=10, max_fetch_size=4 * 1024, urls_group_size=20, urls_group_time_spacing=20, per_request_complete_callback=lambda url, data: logger.debug( "Asset info URL %s retrieved, result: %s" % (url, data))) start_task(task_compile_extended_asset_info, delay=60 * 60) # call again in 60 minutes