def init(http_client, quickstats): """ Get all the data needed for the most popular articles """ logging.info("Popular init()") #http_client = AsyncHTTPClient() endpoint = "%s/live/toppages/v3/?limit=50&apikey=%s&host=" \ % (options.chartbeat_url, options.API) engage_sites = [''.join([endpoint, site]) for site in options.sites] try: responses = yield [http_client.fetch(site) for site in engage_sites] except HTTPError as err: logging.info("HTTP Error: %s" % err) data = [] for response in responses: body = json.loads(response.body.decode('utf-8')) for item in body['pages']: # remove non-articles from popular if is_section_page(item['path']): continue try: article = { "path": item['path'], "title": item['title'], "visits": item['stats']['visits'], } except KeyError as err: logging.info("KeyError: %s" % err) data.append(article) data = sorted(data, key=lambda page: page['visits'], reverse=True) for conn in connections: conn.write_message({ "popular": data[:40] })
def init(http_client, quickstats): """ The magic of async: request all data simultaneously in parallel and bring all that data into the responses variable while ensuring the process is not being blocked waiting for the responses """ logging.info("GeoPoint init()") endpoint = "%s/live/recent/v3/?limit=50&apikey=%s&host=" % (options.chartbeat_url, options.API) random_site = get_random_site() api_site = endpoint + random_site logging.info(api_site) geo_data = {} responses = [] try: response = yield http_client.fetch(api_site) except: response = None logging.info("HTTP Reqs failed") try: if response: people = json.loads(response.body.decode('utf-8')) num_people = len(people) # Grab the first valid article (i.e. not a section page) # That's [n] latitude/longitude values away from the last loaded # article count = 0 while count < num_people: # Ensure this doesn't infinite loop count += 1 random_index = random.randint(0, num_people - 1) person = people[random_index] if is_section_page(person['path']): continue if person['title'] == "" or person['title'] == " ": continue data = { "lat": person['lat'], "lng": person['lng'], "platform": person['platform'], "domain": person['domain'], "host": person['host'], "path": person['path'], "title": person['title'], "user_agent": person['user_agent'], "country": person['country'], } geo_data = data latitude_delta = get_distance( last_lat_lng["lat"], last_lat_lng["lng"], person["lat"], person["lng"]) logging.info("DELTA: {0}".format(latitude_delta)) if latitude_delta > 5.0: break last_lat_lng["lat"] = geo_data["lat"] last_lat_lng["lng"] = geo_data["lng"] except Exception as err: logging.exception(err) for conn in connections: conn.write_message({ "users": len(connections), "geo_point": geo_data })
def init(http_client, quickstats): """ Use toppages to determine author popularity right now """ logging.info("Authors init()") #http_client = AsyncHTTPClient() endpoint = "%s/live/toppages/v3/?limit=100&apikey=%s&sort_by=visits&host=" \ % (options.chartbeat_url, options.API) sites = [''.join([endpoint, site]) for site in options.sites] try: responses = yield [http_client.fetch(site) for site in sites] except HTTPError as err: logging.info("HTTP Error: %s" % err) authors = [] # loop through all chartbeat sites for response in responses: body = json.loads(response.body.decode('utf-8')) # loop dictionary within pages for item in body['pages']: try: # ensure all keys exist keys = ["path", "title", "authors", "stats"] key_error = False for key in keys: if key not in item: key_error = True break # dont bother with objects with missing keys if key_error or "visits" not in item['stats']: continue # empty titles are useless, why do they exist at all? if item["title"] == "" or item["title"] == " ": continue # remove non-articles from popular if is_section_page(item['path']): continue prepd_authors = [] # pre-processing required to remove erroneous authors # or combined authors into a single string for auth in item['authors']: auth = auth.replace("by", "") auth = auth.replace("the", "") if auth[:3] == "and": auth = auth[3:] # break up authors who include multiple names if " and " in auth: prepd_authors.extend(auth.split(" and ")) else: prepd_authors.append(auth) new_article = Article(item['title'], item['path'], item['stats']['visits'], item['authors']) # accumulate all authors into an array of dictionaries for auth in prepd_authors: if auth == "" or auth == " ": continue auth = auth.strip() new_author = Author(auth.title()) if new_article not in new_author.articles: new_author.articles.append(new_article) found_author = False for author in authors: if author == new_author: if new_article not in author.articles: author.articles.append(new_article) found_author = True break if not found_author: authors.append(new_author) except Exception as err: print(err) authors.sort(key=lambda author: author.total_visits, reverse=True) for conn in connections: conn.write_message({ "authors": { "name": "authors", "children": [author.json for author in authors][:50] } })