예제 #1
0
def init(http_client, quickstats):
    """ Get all the data needed for the most popular articles """
    logging.info("Popular init()")
    #http_client = AsyncHTTPClient()
    endpoint = "%s/live/toppages/v3/?limit=50&apikey=%s&host=" \
        % (options.chartbeat_url, options.API)
    engage_sites = [''.join([endpoint, site]) for site in options.sites]
    try:
        responses = yield [http_client.fetch(site) for site in engage_sites]
    except HTTPError as err:
        logging.info("HTTP Error: %s" % err)
    data = []
    for response in responses:
        body = json.loads(response.body.decode('utf-8'))
        for item in body['pages']:

            # remove non-articles from popular
            if is_section_page(item['path']):
                continue

            try:
                article = {
                    "path": item['path'],
                    "title": item['title'],
                    "visits": item['stats']['visits'],
                }
            except KeyError as err:
                logging.info("KeyError: %s" % err)
            data.append(article)
    data = sorted(data, key=lambda page: page['visits'], reverse=True)

    for conn in connections:
        conn.write_message({ "popular": data[:40] })
예제 #2
0
def init(http_client, quickstats):
    """ The magic of async:  request all data simultaneously in parallel
    and bring all that data into the responses variable while ensuring
    the process is not being blocked waiting for the responses """
    logging.info("GeoPoint init()")
    endpoint = "%s/live/recent/v3/?limit=50&apikey=%s&host=" % (options.chartbeat_url, options.API)

    random_site = get_random_site()

    api_site = endpoint + random_site
    logging.info(api_site)
    geo_data = {}
    responses = []
    try:
        response = yield http_client.fetch(api_site)
    except:
        response = None
        logging.info("HTTP Reqs failed")

    try:
        if response:
            people = json.loads(response.body.decode('utf-8'))
            num_people = len(people)

            # Grab the first valid article (i.e. not a section page)
            # That's [n] latitude/longitude values away from the last loaded
            # article
            count = 0
            while count < num_people: # Ensure this doesn't infinite loop
                count += 1
                random_index = random.randint(0, num_people - 1)
                person = people[random_index]

                if is_section_page(person['path']):
                    continue
                if person['title'] == "" or person['title'] == " ":
                    continue

                data = {
                    "lat": person['lat'],
                    "lng": person['lng'],
                    "platform": person['platform'],
                    "domain": person['domain'],
                    "host": person['host'],
                    "path": person['path'],
                    "title": person['title'],
                    "user_agent": person['user_agent'],
                    "country": person['country'],
                }
                geo_data = data

                latitude_delta = get_distance(
                                    last_lat_lng["lat"],
                                    last_lat_lng["lng"],
                                    person["lat"],
                                    person["lng"])

                logging.info("DELTA: {0}".format(latitude_delta))

                if latitude_delta > 5.0:
                    break

            last_lat_lng["lat"] = geo_data["lat"]
            last_lat_lng["lng"] = geo_data["lng"]

    except Exception as err:
        logging.exception(err)

    for conn in connections:
            conn.write_message({ "users": len(connections), "geo_point": geo_data })
예제 #3
0
def init(http_client, quickstats):
    """ Use toppages to determine author popularity right now """
    logging.info("Authors init()")
    #http_client = AsyncHTTPClient()
    endpoint = "%s/live/toppages/v3/?limit=100&apikey=%s&sort_by=visits&host=" \
        % (options.chartbeat_url, options.API)
    sites = [''.join([endpoint, site]) for site in options.sites]

    try:
        responses = yield [http_client.fetch(site) for site in sites]
    except HTTPError as err:
        logging.info("HTTP Error: %s" % err)
    authors = []
    # loop through all chartbeat sites
    for response in responses:
        body = json.loads(response.body.decode('utf-8'))
        # loop dictionary within pages
        for item in body['pages']:
            try:
                # ensure all keys exist
                keys = ["path", "title", "authors", "stats"]
                key_error = False
                for key in keys:
                    if key not in item:
                        key_error = True
                        break

                # dont bother with objects with missing keys
                if key_error or "visits" not in item['stats']:
                    continue
                # empty titles are useless, why do they exist at all?
                if item["title"] == "" or item["title"] == " ":
                    continue
                # remove non-articles from popular
                if is_section_page(item['path']):
                    continue

                prepd_authors = []
                # pre-processing required to remove erroneous authors
                # or combined authors into a single string
                for auth in item['authors']:
                    auth = auth.replace("by", "")
                    auth = auth.replace("the", "")
                    if auth[:3] == "and":
                        auth = auth[3:]

                    # break up authors who include multiple names
                    if " and " in auth:
                        prepd_authors.extend(auth.split(" and "))
                    else:
                        prepd_authors.append(auth)

                new_article = Article(item['title'], item['path'],
                                      item['stats']['visits'], item['authors'])

                # accumulate all authors into an array of dictionaries
                for auth in prepd_authors:
                    if auth == "" or auth == " ":
                        continue
                    auth = auth.strip()

                    new_author = Author(auth.title())
                    if new_article not in new_author.articles:
                        new_author.articles.append(new_article)

                    found_author = False
                    for author in authors:
                        if author == new_author:
                            if new_article not in author.articles:
                                author.articles.append(new_article)
                            found_author = True
                            break
                    if not found_author:
                        authors.append(new_author)
            except Exception as err:
                print(err)

    authors.sort(key=lambda author: author.total_visits, reverse=True)

    for conn in connections:
        conn.write_message({
            "authors": {
                "name": "authors",
                "children": [author.json for author in authors][:50]
            }
        })