Example #1
0
def identify_popular_links(directory=RAW_TWEET_DIR, write_mode="w", **kwargs):
    """ Identify the most popular links from the last day of tweest in the db
        Writes them to latest_links.txt in the RAW_TWEET_DIR
        (or directory kwarg)
    """
    dbconn = MySqlHook(mysl_conn_id="mysql_default")
    cursor = dbconn.cursor()

    query = """select * from tweets where
    created > date('now', '-1 days') and urls is not null
    order by favorite_count"""
    df = pd.read_sql_query(query, conn)
    df.urls = df.urls.map(ast.literal_eval)
    cntr = Counter(itertools.chain.from_iterable(df.urls.values))
    with open("{}/latest_links.txt".format(directory), write_mode) as latest:
        wrtr = writer(latest)
        wrtr.writerow(["url", "count"])
        wrtr.writerows(cntr.most_common(5))