def identify_popular_links(directory=RAW_TWEET_DIR, write_mode="w", **kwargs): """ Identify the most popular links from the last day of tweest in the db Writes them to latest_links.txt in the RAW_TWEET_DIR (or directory kwarg) """ dbconn = MySqlHook(mysl_conn_id="mysql_default") cursor = dbconn.cursor() query = """select * from tweets where created > date('now', '-1 days') and urls is not null order by favorite_count""" df = pd.read_sql_query(query, conn) df.urls = df.urls.map(ast.literal_eval) cntr = Counter(itertools.chain.from_iterable(df.urls.values)) with open("{}/latest_links.txt".format(directory), write_mode) as latest: wrtr = writer(latest) wrtr.writerow(["url", "count"]) wrtr.writerows(cntr.most_common(5))