def __init__(self): secrets_file = open('secrets.json','rb') secrets = json.load(secrets_file) secrets_file.close() self.blog_name = "wheredidmypostgo" # Build an Authorized Tumblr Client self.tb_client = pytumblr.TumblrRestClient(**secrets['tumblr_tokens']) self.etl_controller = app.etl_controller() max_end_date = date.today() - timedelta(days=3) sql = """ select blog_name, avg(ClosenessCentrality) as 'ClosenessCentrality' from tb_reblog_graphs where reblogged_root_name in (%s) and end_date > '%s' and blog_name not in ('wheredidmypostgo', %s) group by blog_name order by avg(ClosenessCentrality) DESC """ % ("'"+"','".join(self.etl_controller.target_blogs)+"'", max_end_date.isoformat() , "'"+"','".join(self.etl_controller.target_blogs)+"'") self.influencer_df = psql.read_frame(sql,self.etl_controller.mysql_connection) self.influencer_df['pdf'] = self.influencer_df.ClosenessCentrality / self.influencer_df.ClosenessCentrality.sum() self.influencer_df['cdf'] = self.influencer_df.sort(column='pdf',ascending=False).pdf.cumsum() sql = """ select tag from tb_posts inner join tb_posttag_level on tb_posttag_level.`post_id` = tb_posts.id where tb_posts.blog_name = 'wheredidmypostgo' """ curs = self.etl_controller.mysql_connection.cursor() curs.execute(sql) all_tags = curs.fetchall() self.most_common_tags = [t[0] for t in Counter(all_tags).most_common(n=200)] curs.close() response = self.tb_client.posts('wheredidmypostgo', notes_info='true') self.posts = response['posts'] for offset in range(20,response['total_posts'],20): response = self.tb_client.posts('wheredidmypostgo', notes_info='true', offset=offset) self.posts.extend(response['posts']) self.notes = [] for p in self.posts: if p['note_count'] > 0: self.notes.extend(p['notes']) self.notes_df = pd.DataFrame(self.notes) self.notes_df['date'] = self.notes_df.timestamp.apply(float).apply(datetime.fromtimestamp) self.todays_notes = self.notes_df[self.notes_df.date >= (datetime.now() - timedelta(hours=4))].sort(column='date', ascending=False).head(50)
def __init__(self): secrets_file = open('secrets.json', 'rb') secrets = json.load(secrets_file) secrets_file.close() self.blog_name = "wheredidmypostgo" # Build an Authorized Tumblr Client self.tb_client = pytumblr.TumblrRestClient(**secrets['tumblr_tokens']) self.etl_controller = app.etl_controller() max_end_date = date.today() - timedelta(days=3) sql = """ select blog_name, avg(ClosenessCentrality) as 'ClosenessCentrality' from tb_reblog_graphs where reblogged_root_name in (%s) and end_date > '%s' and blog_name not in ('wheredidmypostgo', %s) group by blog_name order by avg(ClosenessCentrality) DESC """ % ("'" + "','".join(self.etl_controller.target_blogs) + "'", max_end_date.isoformat(), "'" + "','".join(self.etl_controller.target_blogs) + "'") self.influencer_df = psql.read_frame( sql, self.etl_controller.mysql_connection) self.influencer_df[ 'pdf'] = self.influencer_df.ClosenessCentrality / self.influencer_df.ClosenessCentrality.sum( ) self.influencer_df['cdf'] = self.influencer_df.sort( column='pdf', ascending=False).pdf.cumsum() sql = """ select tag from tb_posts inner join tb_posttag_level on tb_posttag_level.`post_id` = tb_posts.id where tb_posts.blog_name = 'wheredidmypostgo' """ curs = self.etl_controller.mysql_connection.cursor() curs.execute(sql) all_tags = curs.fetchall() self.most_common_tags = [ t[0] for t in Counter(all_tags).most_common(n=200) ] curs.close() response = self.tb_client.posts('wheredidmypostgo', notes_info='true') self.posts = response['posts'] for offset in range(20, response['total_posts'], 20): response = self.tb_client.posts('wheredidmypostgo', notes_info='true', offset=offset) self.posts.extend(response['posts']) self.notes = [] for p in self.posts: if p['note_count'] > 0: self.notes.extend(p['notes']) self.notes_df = pd.DataFrame(self.notes) self.notes_df['date'] = self.notes_df.timestamp.apply(float).apply( datetime.fromtimestamp) self.todays_notes = self.notes_df[self.notes_df.date >= ( datetime.now() - timedelta(hours=4))].sort( column='date', ascending=False).head(50)
import app etl_controller = app.etl_controller() etl_controller.check_submissions() etl_controller.tb_reblog_tree_etl_active_posts()