def create_all_day_story_stats(self): queries = [] engine = create_engine(conn_str) df_stories = pd.read_sql("SELECT * FROM stories", engine) level_view_name = "time_to_$level$_day_stats" # can't have 1000 features = 200 stories * 5 levels level = "100" my_level_view_name = level_view_name.replace('$level$', level) for story_id in df_stories["story id"].values: sql_story_id = helpers.clean_string_for_column_name(story_id) view_name = "story_" + sql_story_id + "_day_stats" sql = """ SELECT f.distinct_id, COALESCE(agg_day_time_total,0) AS agg_day_time_total, COALESCE(agg_day_time_avg,0) AS agg_day_time_avg, COALESCE(agg_day_count,0) AS agg_day_count, COALESCE(agg_day_count_avg,0) AS agg_day_count_avg FROM (SELECT * FROM """ + self.event_table_alias + my_level_view_name + """ WHERE "story id"='$story_id$' ) t RIGHT JOIN first_seen_from_events f ON f.distinct_id = """ + "t" + """.distinct_id; """ sql = sql.replace("$story_id$", story_id) queries.append(self.create_view(view_name, sql)) return ''.join(queries)
def get_all_day_event_stats_tables(self, first, last): tables = {} engine = create_engine(conn_str) df_events = pd.read_sql( "SELECT * FROM event_types ORDER BY event_type ", engine) df_events = df_events[first:last] for event_type in df_events.event_type.values: column_aliases = {} sql_event_type = helpers.clean_string_for_column_name(event_type) view_name = self.event_table_alias + sql_event_type + "_day_stats" original_columns = ["day_avg", "total", "day_active"] for col in original_columns: column_aliases[col] = sql_event_type + "_" + col tables[view_name] = column_aliases return tables
def create_all_day_event_stats(self): queries = [] engine = create_engine(conn_str) df_events = pd.read_sql("SELECT * FROM event_types", engine) for event_type in df_events.event_type.values: sql_event_type = helpers.clean_string_for_column_name(event_type) view_name = sql_event_type + "_day_stats" sql = """SELECT distinct_id, AVG(day_total::real) as day_avg, SUM(day_total) as total, SUM(CASE WHEN day_total = 0 THEN 0 ELSE 1 END) as day_active FROM """ + self.event_table_alias + """day_totals WHERE event_type='$event_type$' GROUP BY distinct_id; """ sql = sql.replace("$event_type$", event_type) queries.append(self.create_view(view_name, sql)) return ''.join(queries)
def get_all_story_stats_tables(self, first, last): tables = {} engine = create_engine(conn_str) df_stories = pd.read_sql( """SELECT * FROM stories ORDER BY "story id" """, engine) df_stories = df_stories[first:last] for story_id in df_stories["story id"].values: column_aliases = {} sql_story_id = helpers.clean_string_for_column_name(story_id) view_name = self.event_table_alias + \ "story_" + sql_story_id + "_day_stats" # ,"agg_day_time_avg", "agg_day_count", "agg_day_count_avg"] original_columns = ["agg_day_count_avg"] for col in original_columns: column_aliases[col] = "story_" + sql_story_id + "_" + col tables[view_name] = column_aliases return tables