예제 #1
0
    def create_all_day_story_stats(self):
        queries = []
        engine = create_engine(conn_str)
        df_stories = pd.read_sql("SELECT * FROM stories", engine)

        level_view_name = "time_to_$level$_day_stats"
        # can't have 1000 features = 200 stories * 5 levels
        level = "100"
        my_level_view_name = level_view_name.replace('$level$', level)
        for story_id in df_stories["story id"].values:
            sql_story_id = helpers.clean_string_for_column_name(story_id)
            view_name = "story_" + sql_story_id + "_day_stats"
            sql = """
                    SELECT f.distinct_id,
                    COALESCE(agg_day_time_total,0) AS agg_day_time_total,
                    COALESCE(agg_day_time_avg,0) AS agg_day_time_avg,
                    COALESCE(agg_day_count,0) AS agg_day_count,
                    COALESCE(agg_day_count_avg,0) AS agg_day_count_avg
                    FROM
                     (SELECT * FROM """ + self.event_table_alias + my_level_view_name + """
                    WHERE "story id"='$story_id$' ) t
                    RIGHT JOIN first_seen_from_events f
                    ON f.distinct_id = """ + "t" + """.distinct_id;
                    """
            sql = sql.replace("$story_id$", story_id)
            queries.append(self.create_view(view_name, sql))

        return ''.join(queries)
예제 #2
0
    def get_all_day_event_stats_tables(self, first, last):
        tables = {}

        engine = create_engine(conn_str)
        df_events = pd.read_sql(
            "SELECT * FROM event_types ORDER BY event_type ",
            engine)
        df_events = df_events[first:last]
        for event_type in df_events.event_type.values:
            column_aliases = {}
            sql_event_type = helpers.clean_string_for_column_name(event_type)
            view_name = self.event_table_alias + sql_event_type + "_day_stats"
            original_columns = ["day_avg", "total", "day_active"]
            for col in original_columns:
                column_aliases[col] = sql_event_type + "_" + col
            tables[view_name] = column_aliases
        return tables
예제 #3
0
    def create_all_day_event_stats(self):
        queries = []
        engine = create_engine(conn_str)
        df_events = pd.read_sql("SELECT * FROM event_types", engine)
        for event_type in df_events.event_type.values:
            sql_event_type = helpers.clean_string_for_column_name(event_type)
            view_name = sql_event_type + "_day_stats"
            sql = """SELECT distinct_id,
                            AVG(day_total::real) as day_avg,
                            SUM(day_total) as total,
                            SUM(CASE WHEN day_total = 0 THEN 0 ELSE 1 END)
                                     as day_active
                     FROM """ + self.event_table_alias + """day_totals
                     WHERE event_type='$event_type$'
                     GROUP BY distinct_id;
                     """
            sql = sql.replace("$event_type$", event_type)
            queries.append(self.create_view(view_name, sql))

        return ''.join(queries)
예제 #4
0
    def get_all_story_stats_tables(self, first, last):
        tables = {}

        engine = create_engine(conn_str)

        df_stories = pd.read_sql(
            """SELECT * FROM stories ORDER BY "story id" """,
            engine)
        df_stories = df_stories[first:last]

        for story_id in df_stories["story id"].values:
            column_aliases = {}
            sql_story_id = helpers.clean_string_for_column_name(story_id)
            view_name = self.event_table_alias + \
                "story_" + sql_story_id + "_day_stats"
            # ,"agg_day_time_avg", "agg_day_count", "agg_day_count_avg"]
            original_columns = ["agg_day_count_avg"]
            for col in original_columns:
                column_aliases[col] = "story_" + sql_story_id + "_" + col
            tables[view_name] = column_aliases
        return tables