from src.common.sql_plot import SqlPlot tag_group = ('#thelastofus', '#thelastofus2', '#tlou', '#tlou2') SqlPlot().show( sql_queries=[{ 'query': f""" select date_trunc('week', posts.created) as time_window, count(distinct posts.id) from posts join post_tags on posts.id = post_tags.post_id and posts.type = 1 and posts.created > '2019-01-01' and post_tags.value in {tag_group} group by time_window order by time_window """, 'label': tag_group }], title="Количество новых постов за неделю с тегами", x_label="Время", y_label="Новые посты за неделю" )
from src.common.config_loader import ConfigLoader from src.common.data_base_wrapper import DataBaseWrapper from src.common.sql_plot import SqlPlot subsite_ids = [64954] config = ConfigLoader.load() db = DataBaseWrapper(config["db"]) queries = [] for subsite_id in subsite_ids: subsite_name = db.execute_select_one( "select name from subsites where id = %s", (subsite_id, ))[0] queries.append({ 'query': f""" select date_trunc('week', created) as time_window, count(*) from posts where type = 1 and subsite_id = {subsite_id} and created between '2018-06-01' and '2020-07-20' group by time_window order by time_window """, 'label': subsite_name }) SqlPlot().show(sql_queries=queries, title="Количество новых постов за неделю", x_label="Время", y_label="Новые посты за неделю")
from src.common.sql_plot import SqlPlot post_id = 220958 SqlPlot().show(sql_queries=[{ 'query': f""" select request_time, comments from post_history where post_id = {post_id} order by id """ }], title="Комментарии", x_label="Время", y_label="Комментарии")
from src.common.sql_plot import SqlPlot SqlPlot().show( sql_queries=[ { 'query': """ select date_trunc('week', created) as time_window, count(*) from posts where type = 1 and created < '2020-07-20' group by time_window order by time_window """, 'label': 'С мемами' }, { 'query': """ select date_trunc('week', created) as time_window, count(*) from posts where type = 1 and subsite_id != 64966 and created < '2020-07-20' group by time_window order by time_window """, 'label': 'Без мемов' } ], title="Количество новых постов за неделю (без репостов и вакансий)", x_label="Время", y_label="Новые посты за неделю" )
from src.common.sql_plot import SqlPlot SqlPlot().show(sql_queries=[{ 'query': """ select date_trunc('week', created) as time_window, avg(comments_count) from posts where type = 1 and created between '2016-06-01' and '2020-07-19' group by time_window order by time_window """ }], title="Среднее количество комментариев на пост", x_label="Время", y_label="Среднее количество комментариев на пост")
from src.common.sql_plot import SqlPlot SqlPlot().show(sql_queries=[{ 'query': """ with data as ( select date_trunc('week', created) as time_window, count(*) as new_posts from posts group by time_window ) select time_window, sum(new_posts) over ( order by time_window rows between unbounded preceding and current row ) as total_posts from data """ }], title="Количество постов", x_label="Время", y_label="Посты")
from src.common.sql_plot import SqlPlot post_id = 220958 SqlPlot().show(sql_queries=[{ 'query': f""" select request_time, hits from post_history where post_id = {post_id} order by id """ }], title="Просмотры", x_label="Время", y_label="Просмотры")
queries.append({ 'query': f""" with length_data as ( select posts.id as post_id, posts.created as created, sum(coalesce(blocks.text_length, 0)) as text_length from posts join post_blocks blocks on posts.id = blocks.post_id and posts.type = 1 group by posts.id ) select date_trunc('week', created) as time_window, percentile_disc({percentile}) within group (order by text_length) as percentile from length_data where created between '2016-06-01' and '2020-07-19' group by time_window order by time_window """, 'label': str(int(percentile * 100)) + '-процентиль' }) SqlPlot().show( sql_queries=queries, title="Длина постов", x_label="Время", y_label="Процентили" )
from src.common.sql_plot import SqlPlot SqlPlot().show(sql_queries=[{ 'query': """ select date_trunc('week', created) as time_window, sum(comments_count) from posts where type = 1 and created between '2016-06-01' and '2020-07-19' group by time_window order by time_window """ }], title="Количество новых комментариев", x_label="Время", y_label="Новые комментарии за неделю")
SqlPlot().show( sql_queries=[ { 'query': """ with length_data as ( select posts.id as post_id, sum(coalesce(blocks.text_length, 0)) as text_length from posts join post_blocks blocks on posts.id = blocks.post_id and posts.type = 1 group by posts.id ), percentiles as ( select generate_series as value from generate_series(0, 0.95, 0.01) ) select percentile_disc(percentiles.value) within group (order by text_length), percentiles.value * 100 from percentiles cross join length_data where text_length < 341961 group by percentiles.value """ } ], title="Распределение длины постов", x_label="Количество текстовых символов в посте", y_label="Количество постов, %" )
post_id = 220958 SqlPlot().show(sql_queries=[{ 'query': f""" with data as ( select date_trunc('hour', request_time) as hours, min(request_time) as request_time, min(hits) as hits, min(rating) as rating from post_history group by hours order by hours ), delta_data as ( select request_time, (hits - coalesce(lag(hits, 1) over (order by hours), 0))::float as d_hits, (rating - coalesce(lag(rating, 1) over (order by hours), 0))::float as d_rating, extract(epoch from request_time - lag(request_time, 1) over (order by hours)) / 3600 as d_time from data ) select request_time, case when d_hits != 0 then d_rating / d_hits / d_time else 0 end as rate from delta_data where request_time < '2020-10-03' """ }], title="Конверсия просмотров в рейтинг", x_label="Время", y_label="Коэффициент")
from src.common.sql_plot import SqlPlot post_id = 220958 SqlPlot().show(sql_queries=[{ 'query': f""" select request_time, rating from post_history where post_id = {post_id} order by id """ }], title="Рейтинг", x_label="Время", y_label="Рейтинг")
from src.common.sql_plot import SqlPlot SqlPlot().show( sql_queries=[ { 'query': """ with all_ids as ( select generate_series as user_id from generate_series(1, (select max(id) from users)) ) select round(all_ids.user_id, -3) user_id_group, count(user_errors.id) from all_ids left join user_errors on all_ids.user_id = user_errors.user_id group by user_id_group order by user_id_group """, 'label': 'Ошибки' } ], title="Количество ошибок API", x_label="ID пользователя", y_label="Количество ошибок на 1000 пользователей" )
from src.common.sql_plot import SqlPlot post_id = 220958 SqlPlot().show(sql_queries=[{ 'query': f""" with data as ( select date_trunc('hour', request_time) as hours, min(request_time) as request_time, min(hits) as hits from post_history group by hours order by hours ) select request_time, coalesce((hits - coalesce(lag(hits, 1) over (order by hours), 0)) / (extract(epoch from request_time - lag(request_time, 1) over (order by hours)) / 3600), 0) from data """ }], title="Просмотры в час", x_label="Время", y_label="Просмотры в час")
SqlPlot().show( sql_queries=[ { 'query': """ select date_trunc('week', created) as time_window, count(distinct posts.id) from posts join post_tags tags on posts.id = tags.post_id and posts.type = 1 and posts.created between '2016-06-01' and '2020-07-20' and tags.value in ('#лонг', '#лонгрид', '#longread') group by time_window order by time_window """, 'label': "#лонг, #лонгрид, #longread" }, { 'query': """ select date_trunc('week', created) as time_window, count(distinct posts.id) from posts join post_tags tags on posts.id = tags.post_id and posts.type = 1 and posts.created between '2016-06-01' and '2020-07-20' and tags.value in ('#лонг', '#лонгрид', '#long', '#longread') group by time_window order by time_window """, 'label': "#long" } ], title="Новые лонги", x_label="Время", y_label="Новые лонги за неделю" )
from src.common.sql_plot import SqlPlot SqlPlot().show(sql_queries=[{ 'query': """ select date_trunc('week', created) as time_window, count(*) from users group by time_window """, 'label': 'Новые пользователи' }], title="Количество новых пользователей за неделю", x_label="Время", y_label="Новые пользователи за неделю")
SqlPlot().show(sql_queries=[ { 'query': """ select date_trunc('week', created) as time_window, count(distinct posts.id) from posts join post_tags tags on posts.id = tags.post_id and posts.type = 1 and posts.created between '2019-02-01' and '2020-07-20' and posts.is_editorial = true and tags.value in ('#лонг', '#лонгрид', '#long', '#longread') group by time_window order by time_window """, 'label': "Лонги (редакция)" }, { 'query': """ select date_trunc('week', created) as time_window, count(distinct posts.id) from posts join post_tags tags on posts.id = tags.post_id and posts.type = 1 and posts.created between '2019-02-01' and '2020-07-20' and posts.is_editorial = false and tags.value in ('#лонг', '#лонгрид', '#long', '#longread') group by time_window order by time_window """, 'label': "Лонги (UGC)" }, ], title="Новые лонги", x_label="Время", y_label="Новые лонги за неделю")
from src.common.sql_plot import SqlPlot SqlPlot().show(sql_queries=[{ 'query': """ select date_trunc('day', estimated_creation_time) as time_window, count(*) from post_errors where status_code = 403 and estimated_creation_time between '2016-01-01 00:00:00.000000' and '2021-01-01 00:00:00.000000' group by time_window order by time_window """ }], title="Посты в скрытых подсайтах", x_label="Время", y_label="Количество ошибок 403 за день")
from src.common.sql_plot import SqlPlot post_id = 220958 SqlPlot().show( sql_queries=[ { 'query': f""" select request_time, favorites from post_history where post_id = {post_id} order by id """ } ], title="Закладки", x_label="Время", y_label="Закладки" )