def group_by_week(video_data): dummy_start_date = datetime.date(2017, 7, 1) start_date = datetime.date(2019, 7, 1) tmr = datetime.date.today() + datetime.timedelta(days=1) # consider timezone week = datetime.timedelta(weeks=1) groups = [] prev_videos = Group( "Previous Videos", filter_video_by_date(video_data, dummy_start_date, start_date) ) if len(prev_videos.ids) > 0: groups += [prev_videos] while start_date <= tmr: end_date = start_date + week title = "Week %s" % start_date ids = filter_video_by_date(video_data, start_date, end_date) group = Group(title, ids) if len(group.ids) > 0: groups.insert(0, group) start_date = end_date return groups
def process_groups(groups, video_data, merge_small_groups = True): # merge all groups with less than 2 vid to Others result = [] others = [] group_map = {} for group in groups: ids = remove_invalid_ids(group.ids, video_data) if len(ids) == 0: continue g_id = video_data[ids[0]].channel_id if group_map.get(g_id, None): group_map[g_id].ids += group.ids else: group_map[g_id] = group groups = group_map.values() for group in groups: if merge_small_groups and len(group.ids) <= 2: others += group.ids else: result.append(group) result = sorted(result, key = lambda g: g.title.upper()) if len(others) > 0: result.append(Group('Others', others)) # sort each group by publish date for group in result: group.ids = sort_video_ids_by_time(group.ids, video_data) if len(group.ids) > 0: group.slug = video_data[group.ids[0]].channel_id return result
def get_message(site): video_data = site.video_data (most_viewed, latest) = data_loader.sort_videos(video_data) NUM = 18 debug_groups = [ Group("Latest", latest[:NUM]), Group("Most Viewed", most_viewed[:NUM]), ] dump_video_list = ["# Latest"] dump_video_list += util.dump_video_list(latest[:NUM], video_data) dump_video_list += ["", "# Most viewed"] dump_video_list += util.dump_video_list(most_viewed[:NUM], video_data) debug_text = '\n'.join(dump_video_list)
def group_by_day(video_data): start_date = datetime.date(2020, 2, 17) tmr = datetime.date.today() + datetime.timedelta(days=1) # consider timezone delta = datetime.timedelta(days=1) groups = [] while start_date <= tmr: end_date = start_date + delta title = "%s" % start_date ids = filter_video_by_date(video_data, start_date, end_date) group = Group(title, ids) if len(group.ids) > 0: groups.insert(0, group) start_date = end_date return groups
def parse(lines): group_map = {} current_group = None for s in lines: s = s.strip() if not s: continue if s.startswith("#"): if current_group: group_map[current_group.title] = current_group current_group = Group(title=s[1:].strip(), ids=[]) continue current_group.ids.append(util.extract_youtube_id(s)) if current_group: group_map[current_group.title] = current_group return group_map
def load_site_data(config, path, video_cache, merge_small_groups = True): api_key = config.api_key ph = PathHelper(path) groups = parse(ph.get_data_file()) most_viewed_data = parse(ph.get_most_viewed_data_file()) latest_data = parse(ph.get_latest_data_file()) if len(most_viewed_data) > 0: most_viewed = most_viewed_data[0].ids else: most_viewed = [] if len(latest_data) > 0: latest = latest_data[0].ids else: latest = [] all_youtube_ids = [id for g in groups for id in g.ids] print("Num of videos: %s" % len(all_youtube_ids)) video_data = load_video_data(all_youtube_ids, video_cache, api_key) videos_by_time = Group("All videos", sort_video_ids_by_time(all_youtube_ids, video_data)) # merge and sort groups_by_num_videos = sort_by_num_videos(groups) groups = process_groups(groups, video_data, merge_small_groups) site = Site() site.groups = groups site.video_data = video_data site.most_viewed = most_viewed site.latest = latest site.groups_by_week = group_by_week(video_data) site.groups_by_day = group_by_day(video_data) site.groups_by_num_videos = groups_by_num_videos site.videos_by_time = videos_by_time site.num_videos = len(all_youtube_ids) return site
def parse(file_path): if not os.path.exists(file_path): return [] print("Parsing %s" % file_path) f = open(file_path) groups = [] current_group = None for s in f.readlines(): s = s.strip() if not s: continue if s.startswith("#"): if current_group: groups.append(current_group) current_group = Group(title = s[1:].strip(), ids = []) continue current_group.ids.append(util.extract_youtube_id(s)) if current_group: groups.append(current_group) return groups