def _convert_posting_interval(channel_list): header = [ "Channel", "ViewCount", "SubscriberCount", "VideCount", "PostingInterval" ] data = [header] for channel_id in channel_list: channel_data = channel_module.load_channel_from_file(channel_id) latest_video = channel_data["videos"][0] first_video = channel_data["videos"][-1] to_date = datetime.datetime.strptime(latest_video["published_at"], "%Y-%m-%dT%H:%M:%SZ") from_date = datetime.datetime.strptime(first_video["published_at"], "%Y-%m-%dT%H:%M:%SZ") days = (to_date - from_date).days interval = days / int(channel_data["video_count"]) row = [ channel_data["name"], channel_data["view_count"], channel_data["subscriber_count"], channel_data["video_count"], interval, ] data.append(row) file_path = f"{settings.OUTPUT_DIR}/posting_interval.csv" with open(file_path, "w") as f: writer = csv.writer(f) writer.writerows(data)
def _convert_video_data(channel_list): header = [ "channel_name", "video_title", "published_at", "thumbnail_url", "view_count", "like_count", "dislike_count", "favorite_count", "comment_count", ] csv_data = [header] for channel_id in channel_list: channel_data = channel_module.load_channel_from_file(channel_id) for video in channel_data["videos"]: row = [ channel_data["name"], video["title"], video["published_at"], video["thumbnail_url"], video["view_count"], video["like_count"], video["dislike_count"], video["favorite_count"], video["comment_count"], ] csv_data.append(row) file_path = f"{settings.OUTPUT_DIR}/video.csv" with open(file_path, "w") as f: writer = csv.writer(f) writer.writerows(csv_data)
def _convert_date_series_video_view_data(channel_list): header = ["Date"] data = {} for channel_id in channel_list: channel_data = channel_module.load_channel_from_file(channel_id) hex_channel_id = hashlib.md5(channel_id.encode("utf-8")).hexdigest() header.append(hex_channel_id) header.append(f"{hex_channel_id}_title") for video in channel_data["videos"]: date_obj = parse(video["published_at"]) date_key = date_obj.strftime("%Y-%m-%d") if not date_key in data: data[date_key] = {} data[date_key][channel_id] = { "view": video["view_count"], "title": video["title"], } csv_data = [header] for date_key, dic in data.items(): row = [date_key] for channel_id in channel_list: view = dic[channel_id]["view"] if channel_id in dic else 0 row.append(view) title = dic[channel_id]["title"] if channel_id in dic else "" row.append(title) csv_data.append(row) file_path = f"{settings.OUTPUT_DIR}/video_view_per_date.csv" with open(file_path, "w") as f: writer = csv.writer(f) writer.writerows(csv_data)
def main(*args): channel_list_file_path = args[0] channel_list = channel_module.get_channel_list(channel_list_file_path) for channel_id in channel_list: print(f"Download video list for channel ID: {channel_id} .") channel_data = channel_module.load_channel_from_file(channel_id) channel_data["videos"] = _fetch_all_videos_by_channel_id(channel_id) file_path = f"{settings.OUTPUT_DIR}/{channel_id}.json" _save_to_file(file_path, channel_data)
def main(*args): channel_list_file_path = args[0] channel_list = channel_module.get_channel_list(channel_list_file_path) for channel_id in channel_list: print(f"Download video statistics for channel ID: {channel_id} .") channel_detail = channel_module.load_channel_from_file(channel_id) video_list = _fetch_videos_by_channel_id(channel_id) if not video_list: continue videos = list(map(lambda v: _build_data(v), video_list)) channel_detail["videos"] = videos file_path = f"{settings.OUTPUT_DIR}/{channel_id}.json" _save_to_file(file_path, channel_detail)
def _fetch_videos_by_channel_id( channel_id: str) -> Union[List, Literal[False]]: channel = channel_module.load_channel_from_file(channel_id) if not channel: return False chunked_video_id_list = _chunk_video_id_list(channel) for video_id_list in chunked_video_id_list: print("*", end="", flush=True) video_statistics = _fetch_video_statistics(",".join(video_id_list)) if not video_statistics["items"]: continue for item in video_statistics["items"]: video_id = item["id"] index = _find_index(channel["videos"], "video_id", video_id) channel["videos"][index]["statistics"] = item["statistics"] print("") return channel["videos"]
def _convert_channel_data(channel_list): header = [ "name", "published_at", "view_count", "subscriber_count", "video_count", ] csv_data = [header] for channel_id in channel_list: channel_data = channel_module.load_channel_from_file(channel_id) row = [ channel_data["name"], channel_data["published_at"], channel_data["view_count"], channel_data["subscriber_count"], channel_data["video_count"], ] csv_data.append(row) file_path = f"{settings.OUTPUT_DIR}/channel.csv" with open(file_path, "w") as f: writer = csv.writer(f) writer.writerows(csv_data)
output_file("plot.html") df = pd.read_csv("./output/video_view_per_date.csv", index_col="Date", parse_dates=True) df = df.sort_values(by=["Date"], ascending=True) source = ColumnDataSource(df) plot = figure( plot_width=1200, plot_height=600, x_axis_label="Date", y_axis_label="View", x_axis_type="datetime", ) colors = itertools.cycle(palette) for channel_id, color in zip(channel_list, colors): channel_data = channel_module.load_channel_from_file(channel_id) hex_channel_id = hashlib.md5(channel_id.encode("utf-8")).hexdigest() print(hex_channel_id) plot.line( x="Date", y=hex_channel_id, source=source, color=color, line_width=2, legend_label=channel_data["name"], name=hex_channel_id, ) hover = HoverTool( tooltips=[ ("View", f"@{hex_channel_id}"), ("Channel", channel_data["name"]),