예제 #1
0
def _convert_posting_interval(channel_list):
    header = [
        "Channel", "ViewCount", "SubscriberCount", "VideCount",
        "PostingInterval"
    ]
    data = [header]
    for channel_id in channel_list:
        channel_data = channel_module.load_channel_from_file(channel_id)
        latest_video = channel_data["videos"][0]
        first_video = channel_data["videos"][-1]
        to_date = datetime.datetime.strptime(latest_video["published_at"],
                                             "%Y-%m-%dT%H:%M:%SZ")
        from_date = datetime.datetime.strptime(first_video["published_at"],
                                               "%Y-%m-%dT%H:%M:%SZ")
        days = (to_date - from_date).days
        interval = days / int(channel_data["video_count"])

        row = [
            channel_data["name"],
            channel_data["view_count"],
            channel_data["subscriber_count"],
            channel_data["video_count"],
            interval,
        ]
        data.append(row)

    file_path = f"{settings.OUTPUT_DIR}/posting_interval.csv"
    with open(file_path, "w") as f:
        writer = csv.writer(f)
        writer.writerows(data)
예제 #2
0
def _convert_video_data(channel_list):
    header = [
        "channel_name",
        "video_title",
        "published_at",
        "thumbnail_url",
        "view_count",
        "like_count",
        "dislike_count",
        "favorite_count",
        "comment_count",
    ]

    csv_data = [header]
    for channel_id in channel_list:
        channel_data = channel_module.load_channel_from_file(channel_id)
        for video in channel_data["videos"]:
            row = [
                channel_data["name"],
                video["title"],
                video["published_at"],
                video["thumbnail_url"],
                video["view_count"],
                video["like_count"],
                video["dislike_count"],
                video["favorite_count"],
                video["comment_count"],
            ]
            csv_data.append(row)

    file_path = f"{settings.OUTPUT_DIR}/video.csv"
    with open(file_path, "w") as f:
        writer = csv.writer(f)
        writer.writerows(csv_data)
예제 #3
0
def _convert_date_series_video_view_data(channel_list):

    header = ["Date"]
    data = {}
    for channel_id in channel_list:
        channel_data = channel_module.load_channel_from_file(channel_id)
        hex_channel_id = hashlib.md5(channel_id.encode("utf-8")).hexdigest()
        header.append(hex_channel_id)
        header.append(f"{hex_channel_id}_title")
        for video in channel_data["videos"]:
            date_obj = parse(video["published_at"])
            date_key = date_obj.strftime("%Y-%m-%d")
            if not date_key in data:
                data[date_key] = {}
            data[date_key][channel_id] = {
                "view": video["view_count"],
                "title": video["title"],
            }

    csv_data = [header]
    for date_key, dic in data.items():
        row = [date_key]
        for channel_id in channel_list:
            view = dic[channel_id]["view"] if channel_id in dic else 0
            row.append(view)
            title = dic[channel_id]["title"] if channel_id in dic else ""
            row.append(title)
        csv_data.append(row)

    file_path = f"{settings.OUTPUT_DIR}/video_view_per_date.csv"
    with open(file_path, "w") as f:
        writer = csv.writer(f)
        writer.writerows(csv_data)
예제 #4
0
def main(*args):
    channel_list_file_path = args[0]
    channel_list = channel_module.get_channel_list(channel_list_file_path)
    for channel_id in channel_list:
        print(f"Download video list for channel ID: {channel_id} .")
        channel_data = channel_module.load_channel_from_file(channel_id)
        channel_data["videos"] = _fetch_all_videos_by_channel_id(channel_id)
        file_path = f"{settings.OUTPUT_DIR}/{channel_id}.json"
        _save_to_file(file_path, channel_data)
def main(*args):
    channel_list_file_path = args[0]
    channel_list = channel_module.get_channel_list(channel_list_file_path)
    for channel_id in channel_list:
        print(f"Download video statistics for channel ID: {channel_id} .")
        channel_detail = channel_module.load_channel_from_file(channel_id)
        video_list = _fetch_videos_by_channel_id(channel_id)
        if not video_list:
            continue

        videos = list(map(lambda v: _build_data(v), video_list))
        channel_detail["videos"] = videos
        file_path = f"{settings.OUTPUT_DIR}/{channel_id}.json"
        _save_to_file(file_path, channel_detail)
def _fetch_videos_by_channel_id(
        channel_id: str) -> Union[List, Literal[False]]:
    channel = channel_module.load_channel_from_file(channel_id)
    if not channel:
        return False

    chunked_video_id_list = _chunk_video_id_list(channel)
    for video_id_list in chunked_video_id_list:
        print("*", end="", flush=True)
        video_statistics = _fetch_video_statistics(",".join(video_id_list))
        if not video_statistics["items"]:
            continue
        for item in video_statistics["items"]:
            video_id = item["id"]
            index = _find_index(channel["videos"], "video_id", video_id)
            channel["videos"][index]["statistics"] = item["statistics"]

    print("")
    return channel["videos"]
예제 #7
0
def _convert_channel_data(channel_list):
    header = [
        "name",
        "published_at",
        "view_count",
        "subscriber_count",
        "video_count",
    ]
    csv_data = [header]
    for channel_id in channel_list:
        channel_data = channel_module.load_channel_from_file(channel_id)
        row = [
            channel_data["name"],
            channel_data["published_at"],
            channel_data["view_count"],
            channel_data["subscriber_count"],
            channel_data["video_count"],
        ]
        csv_data.append(row)

    file_path = f"{settings.OUTPUT_DIR}/channel.csv"
    with open(file_path, "w") as f:
        writer = csv.writer(f)
        writer.writerows(csv_data)
예제 #8
0
output_file("plot.html")
df = pd.read_csv("./output/video_view_per_date.csv",
                 index_col="Date",
                 parse_dates=True)
df = df.sort_values(by=["Date"], ascending=True)
source = ColumnDataSource(df)
plot = figure(
    plot_width=1200,
    plot_height=600,
    x_axis_label="Date",
    y_axis_label="View",
    x_axis_type="datetime",
)
colors = itertools.cycle(palette)
for channel_id, color in zip(channel_list, colors):
    channel_data = channel_module.load_channel_from_file(channel_id)
    hex_channel_id = hashlib.md5(channel_id.encode("utf-8")).hexdigest()
    print(hex_channel_id)
    plot.line(
        x="Date",
        y=hex_channel_id,
        source=source,
        color=color,
        line_width=2,
        legend_label=channel_data["name"],
        name=hex_channel_id,
    )
    hover = HoverTool(
        tooltips=[
            ("View", f"@{hex_channel_id}"),
            ("Channel", channel_data["name"]),