コード例 #1
0
ファイル: channel.py プロジェクト: Anjan-12/youtubedownloader
    def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
        """Extracts videos from a raw json page

        :param str raw_json: Input json extracted from the page or the last
            server response
        :rtype: Tuple[List[str], Optional[str]]
        :returns: Tuple containing a list of up to 100 video watch ids and
            a continuation token, if more videos are available
        """
        initial_data = json.loads(raw_json)
        # this is the json tree structure, if the json was extracted from
        # html
        try:
            videos = initial_data["contents"][
                "twoColumnBrowseResultsRenderer"]["tabs"][1]["tabRenderer"][
                    "content"]["sectionListRenderer"]["contents"][0][
                        "itemSectionRenderer"]["contents"][0]["gridRenderer"][
                            "items"]
        except (KeyError, IndexError, TypeError):
            try:
                # this is the json tree structure, if the json was directly sent
                # by the server in a continuation response
                important_content = initial_data[1]['response'][
                    'onResponseReceivedActions'][0][
                        'appendContinuationItemsAction']['continuationItems']
                videos = important_content
            except (KeyError, IndexError, TypeError):
                try:
                    # this is the json tree structure, if the json was directly sent
                    # by the server in a continuation response
                    # no longer a list and no longer has the "response" key
                    important_content = initial_data[
                        'onResponseReceivedActions'][0][
                            'appendContinuationItemsAction'][
                                'continuationItems']
                    videos = important_content
                except (KeyError, IndexError, TypeError) as p:
                    logger.info(p)
                    return [], None

        try:
            continuation = videos[-1]['continuationItemRenderer'][
                'continuationEndpoint']['continuationCommand']['token']
            videos = videos[:-1]
        except (KeyError, IndexError):
            # if there is an error, no continuation is available
            continuation = None

        # remove duplicates
        return (
            uniqueify(
                list(
                    # only extract the video ids from the video data
                    map(
                        lambda x: (f"/watch?v="
                                   f"{x['gridVideoRenderer']['videoId']}"),
                        videos)), ),
            continuation,
        )
コード例 #2
0
    def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
        """Extracts videos from a raw json page

        :param str raw_json: Input json extracted from the page or the last
            server response
        :rtype: Tuple[List[str], Optional[str]]
        :returns: Tuple containing a list of up to 100 video watch ids and
            a continuation token, if more videos are available
        """
        initial_data = json.loads(raw_json)
        try:
            # this is the json tree structure, if the json was extracted from
            # html
            important_content = \
                initial_data["contents"]["twoColumnBrowseResultsRenderer"][
                    "tabs"][
                    0][
                    "tabRenderer"]["content"]["sectionListRenderer"][
                    "contents"][0][
                    "itemSectionRenderer"]["contents"][0][
                    "playlistVideoListRenderer"]
        except (KeyError, IndexError, TypeError):
            try:
                # this is the json tree structure, if the json was directly sent
                # by the server in a continuation response
                important_content = \
                    initial_data[1]["response"][
                        "continuationContents"][
                        "playlistVideoListContinuation"]
            except (KeyError, IndexError, TypeError) as p:
                print(p)
                return [], None
        videos = important_content["contents"]
        try:
            continuation = \
                important_content["continuations"][0]["nextContinuationData"][
                    "continuation"]
        except (KeyError, IndexError):
            # if there is an error, no continuation is available
            continuation = None

        # remove duplicates
        return uniqueify(
            list(
                # only extract the video ids from the video data
                map(
                    lambda x:
                    (f"/watch?v={x['playlistVideoRenderer']['videoId']}"),
                    videos))), continuation
コード例 #3
0
def get_url_list_from_file(file, retry):
    file = file or defaultIni
    downloads = list()
    urls = list()

    if file and os.path.exists(file):
        with open(file, "r") as fp:
            for line in fp:
                downloads.append(line.strip('\n'))

        downloads = uniqueify(downloads)
        with open(file, "w") as f:
            for url in downloads:
                f.write(f"{url}\n")

        for url in downloads:
            urls += get_url_by_item(url, retry)

        urls = uniqueify(urls)
        with open(file, "w") as f:
            for url in urls:
                f.write(f"{url}\n")

    return urls
コード例 #4
0
    def _extract_videos(raw_json: str) -> Tuple[
        List[Tuple[str, str]], Optional[str]]:
        """
        @returns: Tuple[Tuple[endpoint, title], Continuation[Optional]]
        """
        initial_data = json.loads(raw_json)
        try:
            important_content = \
                initial_data["contents"]["twoColumnBrowseResultsRenderer"][
                    "tabs"][
                    0][
                    "tabRenderer"]["content"]["sectionListRenderer"][
                    "contents"][0][
                    "itemSectionRenderer"]["contents"][0][
                    "playlistVideoListRenderer"]
        except (KeyError, IndexError, TypeError):
            try:
                important_content = \
                    initial_data[1]["response"][
                        "continuationContents"][
                        "playlistVideoListContinuation"]
            except (KeyError, IndexError, TypeError) as p:
                print(p)
                return [], None
        videos = important_content["contents"]
        try:
            continuation = \
                important_content["continuations"][0]["nextContinuationData"][
                    "continuation"]
        except (KeyError, IndexError):
            continuation = None

        return uniqueify(
            list(
                map(
                    lambda x: (
                        f"/watch?v={x['playlistVideoRenderer']['videoId']}",
                        x["playlistVideoRenderer"]["title"].get("simpleText",
                                                                "")),
                    videos
                )
            )
        ), continuation
コード例 #5
0
def get_correct_videos_from_playlist(url, retry):
    videos = list()
    title = None
    i = 1
    while len(videos) == 0 or title == None:
        videos, title = get_videos_from_playlist(url)

        logger.debug(f"{i} retry in get_correct_videos_from_playlist()")
        # logger.info(fib(i))
        i += 1
        if i > retry + 100:
            break

    logger.info('Playlist = {url}'.format(url=url))
    logger.info('Title = {title}'.format(title=title))
    logger.info(
        '{videos} Videos found from playlist'.format(videos=len(videos)))

    return uniqueify(videos)
コード例 #6
0
def get_videos_from_channel(url):
    videos = list()

    try:
        channel_id: str = regex_search(r"(?:channel|\/)([0-9A-Za-z_-]{24}).*",
                                       url,
                                       group=1)
    except IndexError:  # assume that url is just the id
        channel_id = url

    channel_url = f"https://www.youtube.com/channel/{channel_id}/videos"
    html = request.get(channel_url)

    video_regex = re.compile(r"href=\"(/watch\?v=[\w-]*)")
    videos = uniqueify(video_regex.findall(html))

    videos = [f"https://www.youtube.com{video_id}" for video_id in videos]

    return videos
コード例 #7
0
def test_uniqueify():
    non_unique_list = [1, 2, 3, 3, 4, 5]
    expected = [1, 2, 3, 4, 5]
    result = uniqueify(non_unique_list)
    assert result == expected
コード例 #8
0
 def _extract_videos(self, html: str) -> List[str]:
     return uniqueify(self._video_regex.findall(html))
コード例 #9
0
 def _extract_videos_old(self, html: str) -> List[Tuple[str, str]]:
     matches = self._video_regex_2.findall(html)
     _list: List[Tuple[str, str]] = []
     for match in matches:
         _list.append((self._video_url(match[0]), match[1]))
     return uniqueify(_list)