Beispiel #1
0
def test_metadata_from_initial_data(stream_dict):
    initial_data = extract.initial_data(stream_dict)
    ytmd = extract.metadata(json.loads(initial_data))
    assert len(ytmd.raw_metadata) > 0
    assert 'contents' in ytmd.raw_metadata[0]
    assert len(ytmd.metadata) > 0
    assert 'Song' in ytmd.metadata[0]
Beispiel #2
0
    def prefetch(self) -> None:
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None
        """
        self.watch_html = request.get(url=self.watch_url)
        self.check_availability()
        self.age_restricted = extract.is_age_restricted(self.watch_html)

        if self.age_restricted:
            if not self.embed_html:
                self.embed_html = request.get(url=self.embed_url)
            self.vid_info_url = extract.video_info_url_age_restricted(
                self.video_id, self.watch_url
            )
        else:
            self.vid_info_url = extract.video_info_url(
                video_id=self.video_id, watch_url=self.watch_url
            )

        self.initial_data = extract.initial_data(self.watch_html)

        self.vid_info_raw = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html)
            self.js = request.get(self.js_url)
Beispiel #3
0
    def _paginate(
        self, until_watch_id: Optional[str] = None
    ) -> Iterable[List[str]]:
        """Parse the video links from the page source, yields the /watch?v=
        part from video link

        :param until_watch_id Optional[str]: YouTube Video watch id until
            which the playlist should be read.

        :rtype: Iterable[List[str]]
        :returns: Iterable of lists of YouTube watch ids
        """
        req = self.html
        videos_urls, continuation = self._extract_videos(
            json.dumps(extract.initial_data(self.html))
        )
        if until_watch_id:
            try:
                trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
                yield videos_urls[:trim_index]
                return
            except ValueError:
                pass
        yield videos_urls

        # Extraction from a playlist only returns 100 videos at a time
        # if self._extract_videos returns a continuation there are more
        # than 100 songs inside a playlist, so we need to add further requests
        # to gather all of them
        if continuation:
            load_more_url, headers = self._build_continuation_url(continuation)
        else:
            load_more_url, headers = None, None

        while load_more_url and headers:  # there is an url found
            logger.debug("load more url: %s", load_more_url)
            # requesting the next page of videos with the url generated from the
            # previous page
            req = request.get(load_more_url, extra_headers=headers)
            # extract up to 100 songs from the page loaded
            # returns another continuation if more videos are available
            videos_urls, continuation = self._extract_videos(req)
            if until_watch_id:
                try:
                    trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
                    yield videos_urls[:trim_index]
                    return
                except ValueError:
                    pass
            yield videos_urls

            if continuation:
                load_more_url, headers = self._build_continuation_url(
                    continuation
                )
            else:
                load_more_url, headers = None, None
    def initial_data(self):
        """Extract the initial data from the playlist page html.

        :rtype: dict
        """
        if self._initial_data:
            return self._initial_data
        else:
            self._initial_data = extract.initial_data(self.html)
            return self._initial_data
Beispiel #5
0
    def prefetch(self) -> None:
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None
        """
        self.watch_html = request.get(url=self.watch_url)
        if self.watch_html is None:
            raise VideoUnavailable(video_id=self.video_id)
        self.age_restricted = extract.is_age_restricted(self.watch_html)

        if extract.is_private(self.watch_html):
            raise VideoPrivate(video_id=self.video_id)

        if not extract.recording_available(self.watch_html):
            raise RecordingUnavailable(video_id=self.video_id)

        if self.age_restricted:
            if not self.embed_html:
                self.embed_html = request.get(url=self.embed_url)
            self.vid_info_url = extract.video_info_url_age_restricted(
                self.video_id, self.watch_url)
        else:
            self.vid_info_url = extract.video_info_url(
                video_id=self.video_id, watch_url=self.watch_url)

        self.initial_data_raw = extract.initial_data(self.watch_html)
        self.initial_data = json.loads(self.initial_data_raw)

        self.vid_info_raw = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html)
            self.js = request.get(self.js_url)
Beispiel #6
0
 def initial_data(self):
     if self._initial_data:
         return self._initial_data
     self._initial_data = extract.initial_data(self.watch_html)
     return self._initial_data
def test_initial_data(stream_dict):
    initial_data = extract.initial_data(stream_dict)
    assert 'contents' in initial_data
def test_initial_data_missing():
    with pytest.raises(RegexMatchError):
        extract.initial_data('')
Beispiel #9
0
def test_initial_data_missing():
    initial_data = extract.initial_data('')
    assert initial_data == "{}"
Beispiel #10
0
 async def initial_data(self):
     if self._initial_data:
         return self._initial_data
     else:
         self._initial_data = extract.initial_data(await self.html)
         return self._initial_data