def prefetch(self) -> None: """Eagerly download all necessary data. Eagerly executes all necessary network requests so all other operations don't does need to make calls outside of the interpreter which blocks for long periods of time. :rtype: None """ self.watch_html = request.get(url=self.watch_url, request_headers=self.request_headers) if self.watch_html is None: raise VideoUnavailable(video_id=self.video_id) self.age_restricted = extract.is_age_restricted(self.watch_html) if not self.age_restricted and "This video is private" in self.watch_html: raise VideoUnavailable(video_id=self.video_id) if self.age_restricted: if not self.embed_html: self.embed_html = request.get( url=self.embed_url, request_headers=self.request_headers) self.vid_info_url = extract.video_info_url_age_restricted( self.video_id, self.watch_url) else: self.vid_info_url = extract.video_info_url( video_id=self.video_id, watch_url=self.watch_url) self.vid_info_raw = request.get(self.vid_info_url, request_headers=self.request_headers) if not self.age_restricted: self.js_url = extract.js_url(self.watch_html) self.js = request.get(self.js_url, request_headers=self.request_headers)
def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]: """Parse the video links from the page source, yields the /watch?v= part from video link """ req = self.html videos_urls = self._extract_videos(req) if until_watch_id: try: trim_index = videos_urls.index(f"/watch?v={until_watch_id}") yield videos_urls[:trim_index] return except ValueError: pass yield videos_urls # The above only returns 100 or fewer links # Simulating a browser request for the load more link load_more_url = self._find_load_more_url(req) while load_more_url: # there is an url found logger.debug("load more url: %s", load_more_url) req = request.get(load_more_url) load_more = json.loads(req) try: html = load_more["content_html"] except KeyError: logger.debug("Could not find content_html") return videos_urls = self._extract_videos(html) if until_watch_id: try: trim_index = videos_urls.index(f"/watch?v={until_watch_id}") yield videos_urls[:trim_index] return except ValueError: pass yield videos_urls load_more_url = self._find_load_more_url( load_more["load_more_widget_html"], ) return
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None): if proxies: install_proxy(proxies) try: self.playlist_id: str = parse_qs(url.split("?")[1])["list"][0] except IndexError: # assume that url is just the id self.playlist_id = url self.playlist_url = f"https://www.youtube.com/playlist?list={self.playlist_id}" self.html = request.get(self.playlist_url) # Needs testing with non-English self.last_update: Optional[date] = None date_match = re.search( r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html ) if date_match: month, day, year = date_match.groups() self.last_update = datetime.strptime( f"{month} {day:0>2} {year}", "%b %d %Y" ).date() self._video_regex = re.compile(r"href=\"(/watch\?v=[\w-]*)")
def descramble(self) -> None: """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info("init started") self.vid_info = dict(parse_qsl(self.vid_info_raw)) if self.age_restricted: self.player_config_args = self.vid_info else: assert self.watch_html is not None self.player_config_args = get_ytplayer_config( self.watch_html)["args"] # Fix for KeyError: 'title' issue #434 if "title" not in self.player_config_args: # type: ignore i_start = self.watch_html.lower().index("<title>") + len( "<title>") i_end = self.watch_html.lower().index("</title>") title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(" - youtube") title = title[:index] if index > 0 else title self.player_config_args["title"] = unescape(title) # https://github.com/nficano/youpy/issues/165 stream_maps = ["url_encoded_fmt_stream_map"] if "adaptive_fmts" in self.player_config_args: stream_maps.append("adaptive_fmts") # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: apply_descrambler(self.vid_info, fmt) apply_descrambler(self.player_config_args, fmt) if not self.js: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.js_url = extract.js_url(self.embed_html) self.js = request.get(self.js_url) apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) self.player_response = json.loads( self.player_config_args["player_response"]) del self.player_config_args["player_response"] self.stream_progress_state.title = self.title self.stream_progress_state.duration = self.length logger.info("init finished successfully")
def test_get_non_http(): with pytest.raises(ValueError): request.get("file://bad", None)
def test_get(mock_urlopen): response = mock.Mock() response.read.return_value = "<html></html>".encode("utf-8") mock_urlopen.return_value = response response = request.get("http://fakeassurl.gov", None) assert response == "<html></html>"
def xml_captions(self) -> str: """Download the xml caption tracks.""" return request.get(self.url, request_headers=self.request_headers)