def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # Fix for KeyError: 'title' issue #434 if 'title' not in self.player_config_args: i_start = (self.watch_html.lower().index('<title>') + len('<title>')) i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def init(self): logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # ---> ADD THIS PART <--- if 'title' not in self.player_config_args: # for more reliability when parsing, we may use a trained parser try: from bs4 import BeautifulSoup soup = BeautifulSoup(self.watch_html, 'lxml') title = soup.title.get_text().strip() except ModuleNotFoundError: # since this parsing is actually pretty simple, we may just # parse it using index() i_start = self.watch_html.lower().index('<title>') + len( '<title>') i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() # remove the ' - youtube' part that is added to the browser tab's title index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title # / ---> ADD THIS PART <--- self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def descramble(self) -> None: """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info("init started") self.vid_info = dict(parse_qsl(self.vid_info_raw)) if self.age_restricted: self.player_config_args = self.vid_info else: assert self.watch_html is not None self.player_config_args = get_ytplayer_config(self.watch_html)["args"] # Fix for KeyError: 'title' issue #434 if "title" not in self.player_config_args: # type: ignore i_start = self.watch_html.lower().index("<title>") + len("<title>") i_end = self.watch_html.lower().index("</title>") title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(" - youtube") title = title[:index] if index > 0 else title self.player_config_args["title"] = unescape(title) # https://github.com/nficano/pytube/issues/165 stream_maps = ["url_encoded_fmt_stream_map"] if "adaptive_fmts" in self.player_config_args: stream_maps.append("adaptive_fmts") # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: apply_descrambler(self.vid_info, fmt) apply_descrambler(self.player_config_args, fmt) if not self.js: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.js_url = extract.js_url(self.embed_html) self.js = request.get(self.js_url) apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) self.player_response = json.loads(self.player_config_args["player_response"]) del self.player_config_args["player_response"] self.stream_monostate.title = self.title self.stream_monostate.duration = self.length logger.info("init finished successfully")
def descramble(self) -> None: """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ self.vid_info = dict(parse_qsl(self.vid_info_raw)) self.player_config_args = self.vid_info self.player_response = json.loads(self.vid_info['player_response']) # On pre-signed videos, we need to use get_ytplayer_config to fix # the player_response item if 'streamingData' not in self.player_config_args['player_response']: config_response = get_ytplayer_config(self.watch_html) if 'args' in config_response: self.player_config_args['player_response'] = config_response['args']['player_response'] # noqa: E501 else: self.player_config_args['player_response'] = config_response # https://github.com/nficano/pytube/issues/165 stream_maps = ["url_encoded_fmt_stream_map"] if "adaptive_fmts" in self.player_config_args: stream_maps.append("adaptive_fmts") # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: apply_descrambler(self.vid_info, fmt) apply_descrambler(self.player_config_args, fmt) if not self.js: if not self.embed_html: self.embed_html = request.get(url=self.embed_url) self.js_url = extract.js_url(self.embed_html) self.js = request.get(self.js_url) apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) if isinstance(self.player_config_args["player_response"], str): self.player_response = json.loads( self.player_config_args["player_response"] ) else: self.player_response = self.player_config_args["player_response"] del self.player_config_args["player_response"] self.stream_monostate.title = self.title self.stream_monostate.duration = self.length
def player_config_args(self): if self._player_config_args: return self._player_config_args self._player_config_args = self.vid_info # On pre-signed videos, we need to use get_ytplayer_config to fix # the player_response item if 'streamingData' not in self.player_config_args['player_response']: config_response = extract.get_ytplayer_config(self.watch_html) if 'args' in config_response: self.player_config_args['player_response'] = config_response['args']['player_response'] # noqa: E501 else: self.player_config_args['player_response'] = config_response return self._player_config_args
def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) # apply the signature to the download url. mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} self.player_config = extract.get_ytplayer_config(self.watch_html) progressive_fmts = 'url_encoded_fmt_stream_map' adaptive_fmts = 'adaptive_fmts' config_args = self.player_config['args'] # unscramble the progressive and adaptive stream manifests. mixins.apply_descrambler(self.vid_info, progressive_fmts) mixins.apply_descrambler(self.vid_info, adaptive_fmts) mixins.apply_descrambler(config_args, progressive_fmts) mixins.apply_descrambler(config_args, adaptive_fmts) # apply the signature to the download url. mixins.apply_signature(config_args, progressive_fmts, self.js) mixins.apply_signature(config_args, adaptive_fmts, self.js) # load the player_response object (contains subtitle information) apply_mixin(config_args, 'player_response', json.loads) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(progressive_fmts) self.initialize_stream_objects(adaptive_fmts) self.initialize_caption_objects() logger.info('init finished successfully')
def test_signature_cipher_does_not_error(stream_dict): config_args = extract.get_ytplayer_config(stream_dict)['args'] extract.apply_descrambler(config_args, "url_encoded_fmt_stream_map") assert "s" in config_args["url_encoded_fmt_stream_map"][0].keys()
def test_get_ytplayer_config_with_no_match_should_error(): with pytest.raises(RegexMatchError): extract.get_ytplayer_config("")