def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # Fix for KeyError: 'title' issue #434 if 'title' not in self.player_config_args: i_start = (self.watch_html.lower().index('<title>') + len('<title>')) i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def apply_descrambler(stream_data, key): """Apply various in-place transforms to YouTube's media stream data. Creates a ``list`` of dictionaries by string splitting on commas, then taking each list item, parsing it as a query string, converting it to a ``dict`` and unquoting the value. :param dict dct: Dictionary containing query string encoded values. :param str key: Name of the key in dictionary. **Example**: >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'} >>> apply_descrambler(d, 'foo') >>> print(d) {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]} """ stream_data[key] = [{k: unquote(v) for k, v in parse_qsl(i)} for i in stream_data[key].split(',')] logger.debug( 'applying descrambler\n%s', pprint.pformat(stream_data[key], indent=2), )
def apply_descrambler(stream_data, key): """Apply various in-place transforms to YouTube's media stream data. Creates a ``list`` of dictionaries by string splitting on commas, then taking each list item, parsing it as a query string, converting it to a ``dict`` and unquoting the value. :param dict dct: Dictionary containing query string encoded values. :param str key: Name of the key in dictionary. **Example**: >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'} >>> apply_descrambler(d, 'foo') >>> print(d) {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]} """ stream_data[key] = [ {k: unquote(v) for k, v in parse_qsl(i)} for i in stream_data[key].split(',') ] logger.debug( 'applying descrambler\n%s', pprint.pformat(stream_data[key], indent=2), )
def init(self): logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # ---> ADD THIS PART <--- if 'title' not in self.player_config_args: # for more reliability when parsing, we may use a trained parser try: from bs4 import BeautifulSoup soup = BeautifulSoup(self.watch_html, 'lxml') title = soup.title.get_text().strip() except ModuleNotFoundError: # since this parsing is actually pretty simple, we may just # parse it using index() i_start = self.watch_html.lower().index('<title>') + len( '<title>') i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() # remove the ' - youtube' part that is added to the browser tab's title index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title # / ---> ADD THIS PART <--- self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def apply_descrambler(stream_data, key): """Apply various in-place transforms to YouTube's media stream data. Creates a ``list`` of dictionaries by string splitting on commas, then taking each list item, parsing it as a query string, converting it to a ``dict`` and unquoting the value. :param dict dct: Dictionary containing query string encoded values. :param str key: Name of the key in dictionary. **Example**: >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'} >>> apply_descrambler(d, 'foo') >>> print(d) {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]} """ if key == 'url_encoded_fmt_stream_map' and not stream_data.get( 'url_encoded_fmt_stream_map'): formats = json.loads( stream_data['player_response'])['streamingData']['formats'] formats.extend( json.loads(stream_data['player_response'])['streamingData'] ['adaptiveFormats']) try: stream_data[key] = [{ u'url': format_item[u'url'], u'type': format_item[u'mimeType'], u'quality': format_item[u'quality'], u'itag': format_item[u'itag'] } for format_item in formats] except KeyError: cipher_url = [ parse_qs(formats[i]['cipher']) for i, data in enumerate(formats) ] stream_data[key] = [{ u'url': cipher_url[i][u'url'][0], u's': cipher_url[i][u's'][0], u'type': format_item[u'mimeType'], u'quality': format_item[u'quality'], u'itag': format_item[u'itag'] } for i, format_item in enumerate(formats)] else: stream_data[key] = [{k: unquote(v) for k, v in parse_qsl(i)} for i in stream_data[key].split(',')] logger.debug( 'applying descrambler\n%s', pprint.pformat(stream_data[key], indent=2), )
def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) # apply the signature to the download url. mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) # apply the signature to the download url. mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} self.player_config = extract.get_ytplayer_config(self.watch_html) progressive_fmts = 'url_encoded_fmt_stream_map' adaptive_fmts = 'adaptive_fmts' config_args = self.player_config['args'] # unscramble the progressive and adaptive stream manifests. mixins.apply_descrambler(self.vid_info, progressive_fmts) mixins.apply_descrambler(self.vid_info, adaptive_fmts) mixins.apply_descrambler(config_args, progressive_fmts) mixins.apply_descrambler(config_args, adaptive_fmts) # apply the signature to the download url. mixins.apply_signature(config_args, progressive_fmts, self.js) mixins.apply_signature(config_args, adaptive_fmts, self.js) # load the player_response object (contains subtitle information) apply_mixin(config_args, 'player_response', json.loads) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(progressive_fmts) self.initialize_stream_objects(adaptive_fmts) self.initialize_caption_objects() logger.info('init finished successfully')
def apply_descrambler(stream_data, key): """Apply various in-place transforms to YouTube's media stream data. Creates a ``list`` of dictionaries by string splitting on commas, then taking each list item, parsing it as a query string, converting it to a ``dict`` and unquoting the value. :param dict dct: Dictionary containing query string encoded values. :param str key: Name of the key in dictionary. **Example**: >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'} >>> apply_descrambler(d, 'foo') >>> print(d) {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]} """ import urllib.parse if key == "url_encoded_fmt_stream_map" and not stream_data.get( "url_encoded_fmt_stream_map"): try: formats = json.loads( stream_data["player_response"])["streamingData"]["formats"] formats.extend( json.loads(stream_data["player_response"])["streamingData"] ["adaptiveFormats"]) except BaseException: formats = json.loads(stream_data["player_response"] )["streamingData"]["adaptiveFormats"] try: stream_data[key] = [{ u"url": format_item[u"url"], u"type": format_item[u"mimeType"], u"quality": format_item[u"quality"], u"itag": format_item[u"itag"], } for format_item in formats] except BaseException: stream_data[key] = [{ u"url": urllib.parse.unquote([ url_item for url_item in format_item[u"cipher"].split("&") if "url=" in url_item ][0].split("=")[1]), u"sp": urllib.parse.unquote([ url_item for url_item in format_item[u"cipher"].split("&") if "sp=" in url_item ][0].split("=")[1]), u"s": urllib.parse.unquote([ url_item for url_item in format_item[u"cipher"].split("&") if "s=" in url_item ][0].split("=")[1]), u"type": format_item[u"mimeType"], u"quality": format_item[u"quality"], u"itag": format_item[u"itag"], } for format_item in formats] else: stream_data[key] = [{k: unquote(v) for k, v in parse_qsl(i)} for i in stream_data[key].split(",")] logger.debug( "applying descrambler\n%s", pprint.pformat(stream_data[key], indent=2), )
:param dict dct: Dictionary containing query string encoded values. :param str key: Name of the key in dictionary. **Example**: >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'} >>> apply_descrambler(d, 'foo') >>> print(d) {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]} """ if key == 'url_encoded_fmt_stream_map' and not stream_data.get('url_encoded_fmt_stream_map'): formats = json.loads(stream_data['player_response'])['streamingData']['formats'] formats.extend(json.loads(stream_data['player_response'])['streamingData']['adaptiveFormats']) stream_data[key] = [{u'url': format_item[u'url'], u'type': format_item[u'mimeType'], u'quality': format_item[u'quality'], u'itag': format_item[u'itag']} for format_item in formats] else: stream_data[key] = [ {k: unquote(v) for k, v in parse_qsl(i)} for i in stream_data[key].split(',') ] logger.debug( 'applying descrambler\n%s', pprint.pformat(stream_data[key], indent=2), )