def init(self): """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # Fix for KeyError: 'title' issue #434 if 'title' not in self.player_config_args: i_start = (self.watch_html.lower().index('<title>') + len('<title>')) i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def init(self): logger.info('init started') self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)} if self.age_restricted: self.player_config_args = self.vid_info else: self.player_config_args = extract.get_ytplayer_config( self.watch_html, )['args'] # ---> ADD THIS PART <--- if 'title' not in self.player_config_args: # for more reliability when parsing, we may use a trained parser try: from bs4 import BeautifulSoup soup = BeautifulSoup(self.watch_html, 'lxml') title = soup.title.get_text().strip() except ModuleNotFoundError: # since this parsing is actually pretty simple, we may just # parse it using index() i_start = self.watch_html.lower().index('<title>') + len( '<title>') i_end = self.watch_html.lower().index('</title>') title = self.watch_html[i_start:i_end].strip() # remove the ' - youtube' part that is added to the browser tab's title index = title.lower().rfind(' - youtube') title = title[:index] if index > 0 else title self.player_config_args['title'] = title # / ---> ADD THIS PART <--- self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ['url_encoded_fmt_stream_map'] if 'adaptive_fmts' in self.player_config_args: stream_maps.append('adaptive_fmts') # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature(self.player_config_args, fmt, self.js) except TypeError: self.js_url = extract.js_url( self.embed_html, self.age_restricted, ) self.js = request.get(self.js_url) mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) apply_mixin(self.player_config_args, 'player_response', json.loads) self.initialize_caption_objects() logger.info('init finished successfully')
def test_get_vid_desc(cipher_signature): expected = ( "PSY - ‘I LUV IT’ M/V @ https://youtu.be/Xvjnoagk6GU\n" "PSY - ‘New Face’ M/V @https://youtu.be/OwJPPaEyqhI\n" "PSY - 8TH ALBUM '4X2=8' on iTunes @\n" "https://smarturl.it/PSY_8thAlbum\n" "PSY - GANGNAM STYLE(강남스타일) on iTunes @ http://smarturl.it/PsyGangnam\n" "#PSY #싸이 #GANGNAMSTYLE #강남스타일\n" "More about PSY@\nhttp://www.youtube.com/officialpsy\n" "http://www.facebook.com/officialpsy\n" "http://twitter.com/psy_oppa\n" "https://www.instagram.com/42psy42\n" "http://iTunes.com/PSY\n" "http://sptfy.com/PSY\n" "http://weibo.com/psyoppa") assert extract.get_vid_descr(cipher_signature.watch_html) == expected
def descramble(self) -> None: """Descramble the stream data and build Stream instances. The initialization process takes advantage of Python's "call-by-reference evaluation," which allows dictionary transforms to be applied in-place, instead of holding references to mutations at each interstitial step. :rtype: None """ logger.info("init started") self.vid_info = {k: v for k, v in parse_qsl(self.vid_info_raw)} if self.age_restricted: self.player_config_args = self.vid_info else: assert self.watch_html is not None self.player_config_args = extract.get_ytplayer_config( self.watch_html, )["args"] # Fix for KeyError: 'title' issue #434 if "title" not in self.player_config_args: # type: ignore i_start = self.watch_html.lower().index("<title>") + len( "<title>") i_end = self.watch_html.lower().index("</title>") title = self.watch_html[i_start:i_end].strip() index = title.lower().rfind(" - youtube") title = title[:index] if index > 0 else title self.player_config_args["title"] = unescape(title) if self.watch_html: self.vid_descr = extract.get_vid_descr(self.watch_html) # https://github.com/nficano/pytube/issues/165 stream_maps = ["url_encoded_fmt_stream_map"] if "adaptive_fmts" in self.player_config_args: stream_maps.append("adaptive_fmts") # unscramble the progressive and adaptive stream manifests. for fmt in stream_maps: if not self.age_restricted and fmt in self.vid_info: mixins.apply_descrambler(self.vid_info, fmt) mixins.apply_descrambler(self.player_config_args, fmt) try: mixins.apply_signature( self.player_config_args, fmt, self.js # type: ignore ) except TypeError: assert self.embed_html is not None self.js_url = extract.js_url(self.embed_html, self.age_restricted) self.js = request.get(self.js_url) assert self.js is not None mixins.apply_signature(self.player_config_args, fmt, self.js) # build instances of :class:`Stream <Stream>` self.initialize_stream_objects(fmt) # load the player_response object (contains subtitle information) self.player_config_args["player_response"] = json.loads( self.player_config_args["player_response"]) self.initialize_caption_objects() logger.info("init finished successfully")
def test_get_vid_desc(cipher_signature): expected = "PSY - DADDY(feat. CL of 2NE1) M/V @ https://youtu.be/FrG4TEcSuRg\nPSY - 나팔바지(NAPAL BAJI) M/V @ https://youtu.be/tF27TNC_4pc\nPSY - 7TH ALBUM '칠집싸이다' on iTunes @ http://smarturl.it/PSY_7THALBUM\nPSY - GANGNAM STYLE(강남스타일) on iTunes @ http://smarturl.it/PsyGangnam\n#PSY #싸이 #GANGNAMSTYLE #강남스타일\nMore about PSY@\nhttp://www.psypark.com/\nhttp://www.youtube.com/officialpsy\nhttp://www.facebook.com/officialpsy\nhttp://twitter.com/psy_oppa\nhttps://www.instagram.com/42psy42\nhttp://iTunes.com/PSY\nhttp://sptfy.com/PSY\nhttp://weibo.com/psyoppa\nhttp://twitter.com/ygent_official" # noqa assert extract.get_vid_descr(cipher_signature.watch_html) == expected
def test_get_vid_desc(cipher_signature): expected = "PSY - DADDY(feat. CL of 2NE1) M/V @ https://youtu.be/FrG4TEcSuRg\nPSY - 나팔바지(NAPAL BAJI) M/V @ https://youtu.be/tF27TNC_4pc\nPSY - 7TH ALBUM '칠집싸이다' on iTunes @ http://smarturl.it/PSY_7THALBUM\nPSY - GANGNAM STYLE(강남스타일) on iTunes @ http://smarturl.it/PsyGangnam\n#PSY #싸이 #GANGNAMSTYLE #강남스타일\nMore about PSY@\nhttp://www.psypark.com/\nhttp://www.youtube.com/officialpsy\nhttp://www.facebook.com/officialpsy\nhttp://twitter.com/psy_oppa\nhttps://www.instagram.com/42psy42\nhttp://iTunes.com/PSY\nhttp://sptfy.com/PSY\nhttp://weibo.com/psyoppa\nhttp://twitter.com/ygent_official" # noqa assert extract.get_vid_descr(cipher_signature.watch_html) == expected