Example #1
0
    def js_url(self):
        if self._js_url:
            return self._js_url

        if self.age_restricted:
            self._js_url = extract.js_url(self.embed_html)
        else:
            self._js_url = extract.js_url(self.watch_html)

        return self._js_url
Example #2
0
    def init(self):
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

            # ---> ADD THIS PART <---
            if 'title' not in self.player_config_args:
                # for more reliability when parsing, we may use a trained parser
                try:
                    from bs4 import BeautifulSoup
                    soup = BeautifulSoup(self.watch_html, 'lxml')
                    title = soup.title.get_text().strip()
                except ModuleNotFoundError:
                    # since this parsing is actually pretty simple, we may just
                    # parse it using index()
                    i_start = self.watch_html.lower().index('<title>') + len(
                        '<title>')
                    i_end = self.watch_html.lower().index('</title>')
                    title = self.watch_html[i_start:i_end].strip()
                # remove the ' - youtube' part that is added to the browser tab's title
                index = title.lower().rfind(' - youtube')
                title = title[:index] if index > 0 else title
                self.player_config_args['title'] = title
            # / ---> ADD THIS PART <---

        self.vid_descr = extract.get_vid_descr(self.watch_html)
        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            try:
                mixins.apply_signature(self.player_config_args, fmt, self.js)
            except TypeError:
                self.js_url = extract.js_url(
                    self.embed_html,
                    self.age_restricted,
                )
                self.js = request.get(self.js_url)
                mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Example #3
0
def test_js_url(cipher_signature):
    expected = (
        r"https://youtube.com/s/player/([\w\d]+)/player_ias.vflset/en_US/base.js"
    )
    result = extract.js_url(cipher_signature.watch_html)
    match = re.search(expected, result)
    assert match is not None
Example #4
0
    def prefetch(self) -> None:
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None
        """
        self.watch_html = request.get(url=self.watch_url)
        if self.watch_html is None:
            raise VideoUnavailable(video_id=self.video_id)
        self.age_restricted = extract.is_age_restricted(self.watch_html)

        if not self.age_restricted and "This video is private" in self.watch_html:
            raise VideoUnavailable(video_id=self.video_id)

        if self.age_restricted:
            if not self.embed_html:
                self.embed_html = request.get(url=self.embed_url)
            self.vid_info_url = extract.video_info_url_age_restricted(
                self.video_id, self.watch_url
            )
        else:
            self.vid_info_url = extract.video_info_url(
                video_id=self.video_id, watch_url=self.watch_url
            )

        self.vid_info_raw = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html)
            self.js = request.get(self.js_url)
Example #5
0
    def prefetch(self) -> None:
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        self.watch_html = request.get(url=self.watch_url)
        if (self.watch_html is None
                or '<img class="icon meh" src="/yts/img'  # noqa: W503
                not in self.watch_html  # noqa: W503
            ):
            raise VideoUnavailable(video_id=self.video_id)

        self.embed_html = request.get(url=self.embed_url)
        self.age_restricted = extract.is_age_restricted(self.watch_html)
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            embed_html=self.embed_html,
            age_restricted=self.age_restricted,
        )
        self.vid_info_raw = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html, self.age_restricted)
            self.js = request.get(self.js_url)
Example #6
0
	def prefetch(self):
		"""Eagerly download all necessary data.

		Eagerly executes all necessary network requests so all other
		operations don't does need to make calls outside of the interpreter
		which blocks for long periods of time.

		:rtype: None

		"""
		self.watch_html = request.get(url=self.watch_url)
		#with open("/tmp/watch_html",'w') as f:					# Debug
		#	f.write(self.watch_html)	
		
		# 30.07.2020 siehe  github.com/nficano/pytube/issues/499 +
		#	github.com/nficano/pytube/issues/337:
		#if '<img class="icon meh" src="/yts/img' not in self.watch_html:
		#	raise VideoUnavailable('This video is unavailable.')
		self.embed_html = request.get(url=self.embed_url)
		self.age_restricted = extract.is_age_restricted(self.watch_html)
		self.vid_info_url = extract.video_info_url(
			video_id=self.video_id,
			watch_url=self.watch_url,
			watch_html=self.watch_html,
			embed_html=self.embed_html,
			age_restricted=self.age_restricted,
		)
		self.vid_info = request.get(self.vid_info_url)
		if not self.age_restricted:
			self.js_url = extract.js_url(self.watch_html, self.age_restricted)
			self.js = request.get(self.js_url)
Example #7
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

            # Fix for KeyError: 'title' issue #434
            if 'title' not in self.player_config_args:
                i_start = (self.watch_html.lower().index('<title>') +
                           len('<title>'))
                i_end = self.watch_html.lower().index('</title>')
                title = self.watch_html[i_start:i_end].strip()
                index = title.lower().rfind(' - youtube')
                title = title[:index] if index > 0 else title
                self.player_config_args['title'] = title

        self.vid_descr = extract.get_vid_descr(self.watch_html)
        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            try:
                mixins.apply_signature(self.player_config_args, fmt, self.js)
            except TypeError:
                self.js_url = extract.js_url(
                    self.embed_html,
                    self.age_restricted,
                )
                self.js = request.get(self.js_url)
                mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Example #8
0
    def prefetch(self):
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        self.watch_html = request.get(url=self.watch_url)
        if 'id="player-unavailable"' in self.watch_html:
            raise VideoUnavailable('This video is not available.')
        self.embed_html = request.get(url=self.embed_url)
        self.age_restricted = extract.is_age_restricted(self.watch_html)
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            watch_html=self.watch_html,
            embed_html=self.embed_html,
            age_restricted=self.age_restricted,
        )
        self.vid_info = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html, self.age_restricted)
            self.js = request.get(self.js_url)
Example #9
0
 def getJS(self) -> None:
     response = urlopen("https://youtube.com/watch", timeout=None)
     watch_html = response.read().decode('utf_8')
     age_restricted = extract.is_age_restricted(watch_html)
     if age_restricted:
         response = urlopen("https://www.youtube.com/embed", timeout=None)
         embed_html = response.read().decode('utf_8')
         self.js_url = extract.js_url(embed_html)
     else:
         self.js_url = extract.js_url(watch_html)
     if pytube.__js_url__ != self.js_url:
         response = urlopen(self.js_url, timeout=None)
         self.js = response.read().decode('utf_8')
         pytube.__js__ = self.js
         pytube.__js_url__ = self.js_url
     else:
         self.js = pytube.__js__
Example #10
0
    def descramble(self) -> None:
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info("init started")

        self.vid_info = dict(parse_qsl(self.vid_info_raw))
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            assert self.watch_html is not None
            self.player_config_args = get_ytplayer_config(self.watch_html)["args"]

            # Fix for KeyError: 'title' issue #434
            if "title" not in self.player_config_args:  # type: ignore
                i_start = self.watch_html.lower().index("<title>") + len("<title>")
                i_end = self.watch_html.lower().index("</title>")
                title = self.watch_html[i_start:i_end].strip()
                index = title.lower().rfind(" - youtube")
                title = title[:index] if index > 0 else title
                self.player_config_args["title"] = unescape(title)

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ["url_encoded_fmt_stream_map"]
        if "adaptive_fmts" in self.player_config_args:
            stream_maps.append("adaptive_fmts")

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                apply_descrambler(self.vid_info, fmt)
            apply_descrambler(self.player_config_args, fmt)

            if not self.js:
                if not self.embed_html:
                    self.embed_html = request.get(url=self.embed_url)
                self.js_url = extract.js_url(self.embed_html)
                self.js = request.get(self.js_url)

            apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        self.player_response = json.loads(self.player_config_args["player_response"])
        del self.player_config_args["player_response"]
        self.stream_monostate.title = self.title
        self.stream_monostate.duration = self.length

        logger.info("init finished successfully")
Example #11
0
    def descramble(self) -> None:
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        self.vid_info = dict(parse_qsl(self.vid_info_raw))
        self.player_config_args = self.vid_info
        self.player_response = json.loads(self.vid_info['player_response'])

        # On pre-signed videos, we need to use get_ytplayer_config to fix
        #  the player_response item
        if 'streamingData' not in self.player_config_args['player_response']:
            config_response = get_ytplayer_config(self.watch_html)
            if 'args' in config_response:
                self.player_config_args['player_response'] = config_response['args']['player_response']  # noqa: E501
            else:
                self.player_config_args['player_response'] = config_response

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ["url_encoded_fmt_stream_map"]
        if "adaptive_fmts" in self.player_config_args:
            stream_maps.append("adaptive_fmts")

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                apply_descrambler(self.vid_info, fmt)
            apply_descrambler(self.player_config_args, fmt)

            if not self.js:
                if not self.embed_html:
                    self.embed_html = request.get(url=self.embed_url)
                self.js_url = extract.js_url(self.embed_html)
                self.js = request.get(self.js_url)

            apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        if isinstance(self.player_config_args["player_response"], str):
            self.player_response = json.loads(
                self.player_config_args["player_response"]
            )
        else:
            self.player_response = self.player_config_args["player_response"]
        del self.player_config_args["player_response"]
        self.stream_monostate.title = self.title
        self.stream_monostate.duration = self.length
Example #12
0
 async def getJavaScript(self) -> None:
     '''Gets player JavaScript from YouTube, avoid calling more than once.
     '''
     global js_url
     async with httpx.AsyncClient() as client:
         response = await client.get('https://youtube.com/watch',
                                     timeout=None)
     watch_html = response.text
     age_restricted = extract.is_age_restricted(watch_html)
     if age_restricted:
         async with httpx.AsyncClient() as client:
             response = await client.get('https://www.youtube.com/embed',
                                         timeout=None)
         embed_html = response.text
         self.js_url = extract.js_url(embed_html)
     else:
         self.js_url = extract.js_url(watch_html)
     if js_url != self.js_url:
         async with httpx.AsyncClient() as client:
             response = await client.get(self.js_url, timeout=None)
         self.js = response.text
Example #13
0
    def prefetch(self, multithread = True):
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        if multithread:
            threads, results = [None] * 2, [None] * 2
            for i, url in enumerate([self.watch_url, self.embed_url]):
                threads[i] = Thread(target=self.do_get, args=(url, results, i))
                threads[i].start()
            for i in range(len(threads)):
                threads[i].join()
            self.watch_html, self.embed_html = results
        else:
            self.watch_html = request.get(url=self.watch_url)
            self.embed_html = request.get(url=self.embed_url)
        if '<img class="icon meh" src="/yts/img' not in self.watch_html:
            raise VideoUnavailable('This video is unavailable.')
        self.age_restricted = extract.is_age_restricted(self.watch_html)
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            watch_html=self.watch_html,
            embed_html=self.embed_html,
            age_restricted=self.age_restricted,
        )
        if multithread:
            threads, results = [None] * 2, [None] * 2
            threads[0] = Thread(target=self.do_get, args=(self.vid_info_url, results, 0))
            threads[0].start()
        else:
            self.vid_info = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html, self.age_restricted)
            if multithread:
                threads[1] = Thread(target=self.do_get, args=(self.js_url, results, 1))
                threads[1].start()
                threads[0].join()
                threads[1].join()
            else:
                self.js = request.get(self.js_url)
        else:
            threads[0].join()
        if multithread:
            self.vid_info, self.js = results
    def prefetch(self) -> None:
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None
        """
        self.watch_html = request.get(url=self.watch_url)
        self.check_availability()
        self.age_restricted = extract.is_age_restricted(self.watch_html)

        if self.age_restricted:
            if not self.embed_html:
                self.embed_html = request.get(url=self.embed_url)
            self.vid_info_url = extract.video_info_url_age_restricted(
                self.video_id, self.watch_url)
            self.js_url = extract.js_url(self.embed_html)
        else:
            self.vid_info_url = extract.video_info_url(
                video_id=self.video_id, watch_url=self.watch_url)
            self.js_url = extract.js_url(self.watch_html)

        self.initial_data = extract.initial_data(self.watch_html)

        self.vid_info_raw = request.get(self.vid_info_url)

        # If the js_url doesn't match the cached url, fetch the new js and update
        #  the cache; otherwise, load the cache.
        if pytube.__js_url__ != self.js_url:
            self.js = request.get(self.js_url)
            pytube.__js__ = self.js
            pytube.__js_url__ = self.js_url
        else:
            self.js = pytube.__js__
Example #15
0
 def _getJS(self) -> None:
     try:
         response = urlopen('https://youtube.com/watch', timeout = None)
         watch_html = response.read().decode('utf_8')
         age_restricted = extract.is_age_restricted(watch_html)
         self._js_url = extract.js_url(watch_html)
         if pytube.__js_url__ != self._js_url:
             response = urlopen(self._js_url, timeout = None)
             self._js = response.read().decode('utf_8')
             pytube.__js__ = self._js
             pytube.__js_url__ = self._js_url
         else:
             self._js = pytube.__js__
     except:
         raise Exception('ERROR: Could not make request.')
 async def getJS(self) -> None:
     async with httpx.AsyncClient() as client:
         """
         Removed v parameter from the query. (No idea about why PyTube bothered with that)
         """
         response = await client.get("https://www.youtube.com/",
                                     timeout=None)
         watch_html = response.text
     self.js_url = extract.js_url(watch_html)
     if pytube.__js_url__ != self.js_url:
         async with httpx.AsyncClient() as client:
             response = await client.get(self.js_url, timeout=None)
             self.js = response.text
         pytube.__js__ = self.js
         pytube.__js_url__ = self.js_url
     else:
         self.js = pytube.__js__
Example #17
0
    def prefetch(self):
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        self.watch_html = request.get(url=self.watch_url)
        if extract.is_age_restricted(self.watch_html):
            raise AgeRestrictionError('Content is age restricted')
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            watch_html=self.watch_html,
        )
        self.js_url = extract.js_url(self.watch_html)
        self.js = request.get(self.js_url)
        self.vid_info = request.get(self.vid_info_url)
Example #18
0
    async def prefetch(self) -> None:
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None
        """
        self.watch_html = await request.get(url=self.watch_url)
        if self.watch_html is None:
            raise VideoUnavailable(video_id=self.video_id)
        self.age_restricted = extract.is_age_restricted(self.watch_html)

        if not self.age_restricted and (
                "This video is private" in self.watch_html or
                "This video is no longer available because the YouTube account "
                "associated with this video has been terminated."
                in self.watch_html
                or "This video is only available to Music Premium members"
                in self.watch_html or
                "This video is no longer available due to a copyright claim by"
                in self.watch_html):
            raise VideoUnavailable(video_id=self.video_id)

        if self.age_restricted:
            if not self.embed_html:
                self.embed_html = await request.get(url=self.embed_url)
            self.vid_info_url = extract.video_info_url_age_restricted(
                self.video_id, self.watch_url)
        else:
            self.vid_info_url = extract.video_info_url(
                video_id=self.video_id, watch_url=self.watch_url)

        self.vid_info_raw = await request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html)
            self.js = await request.get(self.js_url)
Example #19
0
    def prefetch(self):
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        self.watch_html = request.get(url=self.watch_url)
        self.embed_html = request.get(url=self.embed_url)
        self.age_restricted = extract.is_age_restricted(self.watch_html)
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            watch_html=self.watch_html,
            embed_html=self.embed_html,
            age_restricted=self.age_restricted,
        )
        self.vid_info = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html)
            self.js = request.get(self.js_url)
Example #20
0
def test_js_url(cipher_signature):
    expected = 'https://youtube.com/yts/jsbin/player-vflOdyxa4/en_US/base.js'
    result = extract.js_url(cipher_signature.watch_html)
    assert expected == result
Example #21
0
def test_js_url(cipher_signature):
    expected = (
        "https://youtube.com/s/player/9b65e980/player_ias.vflset/en_US/base.js"
    )
    result = extract.js_url(cipher_signature.watch_html)
    assert expected == result
Example #22
0
 def _get_cipher(self, videoId):
     embed_url = "https://www.youtube.com/embed/" + videoId
     embed_html = request.get(url=embed_url)
     js_url = extract.js_url(embed_html)
     self._js = request.get(js_url)
     self._cipher = Cipher(js=self._js)
Example #23
0
def test_js_url(cipher_signature):
    expected = (
        "https://youtube.com/yts/jsbin/player_ias-vflWQEEag/en_US/base.js")
    result = extract.js_url(cipher_signature.watch_html)
    assert expected == result