Beispiel #1
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

            # Fix for KeyError: 'title' issue #434
            if 'title' not in self.player_config_args:
                i_start = (self.watch_html.lower().index('<title>') +
                           len('<title>'))
                i_end = self.watch_html.lower().index('</title>')
                title = self.watch_html[i_start:i_end].strip()
                index = title.lower().rfind(' - youtube')
                title = title[:index] if index > 0 else title
                self.player_config_args['title'] = title

        self.vid_descr = extract.get_vid_descr(self.watch_html)
        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            try:
                mixins.apply_signature(self.player_config_args, fmt, self.js)
            except TypeError:
                self.js_url = extract.js_url(
                    self.embed_html,
                    self.age_restricted,
                )
                self.js = request.get(self.js_url)
                mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Beispiel #2
0
    def init(self):
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

            # ---> ADD THIS PART <---
            if 'title' not in self.player_config_args:
                # for more reliability when parsing, we may use a trained parser
                try:
                    from bs4 import BeautifulSoup
                    soup = BeautifulSoup(self.watch_html, 'lxml')
                    title = soup.title.get_text().strip()
                except ModuleNotFoundError:
                    # since this parsing is actually pretty simple, we may just
                    # parse it using index()
                    i_start = self.watch_html.lower().index('<title>') + len(
                        '<title>')
                    i_end = self.watch_html.lower().index('</title>')
                    title = self.watch_html[i_start:i_end].strip()
                # remove the ' - youtube' part that is added to the browser tab's title
                index = title.lower().rfind(' - youtube')
                title = title[:index] if index > 0 else title
                self.player_config_args['title'] = title
            # / ---> ADD THIS PART <---

        self.vid_descr = extract.get_vid_descr(self.watch_html)
        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            try:
                mixins.apply_signature(self.player_config_args, fmt, self.js)
            except TypeError:
                self.js_url = extract.js_url(
                    self.embed_html,
                    self.age_restricted,
                )
                self.js = request.get(self.js_url)
                mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Beispiel #3
0
    def descramble(self) -> None:
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info("init started")

        self.vid_info = dict(parse_qsl(self.vid_info_raw))
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            assert self.watch_html is not None
            self.player_config_args = get_ytplayer_config(self.watch_html)["args"]

            # Fix for KeyError: 'title' issue #434
            if "title" not in self.player_config_args:  # type: ignore
                i_start = self.watch_html.lower().index("<title>") + len("<title>")
                i_end = self.watch_html.lower().index("</title>")
                title = self.watch_html[i_start:i_end].strip()
                index = title.lower().rfind(" - youtube")
                title = title[:index] if index > 0 else title
                self.player_config_args["title"] = unescape(title)

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ["url_encoded_fmt_stream_map"]
        if "adaptive_fmts" in self.player_config_args:
            stream_maps.append("adaptive_fmts")

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                apply_descrambler(self.vid_info, fmt)
            apply_descrambler(self.player_config_args, fmt)

            if not self.js:
                if not self.embed_html:
                    self.embed_html = request.get(url=self.embed_url)
                self.js_url = extract.js_url(self.embed_html)
                self.js = request.get(self.js_url)

            apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        self.player_response = json.loads(self.player_config_args["player_response"])
        del self.player_config_args["player_response"]
        self.stream_monostate.title = self.title
        self.stream_monostate.duration = self.length

        logger.info("init finished successfully")
Beispiel #4
0
    def descramble(self) -> None:
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        self.vid_info = dict(parse_qsl(self.vid_info_raw))
        self.player_config_args = self.vid_info
        self.player_response = json.loads(self.vid_info['player_response'])

        # On pre-signed videos, we need to use get_ytplayer_config to fix
        #  the player_response item
        if 'streamingData' not in self.player_config_args['player_response']:
            config_response = get_ytplayer_config(self.watch_html)
            if 'args' in config_response:
                self.player_config_args['player_response'] = config_response['args']['player_response']  # noqa: E501
            else:
                self.player_config_args['player_response'] = config_response

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ["url_encoded_fmt_stream_map"]
        if "adaptive_fmts" in self.player_config_args:
            stream_maps.append("adaptive_fmts")

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                apply_descrambler(self.vid_info, fmt)
            apply_descrambler(self.player_config_args, fmt)

            if not self.js:
                if not self.embed_html:
                    self.embed_html = request.get(url=self.embed_url)
                self.js_url = extract.js_url(self.embed_html)
                self.js = request.get(self.js_url)

            apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        if isinstance(self.player_config_args["player_response"], str):
            self.player_response = json.loads(
                self.player_config_args["player_response"]
            )
        else:
            self.player_response = self.player_config_args["player_response"]
        del self.player_config_args["player_response"]
        self.stream_monostate.title = self.title
        self.stream_monostate.duration = self.length
Beispiel #5
0
    def player_config_args(self):
        if self._player_config_args:
            return self._player_config_args

        self._player_config_args = self.vid_info
        # On pre-signed videos, we need to use get_ytplayer_config to fix
        #  the player_response item
        if 'streamingData' not in self.player_config_args['player_response']:
            config_response = extract.get_ytplayer_config(self.watch_html)
            if 'args' in config_response:
                self.player_config_args['player_response'] = config_response['args']['player_response']  # noqa: E501
            else:
                self.player_config_args['player_response'] = config_response

        return self._player_config_args
Beispiel #6
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html,
            )['args']

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            # apply the signature to the download url.
            mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Beispiel #7
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            # apply the signature to the download url.
            mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Beispiel #8
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        self.player_config = extract.get_ytplayer_config(self.watch_html)

        progressive_fmts = 'url_encoded_fmt_stream_map'
        adaptive_fmts = 'adaptive_fmts'
        config_args = self.player_config['args']

        # unscramble the progressive and adaptive stream manifests.
        mixins.apply_descrambler(self.vid_info, progressive_fmts)
        mixins.apply_descrambler(self.vid_info, adaptive_fmts)
        mixins.apply_descrambler(config_args, progressive_fmts)
        mixins.apply_descrambler(config_args, adaptive_fmts)

        # apply the signature to the download url.
        mixins.apply_signature(config_args, progressive_fmts, self.js)
        mixins.apply_signature(config_args, adaptive_fmts, self.js)

        # load the player_response object (contains subtitle information)
        apply_mixin(config_args, 'player_response', json.loads)

        # build instances of :class:`Stream <Stream>`
        self.initialize_stream_objects(progressive_fmts)
        self.initialize_stream_objects(adaptive_fmts)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Beispiel #9
0
def test_signature_cipher_does_not_error(stream_dict):
    config_args = extract.get_ytplayer_config(stream_dict)['args']
    extract.apply_descrambler(config_args, "url_encoded_fmt_stream_map")
    assert "s" in config_args["url_encoded_fmt_stream_map"][0].keys()
Beispiel #10
0
def test_get_ytplayer_config_with_no_match_should_error():
    with pytest.raises(RegexMatchError):
        extract.get_ytplayer_config("")