Esempio n. 1
0
def parse_all_links(Playlist):

    url = Playlist.construct_playlist_url()
    req = request.get(url)

    # split the page source by line and process each line
    content = [
        x for x in req.split('\n')
        if 'yt-uix-sessionlink yt-uix-tile-link' in x
    ]
    link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content]

    # The above only returns 100 or fewer links
    # Simulating a browser request for the load more link
    load_more_url = Playlist._load_more_url(req)
    while len(load_more_url):  # there is an url found
        req = request.get(load_more_url)
        load_more = json.loads(req)
        videos = re.findall(
            r'href=\"(/watch\?v=[\w-]*)',
            load_more['content_html'],
        )
        # remove duplicates
        link_list.extend(list(OrderedDict.fromkeys(videos)))
        load_more_url = Playlist._load_more_url(
            load_more['load_more_widget_html'], )

    return link_list
Esempio n. 2
0
    def prefetch(self) -> None:
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        self.watch_html = request.get(url=self.watch_url)
        if (self.watch_html is None
                or '<img class="icon meh" src="/yts/img'  # noqa: W503
                not in self.watch_html  # noqa: W503
            ):
            raise VideoUnavailable(video_id=self.video_id)

        self.embed_html = request.get(url=self.embed_url)
        self.age_restricted = extract.is_age_restricted(self.watch_html)
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            embed_html=self.embed_html,
            age_restricted=self.age_restricted,
        )
        self.vid_info_raw = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html, self.age_restricted)
            self.js = request.get(self.js_url)
Esempio n. 3
0
    def parse_links(self):
        """Parse the video links from the page source, extracts and
        returns the /watch?v= part from video link href
        It's an alternative for BeautifulSoup
        """

        url = self.construct_playlist_url()
        req = request.get(url)

        # split the page source by line and process each line
        content = [x for x in req.split('\n') if 'pl-video-title-link' in x]
        link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content]

        # The above only returns 100 or fewer links
        # Simulating a browser request for the load more link
        load_more_url = self._load_more_url(req)
        while len(load_more_url):  # there is an url found
            logger.debug('load more url: %s' % load_more_url)
            req = request.get(load_more_url)
            load_more = json.loads(req)
            videos = re.findall(
                r'href=\"(/watch\?v=[\w-]*)',
                load_more['content_html'],
            )
            # remove duplicates
            link_list.extend(list(OrderedDict.fromkeys(videos)))
            load_more_url = self._load_more_url(
                load_more['load_more_widget_html'], )

        return link_list
Esempio n. 4
0
    def prefetch(self) -> None:
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None
        """
        self.watch_html = request.get(url=self.watch_url)
        if self.watch_html is None:
            raise VideoUnavailable(video_id=self.video_id)
        self.age_restricted = extract.is_age_restricted(self.watch_html)

        if not self.age_restricted and "This video is private" in self.watch_html:
            raise VideoUnavailable(video_id=self.video_id)

        if self.age_restricted:
            if not self.embed_html:
                self.embed_html = request.get(url=self.embed_url)
            self.vid_info_url = extract.video_info_url_age_restricted(
                self.video_id, self.watch_url
            )
        else:
            self.vid_info_url = extract.video_info_url(
                video_id=self.video_id, watch_url=self.watch_url
            )

        self.vid_info_raw = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html)
            self.js = request.get(self.js_url)
Esempio n. 5
0
	def prefetch(self):
		"""Eagerly download all necessary data.

		Eagerly executes all necessary network requests so all other
		operations don't does need to make calls outside of the interpreter
		which blocks for long periods of time.

		:rtype: None

		"""
		self.watch_html = request.get(url=self.watch_url)
		#with open("/tmp/watch_html",'w') as f:					# Debug
		#	f.write(self.watch_html)	
		
		# 30.07.2020 siehe  github.com/nficano/pytube/issues/499 +
		#	github.com/nficano/pytube/issues/337:
		#if '<img class="icon meh" src="/yts/img' not in self.watch_html:
		#	raise VideoUnavailable('This video is unavailable.')
		self.embed_html = request.get(url=self.embed_url)
		self.age_restricted = extract.is_age_restricted(self.watch_html)
		self.vid_info_url = extract.video_info_url(
			video_id=self.video_id,
			watch_url=self.watch_url,
			watch_html=self.watch_html,
			embed_html=self.embed_html,
			age_restricted=self.age_restricted,
		)
		self.vid_info = request.get(self.vid_info_url)
		if not self.age_restricted:
			self.js_url = extract.js_url(self.watch_html, self.age_restricted)
			self.js = request.get(self.js_url)
Esempio n. 6
0
    def prefetch(self):
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        self.watch_html = request.get(url=self.watch_url)
        if 'id="player-unavailable"' in self.watch_html:
            raise VideoUnavailable('This video is not available.')
        self.embed_html = request.get(url=self.embed_url)
        self.age_restricted = extract.is_age_restricted(self.watch_html)
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            watch_html=self.watch_html,
            embed_html=self.embed_html,
            age_restricted=self.age_restricted,
        )
        self.vid_info = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html, self.age_restricted)
            self.js = request.get(self.js_url)
Esempio n. 7
0
    def descramble(self) -> None:
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info("init started")

        self.vid_info = dict(parse_qsl(self.vid_info_raw))
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            assert self.watch_html is not None
            self.player_config_args = get_ytplayer_config(self.watch_html)["args"]

            # Fix for KeyError: 'title' issue #434
            if "title" not in self.player_config_args:  # type: ignore
                i_start = self.watch_html.lower().index("<title>") + len("<title>")
                i_end = self.watch_html.lower().index("</title>")
                title = self.watch_html[i_start:i_end].strip()
                index = title.lower().rfind(" - youtube")
                title = title[:index] if index > 0 else title
                self.player_config_args["title"] = unescape(title)

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ["url_encoded_fmt_stream_map"]
        if "adaptive_fmts" in self.player_config_args:
            stream_maps.append("adaptive_fmts")

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                apply_descrambler(self.vid_info, fmt)
            apply_descrambler(self.player_config_args, fmt)

            if not self.js:
                if not self.embed_html:
                    self.embed_html = request.get(url=self.embed_url)
                self.js_url = extract.js_url(self.embed_html)
                self.js = request.get(self.js_url)

            apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        self.player_response = json.loads(self.player_config_args["player_response"])
        del self.player_config_args["player_response"]
        self.stream_monostate.title = self.title
        self.stream_monostate.duration = self.length

        logger.info("init finished successfully")
Esempio n. 8
0
    def descramble(self) -> None:
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        self.vid_info = dict(parse_qsl(self.vid_info_raw))
        self.player_config_args = self.vid_info
        self.player_response = json.loads(self.vid_info['player_response'])

        # On pre-signed videos, we need to use get_ytplayer_config to fix
        #  the player_response item
        if 'streamingData' not in self.player_config_args['player_response']:
            config_response = get_ytplayer_config(self.watch_html)
            if 'args' in config_response:
                self.player_config_args['player_response'] = config_response['args']['player_response']  # noqa: E501
            else:
                self.player_config_args['player_response'] = config_response

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ["url_encoded_fmt_stream_map"]
        if "adaptive_fmts" in self.player_config_args:
            stream_maps.append("adaptive_fmts")

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                apply_descrambler(self.vid_info, fmt)
            apply_descrambler(self.player_config_args, fmt)

            if not self.js:
                if not self.embed_html:
                    self.embed_html = request.get(url=self.embed_url)
                self.js_url = extract.js_url(self.embed_html)
                self.js = request.get(self.js_url)

            apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        if isinstance(self.player_config_args["player_response"], str):
            self.player_response = json.loads(
                self.player_config_args["player_response"]
            )
        else:
            self.player_response = self.player_config_args["player_response"]
        del self.player_config_args["player_response"]
        self.stream_monostate.title = self.title
        self.stream_monostate.duration = self.length
Esempio n. 9
0
    def prefetch(self, multithread = True):
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        if multithread:
            threads, results = [None] * 2, [None] * 2
            for i, url in enumerate([self.watch_url, self.embed_url]):
                threads[i] = Thread(target=self.do_get, args=(url, results, i))
                threads[i].start()
            for i in range(len(threads)):
                threads[i].join()
            self.watch_html, self.embed_html = results
        else:
            self.watch_html = request.get(url=self.watch_url)
            self.embed_html = request.get(url=self.embed_url)
        if '<img class="icon meh" src="/yts/img' not in self.watch_html:
            raise VideoUnavailable('This video is unavailable.')
        self.age_restricted = extract.is_age_restricted(self.watch_html)
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            watch_html=self.watch_html,
            embed_html=self.embed_html,
            age_restricted=self.age_restricted,
        )
        if multithread:
            threads, results = [None] * 2, [None] * 2
            threads[0] = Thread(target=self.do_get, args=(self.vid_info_url, results, 0))
            threads[0].start()
        else:
            self.vid_info = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html, self.age_restricted)
            if multithread:
                threads[1] = Thread(target=self.do_get, args=(self.js_url, results, 1))
                threads[1].start()
                threads[0].join()
                threads[1].join()
            else:
                self.js = request.get(self.js_url)
        else:
            threads[0].join()
        if multithread:
            self.vid_info, self.js = results
Esempio n. 10
0
    def download(self, output_path=None, filename=None):
        """Write the media stream to disk.

        :param output_path:
            (optional) Output path for writing media file. If one is not
            specified, defaults to the current working directory.
        :type output_path: str or None
        :param filename:
            (optional) Output filename (stem only) for writing media file.
            If one is not specified, the default filename is used.
        :type filename: str or None

        :rtype: None

        """
        output_path = output_path or os.getcwd()
        if filename:
            safe = safe_filename(filename)
            filename = '{filename}.{s.subtype}'.format(filename=safe, s=self)
        filename = filename or self.default_filename

        # file path
        fp = os.path.join(output_path, filename)
        bytes_remaining = self.filesize
        logger.debug(
            'downloading (%s total bytes) file to %s',
            self.filesize,
            fp,
        )

        tmpRangefp = None
        isTimeOut = False
        with open(fp, 'wb') as fh:
            while True:
                for chunk in request.get(self.url,
                                         streaming=True,
                                         conRangefp=tmpRangefp):

                    if not chunk:
                        print 'streams time out sleep 10s'
                        fh.flush()
                        time.sleep(10)
                        nfize = os.path.getsize(fp)
                        print fp, nfize
                        if nfize < self.filesize:
                            tmpRangefp = fp
                            isTimeOut = True
                            break
                    else:
                        tmpRangefp = None
                    # reduce the (bytes) remainder by the length of the chunk.
                    bytes_remaining -= len(chunk)
                    # send to the on_progress callback.
                    self.on_progress(chunk, fh, bytes_remaining)
                if isTimeOut:
                    isTimeOut = False
                    print tmpRangefp
                else:
                    self.on_complete(fh)
                    break
Esempio n. 11
0
    def download(self, output_path=None):
        """Write the media stream to disk.

        :param output_path:
            (optional) Output path for writing media file. If one is not
            specified, defaults to the current working directory.
        :type output_path: str or None
        :rtype: None

        """
        # TODO(nficano): allow a filename to specified.
        output_path = output_path or os.getcwd()

        # file path
        fp = os.path.join(output_path, self.default_filename)
        bytes_remaining = self.filesize
        logger.debug(
            'downloading (%s total bytes) file to %s',
            self.filesize,
            fp,
        )

        with open(fp, 'wb') as fh:
            for chunk in request.get(self.url, streaming=True):
                # reduce the (bytes) remainder by the length of the chunk.
                bytes_remaining -= len(chunk)
                # send to the on_progress callback.
                self.on_progress(chunk, fh, bytes_remaining)
            self.on_complete(fh)
Esempio n. 12
0
    def download(self,
                 output_path=None,
                 filename=None,
                 filename_prefix=None,
                 only_url=None):
        """Write the media stream to disk.

		:param output_path:
			(optional) Output path for writing media file. If one is not
			specified, defaults to the current working directory.
		:type output_path: str or None
		:param filename:
			(optional) Output filename (stem only) for writing media file.
			If one is not specified, the default filename is used.
		:type filename: str or None
		:param filename_prefix:
			(optional) A string that will be prepended to the filename.
			For example a number in a playlist or the name of a series.
			If one is not specified, nothing will be prepended
			This is seperate from filename so you can use the default
			filename but still add a prefix.
		:type filename_prefix: str or None

		:rtype: str

		"""

        if only_url:
            return self.url

        output_path = output_path or os.getcwd()
        if filename:
            safe = safe_filename(filename)
            filename = '{filename}.{s.subtype}'.format(filename=safe, s=self)
        filename = filename or self.default_filename

        if filename_prefix:
            filename = '{prefix}{filename}'\
             .format(
              prefix=safe_filename(filename_prefix),
              filename=filename,
             )

        # file path
        fp = os.path.join(output_path, filename)
        bytes_remaining = self.filesize
        PLog(
            'streams: downloading (%s total bytes) file to %s',
            self.filesize,
            fp,
        )

        with open(fp, 'wb') as fh:
            for chunk in request.get(self.url, streaming=True):
                # reduce the (bytes) remainder by the length of the chunk.
                bytes_remaining -= len(chunk)
                # send to the on_progress callback.
                self.on_progress(chunk, fh, bytes_remaining)
        self.on_complete(fh)
        return fp
Esempio n. 13
0
    def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
        if proxies:
            install_proxy(proxies)

        try:
            self.playlist_id: str = parse_qs(url.split("?")[1])["list"][0]
        except IndexError:  # assume that url is just the id
            self.playlist_id = url

        self.playlist_url = (
            f"https://www.youtube.com/playlist?list={self.playlist_id}")
        self.html = request.get(self.playlist_url)

        # Needs testing with non-English
        self.last_update: Optional[date] = None
        date_match = re.search(
            r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html)
        if date_match:
            month, day, year = date_match.groups()
            self.last_update = datetime.strptime(f"{month} {day:0>2} {year}",
                                                 "%b %d %Y").date()

        self._js_regex = re.compile(r"window\[\"ytInitialData\"] = ([^\n]+)")

        self._video_regex = re.compile(r"href=\"(/watch\?v=[\w-]*)")
Esempio n. 14
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

            # Fix for KeyError: 'title' issue #434
            if 'title' not in self.player_config_args:
                i_start = (self.watch_html.lower().index('<title>') +
                           len('<title>'))
                i_end = self.watch_html.lower().index('</title>')
                title = self.watch_html[i_start:i_end].strip()
                index = title.lower().rfind(' - youtube')
                title = title[:index] if index > 0 else title
                self.player_config_args['title'] = title

        self.vid_descr = extract.get_vid_descr(self.watch_html)
        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            try:
                mixins.apply_signature(self.player_config_args, fmt, self.js)
            except TypeError:
                self.js_url = extract.js_url(
                    self.embed_html,
                    self.age_restricted,
                )
                self.js = request.get(self.js_url)
                mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Esempio n. 15
0
    def init(self):
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

            # ---> ADD THIS PART <---
            if 'title' not in self.player_config_args:
                # for more reliability when parsing, we may use a trained parser
                try:
                    from bs4 import BeautifulSoup
                    soup = BeautifulSoup(self.watch_html, 'lxml')
                    title = soup.title.get_text().strip()
                except ModuleNotFoundError:
                    # since this parsing is actually pretty simple, we may just
                    # parse it using index()
                    i_start = self.watch_html.lower().index('<title>') + len(
                        '<title>')
                    i_end = self.watch_html.lower().index('</title>')
                    title = self.watch_html[i_start:i_end].strip()
                # remove the ' - youtube' part that is added to the browser tab's title
                index = title.lower().rfind(' - youtube')
                title = title[:index] if index > 0 else title
                self.player_config_args['title'] = title
            # / ---> ADD THIS PART <---

        self.vid_descr = extract.get_vid_descr(self.watch_html)
        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            try:
                mixins.apply_signature(self.player_config_args, fmt, self.js)
            except TypeError:
                self.js_url = extract.js_url(
                    self.embed_html,
                    self.age_restricted,
                )
                self.js = request.get(self.js_url)
                mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
Esempio n. 16
0
 def featured_channels_html(self):
     if self._featured_channels_html:
         return self._featured_channels_html
     else:
         self._featured_channels_html = request.get(
             self.featured_channels_url)
         return self._featured_channels_html
Esempio n. 17
0
    def _paginate(
        self, until_watch_id: Optional[str] = None
    ) -> Iterable[List[str]]:
        """Parse the video links from the page source, yields the /watch?v=
        part from video link

        :param until_watch_id Optional[str]: YouTube Video watch id until
            which the playlist should be read.

        :rtype: Iterable[List[str]]
        :returns: Iterable of lists of YouTube watch ids
        """
        req = self.html
        videos_urls, continuation = self._extract_videos(
            # extract the json located inside the window["ytInitialData"] js
            # variable of the playlist html page
            self._extract_json(req)
        )
        if until_watch_id:
            try:
                trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
                yield videos_urls[:trim_index]
                return
            except ValueError:
                pass
        yield videos_urls

        # Extraction from a playlist only returns 100 videos at a time
        # if self._extract_videos returns a continuation there are more
        # than 100 songs inside a playlist, so we need to add further requests
        # to gather all of them
        if continuation:
            load_more_url, headers = self._build_continuation_url(continuation)
        else:
            load_more_url, headers = None, None

        while load_more_url and headers:  # there is an url found
            logger.debug("load more url: %s", load_more_url)
            # requesting the next page of videos with the url generated from the
            # previous page
            req = request.get(load_more_url, extra_headers=headers)
            # extract up to 100 songs from the page loaded
            # returns another continuation if more videos are available
            videos_urls, continuation = self._extract_videos(req)
            if until_watch_id:
                try:
                    trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
                    yield videos_urls[:trim_index]
                    return
                except ValueError:
                    pass
            yield videos_urls

            if continuation:
                load_more_url, headers = self._build_continuation_url(
                    continuation
                )
            else:
                load_more_url, headers = None, None
Esempio n. 18
0
    def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
        if proxies:
            install_proxy(proxies)

        self.playlist_id = extract.playlist_id(url)

        self.playlist_url = (
            f"https://www.youtube.com/playlist?list={self.playlist_id}")
        self.html = request.get(self.playlist_url)
Esempio n. 19
0
    def filesize(self):
        """File size of the media stream in bytes.

        :rtype: int
        :returns:
            Filesize (in bytes) of the stream.
        """
        headers = request.get(self.url, headers=True)
        return int(headers['content-length'])
Esempio n. 20
0
    def html(self):
        """Get the html for the /videos page.

        :rtype: str
        """
        if self._html:
            return self._html
        self._html = request.get(self.videos_url)
        return self._html
    def parse_links(self):
        url = self.construct_playlist_url()
        req = request.get(url)

        # split the page source by line and process each line
        content = [x for x in req.split('\n') if 'pl-video-title-link' in x]
        link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content]

        return link_list
Esempio n. 22
0
    def filesize(self):
        """File size of the media stream in bytes.

        :rtype: int
        :returns:
            Filesize (in bytes) of the stream.
        """
        headers = request.get(self.url, headers=True)
        return int(headers['content-length'])
Esempio n. 23
0
    def html(self):
        """Get the playlist page html.

        :rtype: str
        """
        if self._html:
            return self._html
        self._html = request.get(self.playlist_url)
        return self._html
Esempio n. 24
0
    def filesize(self) -> int:
        """File size of the media stream in bytes.

        :rtype: int
        :returns:
            Filesize (in bytes) of the stream.
        """
        if self._filesize is None:
            headers = request.get(self.url, headers=True)
            self._filesize = int(headers["content-length"])
        return self._filesize
Esempio n. 25
0
    def about_html(self):
        """Get the html for the /about page.

        Currently unused for any functionality.

        :rtype: str
        """
        if self._about_html:
            return self._about_html
        else:
            self._about_html = request.get(self.about_url)
            return self._about_html
Esempio n. 26
0
def getPlaylistTitle(url):
    req = request.get(url)
    open_tag = "<title>"
    end_tag = "</title>"
    matchresult = re.compile(open_tag + "(.+?)" + end_tag)
    matchresult = matchresult.search(req).group()
    matchresult = matchresult.replace(open_tag, "")
    matchresult = matchresult.replace(end_tag, "")
    matchresult = matchresult.replace("- YouTube", "")
    matchresult = matchresult.strip()

    return matchresult
Esempio n. 27
0
    def community_html(self):
        """Get the html for the /community page.

        Currently unused for any functionality.

        :rtype: str
        """
        if self._community_html:
            return self._community_html
        else:
            self._community_html = request.get(self.community_url)
            return self._community_html
Esempio n. 28
0
    def prefetch(self):
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        self.watch_html = request.get(url=self.watch_url)
        if extract.is_age_restricted(self.watch_html):
            raise AgeRestrictionError('Content is age restricted')
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            watch_html=self.watch_html,
        )
        self.js_url = extract.js_url(self.watch_html)
        self.js = request.get(self.js_url)
        self.vid_info = request.get(self.vid_info_url)
Esempio n. 29
0
    def playlists_html(self):
        """Get the html for the /playlists page.

        Currently unused for any functionality.

        :rtype: str
        """
        if self._playlists_html:
            return self._playlists_html
        else:
            self._playlists_html = request.get(self.playlists_url)
            return self._playlists_html
Esempio n. 30
0
    def featured_channels_html(self):
        """Get the html for the /channels page.

        Currently unused for any functionality.

        :rtype: str
        """
        if self._featured_channels_html:
            return self._featured_channels_html
        else:
            self._featured_channels_html = request.get(
                self.featured_channels_url)
            return self._featured_channels_html
Esempio n. 31
0
    def js(self):
        if self._js:
            return self._js

        # If the js_url doesn't match the cached url, fetch the new js and update
        #  the cache; otherwise, load the cache.
        if pytube.__js_url__ != self.js_url:
            self._js = request.get(self.js_url)
            pytube.__js__ = self._js
            pytube.__js_url__ = self.js_url
        else:
            self._js = pytube.__js__

        return self._js
Esempio n. 32
0
    def prefetch(self):
        """Eagerly download all necessary data.

        Eagerly executes all necessary network requests so all other
        operations don't does need to make calls outside of the interpreter
        which blocks for long periods of time.

        :rtype: None

        """
        self.watch_html = request.get(url=self.watch_url)
        self.embed_html = request.get(url=self.embed_url)
        self.age_restricted = extract.is_age_restricted(self.watch_html)
        self.vid_info_url = extract.video_info_url(
            video_id=self.video_id,
            watch_url=self.watch_url,
            watch_html=self.watch_html,
            embed_html=self.embed_html,
            age_restricted=self.age_restricted,
        )
        self.vid_info = request.get(self.vid_info_url)
        if not self.age_restricted:
            self.js_url = extract.js_url(self.watch_html)
            self.js = request.get(self.js_url)
Esempio n. 33
0
    def parse_links(self):
        """Parse the video links from the page source, extracts and
        returns the /watch?v= part from video link href
        It's an alternative for BeautifulSoup

        :return: list
        """

        url = self.construct_playlist_url()
        req = request.get(url)

        # split the page source by line and process each line
        content = [x for x in req.split('\n') if 'pl-video-title-link' in x]
        link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content]

        return link_list
Esempio n. 34
0
    def download(self, output_path=None, filename=None):
        """Write the media stream to disk.

        :param output_path:
            (optional) Output path for writing media file. If one is not
            specified, defaults to the current working directory.
        :type output_path: str or None
        :param filename:
            (optional) Output filename (stem only) for writing media file.
            If one is not specified, the default filename is used.
        :type filename: str or None

        :rtype: None

        """
        output_path = output_path or os.getcwd()
        if filename:
            safe = safe_filename(filename)
            filename = '{filename}.{s.subtype}'.format(filename=safe, s=self)
        filename = filename or self.default_filename

        # file path
        fp = os.path.join(output_path, filename)
        bytes_remaining = self.filesize
        logger.debug(
            'downloading (%s total bytes) file to %s',
            self.filesize, fp,
        )

        with open(fp, 'wb') as fh:
            for chunk in request.get(self.url, streaming=True):
                # reduce the (bytes) remainder by the length of the chunk.
                bytes_remaining -= len(chunk)
                # send to the on_progress callback.
                self.on_progress(chunk, fh, bytes_remaining)
            self.on_complete(fh)
Esempio n. 35
0
 def xml_captions(self):
     """Download the xml caption tracks."""
     return request.get(self.url)