Ejemplo n.º 1
0
    def __init__(
        self,
        url: str,
        defer_prefetch_init: bool = False,
        on_progress_callback: Optional[OnProgress] = None,
        on_complete_callback: Optional[OnComplete] = None,
        proxies: Dict[str, str] = None,
    ):
        """Construct a :class:`YouTube <YouTube>`.

        :param str url:
            A valid YouTube watch URL.
        :param bool defer_prefetch_init:
            Defers executing any network requests.
        :param func on_progress_callback:
            (Optional) User defined callback function for stream download
            progress events.
        :param func on_complete_callback:
            (Optional) User defined callback function for stream download
            complete events.

        """

        self.js: Optional[str] = None  # js fetched by js_url
        self.js_url: Optional[str] = None  # the url to the js, parsed from watch html

        # note: vid_info may eventually be removed. It sounds like it once had
        # additional formats, but that doesn't appear to still be the case.

        # the url to vid info, parsed from watch html
        self.vid_info_url: Optional[str] = None
        self.vid_info_raw: Optional[str] = None  # content fetched by vid_info_url
        self.vid_info: Optional[Dict] = None  # parsed content of vid_info_raw

        self.watch_html: Optional[str] = None  # the html of /watch?v=<video_id>
        self.embed_html: Optional[str] = None
        self.player_config_args: Dict = {}  # inline js in the html containing
        self.player_response: Dict = {}
        # streams
        self.age_restricted: Optional[bool] = None

        self.fmt_streams: List[Stream] = []

        # video_id part of /watch?v=<video_id>
        self.video_id = extract.video_id(url)

        self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
        self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"

        # Shared between all instances of `Stream` (Borg pattern).
        self.stream_monostate = Monostate(
            on_progress=on_progress_callback, on_complete=on_complete_callback
        )

        if proxies:
            install_proxy(proxies)

        if not defer_prefetch_init:
            self.prefetch()
            self.descramble()
Ejemplo n.º 2
0
    def __init__(
        self,
        url=None,
        defer_prefetch_init=False,
        on_progress_callback=None,
        on_complete_callback=None,
        proxies=None,
    ):
        """Construct a :class:`YouTube <YouTube>`.

        :param str url:
            A valid YouTube watch URL.
        :param bool defer_init:
            Defers executing any network requests.
        :param func on_progress_callback:
            (Optional) User defined callback function for stream download
            progress events.
        :param func on_complete_callback:
            (Optional) User defined callback function for stream download
            complete events.

        """
        self.js = None  # js fetched by js_url
        self.js_url = None  # the url to the js, parsed from watch html

        # note: vid_info may eventually be removed. It sounds like it once had
        # additional formats, but that doesn't appear to still be the case.

        self.vid_info = None  # content fetched by vid_info_url
        self.vid_info_url = None  # the url to vid info, parsed from watch html

        self.watch_html = None  # the html of /watch?v=<video_id>
        self.embed_html = None
        self.player_config_args = None  # inline js in the html containing
        # streams
        self.age_restricted = None

        self.fmt_streams = []  # list of :class:`Stream <Stream>` instances
        self.caption_tracks = []

        # video_id part of /watch?v=<video_id>
        self.video_id = extract.video_id(url)

        # https://www.youtube.com/watch?v=<video_id>
        self.watch_url = extract.watch_url(self.video_id)

        self.embed_url = extract.embed_url(self.video_id)
        # A dictionary shared between all instances of :class:`Stream <Stream>`
        # (Borg pattern).
        self.stream_monostate = {
            # user defined callback functions.
            'on_progress': on_progress_callback,
            'on_complete': on_complete_callback,
        }

        if proxies:
            install_proxy(proxies)

        if not defer_prefetch_init:
            self.prefetch_init()
Ejemplo n.º 3
0
 def set_video(self, vurl):
     if bool(self.video) and self.video.video_id == extract.video_id(vurl):
         return
     self.curr_url = vurl
     self.video = Youtube(vurl)
     self.video.register_on_progress_callback(self.update_progress)
     self.video.register_on_complete_callback(self.on_download_completed)
Ejemplo n.º 4
0
    def __init__(self,
                 url: str,
                 on_progress_callback: Optional[Callable[[Any, bytes, int],
                                                         None]] = None,
                 on_complete_callback: Optional[Callable[[Any, Optional[str]],
                                                         None]] = None,
                 proxies: Dict[str, str] = None,
                 use_oauth: bool = False,
                 allow_oauth_cache: bool = True):
        """Construct a :class:`YouTube <YouTube>`.

        :param str url:
            A valid YouTube watch URL.
        :param func on_progress_callback:
            (Optional) User defined callback function for stream download
            progress events.
        :param func on_complete_callback:
            (Optional) User defined callback function for stream download
            complete events.

        """
        self._js: Optional[str] = None  # js fetched by js_url
        self._js_url: Optional[
            str] = None  # the url to the js, parsed from watch html

        self._vid_info: Optional[
            Dict] = None  # content fetched from innertube/player

        self._watch_html: Optional[
            str] = None  # the html of /watch?v=<video_id>
        self._embed_html: Optional[str] = None
        self._player_config_args: Optional[
            Dict] = None  # inline js in the html containing
        self._age_restricted: Optional[bool] = None

        self._fmt_streams: Optional[List[Stream]] = None

        self._initial_data = None
        self._metadata: Optional[YouTubeMetadata] = None

        # video_id part of /watch?v=<video_id>
        self.video_id = extract.video_id(url)

        self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
        self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"

        # Shared between all instances of `Stream` (Borg pattern).
        self.stream_monostate = Monostate(on_progress=on_progress_callback,
                                          on_complete=on_complete_callback)

        if proxies:
            install_proxy(proxies)

        self._author = None
        self._title = None
        self._publish_date = None

        self.use_oauth = use_oauth
        self.allow_oauth_cache = allow_oauth_cache
Ejemplo n.º 5
0
    def __init__(
        self,
        url: str,
        on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
        on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
        proxies: Dict[str, str] = None,
    ):
        """Construct a :class:`YouTube <YouTube>`.

        :param str url:
            A valid YouTube watch URL.
        :param func on_progress_callback:
            (Optional) User defined callback function for stream download
            progress events.
        :param func on_complete_callback:
            (Optional) User defined callback function for stream download
            complete events.

        """
        self._js: Optional[str] = None  # js fetched by js_url
        self._js_url: Optional[str] = None  # the url to the js, parsed from watch html

        # note: vid_info may eventually be removed. It sounds like it once had
        # additional formats, but that doesn't appear to still be the case.

        # the url to vid info, parsed from watch html
        self._vid_info_url: Optional[str] = None
        self._vid_info_raw: Optional[str] = None  # content fetched by vid_info_url
        self._vid_info: Optional[Dict] = None  # parsed content of vid_info_raw

        self._watch_html: Optional[str] = None  # the html of /watch?v=<video_id>
        self._embed_html: Optional[str] = None
        self._player_config_args: Optional[Dict] = None  # inline js in the html containing
        self._player_response: Optional[Dict] = None
        # streams
        self._age_restricted: Optional[bool] = None

        self._fmt_streams: Optional[List[Stream]] = None

        self._initial_data = None
        self._metadata: Optional[YouTubeMetadata] = None

        # video_id part of /watch?v=<video_id>
        self.video_id = extract.video_id(url)

        self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
        self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"

        # Shared between all instances of `Stream` (Borg pattern).
        self.stream_monostate = Monostate(
            on_progress=on_progress_callback, on_complete=on_complete_callback
        )

        if proxies:
            install_proxy(proxies)

        self._author = None
        self._title = None
        self._publish_date = None
Ejemplo n.º 6
0
def get_youtube_id(url):
    # ID部分の取り出し
    try:
        ret = extract.video_id(url)
    except exceptions.RegexMatchError:
        ret = False

    return ret
Ejemplo n.º 7
0
def ppytube_download(url: str, mime_type: str):
    try:
        from pytube import extract
        extract.video_id(url)
    except Exception as e:
        sys.exit("Unsupported URL -- {}\n{}".format(url, e))

    try:
        #object creation using YouTube which was imported in the beginning
        yt = YouTube(url, defer_prefetch_init=True)
        yt.prefetch()
        yt.descramble()
    except Exception as e:
        sys.exit("YouTube Init Error -- {}\n{}".format(url, e))

    if len(yt.streams) == 0:
        sys.exit("No streams")

    try:
        print('{}'.format(yt.title))
        streams = [
            st for st in yt.streams if st.mime_type.startswith(mime_type)
        ]
        stream = streams[0]
        if len(streams) > 1:
            from pprint import pprint
            for st in streams:
                pprint(st)
            itag = input("Select itag (default:{}): ".format(stream.itag))
            if itag:
                stream = [st for st in streams if st.itag is int(itag)][0]
    except Exception as e:
        sys.exit("No stream specified\n{}".format(e))

    try:
        print('From {}'.format(stream.url))
        stream.download()
    except Exception as e:
        sys.exit("Download Failed! -- {}\n{}".format(url, e))

    print('Download Completed!')
Ejemplo n.º 8
0
    def handle(self, **kwargs):
        videos = kwargs['url']
        count = 1
        for video in videos:
            print('Video ' + str(count) + '/' + str(len(videos)))
            vid_id = extract.video_id(video)  # if not a yt url, will fail here
            if not Video.objects.filter(video_id=vid_id).exists():
                emd.download_video(video)
                screeencap_path = od.extract_images(vid_id)
                od.detect_objects(vid_id, screeencap_path)

            count += 1
Ejemplo n.º 9
0
    def __init__(
        self, url=None, defer_prefetch_init=False, on_progress_callback=None,
        on_complete_callback=None,
    ):
        """Construct a :class:`YouTube <YouTube>`.

        :param str url:
            A valid YouTube watch URL.
        :param bool defer_init:
            Defers executing any network requests.
        :param func on_progress_callback:
            (Optional) User defined callback function for stream download
            progress events.
        :param func on_complete_callback:
            (Optional) User defined callback function for stream download
            complete events.

        """
        self.js = None      # js fetched by js_url
        self.js_url = None  # the url to the js, parsed from watch html

        # note: vid_info may eventually be removed. It sounds like it once had
        # additional formats, but that doesn't appear to still be the case.

        self.vid_info = None      # content fetched by vid_info_url
        self.vid_info_url = None  # the url to vid info, parsed from watch html

        self.watch_html = None     # the html of /watch?v=<video_id>
        self.player_config_args = None  # inline js in the html containing
        # streams
        self.age_restricted = None

        self.fmt_streams = []  # list of :class:`Stream <Stream>` instances
        self.caption_tracks = []

        # video_id part of /watch?v=<video_id>
        self.video_id = extract.video_id(url)

        # https://www.youtube.com/watch?v=<video_id>
        self.watch_url = extract.watch_url(self.video_id)

        self.embed_url = extract.embed_url(self.video_id)
        # A dictionary shared between all instances of :class:`Stream <Stream>`
        # (Borg pattern).
        self.stream_monostate = {
            # user defined callback functions.
            'on_progress': on_progress_callback,
            'on_complete': on_complete_callback,
        }

        if not defer_prefetch_init:
            self.prefetch_init()
Ejemplo n.º 10
0
def addVideo(request):
	if request.method == "POST":
		url = request.POST['url']
		is_valid = False
		video_exists = False
		try:
			vid_id = extract.video_id(url)
			video_exists = Video.objects.filter(video_id=vid_id).exists()
			if not video_exists:
				vid = YouTube(url)
				is_valid = True
				analyzevideo.delay(url)
		except:
			pass
		return JsonResponse({'is_valid': is_valid, 'video_exists': video_exists})
def details():
    url = request.args['url']
    error = None

    if not url:
        error = 'Video link is required.'

    if error is not None:
        flash(error)
    else:
        video_id = extract.video_id(url)
        video = pytube_cache.get(video_id)

        if video is None:
            video = YouTube(url)
            pytube_cache.set(video_id, video)

        return render_template('youtube_downloader/details.html', video=video)
Ejemplo n.º 12
0
def get_youtube_id(url):
    """get youtube id from url

    if youtube id exist, pick up youtube id from url

    Args:
        url (string): user input url

    Returns:
        ret (string): youtube id from url

    """
    try:
        ret = extract.video_id(url)
    except exceptions.RegexMatchError:
        ret = False

    return ret
Ejemplo n.º 13
0
    def __init__(
        self,
        url,
        on_progress_callback: Optional[OnProgress] = None,
        on_complete_callback: Optional[OnComplete] = None,
    ):
        """Dont construct the YouTube class directly. Use create()"""

        self.js: Optional[str] = None  # js fetched by js_url
        self.js_url: Optional[
            str] = None  # the url to the js, parsed from watch html

        # note: vid_info may eventually be removed. It sounds like it once had
        # additional formats, but that doesn't appear to still be the case.

        # the url to vid info, parsed from watch html
        self.vid_info_url: Optional[str] = None
        self.vid_info_raw: Optional[
            str] = None  # content fetched by vid_info_url
        self.vid_info: Optional[Dict] = None  # parsed content of vid_info_raw

        self.watch_html: Optional[
            str] = None  # the html of /watch?v=<video_id>
        self.embed_html: Optional[str] = None
        self.player_config_args: Dict = {}  # inline js in the html containing
        self.player_response: Dict = {}
        # streams
        self.age_restricted: Optional[bool] = None

        self.fmt_streams: List[Stream] = []

        # video_id part of /watch?v=<video_id>
        self.video_id = extract.video_id(url)

        self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
        self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"

        # Shared between all instances of `Stream` (Borg pattern).
        self.stream_monostate = Monostate(on_progress=on_progress_callback,
                                          on_complete=on_complete_callback)
Ejemplo n.º 14
0
 async def descarga(self, ctx, url=None):
     '''
     Introduce una url de un video de YT y se te redirigirá
     a otra pagina para descargar tal video en .mp3 o .mp4...
     '''
     if url != None:
         id = extract.video_id(url)
         downl_url = f"https://www.y2mate.com/es/convert-youtube/{id}"
         await typing_sleep(ctx)
         await ctx.message.delete()
         await ctx.send(
             f"Aqui esta el video listo para ser descargado: {downl_url}")
         print(
             f"cmdDescarga||            {ctx.author.name} descargo un video..."
         )
     else:
         await typing_sleep(ctx)
         await ctx.send("No se pudo convertir con exito el video...",
                        delete_after=60.0)
         print(
             f"cmdDescarga||            {ctx.author.name} no pudo descargar un video..."
         )
Ejemplo n.º 15
0
def download_video(url):
    video = YouTube(
        url
    )  # put a try here, it will fail if video does not exist/not valid/been deleted

    if 'en' in video.captions:
        xml_captions = video.captions['en'].xml_captions
    elif 'a.en' in video.captions:
        xml_captions = video.captions['a.en'].xml_captions
    else:
        xml_captions = ''

    str_captions = extract_captions(xml_captions)

    vid_id = extract.video_id(url)

    print('Downloading video...')
    videos_path = Path(__file__).resolve().parent.parent.joinpath('videos')
    video.streams.filter(
        file_extension='mp4').order_by('resolution').desc().first().download(
            videos_path, vid_id)

    entry = Video(title=video.title,
                  description=video.description,
                  upload_date=extract_upload_date(video),
                  user=video.author,
                  captions=str_captions,
                  thumbnail_url=video.thumbnail_url,
                  video_url=url,
                  length=video.length,
                  views=video.views,
                  video_id=vid_id)

    entry.save()

    print('Video saved to database')
Ejemplo n.º 16
0
def test_extract_video_id():
    url = "https://www.youtube.com/watch?v=2lAe1cqCOXo"
    video_id = extract.video_id(url)
    assert video_id == "2lAe1cqCOXo"
Ejemplo n.º 17
0
from pytube import extract
import pandas as pd
from pathlib import Path

url_list = []
video_id = []
if __name__ == "__main__":
    urls = Path(f'./links.txt').read_text(encoding='utf-8')
    url_list += urls.split("\n")
    for url in url_list:
        id = extract.video_id(url)
        video_id.append(id)
    print(video_id)
    print(len(video_id))
Ejemplo n.º 18
0
    def load_streams(self):
        while self.__download_manager.thread_count > 1:
            self.sig_step.emit(self.id, 'Waiting for threads to clear...')
        thread_name = QThread.currentThread().objectName()
        thread_id = int(QThread.currentThreadId())
        self.sig_step.emit(self.id, f'{thread_id}: {thread_name} thread starting...')
        self.__download_manager.videos = []
        self.__download_manager.streams = []
        proxies = self.__download_manager.get_proxies()
        top_level_item_count = self.__download_manager.stream_tree.topLevelItemCount()
        for i in range(top_level_item_count):
            self.__download_manager.stream_tree.takeTopLevelItem(i)
        self.__download_manager.stream_tree.clear()
        self.__download_manager.streams_to_download = {}
        try:
            print('get video id')
            print(extract.video_id(self.__download_manager.url.text()))
            self.sig_step.emit(self.id, f'Loading video')
            loaded_url = YouTube(self.__download_manager.url.text(), proxies=proxies)
            self.sig_step.emit(self.id, f'Loaded video: {loaded_url.title}')
            self.sig_msg.emit(f'Found {loaded_url.title}')
            if self.__abort:
                self.sig_progress_status.emit(f'Aborted!')
                self.sig_done.emit(self.id)
                return
            self.__download_manager.videos.append(loaded_url)

        except RegexMatchError:
            print('playlist')
            if 'playlist' in self.__download_manager.url.text():
                regex_search(r'(?:list=|\/)([0-9A-Za-z_-]{11}).*', self.__download_manager.url.text(), group=1)
                loaded_url = Playlist(self.__download_manager.url.text())
                self.sig_msg.emit(f'Loaded playlist. Discovering videos...')
                loaded_url.populate_video_urls()
                i = 0
                self.sig_progress_status.emit(0)

                for video_url in loaded_url.video_urls:
                    self.sig_step.emit(self.id, f'Loading video {i}')
                    if self.__abort:
                        self.sig_progress_status.emit(f'Aborted!')
                        self.sig_done.emit(self.id)
                        return
                    self.sig_progress_total.emit(int((i / (len(loaded_url.video_urls) * 2)) * 100))
                    vid = YouTube(video_url, proxies=proxies)
                    self.sig_step.emit(self.id, f'Loaded video: {vid.title}')
                    if self.__abort:
                        self.sig_progress_status.emit(f'Aborted!')
                        self.sig_done.emit(self.id)
                        return
                    self.sig_msg.emit(f'Found {vid.title}')

                    self.__download_manager.videos.append(vid)
                    self.sig_progress_status.emit(int((i / len(loaded_url.video_urls)) * 100))
                    i += 1
                self.sig_progress_total.emit(50)
            else:
                self.sig_error.emit('Could not determine Video '
                                    'or Playlist ID from provided URL!\n'
                                    'Please check input!')
                self.sig_done.emit(self.id)
                return
        except Exception as e:
            self.sig_error.emit(str(e))
            self.sig_done.emit(self.id)
            return

        self.sig_msg.emit(f'Loading Streams..')
        print('loading streams')
        i = 0
        for video in self.__download_manager.videos:
            self.sig_progress_status.emit(0)
            self.sig_step.emit(self.id, f'Loading streams for video {i}')
            if self.__abort:
                self.sig_progress_status.emit(f'Aborted!')
                self.sig_done.emit(self.id)
                return
            audio_streams = QTreeWidgetItem(['Audio Only'])
            tree_item = StreamTreeWidgetItem([video.title], f'video_{i}',
                                             self.__download_manager, video, None)
            self.__download_manager.streams = video.streams.all()
            x = 0
            for stream in self.__download_manager.streams:
                self.sig_step.emit(self.id, f'Loading stream {x}')
                if self.__abort:
                    self.sig_progress_status.emit(f'Aborted!')
                    self.sig_done.emit(self.id)
                    return
                self.sig_msg.emit(f'Video {i + 1}/{len(self.__download_manager.videos)}: '
                                  f'Loading Stream ITAG ID: {stream.itag}')
                if stream.video_codec is None:
                    stream_item = StreamTreeWidgetItem([
                        f'Codec: {stream.audio_codec}, '
                        f'ABR: {stream.abr}, '
                        f'File Type: {stream.mime_type.split("/")[1]}, '
                        f'Size: {stream.filesize // 1024} KB'
                    ], f'video_{i}_stream{x}',
                       self.__download_manager, video, stream)
                    self.sig_step.emit(self.id, f'Loaded stream {x}')
                    if self.__abort:
                        self.sig_progress_status.emit(f'Aborted!')
                        self.sig_done.emit(self.id)
                        return
                    audio_streams.addChild(stream_item)
                else:
                    stream_item = StreamTreeWidgetItem([
                        f'Res: {stream.resolution}, FPS: {stream.fps}, '
                        f' Video Codec: {stream.video_codec}, Audio Codec: {stream.audio_codec}, '
                        f'File Type: {stream.mime_type.split("/")[1]}, '
                        f'Size: {stream.filesize // 1024} KB'
                    ], f'video_{i}_stream{x}',
                       self.__download_manager, video, stream)
                    self.sig_step.emit(self.id, f'Loaded stream {x}')
                    if self.__abort:
                        self.sig_progress_status.emit(f'Aborted!')
                        self.sig_done.emit(self.id)
                        return
                    tree_item.addChild(stream_item)
                stream_item.setCheckState(0, Qt.Unchecked)
                x += 1
                self.sig_progress_status.emit(int((x / len(self.__download_manager.streams)) * 100))
            tree_item.addChild(audio_streams)
            self.sig_step.emit(self.id, f'Adding video {i} to tree')
            if self.__abort:
                self.sig_progress_status.emit(f'Aborted!')
                self.sig_done.emit(self.id)
                return
            self.__download_manager.stream_tree.addTopLevelItem(tree_item)
            i += 1
            self.sig_progress_status.emit(100)
            self.sig_progress_total.emit(int((i / (len(self.__download_manager.videos) * 2)) * 100) + 50)
        self.sig_msg.emit(f'Streams Loaded!')
        self.sig_done.emit(self.id)
Ejemplo n.º 19
0
def download_func():
    if request.method == "POST":
        link = video_id(request.form["url"])
        return redirect(url_for('downloading_func', link=link))
Ejemplo n.º 20
0
def test_extract_video_id():
    url = "https://www.youtube.com/watch?v=9bZkp7q19f0"
    video_id = extract.video_id(url)
    assert video_id == "9bZkp7q19f0"
Ejemplo n.º 21
0
def test_extract_video_id():
    url = 'https://www.youtube.com/watch?v=9bZkp7q19f0'
    video_id = extract.video_id(url)
    assert video_id == '9bZkp7q19f0'