def __init__( self, url: str, defer_prefetch_init: bool = False, on_progress_callback: Optional[OnProgress] = None, on_complete_callback: Optional[OnComplete] = None, proxies: Dict[str, str] = None, ): """Construct a :class:`YouTube <YouTube>`. :param str url: A valid YouTube watch URL. :param bool defer_prefetch_init: Defers executing any network requests. :param func on_progress_callback: (Optional) User defined callback function for stream download progress events. :param func on_complete_callback: (Optional) User defined callback function for stream download complete events. """ self.js: Optional[str] = None # js fetched by js_url self.js_url: Optional[str] = None # the url to the js, parsed from watch html # note: vid_info may eventually be removed. It sounds like it once had # additional formats, but that doesn't appear to still be the case. # the url to vid info, parsed from watch html self.vid_info_url: Optional[str] = None self.vid_info_raw: Optional[str] = None # content fetched by vid_info_url self.vid_info: Optional[Dict] = None # parsed content of vid_info_raw self.watch_html: Optional[str] = None # the html of /watch?v=<video_id> self.embed_html: Optional[str] = None self.player_config_args: Dict = {} # inline js in the html containing self.player_response: Dict = {} # streams self.age_restricted: Optional[bool] = None self.fmt_streams: List[Stream] = [] # video_id part of /watch?v=<video_id> self.video_id = extract.video_id(url) self.watch_url = f"https://youtube.com/watch?v={self.video_id}" self.embed_url = f"https://www.youtube.com/embed/{self.video_id}" # Shared between all instances of `Stream` (Borg pattern). self.stream_monostate = Monostate( on_progress=on_progress_callback, on_complete=on_complete_callback ) if proxies: install_proxy(proxies) if not defer_prefetch_init: self.prefetch() self.descramble()
def __init__( self, url=None, defer_prefetch_init=False, on_progress_callback=None, on_complete_callback=None, proxies=None, ): """Construct a :class:`YouTube <YouTube>`. :param str url: A valid YouTube watch URL. :param bool defer_init: Defers executing any network requests. :param func on_progress_callback: (Optional) User defined callback function for stream download progress events. :param func on_complete_callback: (Optional) User defined callback function for stream download complete events. """ self.js = None # js fetched by js_url self.js_url = None # the url to the js, parsed from watch html # note: vid_info may eventually be removed. It sounds like it once had # additional formats, but that doesn't appear to still be the case. self.vid_info = None # content fetched by vid_info_url self.vid_info_url = None # the url to vid info, parsed from watch html self.watch_html = None # the html of /watch?v=<video_id> self.embed_html = None self.player_config_args = None # inline js in the html containing # streams self.age_restricted = None self.fmt_streams = [] # list of :class:`Stream <Stream>` instances self.caption_tracks = [] # video_id part of /watch?v=<video_id> self.video_id = extract.video_id(url) # https://www.youtube.com/watch?v=<video_id> self.watch_url = extract.watch_url(self.video_id) self.embed_url = extract.embed_url(self.video_id) # A dictionary shared between all instances of :class:`Stream <Stream>` # (Borg pattern). self.stream_monostate = { # user defined callback functions. 'on_progress': on_progress_callback, 'on_complete': on_complete_callback, } if proxies: install_proxy(proxies) if not defer_prefetch_init: self.prefetch_init()
def set_video(self, vurl): if bool(self.video) and self.video.video_id == extract.video_id(vurl): return self.curr_url = vurl self.video = Youtube(vurl) self.video.register_on_progress_callback(self.update_progress) self.video.register_on_complete_callback(self.on_download_completed)
def __init__(self, url: str, on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None, on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None, proxies: Dict[str, str] = None, use_oauth: bool = False, allow_oauth_cache: bool = True): """Construct a :class:`YouTube <YouTube>`. :param str url: A valid YouTube watch URL. :param func on_progress_callback: (Optional) User defined callback function for stream download progress events. :param func on_complete_callback: (Optional) User defined callback function for stream download complete events. """ self._js: Optional[str] = None # js fetched by js_url self._js_url: Optional[ str] = None # the url to the js, parsed from watch html self._vid_info: Optional[ Dict] = None # content fetched from innertube/player self._watch_html: Optional[ str] = None # the html of /watch?v=<video_id> self._embed_html: Optional[str] = None self._player_config_args: Optional[ Dict] = None # inline js in the html containing self._age_restricted: Optional[bool] = None self._fmt_streams: Optional[List[Stream]] = None self._initial_data = None self._metadata: Optional[YouTubeMetadata] = None # video_id part of /watch?v=<video_id> self.video_id = extract.video_id(url) self.watch_url = f"https://youtube.com/watch?v={self.video_id}" self.embed_url = f"https://www.youtube.com/embed/{self.video_id}" # Shared between all instances of `Stream` (Borg pattern). self.stream_monostate = Monostate(on_progress=on_progress_callback, on_complete=on_complete_callback) if proxies: install_proxy(proxies) self._author = None self._title = None self._publish_date = None self.use_oauth = use_oauth self.allow_oauth_cache = allow_oauth_cache
def __init__( self, url: str, on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None, on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None, proxies: Dict[str, str] = None, ): """Construct a :class:`YouTube <YouTube>`. :param str url: A valid YouTube watch URL. :param func on_progress_callback: (Optional) User defined callback function for stream download progress events. :param func on_complete_callback: (Optional) User defined callback function for stream download complete events. """ self._js: Optional[str] = None # js fetched by js_url self._js_url: Optional[str] = None # the url to the js, parsed from watch html # note: vid_info may eventually be removed. It sounds like it once had # additional formats, but that doesn't appear to still be the case. # the url to vid info, parsed from watch html self._vid_info_url: Optional[str] = None self._vid_info_raw: Optional[str] = None # content fetched by vid_info_url self._vid_info: Optional[Dict] = None # parsed content of vid_info_raw self._watch_html: Optional[str] = None # the html of /watch?v=<video_id> self._embed_html: Optional[str] = None self._player_config_args: Optional[Dict] = None # inline js in the html containing self._player_response: Optional[Dict] = None # streams self._age_restricted: Optional[bool] = None self._fmt_streams: Optional[List[Stream]] = None self._initial_data = None self._metadata: Optional[YouTubeMetadata] = None # video_id part of /watch?v=<video_id> self.video_id = extract.video_id(url) self.watch_url = f"https://youtube.com/watch?v={self.video_id}" self.embed_url = f"https://www.youtube.com/embed/{self.video_id}" # Shared between all instances of `Stream` (Borg pattern). self.stream_monostate = Monostate( on_progress=on_progress_callback, on_complete=on_complete_callback ) if proxies: install_proxy(proxies) self._author = None self._title = None self._publish_date = None
def get_youtube_id(url): # ID部分の取り出し try: ret = extract.video_id(url) except exceptions.RegexMatchError: ret = False return ret
def ppytube_download(url: str, mime_type: str): try: from pytube import extract extract.video_id(url) except Exception as e: sys.exit("Unsupported URL -- {}\n{}".format(url, e)) try: #object creation using YouTube which was imported in the beginning yt = YouTube(url, defer_prefetch_init=True) yt.prefetch() yt.descramble() except Exception as e: sys.exit("YouTube Init Error -- {}\n{}".format(url, e)) if len(yt.streams) == 0: sys.exit("No streams") try: print('{}'.format(yt.title)) streams = [ st for st in yt.streams if st.mime_type.startswith(mime_type) ] stream = streams[0] if len(streams) > 1: from pprint import pprint for st in streams: pprint(st) itag = input("Select itag (default:{}): ".format(stream.itag)) if itag: stream = [st for st in streams if st.itag is int(itag)][0] except Exception as e: sys.exit("No stream specified\n{}".format(e)) try: print('From {}'.format(stream.url)) stream.download() except Exception as e: sys.exit("Download Failed! -- {}\n{}".format(url, e)) print('Download Completed!')
def handle(self, **kwargs): videos = kwargs['url'] count = 1 for video in videos: print('Video ' + str(count) + '/' + str(len(videos))) vid_id = extract.video_id(video) # if not a yt url, will fail here if not Video.objects.filter(video_id=vid_id).exists(): emd.download_video(video) screeencap_path = od.extract_images(vid_id) od.detect_objects(vid_id, screeencap_path) count += 1
def __init__( self, url=None, defer_prefetch_init=False, on_progress_callback=None, on_complete_callback=None, ): """Construct a :class:`YouTube <YouTube>`. :param str url: A valid YouTube watch URL. :param bool defer_init: Defers executing any network requests. :param func on_progress_callback: (Optional) User defined callback function for stream download progress events. :param func on_complete_callback: (Optional) User defined callback function for stream download complete events. """ self.js = None # js fetched by js_url self.js_url = None # the url to the js, parsed from watch html # note: vid_info may eventually be removed. It sounds like it once had # additional formats, but that doesn't appear to still be the case. self.vid_info = None # content fetched by vid_info_url self.vid_info_url = None # the url to vid info, parsed from watch html self.watch_html = None # the html of /watch?v=<video_id> self.player_config_args = None # inline js in the html containing # streams self.age_restricted = None self.fmt_streams = [] # list of :class:`Stream <Stream>` instances self.caption_tracks = [] # video_id part of /watch?v=<video_id> self.video_id = extract.video_id(url) # https://www.youtube.com/watch?v=<video_id> self.watch_url = extract.watch_url(self.video_id) self.embed_url = extract.embed_url(self.video_id) # A dictionary shared between all instances of :class:`Stream <Stream>` # (Borg pattern). self.stream_monostate = { # user defined callback functions. 'on_progress': on_progress_callback, 'on_complete': on_complete_callback, } if not defer_prefetch_init: self.prefetch_init()
def addVideo(request): if request.method == "POST": url = request.POST['url'] is_valid = False video_exists = False try: vid_id = extract.video_id(url) video_exists = Video.objects.filter(video_id=vid_id).exists() if not video_exists: vid = YouTube(url) is_valid = True analyzevideo.delay(url) except: pass return JsonResponse({'is_valid': is_valid, 'video_exists': video_exists})
def details(): url = request.args['url'] error = None if not url: error = 'Video link is required.' if error is not None: flash(error) else: video_id = extract.video_id(url) video = pytube_cache.get(video_id) if video is None: video = YouTube(url) pytube_cache.set(video_id, video) return render_template('youtube_downloader/details.html', video=video)
def get_youtube_id(url): """get youtube id from url if youtube id exist, pick up youtube id from url Args: url (string): user input url Returns: ret (string): youtube id from url """ try: ret = extract.video_id(url) except exceptions.RegexMatchError: ret = False return ret
def __init__( self, url, on_progress_callback: Optional[OnProgress] = None, on_complete_callback: Optional[OnComplete] = None, ): """Dont construct the YouTube class directly. Use create()""" self.js: Optional[str] = None # js fetched by js_url self.js_url: Optional[ str] = None # the url to the js, parsed from watch html # note: vid_info may eventually be removed. It sounds like it once had # additional formats, but that doesn't appear to still be the case. # the url to vid info, parsed from watch html self.vid_info_url: Optional[str] = None self.vid_info_raw: Optional[ str] = None # content fetched by vid_info_url self.vid_info: Optional[Dict] = None # parsed content of vid_info_raw self.watch_html: Optional[ str] = None # the html of /watch?v=<video_id> self.embed_html: Optional[str] = None self.player_config_args: Dict = {} # inline js in the html containing self.player_response: Dict = {} # streams self.age_restricted: Optional[bool] = None self.fmt_streams: List[Stream] = [] # video_id part of /watch?v=<video_id> self.video_id = extract.video_id(url) self.watch_url = f"https://youtube.com/watch?v={self.video_id}" self.embed_url = f"https://www.youtube.com/embed/{self.video_id}" # Shared between all instances of `Stream` (Borg pattern). self.stream_monostate = Monostate(on_progress=on_progress_callback, on_complete=on_complete_callback)
async def descarga(self, ctx, url=None): ''' Introduce una url de un video de YT y se te redirigirá a otra pagina para descargar tal video en .mp3 o .mp4... ''' if url != None: id = extract.video_id(url) downl_url = f"https://www.y2mate.com/es/convert-youtube/{id}" await typing_sleep(ctx) await ctx.message.delete() await ctx.send( f"Aqui esta el video listo para ser descargado: {downl_url}") print( f"cmdDescarga|| {ctx.author.name} descargo un video..." ) else: await typing_sleep(ctx) await ctx.send("No se pudo convertir con exito el video...", delete_after=60.0) print( f"cmdDescarga|| {ctx.author.name} no pudo descargar un video..." )
def download_video(url): video = YouTube( url ) # put a try here, it will fail if video does not exist/not valid/been deleted if 'en' in video.captions: xml_captions = video.captions['en'].xml_captions elif 'a.en' in video.captions: xml_captions = video.captions['a.en'].xml_captions else: xml_captions = '' str_captions = extract_captions(xml_captions) vid_id = extract.video_id(url) print('Downloading video...') videos_path = Path(__file__).resolve().parent.parent.joinpath('videos') video.streams.filter( file_extension='mp4').order_by('resolution').desc().first().download( videos_path, vid_id) entry = Video(title=video.title, description=video.description, upload_date=extract_upload_date(video), user=video.author, captions=str_captions, thumbnail_url=video.thumbnail_url, video_url=url, length=video.length, views=video.views, video_id=vid_id) entry.save() print('Video saved to database')
def test_extract_video_id(): url = "https://www.youtube.com/watch?v=2lAe1cqCOXo" video_id = extract.video_id(url) assert video_id == "2lAe1cqCOXo"
from pytube import extract import pandas as pd from pathlib import Path url_list = [] video_id = [] if __name__ == "__main__": urls = Path(f'./links.txt').read_text(encoding='utf-8') url_list += urls.split("\n") for url in url_list: id = extract.video_id(url) video_id.append(id) print(video_id) print(len(video_id))
def load_streams(self): while self.__download_manager.thread_count > 1: self.sig_step.emit(self.id, 'Waiting for threads to clear...') thread_name = QThread.currentThread().objectName() thread_id = int(QThread.currentThreadId()) self.sig_step.emit(self.id, f'{thread_id}: {thread_name} thread starting...') self.__download_manager.videos = [] self.__download_manager.streams = [] proxies = self.__download_manager.get_proxies() top_level_item_count = self.__download_manager.stream_tree.topLevelItemCount() for i in range(top_level_item_count): self.__download_manager.stream_tree.takeTopLevelItem(i) self.__download_manager.stream_tree.clear() self.__download_manager.streams_to_download = {} try: print('get video id') print(extract.video_id(self.__download_manager.url.text())) self.sig_step.emit(self.id, f'Loading video') loaded_url = YouTube(self.__download_manager.url.text(), proxies=proxies) self.sig_step.emit(self.id, f'Loaded video: {loaded_url.title}') self.sig_msg.emit(f'Found {loaded_url.title}') if self.__abort: self.sig_progress_status.emit(f'Aborted!') self.sig_done.emit(self.id) return self.__download_manager.videos.append(loaded_url) except RegexMatchError: print('playlist') if 'playlist' in self.__download_manager.url.text(): regex_search(r'(?:list=|\/)([0-9A-Za-z_-]{11}).*', self.__download_manager.url.text(), group=1) loaded_url = Playlist(self.__download_manager.url.text()) self.sig_msg.emit(f'Loaded playlist. Discovering videos...') loaded_url.populate_video_urls() i = 0 self.sig_progress_status.emit(0) for video_url in loaded_url.video_urls: self.sig_step.emit(self.id, f'Loading video {i}') if self.__abort: self.sig_progress_status.emit(f'Aborted!') self.sig_done.emit(self.id) return self.sig_progress_total.emit(int((i / (len(loaded_url.video_urls) * 2)) * 100)) vid = YouTube(video_url, proxies=proxies) self.sig_step.emit(self.id, f'Loaded video: {vid.title}') if self.__abort: self.sig_progress_status.emit(f'Aborted!') self.sig_done.emit(self.id) return self.sig_msg.emit(f'Found {vid.title}') self.__download_manager.videos.append(vid) self.sig_progress_status.emit(int((i / len(loaded_url.video_urls)) * 100)) i += 1 self.sig_progress_total.emit(50) else: self.sig_error.emit('Could not determine Video ' 'or Playlist ID from provided URL!\n' 'Please check input!') self.sig_done.emit(self.id) return except Exception as e: self.sig_error.emit(str(e)) self.sig_done.emit(self.id) return self.sig_msg.emit(f'Loading Streams..') print('loading streams') i = 0 for video in self.__download_manager.videos: self.sig_progress_status.emit(0) self.sig_step.emit(self.id, f'Loading streams for video {i}') if self.__abort: self.sig_progress_status.emit(f'Aborted!') self.sig_done.emit(self.id) return audio_streams = QTreeWidgetItem(['Audio Only']) tree_item = StreamTreeWidgetItem([video.title], f'video_{i}', self.__download_manager, video, None) self.__download_manager.streams = video.streams.all() x = 0 for stream in self.__download_manager.streams: self.sig_step.emit(self.id, f'Loading stream {x}') if self.__abort: self.sig_progress_status.emit(f'Aborted!') self.sig_done.emit(self.id) return self.sig_msg.emit(f'Video {i + 1}/{len(self.__download_manager.videos)}: ' f'Loading Stream ITAG ID: {stream.itag}') if stream.video_codec is None: stream_item = StreamTreeWidgetItem([ f'Codec: {stream.audio_codec}, ' f'ABR: {stream.abr}, ' f'File Type: {stream.mime_type.split("/")[1]}, ' f'Size: {stream.filesize // 1024} KB' ], f'video_{i}_stream{x}', self.__download_manager, video, stream) self.sig_step.emit(self.id, f'Loaded stream {x}') if self.__abort: self.sig_progress_status.emit(f'Aborted!') self.sig_done.emit(self.id) return audio_streams.addChild(stream_item) else: stream_item = StreamTreeWidgetItem([ f'Res: {stream.resolution}, FPS: {stream.fps}, ' f' Video Codec: {stream.video_codec}, Audio Codec: {stream.audio_codec}, ' f'File Type: {stream.mime_type.split("/")[1]}, ' f'Size: {stream.filesize // 1024} KB' ], f'video_{i}_stream{x}', self.__download_manager, video, stream) self.sig_step.emit(self.id, f'Loaded stream {x}') if self.__abort: self.sig_progress_status.emit(f'Aborted!') self.sig_done.emit(self.id) return tree_item.addChild(stream_item) stream_item.setCheckState(0, Qt.Unchecked) x += 1 self.sig_progress_status.emit(int((x / len(self.__download_manager.streams)) * 100)) tree_item.addChild(audio_streams) self.sig_step.emit(self.id, f'Adding video {i} to tree') if self.__abort: self.sig_progress_status.emit(f'Aborted!') self.sig_done.emit(self.id) return self.__download_manager.stream_tree.addTopLevelItem(tree_item) i += 1 self.sig_progress_status.emit(100) self.sig_progress_total.emit(int((i / (len(self.__download_manager.videos) * 2)) * 100) + 50) self.sig_msg.emit(f'Streams Loaded!') self.sig_done.emit(self.id)
def download_func(): if request.method == "POST": link = video_id(request.form["url"]) return redirect(url_for('downloading_func', link=link))
def test_extract_video_id(): url = "https://www.youtube.com/watch?v=9bZkp7q19f0" video_id = extract.video_id(url) assert video_id == "9bZkp7q19f0"
def test_extract_video_id(): url = 'https://www.youtube.com/watch?v=9bZkp7q19f0' video_id = extract.video_id(url) assert video_id == '9bZkp7q19f0'