def main(): pl_url = input("Enter YouTube Playlist URL : ") pl = Playlist(pl_url) pl.populate_video_urls() # or if you want to download in a specific directory print(pl.video_urls) first_num = int(input("Enter first NUM : ")) download_path = '.' for i, target_url in enumerate(pl.video_urls): while True: try: yt = YouTube(target_url) print(yt.title) break except Exception: time.sleep(1) print('retry') continue comment_data = get_chat(target_url) prefix = str(i + first_num).zfill(3) + '.' filename = prefix + re.sub(r'[\\/:*?"<>|]+', '_', yt.title) + "_chat.json" json_text = json.dumps(comment_data, ensure_ascii=False) with open(filename, mode='w', encoding="utf-8") as f: f.write(json_text)
def main(args): collection_dir = args[1] playlist_link = args[2] quality = args[3] collection_files = os.listdir(collection_dir) collection = remove_extensions(collection_files) playlist = Playlist(playlist_link) playlist.populate_video_urls() for link in playlist.video_urls: try: video = YouTube(link) '''remove characters from video title that are not allowed in windows filenames.''' video_title = video.title.translate( {ord(c): None for c in video.title if c in '\\\"/<>?:*|'}) if not is_video_in_collection(collection, video_title): video_stream = video.streams.filter(progressive=True).first() video_stream.download() convert_to_audio(video_stream.default_filename, video_title, collection_dir, quality) os.remove(video_stream.default_filename) except Exception as ex: print(ex)
def download(): if (videotype.get() == 1): downloadYouTube(str(urlInput.get()), path.get()) urlInput_label = tk.Label(frame, text=YouTube(urlInput.get()).title + " downloaded !", bg="#041824", fg="#ffffff", font=("Courier", 10), wraplength=400) urlInput_label.place(y=370, relwidth=1, relheight=0.2) elif (videotype.get() == 2): pl = Playlist(urlInput.get()) pl.populate_video_urls() for link in pl.parse_links(): videolink = 'https://www.youtube.com' + link downloadYouTube(videolink, path.get()) urlInput_label = tk.Label(frame, text=YouTube(videolink).title + " downloaded !", bg="#041824", fg="#ffffff", font=("Courier", 10), wraplength=400) urlInput_label.place(y=370, relwidth=1, relheight=0.2)
def download_playlist(url, list_path): logger.info(f'trying to download youtube playlist {url}') pl = Playlist(url) pl.populate_video_urls() with open(list_path, 'w') as f: for video_url in pl.video_urls: f.write(video_url + '\n')
def enter(): if (videotype.get() == 1): yt1 = YouTube(urlInput.get()) urlInput_label = tk.Label(frame, text=yt1.title, bg="#041824", fg="#ffffff", font=("Courier", 10), wraplength=400) urlInput_label.place(y=240, relwidth=1, relheight=0.2) urlInput_label = tk.Label(frame, text=yt1.length + " seconds long", bg="#041824", fg="#ffffff", font=("Courier", 10), wraplength=400) urlInput_label.place(y=330, relwidth=1, relheight=0.05) elif (videotype.get() == 2): pl = Playlist(urlInput.get()) pl.populate_video_urls() urlInput_label = tk.Label(frame, text='Playlist Name: ' + pl.title() + '\n' + 'Number of videos in playlist: ' + str(len(pl.video_urls)), bg="#041824", fg="#ffffff", font=("Courier", 10), wraplength=400) urlInput_label.place(y=260, relwidth=1, relheight=0.2)
def get_urls_from_entry_text(url_entry_text): urls = [] if 'list' in url_entry_text: url = construct_playlist_url(url_entry_text) pl = Playlist(url) pl.populate_video_urls() return pl.video_urls else: return [url_entry_text]
def get_playlist_info(URL): playlistInfo = dict() playlist = Playlist(URL) playlist.populate_video_urls() urls = playlist.video_urls playlistInfo = get_video_details(urls[0]) playlistInfo['title'] = playlist.title playlistInfo.update({'linkCount': len(playlist.video_urls)}) return playlistInfo
def downloadList(url, maxCount=None, start=None, end=None): print("download Youtube playlist:%s, maxCount:%s" % (url, str(maxCount))) # taskCount = cpu_count() -1 # print("we have %d cpus" % (taskCount + 1)) taskCount = DOWNLOAD_TASK_CUNT pl = Playlist(url) pl.populate_video_urls() videoUrls = pl.video_urls if maxCount: videoUrls = videoUrls[0:maxCount:1] elif start and end: videoUrls = videoUrls[start - 1:end] elif start and end is None: videoUrls = videoUrls[start - 1::] elif start is None and end: videoUrls = videoUrls[:end:] prefix_gen = pl._path_num_prefix_generator() playlistTitle = getPlaylistTitle(pl.construct_playlist_url()) #single thread # for link in videoUrls: # prefix = next(prefix_gen) # print('file prefix is: %s' % prefix) # downloadSingle(link, filename_prefix=prefix, subFolder=playlistTitle) # multiple thread argsArrayList = [] for i in range(0, taskCount): argsArrayList.append([]) i = 0 for link in videoUrls: idx = i % taskCount i += 1 prefix = next(prefix_gen) argsArrayList[idx].append((link, prefix, playlistTitle)) s_linkStatusDic[link] = False downloadListMultipleThread(argsArrayList) times = 1 while hasToDownloadTask(): times += 1 toDownloadFileDic = { k: v for k, v in s_linkStatusDic.items() if v == False } print("=>try %d times, file to download count: %d" % (times, len(toDownloadFileDic))) print(" %s", str(toDownloadFileDic)) downloadListMultipleThread(argsArrayList) print("all download task done.")
def generate_playlist(url, filename='playlist.csv'): playlist = Playlist(url) playlist.populate_video_urls() with open(filename, 'w') as fp: wrt = csv.writer(fp) header = ['url', 'start', 'end'] wrt.writerow(header) for url in playlist.video_urls: wrt.writerow([url, '', '']) return filename
def test_populate(): ob = Playlist(short_test_pl) expected = [ 'https://www.youtube.com/watch?v=m5q2GCsteQs', 'https://www.youtube.com/watch?v=5YK63cXyJ2Q', 'https://www.youtube.com/watch?v=Rzt4rUPFYD4', ] ob.populate_video_urls() assert ob.video_urls == expected
def load_playlist_urls(self, playlists): all_playlist_urls = [] for playlist in playlists: p = Playlist(playlist) p.populate_video_urls() urls = p.video_urls all_playlist_urls.extend(urls) return all_playlist_urls
def test_populate(): ob = Playlist( 'https://www.youtube.com/watch?v=m5q2GCsteQs&list=' 'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw', ) expected = [ 'https://www.youtube.com/watch?v=m5q2GCsteQs', 'https://www.youtube.com/watch?v=5YK63cXyJ2Q', 'https://www.youtube.com/watch?v=Rzt4rUPFYD4', ] ob.populate_video_urls() assert ob.video_urls == expected
def download_playlist(url, target, codec=None): from concurrent.futures import ThreadPoolExecutor pl = Playlist(url) pl.populate_video_urls() urls = pl.video_urls with ThreadPoolExecutor(max_workers=len(urls)) as works: futures = [] for item in urls: futures.append(works.submit(download, item, target, codec=codec)) [item.result() for item in futures] print("All %d jobs Done." % len(urls))
def download_youtube_list(pl_url, folder="."): pl = Playlist(pl_url) pl.populate_video_urls() print "List size is %s:" % len(pl.video_urls) videos = [] for url in pl.video_urls: title, video_hash = download_youtube(url, folder=folder) videos.append((video_hash, title)) return videos
def test_numbering(): ob = Playlist(short_test_pl) ob.populate_video_urls() gen = ob._path_num_prefix_generator(reverse=False) assert '1' in next(gen) assert '2' in next(gen) ob = Playlist(short_test_pl) ob.populate_video_urls() gen = ob._path_num_prefix_generator(reverse=True) assert str(len(ob.video_urls)) in next(gen) assert str(len(ob.video_urls) - 1) in next(gen) ob = Playlist(long_test_pl) ob.populate_video_urls() gen = ob._path_num_prefix_generator(reverse=False) nxt = next(gen) assert len(nxt) > 1 assert '1' in nxt nxt = next(gen) assert len(nxt) > 1 assert '2' in nxt ob = Playlist(long_test_pl) ob.populate_video_urls() gen = ob._path_num_prefix_generator(reverse=True) assert str(len(ob.video_urls)) in next(gen) assert str(len(ob.video_urls) - 1) in next(gen)
def from_playlist_url(url): pli = Playlist(url) pli.parse_links() pli.populate_video_urls() output = dict() output['type'] = 'playlist' output['title'] = pli.title() url = pli.construct_playlist_url() output['url'] = url output['playlist_id'] = _get_playlist_id(url) video_urls = pli.parse_links() output['video_urls'] = video_urls output['video_ids'] = [v.split('=')[1] for v in video_urls] return output
def __init__(self: VideoPlaylist, url: str, sort_by: VideoPlaylist.SortBy, custom_filter: Optional[str] = None): playlist = Playlist(url) playlist.populate_video_urls() self._urls: List[str] = playlist.video_urls if sort_by is VideoPlaylist.SortBy.NEWEST: self._urls.reverse() self._filter: Optional[str] = None if custom_filter: self._filter = f'"simpleText":"{custom_filter}"'
def youtube_playlist_download(playlist_url): print("---Youtube Playlist Download---") playlist = Playlist(playlist_url) playlist.populate_video_urls() print("Number of Videos in Playlist: ", len(playlist.video_urls)) print(playlist.video_urls) cont = input("Continue? (y/n): ") if (cont == "y"): try: multiprocessing_download(playlist.video_urls, True) except Exception as e: print(e) pass else: print("Aborted") return
def main(): pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) if args.source == 'list': with open(args.path, 'r') as f: links = [line for line in f.readlines() if len(line.strip()) > 0] download_list(links) elif args.source == 'link': download_link(args.path) elif args.source == 'playlist': pl = Playlist(args.path) pl.populate_video_urls() with open(os.path.join(output_dir, 'urls.txt'), 'w') as f: for line in pl.video_urls: print(line, file=f) download_list(pl.video_urls)
def download_playlist(url_link): pl = Playlist(url_link) n=1 succes = 0 pl.populate_video_urls() urls = pl.video_urls length = (len(pl.video_urls)) for url in urls: print('------------------------------------') print(f'Video {str(n)} out of {str(length)}') try: download_video (url) n = n + 1 succes = succes + 1 except KeyError as e: print ('I got a KeyError - reason "%s"' % str(e)) n = n + 1 continue print(f'Finished downloading {str(succes)} of {str(length)} videos.')
def YouTube(session): lic_id = _create_or_use_youtube_license(session) dataset_id = _create_or_use_youtube_dataset(session) for playlist_uri in PLAYLISTS: playlist = Playlist(playlist_uri) playlist.populate_video_urls() for video_uri in playlist.video_urls: video = PyYouTube(video_uri) example = Example(dataset_id=dataset_id, type=Type.MP4, lifecycle=Lifecycle.FLEXIBLE, license_id=lic_id) session.add(example) session.flush() video.streams.filter(file_extension='mp4', only_video=True) \ .order_by('resolution').desc().first() \ .download(output_path=str(VIDEO_DIR), filename=str(example.id)) video = Video(id=example.id, orig_url=video_uri) session.add(video) session.commit()
def test_populate_video_urls(request_get, playlist_html): url = "https://www.fakeurl.com/playlist?list=whatever" request_get.return_value = playlist_html playlist = Playlist(url) playlist._find_load_more_url = MagicMock(return_value=None) playlist.populate_video_urls() request_get.assert_called() assert playlist.video_urls == [ "https://www.youtube.com/watch?v=ujTCoH21GlA", "https://www.youtube.com/watch?v=45ryDIPHdGg", "https://www.youtube.com/watch?v=1BYu65vLKdA", "https://www.youtube.com/watch?v=3AQ_74xrch8", "https://www.youtube.com/watch?v=ddqQUz9mZaM", "https://www.youtube.com/watch?v=vwLT6bZrHEE", "https://www.youtube.com/watch?v=TQKI0KE-JYY", "https://www.youtube.com/watch?v=dNBvQ38MlT8", "https://www.youtube.com/watch?v=JHxyrMgOUWI", "https://www.youtube.com/watch?v=l2I8NycJMCY", "https://www.youtube.com/watch?v=g1Zbuk1gAfk", "https://www.youtube.com/watch?v=zixd-si9Q-o", ]
def getPlaylistLinks(self, url): """Gets youtube playlist from a url. Args: url (str): The youtube url. Returns: output (list[str]): a list of urls of individual videos from the playlist if success Raises: IOError: if unable to retrive the links from the playlist """ playlist = Playlist(url) try: playlist.populate_video_urls() except: raise IOError output = playlist.video_urls return output
def download_playlist(): youtube_playlist = input("Enter Youtube Playlist URL : ") pl = Playlist(youtube_playlist) pl.populate_video_urls() print('Title of the playlist: %s' % pl.title()) print('Number of videos in playlist: %s' % len(pl.video_urls)) pl_video_list = pl.video_urls print("Downloading Playlist Videos \n") # printing the list using loop for x in range(len(pl_video_list)): print(str(x) + " >> " + pl_video_list[x]) individual_video_stream = YouTube( pl_video_list[x]).streams.filter(mime_type="video/mp4").first() individual_video_stream.download(download_destination, "PhysicsSession" + str(x), "unAcademy-ClassXI-JEE") # print(*pl_video_list, sep="\n") # pl.download_all(download_destination) return
def main(): parser = get_parser() args = parser.parse_args() pl_url = input("Enter YouTube Playlist URL : ") pl = Playlist(pl_url) pl.populate_video_urls() # or if you want to download in a specific directory print(pl.video_urls) first_num = int(input("Enter first NUM : ")) download_path = '.' for i, target_url in enumerate(pl.video_urls): while True: try: yt = YouTube(target_url) print(yt.title) break except Exception: time.sleep(1) print('retry') continue video_id = target_url.split('=')[1] comment_data = [] pageToken = 'HEAD' while pageToken != 'NONE': resource = get_video_comment(video_id, args.n, pageToken) comment_data += resource['items'] if 'nextPageToken' in resource.keys(): pageToken = resource['nextPageToken'] else: pageToken = 'NONE' prefix = str(i + first_num).zfill(3) + '.' filename = prefix + re.sub(r'[\\/:*?"<>|]+', '_', yt.title) + "_comment.json" json_text = json.dumps(comment_data, ensure_ascii=False) with open(filename, mode='w', encoding="utf-8") as f: f.write(json_text)
def get_resolution(): playlists = [] with open('playlists.txt') as fin: for line in fin.readlines(): playlists.append(line.strip()) for idx, list_id in enumerate(playlists): playlist_url = "https://www.youtube.com/playlist?list=%s" % list_id pl = Playlist(playlist_url) pl.populate_video_urls() for url in pl.video_urls: yt = YouTube(url) parsed = urlparse.urlparse(url) video_hash = urlparse.parse_qs(parsed.query)['v'][0] # print "downloading ", video_hash, yt.title streams = yt.streams.filter(progressive=True, file_extension='mp4') stream = streams.order_by('resolution').desc().first() print idx, list_id, stream.resolution break
def downloadYouTube(videourl, path): yt = YouTube(videourl) yt = yt.streams.filter( progressive=True, file_extension='mp4').order_by('resolution').desc().first() if not os.path.exists(path): os.makedirs(path) yt.download(path) ''' downloadYouTube(url, '/home/botao/share/youtube_videos/yangyi_lee/') ''' pl = Playlist(url) pl.populate_video_urls() l = pl.video_urls print 'len %d' % len(l) #pl.download_all('/home/botao/share/youtube_videos/yangyi_lee') i = 0 while i < len(l): print "downloading %d/154, u %s" % (i, l[i]) try: downloadYouTube(l[i], '/home/botao/share/youtube_videos/yangyi_lee/') except: print 'failed at %d, retry' % i #sys.exit(1) continue i = i + 1
from os import path from os import remove from moviepy.audio.io.ffmpeg_audiowriter import ffmpeg_audiowrite from moviepy.audio.io.AudioFileClip import AudioFileClip from pathlib import Path from datetime import datetime errorList = [] print('Playlist to MP4 v3.0') print('Program started: ' + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\n') file = open('downloadplaylist.txt', 'r') for line in file: #create a playlist from the line in the file and iterate over each video newPlaylist = Playlist(Playlist(line).construct_playlist_url()) newPlaylist.populate_video_urls() for url in newPlaylist.video_urls: #open the youtube video, get the mp4 audio stream, and save it to a file ogVid = YouTube(url) yt = ogVid.streams.filter(only_audio=True, subtype='mp4') if yt.count() == 0: yt = ogVid.streams.filter(subtype='mp4') yt.first().download(getcwd() + '/.trash') #erase special characters from file name search to match the file created by the YouTube downloader vidTitle = ogVid.title vidTitle = vidTitle.replace(',', '') vidTitle = vidTitle.replace('\'', '') vidTitle = vidTitle.replace('\"', '') vidTitle = vidTitle.replace('.', '') vidTitle = vidTitle.replace(';', '')
def download_clips(selected_season=None): try: shutil.rmtree("clips") except Exception as e: pass time.sleep(3) os.mkdir("clips") with requests.Session() as session: url = "https://www.youtube.com/user/BobRossInc/playlists?shelf_id=7&view=50&sort=dd" session.get(url) sauce = session.get(url) soup = bs(sauce.content, 'html') for season, playlist in enumerate( tqdm(soup.find_all('h3', {'class': 'yt-lockup-title'}))): if season == selected_season or selected_season == None: season_url = "https://www.youtube.com/{}".format( playlist.find('a').get('href')) pl = Playlist(season_url) pl.populate_video_urls() for episode, episode_url in enumerate(tqdm(pl.video_urls)): clip_titles = [] try: vid = YouTube(episode_url) vid.streams.filter(subtype='mp4').first().download() except: try: time.sleep(3) vid = YouTube(episode_url) vid.streams.filter( subtype='mp4').first().download() except: print( "Couldn't download season {}, skipping season." .format(season + 1)) continue # skip season episode_filename = "{}.mp4".format( vid.title.replace(".", "").replace("'", "")) xml_caps = vid.captions.get_by_language_code( 'en').xml_captions root = ET.fromstring(xml_caps) for child in root: start = child.attrib["start"] dur = child.attrib["dur"] text = child.text if "(" in text: text = text.replace("(", "").replace(")", "").replace( " ", "_") try: clip = VideoFileClip(episode_filename) time_stamp = time.strftime( "%M_%S", time.gmtime(float(start))) clip.subclip( float(start), float(start) + float(dur)).write_videofile( "clips/S{}E{}-{}-{}-{}.mp4".format( season + 1, episode + 1, time_stamp, text, clip_titles.count(text)), fps=30, codec='mpeg4') # clip.subclip(float(start),float(start) + float(dur)).write_videofile("clips/S1E{}-{}-{}.gif".format(episode+1,text,clip_titles.count(text)),fps=30,codec='gif') clip_titles.append(text) clip.reader.close() clip.audio.reader.close_proc() except: print("Could not get clip {}.".format(text)) time.sleep( 3) # give clip audio/reader enough time to close try: os.remove(episode_filename) # delete episode file except: print( "Couldn't delete episode file {}, please do so manually." .format(episode_filename))
print(FILE_PATH) except: FILE_PATH = '/home/oscar/Tec/' pass for INDEX in range(len(DICTIONARY_YT)): YT_NAME = list(DICTIONARY_YT.keys())[INDEX] YT_PLAYLIST = list(DICTIONARY_YT.values())[INDEX] PATH_DONWLOAD = FILE_PATH + '/YT/' + YT_NAME + '/' if not os.path.exists(PATH_DONWLOAD): os.makedirs(PATH_DONWLOAD, exist_ok=True) print('BUSCANDO URLS EN PLAYLIST') playlist = Playlist(YT_PLAYLIST) playlist.populate_video_urls() VIDEOS_PLAYLIST = playlist.video_urls try: with open(PATH_DONWLOAD + 'URLS_CHECKED.txt', 'r') as file: URLS_CHECKED = file.read().splitlines() except: URLS_CHECKED = [] print('TODAVÍA NO EXISTE URLS_CHECKED.txt') NEW_URLS = [item for item in VIDEOS_PLAYLIST if item not in URLS_CHECKED] BUSCAR_LINKS_BANDCAMP = True if BUSCAR_LINKS_BANDCAMP: LINKS_bandcamp = [] VIDEOS_WITHOUT_BANDCAMP = []