def test_dfxp2srt(self): dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> <p begin="1" end="2">第二行<br/>♪♪</p> <p begin="2" end="3"><span>Third<br/>Line</span></p> </div> </body> </tt>''' srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols 2 00:00:01,000 --> 00:00:02,000 第二行 ♪♪ 3 00:00:02,000 --> 00:00:03,000 Third Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data)
def test_dfxp2srt(self): dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> <p begin="1" end="2">第二行<br/>♪♪</p> <p begin="2" dur="1"><span>Third<br/>Line</span></p> <p begin="3" end="-1">Lines with invalid timestamps are ignored</p> <p begin="-1" end="-1">Ignore, two</p> <p begin="3" dur="-1">Ignored, three</p> </div> </body> </tt>''' srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols 2 00:00:01,000 --> 00:00:02,000 第二行 ♪♪ 3 00:00:02,000 --> 00:00:03,000 Third Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?> <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">The first line</p> </div> </body> </tt>''' srt_data = '''1 00:00:00,000 --> 00:00:01,000 The first line ''' self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
def run(self, info): subs = info.get('requested_subtitles') filename = info['filepath'] new_ext = self.format new_format = new_ext if new_format == 'vtt': new_format = 'webvtt' if subs is None: self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') return [], info self._downloader.to_screen('[ffmpeg] Converting subtitles') sub_filenames = [] for lang, sub in subs.items(): ext = sub['ext'] if ext == new_ext: self._downloader.to_screen( '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) continue old_file = subtitles_filename(filename, lang, ext) sub_filenames.append(old_file) new_file = subtitles_filename(filename, lang, new_ext) if ext in ('dfxp', 'ttml', 'tt'): self._downloader.report_warning( 'You have requested to convert dfxp (TTML) subtitles into another format, ' 'which results in style information loss') dfxp_file = old_file srt_file = subtitles_filename(filename, lang, 'srt') with open(dfxp_file, 'rb') as f: srt_data = dfxp2srt(f.read()) with io.open(srt_file, 'wt', encoding='utf-8') as f: f.write(srt_data) old_file = srt_file subs[lang] = { 'ext': 'srt', 'data': srt_data } if new_ext == 'srt': continue else: sub_filenames.append(srt_file) self.run_ffmpeg(old_file, new_file, ['-f', new_format]) with io.open(new_file, 'rt', encoding='utf-8') as f: subs[lang] = { 'ext': new_ext, 'data': f.read(), } return sub_filenames, info
def __init__(self, handler): self._handler = handler # See ydl_opts['forcejson'] self._on_info_dict_json = None self._allow_authentication_request = True self._skip_authentication = False self._skipped_count = 0 self.ydl_opts = { 'logger': self, 'logtostderr': True, 'no_color': True, 'progress_hooks': [self._on_progress], 'fixup': 'detect_or_warn', 'ignoreerrors': True, # handled via logger error callback 'retries': 10, 'fragment_retries': 10, 'writesubtitles': True, 'allsubtitles': True, 'subtitlesformat': 'vtt/best', 'keepvideo': True, 'postprocessors': [ {'key': 'FFmpegMetadata'}, {'key': 'FFmpegSubtitlesConvertor', 'format': 'vtt'}, {'key': 'FFmpegEmbedSubtitle'}, {'key': 'XAttrMetadata'}]} url = self._handler.get_url() download_dir = os.path.abspath(self._handler.get_download_dir()) with tempfile.TemporaryDirectory() as temp_dir: self.ydl_opts['cookiefile'] = os.path.join(temp_dir, 'cookies') # Collect info without downloading videos testplaylist_dir = os.path.join(temp_dir, 'testplaylist') noplaylist_dir = os.path.join(temp_dir, 'noplaylist') fullplaylist_dir = os.path.join(temp_dir, 'fullplaylist') for path in [testplaylist_dir, noplaylist_dir, fullplaylist_dir]: os.mkdir(path) self.ydl_opts['writeinfojson'] = True self.ydl_opts['writethumbnail'] = True self.ydl_opts['skip_download'] = True self.ydl_opts['playlistend'] = 2 self.ydl_opts['outtmpl'] = '%(autonumber)s.%(ext)s' # Test playlist info_testplaylist, skipped_testplaylist = self._load_playlist( testplaylist_dir, url) self.ydl_opts['noplaylist'] = True if len(info_testplaylist) + skipped_testplaylist > 1: info_noplaylist, skipped_noplaylist = self._load_playlist( noplaylist_dir, url) else: info_noplaylist = info_testplaylist skipped_noplaylist = skipped_testplaylist del self.ydl_opts['noplaylist'] del self.ydl_opts['playlistend'] if (len(info_testplaylist) + skipped_testplaylist > len(info_noplaylist) + skipped_noplaylist): self.ydl_opts['noplaylist'] = ( not self._handler.on_playlist_request()) if not self.ydl_opts['noplaylist']: info_playlist, _ = self._load_playlist( fullplaylist_dir, url) else: info_playlist = info_noplaylist elif len(info_testplaylist) + skipped_testplaylist > 1: info_playlist, _ = self._load_playlist(fullplaylist_dir, url) else: info_playlist = info_testplaylist # Download videos self._allow_authentication_request = False del self.ydl_opts['writeinfojson'] del self.ydl_opts['writethumbnail'] del self.ydl_opts['skip_download'] # Include id and format_id in outtmpl to prevent youtube-dl # from continuing wrong file self.ydl_opts['outtmpl'] = '%(id)s.%(format_id)s.%(ext)s' # Output info_dict as JSON handled via logger debug callback self.ydl_opts['forcejson'] = True mode = self._handler.get_mode() if mode == 'audio': resolution = MAX_RESOLUTION prefer_mpeg = False self.ydl_opts['format'] = 'bestaudio/best' self.ydl_opts['postprocessors'].insert(0, { 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}) self.ydl_opts['postprocessors'].insert(1, { 'key': 'EmbedThumbnail', 'already_have_thumbnail': True}) else: resolution = self._handler.get_resolution() prefer_mpeg = self._handler.get_prefer_mpeg() try: os.makedirs(download_dir, exist_ok=True) except OSError as e: traceback.print_exc(file=sys.stderr) sys.stderr.flush() self._handler.on_error( 'ERROR: Failed to create download folder: %s' % e) sys.exit(1) for i, (info_path, thumbnail_paths, subtitles) in enumerate( info_playlist): with open(info_path) as f: info = json.load(f) title = info.get('title') or info.get('id') or 'video' output_title = self._get_output_title(title) # Test subtitles # youtube-dl fails for subtitles that it can't convert or # are unsupported by ffmpeg supported_subtitles = [] for sub_path, sub_lang, sub_ext in subtitles: print('[youtube_dl_slave] Testing subtitle (%r, %r)' % (sub_lang, sub_ext), file=sys.stderr, flush=True) if sub_ext in ['dfxp', 'ttml', 'tt']: # Try to use youtube-dl's internal dfxp2srt converter with open(sub_path, 'rb') as f: sub_data = f.read() try: sub_data = dfxp2srt(sub_data) except Exception: traceback.print_exc(file=sys.stderr) sys.stderr.flush() continue ff_sub_path = sub_path + '-converted.srt' with open(ff_sub_path, 'w', encoding='utf-8') as f: f.write(sub_data) else: ff_sub_path = sub_path # Try to read and convert subtitles with ffmpeg try: subprocess.run( [FFMPEG_EXE, '-i', os.path.abspath(ff_sub_path), '-f', 'webvtt', '-'], check=True, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL) except FileNotFoundError: traceback.print_exc(file=sys.stderr) sys.stderr.flush() self._handler.on_error( 'ERROR: %r not found' % FFMPEG_EXE) sys.exit(1) except subprocess.CalledProcessError: traceback.print_exc(file=sys.stderr) sys.stderr.flush() continue supported_subtitles.append((sub_lang, sub_ext)) # Choose supported subtitles new_info_subtitles = {} for sub_lang, subs in (info.get('subtitles') or {}).items(): new_subs = [] for sub in subs or []: if (sub_lang, sub.get('ext')) in supported_subtitles: new_subs.append(sub) if new_subs: new_info_subtitles[sub_lang] = new_subs info['subtitles'] = new_info_subtitles thumbnail_path = thumbnail_paths[0] if thumbnail_paths else '' if thumbnail_path: # Convert thumbnail to JPEG and limit resolution print('[youtube_dl_slave] Converting thumbnail', file=sys.stderr, flush=True) new_thumbnail_path = thumbnail_path + '-converted.jpg' try: subprocess.run( [FFMPEG_EXE, '-i', os.path.abspath(thumbnail_path), '-vf', ('scale=\'min({0},iw):min({0},ih):' 'force_original_aspect_ratio=decrease\'' ).format(MAX_THUMBNAIL_RESOLUTION), os.path.abspath(new_thumbnail_path)], check=True, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL) except FileNotFoundError: traceback.print_exc(file=sys.stderr) sys.stderr.flush() self._handler.on_error( 'ERROR: %r not found' % FFMPEG_EXE) sys.exit(1) except subprocess.CalledProcessError: traceback.print_exc(file=sys.stderr) sys.stderr.flush() new_thumbnail_path = '' # No longer needed os.remove(thumbnail_path) thumbnail_path = new_thumbnail_path self._handler.on_progress_start(i, len(info_playlist), title, thumbnail_path) for thumbnail in info.get('thumbnails') or []: thumbnail['filename'] = thumbnail_path # Remove description, because long comments cause problems when # displayed in Nautilus and other applications. with contextlib.suppress(KeyError): del info['description'] sort_formats(info.get('formats') or [], resolution, prefer_mpeg) with open(info_path, 'w') as f: json.dump(info, f) # Check if we already got the file existing_filename = self._find_existing_download( download_dir, output_title, mode) if existing_filename is not None: self._handler.on_progress_end(existing_filename) continue # Download into separate directory because youtube-dl generates # many temporary files temp_download_dir = os.path.join( download_dir, output_title + '.part') # Lock download directory to prevent other processes from # writing to the same files temp_download_dir_cm = contextlib.ExitStack() try: temp_download_dir_cm.enter_context( self._create_and_lock_dir(temp_download_dir)) except OSError as e: traceback.print_exc(file=sys.stderr) sys.stderr.flush() self._handler.on_error( 'ERROR: Failed to lock download folder: %s' % e) sys.exit(1) with temp_download_dir_cm: # Check if the file got downloaded in the meantime existing_filename = self._find_existing_download( download_dir, output_title, mode) if existing_filename is not None: filename = existing_filename else: info_dict = None # See ydl_opts['forcejson'] def on_info_dict_json(info_dict_): nonlocal info_dict info_dict = info_dict_ self._on_info_dict_json = on_info_dict_json self._load_video(temp_download_dir, info_path) if self._on_info_dict_json: raise RuntimeError('info_dict not received') # Find the temporary filename temp_filename_root, temp_filename_ext = ( os.path.splitext(info_dict['_filename'])) if mode == 'audio': temp_filename_ext = '.mp3' else: # youtube-dl changes extension for incompatible # formats to .mkv for ext in [temp_filename_ext, '.mkv']: if os.path.exists(temp_filename_root + ext): temp_filename_ext = ext break temp_filename = temp_filename_root + temp_filename_ext filename = output_title + temp_filename_ext # Move finished download from download to target dir try: os.replace( os.path.join(temp_download_dir, temp_filename), os.path.join(download_dir, filename)) except OSError as e: traceback.print_exc(file=sys.stderr) sys.stderr.flush() self._handler.on_error(( 'ERROR: Falied to move finished download to ' 'download folder: %s') % e) sys.exit(1) # Delete download directory with contextlib.suppress(OSError): shutil.rmtree(temp_download_dir) self._handler.on_progress_end(filename)