def test_is_unicode(self): # This tests makes no sense for Py3 if is_py3: return # Defining non_unicode encodings list as defined in # https://docs.python.org/2/library/codecs.html#standard-encodings non_unicode_encodings = [ 'big5', 'big5hkscs', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', 'cp1257', 'cp1258', 'euc_jp', 'euc_jis_2004', 'euc_jisx0213', 'euc_kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr', 'latin_1', 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', 'iso8859_8', 'iso8859_9', 'iso8859_10', 'iso8859_11', 'iso8859_13', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_16', 'johab', 'koi8_r', 'koi8_u', 'mac_cyrillic', 'mac_greek', 'mac_iceland', 'mac_latin2', 'mac_roman', 'mac_turkish', 'ptcp154', 'shift_jis', 'shift_jis_2004', 'shift_jisx0213'] for enc in non_unicode_encodings: non_unicode = cencode('non_unicode\n', enc) self.no_unicode_item = WebVTTItem(1, text=non_unicode) self.assertRaises(NotImplementedError, WebVTTItem.__str__, self.no_unicode_item)
def setUp(self): self.item = WebVTTItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) self.string = '1\n00:01:00.000 --> 00:01:20.000\nHello world !\n' self.bad_string = 'foobar' self.coordinates = ('1\n00:01:00.000 --> 00:01:20.000 X1:000 X2:000 ' 'Y1:050 Y2:100\nHello world !\n') self.vtt = ('00:01:00.000 --> 00:01:20.000 D:vertical A:start ' 'L:12%\nHello world !\n') self.string_index = 'foo\n00:01:00.000 --> 00:01:20.000\nHello !\n' self.dots = '1\n00:01:00.000 --> 00:01:20.000\nHello world !\n' self.no_index = '00:01:00,000 --> 00:01:20,000\nHello world !\n' self.junk_after_timestamp = ('1\n00:01:00,000 --> 00:01:20,000?\n' 'Hello world !\n')
def merge_subtitle(sub_a, sub_b, delta): out = WebVTTFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in xrange(1, len(intervals)): start = WebVTTTime.from_ordinal(intervals[i - 1]) end = WebVTTTime.from_ordinal(intervals[i]) if (end - start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = WebVTTItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def main(options): # Ensure ffmpeg is around if not run_ffmpeg(['-version']): log.error( "ffmpeg needs to be available to strip audio from the video file.") exit(1) with NamedTemporaryFile(delete=True) as vid_file: log.info("Downloading %s - this might take a while." % options.vid_url) response = get(options.vid_url, stream=True) total_length = response.headers.get("content-length") if total_length is None: # no content length header log.info("Unknown length - can't predict how long this will take.") f.write(response.content) else: bar = ProgressBar(max_value=int(total_length)) dl = 0 for data in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE): dl += len(data) vid_file.write(data) vid_file.flush() bar.update(dl) log.info("Download done. Stripping audio.") (wav_file, wav_file_name) = mkstemp('.wav') result = run_ffmpeg([ "-y", "-i", vid_file.name, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_name ]) if not result: close(wav_file) log.error("ffmpeg failed. Bailing.") exit(1) fs, audio = wav.read(wav_file_name) close(wav_file) log.info("Will write VTT to %s" % options.output) # Make sure the WAV is to code... log.info("Loading up WAV file...") if fs != 16000: log.error("Only 16000hz WAV files are usable.") exit(1) total_samples = len(audio) duration_hours, duration_minutes, duration_seconds = sample_index_to_time( len(audio)) log.info("Approximate duration: %d:%02d:%02d" % (duration_hours, duration_minutes, duration_seconds)) # Let's load up DeepSpeech and get it ready. log.info("Loading pre-trained DeepSpeech model...") root_model_dir = path.join(options.deepspeech_model_dir, MODEL_DIR) model = path.join(root_model_dir, MODEL_FILE) alphabet = path.join(root_model_dir, MODEL_ALPHABET) lang_model = path.join(root_model_dir, MODEL_LANG_MODEL) trie = path.join(root_model_dir, MODEL_TRIE) deepspeech = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH) log.info("Done loading model.") log.info("Loading language model...") deepspeech.enableDecoderWithLM(alphabet, lang_model, trie, LM_WEIGHT, WORD_COUNT_WEIGHT, VALID_WORD_COUNT_WEIGHT) log.info("Done loading model.") playhead = 0 out = WebVTTFile() bar = ProgressBar(max_value=total_samples) while playhead < (total_samples - 1): end_point = min(playhead + AUDIO_SEGMENT_SAMPLES, (total_samples - 1)) segment = audio[playhead:end_point] inference = deepspeech.stt(segment, fs) log.debug("Inferred: %s" % inference) start_hours, start_minutes, start_seconds = sample_index_to_time( playhead) playhead = end_point end_hours, end_minutes, end_seconds = sample_index_to_time(playhead) if not inference or inference == "ah": continue for search, replace in INFERENCE_REPLACEMENTS.iteritems(): inference = sub(r"\b" + search + r"\b", replace, inference) inference = fill(inference, width=MAX_CAPTION_WIDTH) start = WebVTTTime(start_hours, start_minutes, start_seconds) end = WebVTTTime(end_hours, end_minutes, end_seconds) item = WebVTTItem(0, start, end, inference) out.append(item) bar.update(playhead) out.save(options.output, encoding="utf-8") out.clean_indexes() out.save(options.output, encoding="utf-8")
def setUp(self): self.item = WebVTTItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20)
def setUp(self): self.item = WebVTTItem()
def test_multiple_item(self): vtt_file = WebVTTFile([ WebVTTItem(1, {'seconds': 0}, {'seconds': 3}, 'Hello'), WebVTTItem(1, {'seconds': 1}, {'seconds': 2}, 'World !') ]) self.assertEquals(vtt_file.text, 'Hello\nWorld !')
def test_single_item(self): vtt_file = WebVTTFile( [WebVTTItem(1, {'seconds': 1}, {'seconds': 2}, 'Hello')]) self.assertEquals(vtt_file.text, 'Hello')
def test_shift(self): vtt_file = WebVTTFile([WebVTTItem()]) vtt_file.shift(1, 1, 1, 1) self.assertEqual(vtt_file[0].end, (1, 1, 1, 1)) vtt_file.shift(ratio=2) self.assertEqual(vtt_file[0].end, (2, 2, 2, 2))