Beispiel #1
0
 def test_from_time(self):
     time_obj = time(1, 2, 3, 4000)
     self.assertEqual(WebVTTTime(1, 2, 3, 4), time_obj)
     self.assertTrue(WebVTTTime(1, 2, 3, 5) >= time_obj)
     self.assertTrue(WebVTTTime(1, 2, 3, 3) <= time_obj)
     self.assertTrue(WebVTTTime(1, 2, 3, 0) != time_obj)
     self.assertEqual(WebVTTTime(1, 2, 3, 4).to_time(), time_obj)
     self.assertTrue(WebVTTTime(1, 2, 3, 5).to_time() >= time_obj)
     self.assertTrue(WebVTTTime(1, 2, 3, 3).to_time() <= time_obj)
     self.assertTrue(WebVTTTime(1, 2, 3, 0).to_time() != time_obj)
Beispiel #2
0
class TestSimpleTime(unittest.TestCase):

    def setUp(self):
        self.time = WebVTTTime()

    def test_default_value(self):
        self.assertEqual(self.time.ordinal, 0)

    def test_micro_seconds(self):
        self.time.milliseconds = 1
        self.assertEqual(self.time.milliseconds, 1)
        self.time.hours += 42
        self.assertEqual(self.time.milliseconds, 1)
        self.time.milliseconds += 1000
        self.assertEqual(self.time.seconds, 1)

    def test_seconds(self):
        self.time.seconds = 1
        self.assertEqual(self.time.seconds, 1)
        self.time.hours += 42
        self.assertEqual(self.time.seconds, 1)
        self.time.seconds += 60
        self.assertEqual(self.time.minutes, 1)

    def test_minutes(self):
        self.time.minutes = 1
        self.assertEqual(self.time.minutes, 1)
        self.time.hours += 42
        self.assertEqual(self.time.minutes, 1)
        self.time.minutes += 60
        self.assertEqual(self.time.hours, 43)

    def test_hours(self):
        self.time.hours = 1
        self.assertEqual(self.time.hours, 1)
        self.time.minutes += 42
        self.assertEqual(self.time.hours, 1)

    def test_shifting(self):
        self.time.shift(1, 1, 1, 1)
        self.assertEqual(self.time, (1, 1, 1, 1))

    def test_descriptor_from_class(self):
        self.assertRaises(AttributeError, lambda: WebVTTTime.hours)
Beispiel #3
0
class TestSimpleTime(unittest.TestCase):
    def setUp(self):
        self.time = WebVTTTime()

    def test_default_value(self):
        self.assertEqual(self.time.ordinal, 0)

    def test_micro_seconds(self):
        self.time.milliseconds = 1
        self.assertEqual(self.time.milliseconds, 1)
        self.time.hours += 42
        self.assertEqual(self.time.milliseconds, 1)
        self.time.milliseconds += 1000
        self.assertEqual(self.time.seconds, 1)

    def test_seconds(self):
        self.time.seconds = 1
        self.assertEqual(self.time.seconds, 1)
        self.time.hours += 42
        self.assertEqual(self.time.seconds, 1)
        self.time.seconds += 60
        self.assertEqual(self.time.minutes, 1)

    def test_minutes(self):
        self.time.minutes = 1
        self.assertEqual(self.time.minutes, 1)
        self.time.hours += 42
        self.assertEqual(self.time.minutes, 1)
        self.time.minutes += 60
        self.assertEqual(self.time.hours, 43)

    def test_hours(self):
        self.time.hours = 1
        self.assertEqual(self.time.hours, 1)
        self.time.minutes += 42
        self.assertEqual(self.time.hours, 1)

    def test_shifting(self):
        self.time.shift(1, 1, 1, 1)
        self.assertEqual(self.time, (1, 1, 1, 1))

    def test_descriptor_from_class(self):
        self.assertRaises(AttributeError, lambda: WebVTTTime.hours)
Beispiel #4
0
 def test_from_tuple(self):
     self.assertEqual((0, 0, 0, 0), WebVTTTime())
     self.assertEqual((0, 0, 0, 1), WebVTTTime(milliseconds=1))
     self.assertEqual((0, 0, 2, 0), WebVTTTime(seconds=2))
     self.assertEqual((0, 3, 0, 0), WebVTTTime(minutes=3))
     self.assertEqual((4, 0, 0, 0), WebVTTTime(hours=4))
     self.assertEqual((1, 2, 3, 4), WebVTTTime(1, 2, 3, 4))
Beispiel #5
0
 def test_from_dict(self):
     self.assertEqual(dict(), WebVTTTime())
     self.assertEqual(dict(milliseconds=1), WebVTTTime(milliseconds=1))
     self.assertEqual(dict(seconds=2), WebVTTTime(seconds=2))
     self.assertEqual(dict(minutes=3), WebVTTTime(minutes=3))
     self.assertEqual(dict(hours=4), WebVTTTime(hours=4))
     self.assertEqual(dict(hours=1, minutes=2, seconds=3, milliseconds=4),
                      WebVTTTime(1, 2, 3, 4))
Beispiel #6
0
class TestCoercing(TestCase):
    def test_from_tuple(self):
        self.assertEqual((0, 0, 0, 0), WebVTTTime())
        self.assertEqual((0, 0, 0, 1), WebVTTTime(milliseconds=1))
        self.assertEqual((0, 0, 2, 0), WebVTTTime(seconds=2))
        self.assertEqual((0, 3, 0, 0), WebVTTTime(minutes=3))
        self.assertEqual((4, 0, 0, 0), WebVTTTime(hours=4))
        self.assertEqual((1, 2, 3, 4), WebVTTTime(1, 2, 3, 4))

    def test_from_dict(self):
        self.assertEqual(dict(), WebVTTTime())
        self.assertEqual(dict(milliseconds=1), WebVTTTime(milliseconds=1))
        self.assertEqual(dict(seconds=2), WebVTTTime(seconds=2))
        self.assertEqual(dict(minutes=3), WebVTTTime(minutes=3))
        self.assertEqual(dict(hours=4), WebVTTTime(hours=4))
        self.assertEqual(dict(hours=1, minutes=2, seconds=3, milliseconds=4),
                         WebVTTTime(1, 2, 3, 4))

    def test_from_time(self):
        time_obj = time(1, 2, 3, 4000)
        self.assertEqual(WebVTTTime(1, 2, 3, 4), time_obj)
        self.assertTrue(WebVTTTime(1, 2, 3, 5) >= time_obj)
        self.assertTrue(WebVTTTime(1, 2, 3, 3) <= time_obj)
        self.assertTrue(WebVTTTime(1, 2, 3, 0) != time_obj)
        self.assertEqual(WebVTTTime(1, 2, 3, 4).to_time(), time_obj)
        self.assertTrue(WebVTTTime(1, 2, 3, 5).to_time() >= time_obj)
        self.assertTrue(WebVTTTime(1, 2, 3, 3).to_time() <= time_obj)
        self.assertTrue(WebVTTTime(1, 2, 3, 0).to_time() != time_obj)

    def test_from_ordinal(self):
        self.assertEqual(WebVTTTime.from_ordinal(3600000), {'hours': 1})
        self.assertEqual(WebVTTTime(1), 3600000)

    def test_from_repr(self):
        self.time = WebVTTTime()
        self.assertEqual('WebVTTTime(0, 0, 0, 0)', self.time.__repr__())
        self.time = WebVTTTime(1, 1, 1, 1)
        self.assertEqual('WebVTTTime(1, 1, 1, 1)', self.time.__repr__())
Beispiel #7
0
def merge_subtitle(sub_a, sub_b, delta):
    out = WebVTTFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = WebVTTTime.from_ordinal(intervals[i - 1])
        end = WebVTTTime.from_ordinal(intervals[i])

        if (end-start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = WebVTTItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
Beispiel #8
0
def merge_subtitle(sub_a, sub_b, delta):
    out = WebVTTFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = WebVTTTime.from_ordinal(intervals[i - 1])
        end = WebVTTTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = WebVTTItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
Beispiel #9
0
class TestSimpleTime(TestCase):
    def setUp(self):
        self.time = WebVTTTime()

    def test_default_value(self):
        self.assertEqual(self.time.ordinal, 0)

    def test_micro_seconds(self):
        self.time.milliseconds = 1
        self.assertEqual(self.time.milliseconds, 1)
        self.time.hours += 42
        self.assertEqual(self.time.milliseconds, 1)
        self.time.milliseconds += 1000
        self.assertEqual(self.time.seconds, 1)

    def test_seconds(self):
        self.time.seconds = 1
        self.assertEqual(self.time.seconds, 1)
        self.time.hours += 42
        self.assertEqual(self.time.seconds, 1)
        self.time.seconds += 60
        self.assertEqual(self.time.minutes, 1)

    def test_minutes(self):
        self.time.minutes = 1
        self.assertEqual(self.time.minutes, 1)
        self.time.hours += 42
        self.assertEqual(self.time.minutes, 1)
        self.time.minutes += 60
        self.assertEqual(self.time.hours, 43)

    def test_hours(self):
        self.time.hours = 1
        self.assertEqual(self.time.hours, 1)
        self.time.minutes += 42
        self.assertEqual(self.time.hours, 1)

    def test_shifting_forward(self):
        self.time.shift(1, 1, 1, 1)
        self.assertEqual(self.time, (1, 1, 1, 1))

    def test_shifting_backwards(self):
        self.time.shift(-1, -1, -1, -1)
        self.assertEqual(self.time, (-2, 58, 58, 999))
        self.time = WebVTTTime(1, 2, 3, 4)
        self.time.shift(-1, -1, -1, -1)
        self.assertEqual(self.time, (0, 1, 2, 3))

    def test_descriptor_from_class(self):
        self.assertRaises(AttributeError, lambda: WebVTTTime.hours)
Beispiel #10
0
 def test_negative_serialization(self):
     self.assertEqual('00:00:00.000', str(WebVTTTime(-1, 2, 3, 4)))
Beispiel #11
0
 def test_serialization(self):
     for time_string, time_items in self.KNOWN_VALUES:
         self.assertEqual(time_string, str(WebVTTTime(*time_items)))
Beispiel #12
0
 def test_parsing(self):
     for time_string, time_items in self.KNOWN_VALUES:
         self.assertEqual(time_string, WebVTTTime(*time_items))
Beispiel #13
0
 def setUp(self):
     self.time = WebVTTTime()
Beispiel #14
0
 def test_mul(self):
     self.assertEqual(self.time * 2, WebVTTTime(2, 4, 6, 8))
     self.assertEqual(self.time * 0.5, (0, 31, 1, 502))
Beispiel #15
0
 def test_from_repr(self):
     self.time = WebVTTTime()
     self.assertEqual('WebVTTTime(0, 0, 0, 0)', self.time.__repr__())
     self.time = WebVTTTime(1, 1, 1, 1)
     self.assertEqual('WebVTTTime(1, 1, 1, 1)', self.time.__repr__())
Beispiel #16
0
 def test_invalid_int(self):
     random_long = int(choice(list(range(0, 10000000))))
     # String
     self.assertRaises(ValueError, lambda: WebVTTTime.parse_int('test'))
     # Binary
     self.assertRaises(ValueError, lambda: WebVTTTime.parse_int(bin(42)))
     # Char
     self.assertRaises(ValueError, lambda: WebVTTTime.parse_int('t'))
     # None
     self.assertRaises(TypeError, WebVTTTime.parse_int)
     # None
     self.assertRaises(TypeError, WebVTTTime.parse_int(None))
     # List
     self.assertRaises(TypeError, WebVTTTime.parse_int(list(range(10))))
     # Tuple
     self.assertRaises(TypeError, WebVTTTime.parse_int((1, 1)))
     # Boolean
     self.assertRaises(TypeError, WebVTTTime.parse_int(True))
     # Float
     self.assertRaises(TypeError, WebVTTTime.parse_int(uniform(1, 100)))
     # Complex
     self.assertRaises(TypeError, WebVTTTime.parse_int(1j))
     # Long
     self.assertRaises(TypeError, WebVTTTime.parse_int(random_long))
     # Dictionary
     self.assertRaises(TypeError,
                       WebVTTTime.parse_int({
                           'Test1': 1,
                           'Test0': 0
                       }))
Beispiel #17
0
 def test_from_ordinal(self):
     self.assertEqual(WebVTTTime.from_ordinal(3600000), {'hours': 1})
     self.assertEqual(WebVTTTime(1), 3600000)
Beispiel #18
0
 def test_shifting_backwards(self):
     self.time.shift(-1, -1, -1, -1)
     self.assertEqual(self.time, (-2, 58, 58, 999))
     self.time = WebVTTTime(1, 2, 3, 4)
     self.time.shift(-1, -1, -1, -1)
     self.assertEqual(self.time, (0, 1, 2, 3))
def main(options):
    # Ensure ffmpeg is around
    if not run_ffmpeg(['-version']):
        log.error(
            "ffmpeg needs to be available to strip audio from the video file.")
        exit(1)

    with NamedTemporaryFile(delete=True) as vid_file:
        log.info("Downloading %s - this might take a while." % options.vid_url)
        response = get(options.vid_url, stream=True)
        total_length = response.headers.get("content-length")
        if total_length is None:  # no content length header
            log.info("Unknown length - can't predict how long this will take.")
            f.write(response.content)
        else:
            bar = ProgressBar(max_value=int(total_length))
            dl = 0
            for data in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
                dl += len(data)
                vid_file.write(data)
                vid_file.flush()
                bar.update(dl)

        log.info("Download done. Stripping audio.")
        (wav_file, wav_file_name) = mkstemp('.wav')
        result = run_ffmpeg([
            "-y", "-i", vid_file.name, "-vn", "-acodec", "pcm_s16le", "-ar",
            "16000", "-ac", "1", wav_file_name
        ])
        if not result:
            close(wav_file)
            log.error("ffmpeg failed. Bailing.")
            exit(1)

        fs, audio = wav.read(wav_file_name)
        close(wav_file)

    log.info("Will write VTT to %s" % options.output)
    # Make sure the WAV is to code...
    log.info("Loading up WAV file...")

    if fs != 16000:
        log.error("Only 16000hz WAV files are usable.")
        exit(1)

    total_samples = len(audio)
    duration_hours, duration_minutes, duration_seconds = sample_index_to_time(
        len(audio))
    log.info("Approximate duration: %d:%02d:%02d" %
             (duration_hours, duration_minutes, duration_seconds))

    # Let's load up DeepSpeech and get it ready.
    log.info("Loading pre-trained DeepSpeech model...")
    root_model_dir = path.join(options.deepspeech_model_dir, MODEL_DIR)

    model = path.join(root_model_dir, MODEL_FILE)
    alphabet = path.join(root_model_dir, MODEL_ALPHABET)
    lang_model = path.join(root_model_dir, MODEL_LANG_MODEL)
    trie = path.join(root_model_dir, MODEL_TRIE)

    deepspeech = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH)
    log.info("Done loading model.")

    log.info("Loading language model...")
    deepspeech.enableDecoderWithLM(alphabet, lang_model, trie, LM_WEIGHT,
                                   WORD_COUNT_WEIGHT, VALID_WORD_COUNT_WEIGHT)
    log.info("Done loading model.")

    playhead = 0

    out = WebVTTFile()

    bar = ProgressBar(max_value=total_samples)
    while playhead < (total_samples - 1):
        end_point = min(playhead + AUDIO_SEGMENT_SAMPLES, (total_samples - 1))
        segment = audio[playhead:end_point]
        inference = deepspeech.stt(segment, fs)
        log.debug("Inferred: %s" % inference)

        start_hours, start_minutes, start_seconds = sample_index_to_time(
            playhead)
        playhead = end_point
        end_hours, end_minutes, end_seconds = sample_index_to_time(playhead)

        if not inference or inference == "ah":
            continue

        for search, replace in INFERENCE_REPLACEMENTS.iteritems():
            inference = sub(r"\b" + search + r"\b", replace, inference)

        inference = fill(inference, width=MAX_CAPTION_WIDTH)

        start = WebVTTTime(start_hours, start_minutes, start_seconds)
        end = WebVTTTime(end_hours, end_minutes, end_seconds)

        item = WebVTTItem(0, start, end, inference)
        out.append(item)
        bar.update(playhead)

        out.save(options.output, encoding="utf-8")

    out.clean_indexes()
    out.save(options.output, encoding="utf-8")
Beispiel #20
0
 def test_negative_serialization(self):
     self.assertEqual('00:00:00.000', str(WebVTTTime(-1, 2, 3, 4)))
     self.assertEqual('00:00:00.000', str(WebVTTTime(-maxsize, 2, 3, 4)))
     self.assertEqual('00:00:00.000', str(WebVTTTime(0, -2, 3, 4)))
     self.assertEqual('00:00:00.000', str(WebVTTTime(0, 0, -3, 4)))
     self.assertEqual('00:00:00.000', str(WebVTTTime(0, 0, 0, -4)))
Beispiel #21
0
    print "  --delta=<milliseconds>    default: 500"
    print "  -e <encoding>             Encoding of input and output files."
    print "  --encoding=<encoding>     default: utf_8"


def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:e:',
                                   ["help", "encoding=", "delta="])
    except getopt.GetoptError, err:
        print str(err)
        usage()
        sys.exit(2)

    #Settings default values
    delta = WebVTTTime(milliseconds=500)
    encoding = "utf_8"
    #-

    if len(args) <> 3:
        usage()
        sys.exit(2)

    for o, a in opts:
        if o in ("-d", "--delta"):
            delta = WebVTTTime(milliseconds=int(a))
        elif o in ("-e", "--encoding"):
            encoding = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
Beispiel #22
0
 def setUp(self):
     self.time = WebVTTTime()
Beispiel #23
0
 def setUp(self):
     self.time = WebVTTTime(1, 2, 3, 4)
Beispiel #24
0
 def test_max_values(self):
     self.assertEqual('99:59:59.999', str(WebVTTTime(99, 59, 59, 999)))
     self.assertEqual('100:40:39.999', str(WebVTTTime(99, 99, 99, 999)))
Beispiel #25
0
 def test_from_ordinal(self):
     self.assertEqual(WebVTTTime.from_ordinal(3600000), {'hours': 1})
     self.assertEqual(WebVTTTime(1), 3600000)