def test_sort_and_reindex_not_in_place_matches(input_subs, start_index): # Make copies for both sort_and_reindex calls so that they can't affect # each other not_in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs] in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs] nip_ids = [id(sub) for sub in not_in_place_subs] ip_ids = [id(sub) for sub in in_place_subs] not_in_place_output = list( srt.sort_and_reindex( not_in_place_subs, start_index=start_index, ), ) in_place_output = list( srt.sort_and_reindex(in_place_subs, start_index=start_index, in_place=True), ) # The results in each case should be the same subs_eq(not_in_place_output, in_place_output) # Not in place sort_and_reindex should have created new subs assert_false(any(id(sub) in nip_ids for sub in not_in_place_output)) # In place sort_and_reindex should be reusing the same subs assert_true(all(id(sub) in ip_ids for sub in in_place_output))
def generate_subtitles(self): cumulativeEnd = 0 cumulativeStart = 0 numberOfList = len(self.render_list) srt_list = [] caption = '' start = 0 end = 0 counter = 1 for index,include in tqdm(enumerate(self.render_list)): word = include.word newCaption = caption + word cumulativeStart = round(cumulativeEnd,2) cumulativeEnd += round(include.get_duration(),2) end = cumulativeEnd = round(cumulativeEnd,2) if(len(newCaption) < 32): caption = newCaption elif(len(newCaption) >= 32): newStart = timedelta(seconds=start) newEnd = timedelta(seconds=end) subtitle = srt.Subtitle(index=counter,start=newStart, end=newEnd,content=newCaption) srt_list.append(subtitle) counter += 1 start = end newCaption = caption = '' newStart = timedelta(seconds=start) newEnd = timedelta(seconds=end) subtitle = srt.Subtitle(index=counter,start=newStart, end=newEnd,content=newCaption) srt_list.append(subtitle) self.srt_list = srt_list return srt_list
def merge_close_subtitles(subs: List[srt.Subtitle], dist=0.05) -> List[srt.Subtitle]: """ Usually one long phrase is broken down to multitude of subtitles having some fixed time distance between them. This method reconstruct it back from separate sub pieces. :param subs: List of subtitles :param dist: Distance between two subtitles in order to be considered as part of one :return: List of subtitles where subtitles having overlapping with regard to dist intervals are merged into one """ starts = np.array(list(map(lambda sub: sub.start.total_seconds(), subs)))[1:] ends = np.array(list(map(lambda sub: sub.end.total_seconds(), subs)))[:-1] to_merge = (np.round(starts - ends)) <= dist merged_subs = [subs[0]] for i, p in enumerate(to_merge, 1): if p: f = merged_subs[-1] s = subs[i] merged_subs[-1] = srt.Subtitle(f.index, f.start, end=s.end, content=f"{f.content}\n{s.content}", proprietary=f.proprietary) else: sub = subs[i] nsub = srt.Subtitle(len(merged_subs) + 1, sub.start, end=sub.end, content=sub.content, proprietary=sub.proprietary) merged_subs.append(nsub) return merged_subs
def add(subs, start, end, content="", adjust=False): """ Adds a subtitle to subtitles in the correct position. :param subs: :py:class:`Subtitle` objects :param datetime.timedelta start: The timestamp the subtitle starts at. :param datetime.timedelta end: The timestamp the subtitle ends at. :param boolean adjust: Whether to adjust the timestamps of subsequent subtitles. :rtype: :term:`generator` of :py:class:`Subtitle` objects """ if end <= start: raise ValueError( "The end timestamp can't occur before or at the start timestamp.") # ensures list compatibility subs = (x for x in subs) if not isinstance(subs, GeneratorType) else subs # Add the subtitle in the correct position. added = False idx = 1 adjust_time = datetime.timedelta(0) subtitle = _utils.tryNext(subs) while subtitle is not None: subtitle_start = subtitle.start if not added and ((start == subtitle_start and end < subtitle.end) or start < subtitle_start): yield srt.Subtitle( idx, start, end, content, ) idx += 1 adjust_time = end - start if adjust else adjust_time added = True yield srt.Subtitle( idx, subtitle_start + adjust_time, subtitle.end + adjust_time, subtitle.content, ) idx += 1 subtitle = _utils.tryNext(subs) if not added: yield srt.Subtitle( idx, start, end, content, )
def generate_srt(ctx, result, **kwargs): # Generate srt file for video. fps = kwargs['metadata']['output_fps'] frame = kwargs['metadata']['frame_num'] current_time = float(frame) / fps step = datetime.timedelta(milliseconds=1. / fps * 1000) duration = kwargs['metadata']['duration'] global object_srt global caption_srt object_classes = collections.Counter(result['detection_classes']) classes_string = ', '.join( [f'{name}: {count}' for name, count in object_classes.items()]) if not object_srt: start = datetime.timedelta(milliseconds=0) else: start = datetime.timedelta(seconds=current_time) - step end = start + step sub = srt.Subtitle(index=len(object_srt) + 1, start=start, end=end, content=classes_string) if object_srt and object_srt[-1].content == classes_string: object_srt[-1].end = end elif not object_srt or object_srt[-1].content != classes_string: object_srt.append(sub) if ctx.build_caption: captions = result['captions'] if len(captions) > 0: caption = captions[0] if not caption_srt: start = datetime.timedelta(milliseconds=0) else: start = datetime.timedelta(seconds=current_time) - step end = start + step sub = srt.Subtitle(index=len(caption_srt) + 1, start=start, end=end, content=caption) if caption_srt and caption_srt[-1].content == caption: caption_srt[-1].end = end elif not caption_srt or caption_srt[-1].content != caption: caption_srt.append(sub) if current_time + 2 >= duration: with open(PARAMS['objects_srt_file'], 'w') as sw: sw.write(srt.compose(object_srt)) with open(PARAMS['captions_srt_file'], 'w') as sw: sw.write(srt.compose(caption_srt))
def transcribe(): results = [] subs = [] while True: data = process.stdout.read(4000) if len(data) == 0: break if rec.AcceptWaveform(data): results.append(rec.Result()) results.append(rec.FinalResult()) for i, res in enumerate(results): jres = json.loads(res) if not 'result' in jres: continue words = jres['result'] for j in range(0, len(words), WORDS_PER_LINE): line = words[j:j + WORDS_PER_LINE] s = srt.Subtitle( index=len(subs), content=" ".join([l['word'] for l in line]), start=datetime.timedelta(seconds=line[0]['start']), end=datetime.timedelta(seconds=line[-1]['end'])) subs.append(s) return subs
def create_subtitles(caption: Groups) -> List[srt.Subtitle]: """ A srt.Subtitle instance is made for every caption group, with the start time from the first element in the caption group and the end time of the last element in the caption group. Args: caption: The caption groups, consists of a list of our custom Caption-list dataformats. Returns: List of srt.Subtitle instances, created from the caption groups. """ punc = re.compile(r' ([,.?!])') nl = re.compile(r'\n ') subtitles = [] for i, group in enumerate(caption): text = ' '.join(word.text for word in group) # strip spaces in front of punctuation text = punc.sub(r'\g<1>', text) text = nl.sub(r'\n', text) start = group[0].start end = group[-1].end sub = srt.Subtitle(i, timedelta(seconds=start), timedelta(seconds=end), text) subtitles.append(sub) return subtitles
async def run_test(uri): async with websockets.connect(uri) as websocket: wf = wave.open(sys.argv[1], "rb") await websocket.send('{ "config" : { "sample_rate" : %d } }' % (wf.getframerate())) results = [] buffer_size = int(wf.getframerate() * 0.2) # 0.2 seconds of audio while True: data = wf.readframes(buffer_size) if len(data) == 0: break await websocket.send(data) results.append(await websocket.recv()) await websocket.send('{"eof" : 1}') results.append(await websocket.recv()) subs = [] for i, res in enumerate(results): jres = json.loads(res) if not 'result' in jres: continue words = jres['result'] for j in range(0, len(words), WORDS_PER_LINE): line = words[j : j + WORDS_PER_LINE] s = srt.Subtitle(index=len(subs), content=" ".join([l['word'] for l in line]), start=datetime.timedelta(seconds=line[0]['start']), end=datetime.timedelta(seconds=line[-1]['end'])) subs.append(s) print(srt.compose(subs))
def run(self) -> None: """ 번역된 srt 파일 생성 :return: None """ with self.__src_file.open('rt') as fp: file_contents = fp.read() filename = self.__src_file.name google_trans = google_translator() subtitles = list() for sub in srt.parse(file_contents): translated_content = google_trans.translate( sub.content, lang_src=self.__lang_src, lang_tgt=self.__lang_tgt) tmp_sub = srt.Subtitle(index=sub.index, start=sub.start, end=sub.end, content=translated_content, proprietary=sub.proprietary) sys.stdout.write('[{0}][{1}]: {2}\n'.format( filename, tmp_sub.index, tmp_sub.content)) subtitles.append(tmp_sub) # 번역된 srt 파일 쓰기 dst_file = self.__dst_dirpath / self.__src_file.name with dst_file.open('wt') as fp: fp.write(srt.compose(subtitles))
def break_sentences(args, subs, alternative): firstword = True charcount = 0 idx = len(subs) + 1 content = "" for w in alternative.words: if firstword: # first word in sentence, record start time start = w.start_time.ToTimedelta() charcount += len(w.word) content += " " + w.word.strip() if ("." in w.word or "!" in w.word or "?" in w.word or charcount > args.max_chars or ("," in w.word and not firstword)): # break sentence at: . ! ? or line length exceeded # also break if , and not first word subs.append( srt.Subtitle(index=idx, start=start, end=w.end_time.ToTimedelta(), content=srt.make_legal_content(content))) firstword = True idx += 1 content = "" charcount = 0 else: firstword = False return subs
def add_subtitle(evt): print(evt.result.json) global index global last_duration global last_start global last_text global last_offset data = json.loads((evt.result.json)) if last_offset != data["Offset"]: last_start += last_duration current_duration = data["Duration"] + last_start start_s, start_ms = convertduration(last_duration * slow_factor) end_s, end_ms = convertduration(current_duration * slow_factor) transcript.append( srt.Subtitle( index, datetime.timedelta(seconds=start_s, milliseconds=start_ms), datetime.timedelta(seconds=end_s, milliseconds=end_ms), last_text)) index += 1 last_duration = current_duration last_text = data["Text"] last_offset = data["Offset"]
def buildVTT(bookInfoFileName, outputDir): # 从json中读取分段信息。 info = {} with open(bookInfoFileName, 'r', encoding='UTF-8') as f: info = json.load(f) print(info['chapter']) for chapter in info['chapter']: subs = [] index = 0 offset = info['split'][chapter['start']]['start'] for i in range(chapter['start'], chapter['end'] + 1): split = info['split'][i] start = timedelta(milliseconds=(split['start'] - offset)) end = timedelta(milliseconds=(split['end'] - offset)) content = split['texc'] subs.append(srt.Subtitle(index, start, end, content)) index += 1 # 保存vtt字幕文件 vttfilename = '%s/%s.vtt' % ( outputDir, validateTitle('%02d %s' % (chapter['index'] + 1, chapter['title']))) with open(vttfilename, 'w', encoding='UTF-8') as f: # srt转成WebVTT格式 strVTT = srt2vtt(srt.compose(subs)) print(strVTT) f.write(strVTT) print('%s done.' % vttfilename) return
def srt_offset(subs, td_seconds): if not isinstance(td_seconds, timedelta): td_seconds = timedelta(seconds=td_seconds) for sub in subs: yield srt.Subtitle(index=sub.index, start=sub.start + td_seconds, end=sub.end + td_seconds, content=sub.content)
def fit(self, subs, *_): scaled_subs = [] for sub in subs: scaled_subs.append(srt.Subtitle(index=sub.index, start=sub.start * self.scale_factor, end=sub.end * self.scale_factor, content=sub.content)) self.subs_ = SrtSubtitles(scaled_subs, encoding=subs.encoding) return self
def fit(self, subs, *_): offset_subs = [] for sub in subs: offset_subs.append(srt.Subtitle(index=sub.index, start=sub.start + self.td_seconds, end=sub.end + self.td_seconds, content=sub.content)) self.subs_ = SrtSubtitles(offset_subs, encoding=subs.encoding) return self
def gen_sub_data(sub_id, idx, length, single_dur): subs = '' for i in range(1, length+1): s = srt.Subtitle( index=i, start=timedelta(seconds=i*single_dur), end=timedelta(seconds=(i+1)*single_dur/2), content='ID={}, IDX={}, Sentence #{}'.format(sub_id, idx, i)) subs += s.to_srt() return subs.encode('utf-8')
def generate_subtitles(sentences, durations_in_seconds): subtitles = [] start = timedelta(seconds=0) for i, (sentence, duration) in enumerate(zip(sentences, durations_in_seconds)): end = start + timedelta(seconds=duration) subtitles.append( srt.Subtitle(index=i + 1, start=start, end=end, content=sentence)) start = end return subtitles
def gen_subs(filename): with open(filename + "-out.json", "r") as in_file: words = json.loads(in_file.read())["words"] print(words) index = 0 subtitle = "" start_time = 0 end_time = 0 subtitles = [] for word in words: print(word) word["end_time"] = word["start_time "] + word["duration"] if word["duration"] < MAX_WORD_TIME: if start_time + MAX_SUB_TIME >= word["end_time"] and subtitle: subtitle += " " subtitle += word["word"] end_time = max(word['end_time'], start_time + MIN_SUB_TIME) elif subtitle: index += 1 subtitles.append( srt.Subtitle(index, timedelta(seconds=start_time), timedelta(seconds=end_time), subtitle)) subtitle = "" if not subtitle: start_time = word['start_time '] subtitle += word['word'] end_time = max(word['end_time'], start_time + MIN_SUB_TIME) if subtitle: subtitles.append( srt.Subtitle(index, timedelta(seconds=start_time), timedelta(seconds=end_time), subtitle)) # subtitles = list(srt.sort_and_reindex(subtitles)) with open(filename + ".srt", "w") as f: f.write( srt.compose(subtitles, reindex=True, start_index=1, strict=True))
def make_subtitles(self): #make subtitles for i, time_start in enumerate(self.times_list): time_end = time_start + self.interval_len try: subtitle = srt.Subtitle(index=i + 1, start=time_start, end=time_end, content=self.ascii_content_str[i]) self.srt_list.append(subtitle) except IndexError as e: logging.error(e)
def display_sc(start, end, sc_list): display_sorted_sc = sorted(sc_list, key=lambda x: (-float(x[0]), -int(x[2]))) content = "\n".join([sc[3] for sc in display_sorted_sc]) LIMIT = 100 if len(content) >= LIMIT: content = content[:LIMIT - 2] + "…" return srt.Subtitle(index=0, start=timedelta(seconds=start), end=timedelta(seconds=end), content=content)
def create_srt_file(srt_chunks): subtitles = [] for i, chunk in enumerate(srt_chunks): end_time = chunk['end_time'] if i != len(srt_chunks) - 1: end_time = srt_chunks[i + 1]['start_time'] subtitles.append( srt.Subtitle(i + 1, timedelta(seconds=chunk['start_time']), timedelta(seconds=end_time), chunk['text'].strip())) srt_content = srt.compose(subtitles) write_srt_file(srt_content, "en") return srt_content
def create_subtitles_and_transcript(speech_to_text_response): #3 Seconds bin = 3.0 duration = 0 transcriptions = [] transcript = "" index, prev = 0, 0 wordstartsec, wordstartmicrosec = 0, 0 for i in range(len(speech_to_text_response)): # Forms the sentence until the bin size condition is met transcript = transcript + " " + speech_to_text_response[i]["Word"] # Checks whether the elapsed duration is less than the bin size if (int((duration / 10000000)) < bin): wordstartsec, wordstartmicrosec = convertduration( speech_to_text_response[i]["Offset"]) duration = duration + speech_to_text_response[i]["Offset"] - prev prev = speech_to_text_response[i]["Offset"] # transcript = transcript + " " + speech_to_text_response[i]["Word"] else: index = index + 1 #transcript = transcript + " " + speech_to_text_response[i]["Word"] transcriptions.append( srt.Subtitle( index, datetime.timedelta(0, wordstartsec, wordstartmicrosec), datetime.timedelta(0, wordstartsec + bin, 0), transcript)) duration = 0 #logger.info(transcript) transcript = "" transcriptions.append( srt.Subtitle(index, datetime.timedelta(0, wordstartsec, wordstartmicrosec), datetime.timedelta(0, wordstartsec + bin, 0), transcript)) subtitles = srt.compose(transcriptions) logger.info("Outputting subtitles to " + subtitle_output) with open(subtitle_output, "w") as f: f.write(subtitles)
def get_chunk_subtitles( speaker_map_path, audio_in_path, start_index=1, offset=0, lag_correction=-0.001 ): with open(speaker_map_path, "r") as in_json: data = json.load(in_json) results = data["channels"]["0"] n_speakers = int(results["summary"]["speaker-map"]["speaker_count"]) raw_data = results["transitions"]["speaker-map"] audio_data = sr.AudioFile(audio_in_path) chunk_srt = [] with audio_data as source: for entry in raw_data: offset_timedelta = timedelta(milliseconds=offset) start_time = ( timedelta(milliseconds=(entry["timestamp_start"])) + offset_timedelta ) end_time = ( timedelta(milliseconds=(entry["timestamp_end"])) + offset_timedelta ) speaker = entry["result"] duration = entry["timestamp_end"] / 1000 - entry["timestamp_start"] / 1000 audio = r.record(source, duration=duration) # We only deal with speakers, if speaker != "no_speech": transcript = r.recognize_google(audio, language="fr-FR", show_all=True) # We take the first entry if len(transcript) != 0: text = "{}: {}".format( speaker, transcript["alternative"][0]["transcript"] ) # generate_srt( # start_index, # start_time, # end_time, # transcript["alternative"][0]["transcript"], # ) srt_part = srt.Subtitle( index=start_index, start=start_time, end=end_time, content=text ) chunk_srt.append(srt_part) start_index += 1 offset += lag_correction return start_index, entry["timestamp_end"], chunk_srt
def flush(self, time): if self.string == '' or self.start is None: return self._index += 1 sub = srt.Subtitle(self._index, self.start, time, self.string) self.notifier.notify_subtitle(sub) self.subs.append(sub) self.file.write(sub.to_srt()) self.file.flush() self.string = '' self.start = None
def find_by_timestamp( subs, timestamp_one=datetime.timedelta(0), timestamp_two=datetime.timedelta(0), adjust=False, ): """ Finds subtitles from subtitles by timestamp. When timestamp one > timestamp two, subtitles up to timestamp two and subtitles after timestamp one will be found. :param subs: :py:class:`Subtitle` objects :param datetime.timedelta timestamp_one: The timestamp to find from. :param datetime.timedelta timestamp_two: The timestamp to find to. :param boolean adjust: Whether to adjust the timestamps of found subtitles. :rtype: :term:`generator` of :py:class:`Subtitle` objects """ # ensure subs is iterable subs = (x for x in subs) if not isinstance(subs, GeneratorType) else subs # Split the subtitle at the start and end of the block(s). subs = srt.tools.split.split(subs, timestamp_one) subs = srt.tools.split.split(subs, timestamp_two) # edge cases subtitle = _utils.tryNext(subs) sequential = timestamp_one < timestamp_two if subtitle is None or (sequential and timestamp_two <= subtitle.start): return # Find the subtitles using a generator. idx = 1 adjust_time = timestamp_one if adjust else datetime.timedelta(0) while subtitle is not None: start = subtitle.start if (timestamp_one == timestamp_two or (sequential and timestamp_one <= start and start < timestamp_two) or (not sequential and (start < timestamp_two or timestamp_one <= start))): yield srt.Subtitle( idx, subtitle.start - adjust_time, subtitle.end - adjust_time, subtitle.content, ) idx += 1 subtitle = _utils.tryNext(subs)
def po_to_srt_converter(src_fp, dest_fp): po = polib.pofile(src_fp.read().decode("utf-8-sig")) for unit in po: translated_content = unit.msgstr or unit.msgid lines = translated_content.split(NEW_LINE_TAG) content = "\n".join(lines) index, timecodes = unit.comment.split("\n", maxsplit=1) start, end = timecodes.split(" --> ") start = srt.srt_timestamp_to_timedelta(start) end = srt.srt_timestamp_to_timedelta(end) cue = srt.Subtitle(index=index, start=start, end=end, content=content) dest_fp.write(cue.to_srt().encode("utf-8"))
def Subtitle(videoFile, duration, thresh, language1): videoToWav(videoFile) r = sr.Recognizer() myaudio = pydub.AudioSegment.from_wav('Extracted.wav') speak = pydub.silence.detect_nonsilent(myaudio, min_silence_len=int(duration), silence_thresh=int(thresh)) speak = [(int(start / 1000), int(stop / 1000)) for start, stop in speak] speakClear = [] skip = [] for idx, val in enumerate(speak): if idx not in skip: if val[1] - val[0] < 3: try: speakClear.append(val[0], speak[idx + 1][1]) skip.append(idx + 1) except: speakClear.append(val) else: speakClear.append(val) subs = [] timer = 0 with sr.AudioFile("Extracted.wav") as source: for i, v in enumerate(speakClear): audioText = r.record(source, duration=v[1] - timer) try: word = r.recognize_google(audioText, language=language1) subs.append( srt.Subtitle(index=(i + 1), start=dt.timedelta(seconds=v[0]), end=dt.timedelta(seconds=v[1]), content=word)) timer += (v[1] - timer) print(v[1]) except sr.UnknownValueError: print("Something Wrong") except Exception as e: print(e) print("Process Finished.") return subs
def generate_new_subtitle(self ): subtitle_text = '\n'.join(self.caption) subtitle_text = srt.make_legal_content(subtitle_text) start = self.time_now stop = start + self.time_delta self.subtitles.append( srt.Subtitle(len(self.subtitles)+1, start, stop, subtitle_text) ) self.time_now = stop self.caption.clear()
def write_srt_file(transcriptions, id): v_transcriptions = [] for trans in transcriptions: index = trans['index'] start = srt.srt_timestamp_to_timedelta(trans['start']) end = srt.srt_timestamp_to_timedelta(trans['end']) content = trans['content'] v_transcriptions.append(srt.Subtitle(index, start, end, content)) new_filename = f'subtitles_{id}.srt' subtitles = srt.compose(v_transcriptions) with open(f'data/subtitles/{new_filename}', "w") as f: f.write(subtitles) f.close() return new_filename
def make_videos_from_words(word, srt_file_location): # opening the subtitle file subtitle_file = open(srt_file_location) # parsing the subtitle file through the srt library subtitle_generator = srt.parse(subtitle_file) subs = list(subtitle_generator) index = 0 for sub in subs: # if the searching word in the current sub if word.lower() in sub.content.lower(): # calculating the share of the word in the sentence or the sub word_share_in_content = len(word) / len(sub.content.lower()) # the index that the word starts in word_start = sub.content.lower().find(word.lower()) # the percentage of the sub that the word starts in starting_percentage = word_start / len(sub.content.lower()) start = sub.start.microseconds / 1000000 + sub.start.seconds end = sub.end.microseconds / 1000000 + sub.end.seconds time = end - start # starting from the time that the word starts in start += time * starting_percentage - .2 word_time = time * word_share_in_content end = start + word_time + .5 # making the subtitle for the output video sub_for_video = srt.Subtitle(1, start=timedelta(seconds=0), end=timedelta(seconds=end - start), content=word) with open(f"{word}{index}.srt", "w") as final: final.write(srt.compose([sub_for_video])) # making the output video ffmpeg_extract_subclip("s.mp4", start, end, targetname=f"{word}{index}.mp4") index += 1 # closing the srt file subtitle_file.close()