def parseFile(filename): rArr=[] try: subs=SubRipFile.open(filename) except UnicodeDecodeError: subs=SubRipFile.open(filename,encoding='iso-8859-1') for s in subs: text=s.text text=re.sub(fontpattern,'',text) text=re.sub(npattern,'',text) rArr.append(text) return rArr
def parseString(string): rArr=[] subs=SubRipFile.from_string(string) for s in subs: text=s.text text=re.sub(fontpattern,'',text) text=re.sub(npattern,'',text) rArr.append(text) return rArr
def create_srt(split_df, cris_stt_df): abs_path = os.path.dirname(split_df) df1 = pd.read_csv(split_df) df2 = pd.read_excel(cris_stt_df) df1.rename(columns={'wav_filename': 'wav_name'}, inplace=True) # This df3 contains all the info for srt creation df3 = pd.merge(df1, df2, how='inner', on='wav_name') print("Creating the srt:") new_srt = SubRipFile() for index, row in df3.iterrows(): text = str(row['transcripts'] if \ type(row['transcripts']) != float \ else "") new_srt.append( SubRipItem(index=index + 1, start=SubRipTime(milliseconds=row['start']), end=SubRipTime(milliseconds=row['end']), text=text[:-1] if text.endswith(".") else text)) new_srt.save(os.path.join(abs_path, "stt_converted.srt")) print("successfully written")
help="name of output file without extention (default: output)") parser.add_argument('--eol', type=str, default="###", help="End of line marker (default: ###)") args=parser.parse_args() subs = pysrt.open(args.file) after_subs = [] indx = 1 iterator = iter(subs) while True: try: sub = next(iterator) except StopIteration: break if sub.text[-len(args.eol):] != args.eol: after_subs.append(SubRipItem(indx, sub.start, sub.end, sub.text)) else: line2_sub = next(iterator) text = sub.text[:-len(args.eol)] + '\n' + line2_sub.text combined_sub = SubRipItem(indx, sub.start, line2_sub.end, text=text) after_subs.append(combined_sub) indx += 1 after = SubRipFile(items=after_subs) after.save(args.output + '.srt', encoding='utf-8')