Esempio n. 1
0
def parseFile(filename):
    rArr=[]
    try:
        subs=SubRipFile.open(filename) 
    except UnicodeDecodeError:
        subs=SubRipFile.open(filename,encoding='iso-8859-1')
    for s in subs:
        text=s.text
        text=re.sub(fontpattern,'',text)
        text=re.sub(npattern,'',text)
        rArr.append(text)
    return rArr
Esempio n. 2
0
def parseString(string):
    rArr=[]
    subs=SubRipFile.from_string(string)
    for s in subs:
        text=s.text
        text=re.sub(fontpattern,'',text)
        text=re.sub(npattern,'',text)
        rArr.append(text)
    return rArr
Esempio n. 3
0
def create_srt(split_df, cris_stt_df):
    abs_path = os.path.dirname(split_df)
    df1 = pd.read_csv(split_df)
    df2 = pd.read_excel(cris_stt_df)
    df1.rename(columns={'wav_filename': 'wav_name'}, inplace=True)
    # This df3 contains all the info for srt creation
    df3 = pd.merge(df1, df2, how='inner', on='wav_name')
    print("Creating the srt:")
    new_srt = SubRipFile()
    for index, row in df3.iterrows():
        text = str(row['transcripts'] if \
                    type(row['transcripts']) != float \
                        else "")
        new_srt.append(
            SubRipItem(index=index + 1,
                       start=SubRipTime(milliseconds=row['start']),
                       end=SubRipTime(milliseconds=row['end']),
                       text=text[:-1] if text.endswith(".") else text))
    new_srt.save(os.path.join(abs_path, "stt_converted.srt"))
    print("successfully written")
Esempio n. 4
0
                 help="name of output file without extention (default: output)")

    parser.add_argument('--eol', type=str, default="###",
                 help="End of line marker (default: ###)")
    
    args=parser.parse_args()

    subs = pysrt.open(args.file)
    after_subs = []

    indx = 1
    iterator = iter(subs)

    while True:
        try:
            sub = next(iterator)
        except StopIteration:
            break

        if sub.text[-len(args.eol):] != args.eol:
            after_subs.append(SubRipItem(indx, sub.start, sub.end, sub.text))
        else:
            line2_sub = next(iterator)
            text = sub.text[:-len(args.eol)] + '\n' + line2_sub.text
            combined_sub = SubRipItem(indx, sub.start, line2_sub.end, text=text)
            after_subs.append(combined_sub)
        indx += 1

    after = SubRipFile(items=after_subs)
    after.save(args.output + '.srt', encoding='utf-8')