예제 #1
0
        sub_stamps_path = DIR_SUBS[show]+'/'+episode_name+'_proc_sub_st.json'
        sub_text_path = DIR_SUBS[show]+'/'+episode_name+'_proc_sub_tx.json'
        sub_unttext_path = DIR_SUBS[show]+'/'+episode_name+'_proc_sub_untx.json'
        temp_path = DIR_SUBS[show]+'/'+'temp.srt'
        try:
            with open(sub_stamps_path, 'r') as fp1, open(sub_text_path, 'r') as fp2, open(sub_unttext_path, 'r') as fp3:
                sub_stamps[show][episode_name] = json.load(fp1)
                sub_text[show][episode_name] = json.load(fp2)
                raw_sub_text[show][episode_name] = json.load(fp3)
        except IOError:
            # is this required?
            with io.open(DIR_SUBS[show]+'/'+episode_name+'.srt', 'r', encoding='utf-8') as sub,  open(temp_path, 'w') as temp_fp:
                # temporary file containing semi preprocessed subtitle
                # don't use if doing preprocessing?
                temp_fp.write(unicodedata.normalize('NFKD', sub.read()).encode('ascii', 'ignore'))  # replace unicode chars with closest equivalents
            result = preprocessor.fetch_subtitle_data(temp_path)
            sub_stamps[show][episode_name] = result['sub_stamps']
            sub_text[show][episode_name] = result['sub_text']
            raw_sub_text[show][episode_name] = result['raw_sub_text']
            with open(sub_stamps_path, 'w') as fp1, open(sub_text_path, 'w') as fp2, open(sub_unttext_path, 'w') as fp3:
                json.dump(sub_stamps[show][episode_name], fp1)
                json.dump(sub_text[show][episode_name], fp2)
                json.dump(raw_sub_text[show][episode_name], fp3)

# for plot to subtitle mapping in a variable
plot_to_sub, idf, tf_idf = {}, {}, {}
for show in list_of_shows:
    plot_to_sub[show] = {}
    idf[show] = {}
    tf_idf[show] = {}
예제 #2
0
    temp_path = DIR_SUBS + "/" + "temp.srt"
    try:
        with open(sub_stamps_path, "r") as fp1, open(sub_text_path, "r") as fp2, open(sub_unttext_path, "r") as fp3:
            sub_stamps.append(json.load(fp1))
            sub_text.append(json.load(fp2))
            untouched_sub_text.append(json.load(fp3))
    except IOError:
        with open(sub_stamps_path, "w") as fp1, open(sub_text_path, "w") as fp2, open(sub_unttext_path, "w") as fp3:
            with io.open(DIR_SUBS + "/" + sub_file + ".srt", "r", encoding="utf-8") as sub, open(
                temp_path, "w"
            ) as temp_fp:
                # temporary file containing semi preprocessed subtitle
                temp_fp.write(
                    unicodedata.normalize("NFKD", sub.read()).encode("ascii", "ignore")
                )  # replace unicode chars with closest equivalents
            t1, t2, t3 = fetch_subtitle_data(temp_path)
            sub_stamps.append(t1)
            sub_text.append(t2)
            untouched_sub_text.append(t3)
            json.dump(sub_stamps[-1], fp1)
            json.dump(sub_text[-1], fp2)
            json.dump(untouched_sub_text[-1], fp3)

# for plot to subtitle and subtitle to shot
# will work for the first time
plot_to_sub = [None for i in range(no_episodes)]
idf = [None for i in range(no_episodes)]
tf_idf = [None for i in range(no_episodes)]
for index, vid_file in enumerate(file_names):
    plot_to_sub_path = DIR_PLTSUB + "/" + vid_file + "_proc_pltsub.json"
    idf_path = DIR_PLTSUB + "/" + vid_file + "_idf.json"