Esempio n. 1
0
                json.dump(scene_stamps[show][episode_name], fp2)

# process and store the plot on the drive
plot_sentences = {}
for show in list_of_shows:
    plot_sentences[show] = {}

for show in list_of_shows:
    for episode_name in video_file_names[show]:
    # storing the processed part in json
        file_path = DIR_PLOTS[show]+'/'+episode_name+'_proc_plot.json'
        try:
            with open(file_path, 'r') as fp:
                plot_sentences[show][episode_name] = json.load(fp)
        except IOError:
            plot_sentences[show][episode_name] = preprocessor.fetch_plot_data(DIR_PLOTS[show]+'/'+episode_name+'_plot.txt')
            with open(file_path, 'w') as fp:
                json.dump(plot_sentences[show][episode_name], fp)

# should run only the first time!
# preprocess the srt files and convert them to utf-8
# is destructive
# for show in list_of_shows:
#     for f in video_file_names[show]:
#         file_path = DIR_SUBS[show]+'/'+f+'.srt'
#         if chardet.detect(file_path)['encoding'] not in ['utf-8', 'ascii']:
#             data = open(file_path).read()
#             with open(file_path, 'w') as fp:
#                 fp.write(data.decode(char.detect(file_path)['encoding']).encode('utf-8'))
# alternatively for the last line -> instead of windows use detected value
Esempio n. 2
0
            t1, t2 = get_scene_stamps(DIR_VIDS + "/" + vid_file + ".mp4")
            time_stamps.append(t1)
            scene_stamps.append(t2)
            json.dump(time_stamps[-1], fp1)
            json.dump(scene_stamps[-1], fp2)

plot_sentences = []
for plot in file_names:
    # storing the processed part in json
    file_path = DIR_PLOTS + "/" + plot + "_proc_plot.json"
    try:
        with open(file_path, "r") as fp:
            plot_sentences.append(json.load(fp))
    except IOError:
        with open(file_path, "w") as fp:
            plot_sentences.append(fetch_plot_data(DIR_PLOTS + "/" + plot + "_plot.txt"))
            json.dump(plot_sentences[-1], fp)

# should run only the first time!
# preprocess the srt files and convert them to utf-8
# supposed to be non destructive (but f that)
# for f in file_names:
#     file_path = DIR_SUBS+"/"+f+".srt"
#     if chardet.detect("file_path")["encoding"] != "utf-8"
#         data = open(file_path).read()
#         with open(file_path, "w") as fp:
#             fp.write(data.decode('Windows-1252').encode('utf-8'))
#             fp.write(data.decode(char.detect("file_path")["encoding"]).encode("utf-8"))
# alternatively for the last line -> instead of windows use detected value

sub_stamps, sub_text, untouched_sub_text = [], [], []