Exemple #1
0
def make_highlighting_report_for_each_session(part='digital reading',
                                              redo=False,
                                              use_edit_dir=True):
    # get the session names
    session_names = get_session_names()
    for sess_name in session_names:
        sess = Session(sess_name)
        # avoid sessions where the corrections have not been completely calculated
        if not os.path.isfile(sess.dir_name + os.sep +
                              'time_to_find_highlights.json'):
            continue
        # don't recalculate
        if not redo and os.path.isfile(sess.dir_name + os.sep +
                                       'time_to_make_highlight_report.json'):
            continue
        time_to_find_highlights = {}
        t0 = time.time()
        report = make_report(sess, part=part, use_edit_dir=use_edit_dir)
        with open(sess.dir_name + os.sep + settings.highlighting_report,
                  'w') as fp:
            json.dump(report, fp, indent=4, sort_keys=False)
        time_to_find_highlights['make_report'] = time.time() - t0
        with open(
                sess.dir_name + os.sep + 'time_to_make_highlight_report.json',
                'w') as fp:
            json.dump(time_to_find_highlights, fp)
Exemple #2
0
def find_highlights_for_each_session(no_bolding=False,
                                     img_dir_path=settings.frame_images_dir,
                                     part='digital reading',
                                     dir_to_read_from=settings.global_id_dir,
                                     dir_to_save_to=settings.highlights_dir,
                                     recalculate=False):
    # get the session names
    session_names = get_session_names()
    for sess_name in session_names:
        sess = Session(sess_name)
        # avoid sessions where the corrections have not been completely calculated
        if not os.path.isfile(sess.dir_name + os.sep +
                              'time_to_assign_ids.json'):
            continue
        # don't recalculate
        if not recalculate and os.path.isfile(sess.dir_name + os.sep +
                                              'time_to_find_highlights.json'):
            continue
        time_to_find_highlights = {}
        t0 = time.time()
        find_all_highlights(sess,
                            part=part,
                            no_bolding=no_bolding,
                            img_dir_path=img_dir_path,
                            dir_to_read_from=dir_to_read_from,
                            dir_to_save_to=dir_to_save_to,
                            recalculate=recalculate)
        time_to_find_highlights['find_all_highlights'] = time.time() - t0
        with open(sess.dir_name + os.sep + 'time_to_find_highlights.json',
                  'w') as fp:
            json.dump(time_to_find_highlights, fp)
Exemple #3
0
def assign_global_ids_to_each_session(redo=False,
                                      part='digital reading',
                                      error_dir=settings.error_dir):
    # get the session names
    session_names = get_session_names()
    for sess_name in session_names:
        sess = Session(sess_name)
        # avoid sessions where the corrections have not been completely calculated
        if not os.path.isfile(sess.dir_name + os.sep +
                              'time_to_cleanup_ocr.json'):
            continue
        # don't recalculate
        if not redo and os.path.isfile(sess.dir_name + os.sep +
                                       'time_to_assign_ids.json'):
            continue
        time_to_assign_ids = {}
        t0 = time.time()
        assign_global_ids_from_correct_file(sess,
                                            part=part,
                                            redo=redo,
                                            error_dir=error_dir)
        time_to_assign_ids['assign_global_ids_from_correct_file'] = time.time(
        ) - t0
        with open(sess.dir_name + os.sep + 'time_to_assign_ids.json',
                  'w') as fp:
            json.dump(time_to_assign_ids, fp)
Exemple #4
0
def run_initial_ocr_and_time_on_each_session(redo=False, cutoff_parts=True):
    session_names = get_session_names()
    for sess_name in session_names:
        sess = Session(sess_name)
        if not redo and os.path.isfile(sess.dir_name + os.sep +
                                       'time_to_run_initial_ocr.json'):
            continue
        time_to_build = run_initial_ocr_and_time(sess,
                                                 cutoff_parts=cutoff_parts)
        with open(sess.dir_name + os.sep + 'time_to_run_initial_ocr.json',
                  'w') as fp:
            json.dump(time_to_build, fp)
Exemple #5
0
def assign_global_ids_to_corrected_files_for_each_session():
    # get the session names
    session_names = get_session_names()
    for sess_name in session_names:
        sess = Session(sess_name)
        # assign the global ids
        assign_global_ids_from_correct_file(
            sess,
            part='digital reading',
            redo=True,
            error_dir=settings.error_dir,
            source_dir_name=settings.editor_dir,
            alt_dir_name=settings.editor_dir,
            start_from_scratch=True)
Exemple #6
0
def make_highlighting_images(part='digital reading', redo=False):
    session_names = get_session_names()
    for sess_name in session_names:
        sess = Session(sess_name)
        # avoid sessions where there is no highlighting report
        if not os.path.isfile(sess.dir_name + os.sep +
                              settings.highlighting_report):
            continue
        # don't recalculate
        if not redo and os.path.isfile(sess.dir_name + os.sep +
                                       'time_to_make_highlight_image.json'):
            continue
        time_to_make_matrix = {}
        t0 = time.time()
        get_highlight_visualization_matrix(sess, part=part, redo=redo)
        time_to_make_matrix['make_matrix'] = time.time() - t0
        with open(sess.dir_name + os.sep + 'time_to_make_highlight_image.json',
                  'w') as fp:
            json.dump(time_to_make_matrix, fp)
Exemple #7
0
def get_reading_times():
	all_sesion_names = get_session_names()

	all_times = []
	for sess_name in all_sesion_names:
		sess = Session(sess_name)
		reading_times = [x for x in sess.metadata if x['part'] == 'digital reading']
		if len(reading_times) == 0:
			start_time = None
			end_time = None
		else:
			start_time = min([x['start_time'] for x in reading_times])
			end_time = max([x['end_time'] for x in reading_times])
		all_times.append([sess_name, start_time, end_time])
	with open('reading_times.csv', 'w') as outfile:
		writer = csv.writer(outfile)
		writer.writerow(['sess_name', 'start_time', 'end_time'])
		for row in all_times:
			writer.writerow(row)
Exemple #8
0
def run_cleanup_session_and_time_on_each_session(redo=False,
                                                 part='digital reading'):
    # get the correct bag names
    correct_bags = get_correct_bags()
    word_to_doc = make_matching_dictionary(correct_bags)
    # get the session names
    session_names = get_session_names()
    for sess_name in session_names:
        sess = Session(sess_name)
        if not redo and os.path.isfile(sess.dir_name + os.sep +
                                       'time_to_cleanup_ocr.json'):
            continue
        time_to_cleanup = cleanup_session(sess,
                                          correct_bags,
                                          word_to_doc,
                                          redo=redo,
                                          stop_at_lines=False,
                                          alt_dir_name=None,
                                          part=part)
        with open(sess.dir_name + os.sep + 'time_to_cleanup_ocr.json',
                  'w') as fp:
            json.dump(time_to_cleanup, fp)
Exemple #9
0
                                 y_bottom - y_top,
                                 facecolor='b',
                                 alpha=1.0 / len(data))
        plt.add_patch(rect)
    # scale the plot to fit over the image
    plt.xlim([0, width])
    plt.ylim([height, 0])
    # get rid of y ticks
    plt.yticks([], [])
    plt.xticks([], [])
    plt.savefig(sess.dir_name + os.sep + 'dwell_heatmap.png', dpi=800)


if __name__ == '__main__':

    for sess_name in get_session_names():
        print(sess_name)
        sess = Session(sess_name)
        plot_scroll(sess,
                    xml_dir_extention=None,
                    start_time=None,
                    end_time=None,
                    reset_time=False,
                    filename='scrolling.png')
        if not os.path.isdir(sess.dir_name + os.sep + 'hocr-files'):
            print('no hocr')
            continue
        if not os.path.isdir(sess.dir_name + os.sep + 'xml-files'):
            print('no xml')
            continue
        hocr_files = len(os.listdir(sess.dir_name + os.sep + 'hocr-files'))
Exemple #10
0
def save_scrolling_for_all_sessions(part='digital reading'):
	for sess_name in get_session_names():
		sess = Session(sess_name)
		calculate_scrolling(sess, part=part, redo=True)
    for filename in os.listdir(dir_name):
        if filename.startswith('.'):
            continue
        length_of_zeros = len(filename[:filename.find('.')])
        break

    # if there is nothing in the frames directory, exit
    if length_of_zeros is None:
        return

    # set the matching string
    frame_matching_string = dir_name + '%0' + str(length_of_zeros) + 'd.png'
    command = [
        'ffmpeg', '-r',
        str(1 / settings.little_t), '-i', frame_matching_string, '-vcodec',
        'mpeg4', '-y', sess.dir_name + os.sep + settings.explanation_video
    ]

    # build the video
    subprocess.call(command)


if __name__ == '__main__':
    session_names = get_session_names()
    # only make an explanation for the first session
    # be aware this takes a VERY long time per video
    for sess_name in session_names[:1]:
        sess = Session(sess_name)
        # make a visualization
        make_explanation_frames_for_session(sess)
        make_explanation_video(sess)
Exemple #12
0
def make_highlighting_viz_for_all_sessions(part='digital reading'):
    session_names = get_session_names()
    for sess_name in session_names:
        sess = Session(sess_name)
        # make a visualization
        make_highlight_viz(sess, part=part)