def remake_pads(summaries, stdscr, index):
    new_pads = []
    for i in range(0, len(summaries)):
        subject = utils.format_html(summaries[i]['subject'])
        contents = utils.format_html(summaries[i]['content_snipet'])
        reverse = i == index
        new_pads.append(summary_pad(subject, contents, stdscr, reverse))
    return new_pads
Beispiel #2
0
def remake_pads(summaries, stdscr, index):
    new_pads = []
    for i in range(0, len(summaries)):
        subject = utils.format_html(summaries[i]['subject'])
        contents = utils.format_html(summaries[i]['content_snipet'])
        reverse = i == index
        new_pads.append(summary_pad(subject, contents, stdscr, reverse))
    return new_pads
Beispiel #3
0
def excel_gt_json_generate():
    """
    This function is used to generate rr_gt.json file for calculating similarity.
    Returns:
        Generate rr_gt.json for metirc.py to calculate similarity
    """
    count = 0
    gt_gt = {}
    excel_path = '/home/gita/Downloads/rr_table/excel/'
    text_path = '/home/gita/Downloads/rr_table/text/'
    html_path = '/home/gita/Downloads/rr_table/html/'
    father_path_file = '/home/gita/Downloads/final/f.txt'

    gt_json_file = './src/rr_gt.json'

    with open(father_path_file, 'r') as jf:
        file_list = jf.readlines()
    for file in file_list:
        file_name = file.split('/')[-1].split('.')[0]
        excel_file = excel_path + file_name + '.xlsx'
        text_file = text_path + file_name + '.json'
        adjacent_matrix, cell_text_list = excel_to_matrix(
            excel_file, text_file)
        gt_json = matrix_to_html(cell_text_list, adjacent_matrix)
        gt_html = format_html(gt_json)
        with open(html_path + file_name + '.html', 'w',
                  encoding='utf-8') as hf:
            hf.write(str(gt_html))
        gt_gt[file_name + '.json'] = {"html": gt_html}
        count += 1
        print(count, file_name)
        # if count >= 1:
        #     break
    with open(gt_json_file, 'w') as tf:
        tf.write(json.dumps(gt_gt))
Beispiel #4
0
def excel_pred_json_generate():
    """
    Process the results of OCR and PointerNet to recovery .html and generate .json
    Returns:
        Generate rr_pred.json for metirc.py to calculate similarity.

    """
    count = 0
    error_count = 0
    text_path = '/home/gita/Downloads/rr_table/text/'

    uf_path = '/home/gita/Downloads/final/f1/'
    df_path = '/home/gita/Downloads/final/f2/'
    lm_path = '/home/gita/Downloads/final/m1/'
    rm_path = '/home/gita/Downloads/final/m2/'
    img_path_txt = '/home/gita/Downloads/final/f.txt'

    pred_html_path = './recovered_html/rr_html/'
    pred_json_file = './src/rr_pred.json'

    father_dict = get_parent_dict(uf_path,
                                  df_path,
                                  lm_path,
                                  rm_path,
                                  img_path_txt,
                                  number_per_file=2)
    file_list = get_specific_files('/home/gita/Downloads/rr_table/text/',
                                   ['.json'])
    pred_json = {}
    for file_name in file_list:
        # check_list = ['000917.json']
        # if file_name not in check_list:
        #     continue
        print(count, file_name)
        text_file = text_path + file_name
        # try:
        preds = axis_to_rows(text_file)
        # except:
        #     error_count += 1
        #     print("-------------------------------------------------------------")
        preds_list, matrix = preds_to_matrix(preds, father_dict[file_name])
        if matrix is None:
            continue
        img_json = matrix_to_html(preds_list, matrix)
        pred_html = format_html(img_json)

        with open(pred_html_path + file_name + '.html', 'w',
                  encoding='utf-8') as hf:
            hf.write(str(pred_html))

        pred_json[file_name] = pred_html
        count += 1
        # if count >= len(check_list):
        #     break
    print(error_count)
    with open(pred_json_file, 'w') as tf:
        tf.write(json.dumps(pred_json).strip())
Beispiel #5
0
def ret_json_generate():
    """
    Process the results of OCR and PointerNet to recovery .html and generate .json
    Returns:
        Generate html.json for metirc.py to calculate similarity.
    """
    ocred_path = '/home/gita/Downloads/mini_result/mini_json/'

    img_path_txt = '/home/gita/Downloads/mini_result_50/mini_father.txt'
    uf_path = '/home/gita/Downloads/mini_result_50/father/'
    df_path = ''
    lm_path = '/home/gita/Downloads/mini_result_50/mother_p/'
    rm_path = '/home/gita/Downloads/mini_result_50/mother_n/'

    ret_html_path = './recovered_html/mini_50/'
    ret_json_file = './src/mini_pred_50.json'

    father_dict = get_parent_dict(uf_path,
                                  df_path,
                                  lm_path,
                                  rm_path,
                                  img_path_txt,
                                  number_per_file=2)
    ocred_files = get_specific_files(ocred_path)

    count = 0
    pred_json = {}
    for file_name in ocred_files:
        if file_name in [
                'PMC3707453_006_00.png.json', 'PMC6022086_007_00.png.json'
        ]:
            continue
        preds = parse_ocred_json(ocred_path + file_name)
        preds = merge_by_iou(preds)
        count += 1
        print(count, file_name)
        preds_list, matrix = preds_to_matrix(preds, father_dict[file_name])
        if matrix is None:
            continue
        img_json = matrix_to_html(preds_list, matrix)
        pred_html = format_html(img_json)
        pred_json[file_name[:-5]] = pred_html
        with open(ret_html_path + file_name + '.html', 'w',
                  encoding='utf-8') as hf:
            hf.write(str(pred_html))
        # break
    with open(ret_json_file, 'w') as tf:
        tf.write(json.dumps(pred_json).strip())
def post_pad(post, network, stdscr):
    lines = []
    width = stdscr.getmaxyx()[1]
    bold = curses.A_BOLD
    norm = curses.A_NORMAL
    #lines.append((" ", norm))
    subject = utils.format_html(post['subject'])
    for line in wrap(subject, width - 2):
        lines.append((line, bold))
    contents = utils.format_html(post['contents']).replace(
        '___bold_start___', '').replace('___bold_end___', '')
    for line in wrap(contents, width - 2):
        lines.append((line, norm))
    author = network.get_user_name(post['uid']).encode(
        'ascii', 'ignore') if post['uid'] else 'Anonymous'
    right_author = " " * (width - len(author) - 4) + '- ' + author
    lines.append((right_author, norm))
    if post['student_answer']:
        lines.append(("Student Answer:", bold))
        answer = post['student_answer']
        contents = utils.format_html(answer['contents']).replace(
            '___bold_start___', '').replace('___bold_end___', '')
        for line in wrap(contents, width - 2):
            lines.append((line, norm))
        author = network.get_user_name(answer['uid']).encode(
            'ascii', 'ignore') if answer['uid'] else 'Anonymous'
        right_author = " " * (width - len(author) - 4) + '- ' + author
        lines.append((right_author, norm))
    if post['instructor_answer']:
        lines.append(("Instructor Answer:", bold))
        answer = post['instructor_answer']
        contents = utils.format_html(answer['contents']).replace(
            '___bold_start___', '').replace('___bold_end___', '')
        for line in wrap(contents, width - 2):
            lines.append((line, norm))
        author = network.get_user_name(answer['uid']).encode(
            'ascii', 'ignore') if answer['uid'] else 'Anonymous'
        right_author = " " * (width - len(author) - 4) + '- ' + author
        lines.append((right_author, norm))
    if len(post['followups']) > 0:
        lines.append(("Followup Discussions:", bold))
    for followup in post['followups']:
        subject = utils.format_html(followup['subject'])
        hasUID = 'uid' in followup and followup['uid']
        author = network.get_user_name(
            followup['uid']) if hasUID else 'Anonymous'
        entry = subject + " (" + author.encode('ascii', 'ignore') + ")"
        if followup['plus_ones'] > 0:
            entry += ' +' + str(followup['plus_ones'])
        for line in wrap(entry, width - 2):
            lines.append((line, norm))
        for reply in followup['children']:
            subject = utils.format_html(reply['subject'])
            hasUID = 'uid' in reply and reply['uid']
            author = network.get_user_name(
                reply['uid']) if hasUID else 'Anonymous'
            entry = subject + " (" + author.encode('ascii', 'ignore') + ")"
            first = True
            for line in wrap(entry, width - 9):
                if first:
                    lines.append(("     " + line, norm))
                    first = False
                else:
                    lines.append(("       " + line, norm))
        lines.append(("", norm))

    pad = curses.newpad(len(lines) + 1, width)
    for item in lines:
        contents, style = item
        pad.addstr("\n " + contents, style)
    return pad
def view_summaries(stdscr, feed, network):
    curses.use_default_colors()

    pads = []
    summaries = []
    global data
    data = []
    unsaved_post = None
    height_sum = 0
    window_height = stdscr.getmaxyx()[0]
    window_width = stdscr.getmaxyx()[1]
    while True:
        post = feed.next_post()
        if not post:
            break
        subject = utils.format_html(post['subject'])
        contents = utils.format_html(post['content_snipet'])
        subject_lines = textwrap.wrap(subject, window_width - 2)
        lines = textwrap.wrap(contents, window_width - 2)
        height = len(subject_lines) + len(lines) + 2
        if height_sum + height <= window_height * 2:
            summaries.append(post)
            data.append((subject, contents))
            height_sum += height
            pads.append(summary_pad(subject, contents, stdscr))
        else:
            unsaved_post = post
            break
    # Main loop
    i = 0
    stdscr.erase()
    stdscr.refresh()
    if len(pads) > 0:
        pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True)
    stdscr.nodelay(False)  # getch is non-blocking
    render(pads, i, stdscr)
    while True:
        c = stdscr.getch()

        # Quit the program
        if c == ord('q') or c == curses.KEY_BACKSPACE:
            break

        elif c == ord('i'):
            url = "https://piazza.com/class/" + network._nid + "?cid=" + summaries[
                i]['id']
            plat = platform.system()
            if plat == "Windows":
                subprocess.Popen(["explorer", url])
            elif plat == "Darwin":
                subprocess.Popen(["open", url])
            elif plat == "Linux":
                subprocess.Popen(["x-www-browser", url])

        # Check for ENTER
        elif c == ord('\n'):
            stdscr.clear()
            stdscr.refresh()
            post_summary_obj = summaries[i]
            id_num = post_summary_obj['id']
            post_obj = network.get_post(id_num)
            post_viewer.view_post(post_obj, network, stdscr)
            pads = remake_pads(summaries, stdscr, i)

        # Scroll down
        elif c == ord('j') or c == curses.KEY_DOWN:
            post = unsaved_post if unsaved_post else feed.next_post()
            unsaved_post = None
            if post:
                subject = utils.format_html(post['subject'])
                contents = utils.format_html(post['content_snipet'])
                summaries.append(post)
                data.append((subject, contents))
                pads.append(summary_pad(subject, contents, stdscr))
            if i < len(pads) - 1:
                pads[i] = summary_pad(data[i][0], data[i][1], stdscr)
                i += 1
                pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True)

        # Scroll up
        elif (c == ord('k') or c == curses.KEY_UP) and i > 0:
            pads[i] = summary_pad(data[i][0], data[i][1], stdscr)
            i -= 1
            pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True)

        # Check for window resize
        if c == curses.KEY_RESIZE:
            stdscr.erase()
            stdscr.refresh()
            pads = remake_pads(summaries, stdscr, i)

        render(pads, i, stdscr)
Beispiel #8
0
def view_summaries(stdscr, feed, network):
    curses.use_default_colors()
    
    pads = []
    summaries = []
    global data
    data = []
    unsaved_post = None
    height_sum = 0
    window_height = stdscr.getmaxyx()[0]
    window_width = stdscr.getmaxyx()[1]
    while True:
        post = feed.next_post()
        if not post:
            break
        subject = utils.format_html(post['subject'])
        contents = utils.format_html(post['content_snipet'])
        subject_lines = textwrap.wrap(subject, window_width - 2)
        lines = textwrap.wrap(contents, window_width - 2)
        height = len(subject_lines) + len(lines) + 2
        if height_sum + height <= window_height*2:
            summaries.append(post)
            data.append((subject, contents))
            height_sum += height
            pads.append(summary_pad(subject, contents, stdscr))
        else:
            unsaved_post = post
            break
    # Main loop
    i = 0
    stdscr.erase()
    stdscr.refresh()
    if len(pads) > 0:
        pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True)
    stdscr.nodelay(False) # getch is non-blocking
    render(pads, i, stdscr)
    while True:
        c = stdscr.getch()

        # Quit the program
        if c == ord('q') or c == curses.KEY_BACKSPACE:
            break
        
        elif c == ord('i'):
            url = "https://piazza.com/class/" + network._nid + "?cid=" + summaries[i]['id']
            plat = platform.system()
            if plat == "Windows":
                subprocess.Popen(["explorer", url])
            elif plat == "Darwin":
                subprocess.Popen(["open", url])
            elif plat == "Linux":
                subprocess.Popen(["x-www-browser", url])

        # Check for ENTER
        elif c == ord('\n'):
            stdscr.clear()
            stdscr.refresh()
            post_summary_obj = summaries[i]
            id_num = post_summary_obj['id']
            post_obj = network.get_post(id_num)
            post_viewer.view_post(post_obj, network, stdscr)
            pads = remake_pads(summaries, stdscr, i)

        # Scroll down
        elif c == ord('j') or c == curses.KEY_DOWN:
            post = unsaved_post if unsaved_post else feed.next_post()
            unsaved_post = None
            if post:
                subject = utils.format_html(post['subject'])
                contents = utils.format_html(post['content_snipet'])
                summaries.append(post)
                data.append((subject, contents))
                pads.append(summary_pad(subject, contents, stdscr))
            if i < len(pads) - 1:
                pads[i] = summary_pad(data[i][0], data[i][1], stdscr)
                i += 1
                pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True)

        # Scroll up
        elif (c == ord('k') or c == curses.KEY_UP) and i > 0:
            pads[i] = summary_pad(data[i][0], data[i][1], stdscr)
            i -= 1
            pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True)

        # Check for window resize
        if c == curses.KEY_RESIZE:
            stdscr.erase()
            stdscr.refresh()
            pads = remake_pads(summaries, stdscr, i)
            
        render(pads, i, stdscr)