Exemple #1
0
def adjust_bbox_h_latex_horizontally(bbox, latex):
    """

    :param bbox:
    :param latex:
    :return:
    """
    # adjust horizontally.
    ver_latex2ur, ver_latex2lr, hor_latex2ur, hor_latex2lr = get_latex2adjustment_ratio(
    )
    if latex not in hor_latex2ur or latex not in hor_latex2lr:
        hor_equivalent_map = {
            "\\approx": "=",
            "\\simeq": "=",
        }
        if latex in hor_equivalent_map:
            latex = hor_equivalent_map[latex]
        elif latex.startswith("\\not"):
            latex = "\\backslash"
        else:
            raise Exception("No Stat yet for {}".format(latex))

    upper_ratio = hor_latex2ur[latex]
    lower_ratio = hor_latex2lr[latex]

    new_bbox = copy.copy(bbox)
    if isinstance(new_bbox, BBox):
        new_bbox = new_bbox.to_list()
    else:
        new_bbox = list(new_bbox)

    width = get_width(bbox)
    new_bbox[1] = new_bbox[1] - width * lower_ratio
    new_bbox[3] = new_bbox[3] + width * upper_ratio
    return new_bbox
Exemple #2
0
def get_lower_ratio_hor(cinfo, lower_line):
    """
    using the horizontal width, rather than the vertical height
    because the height might be very thin
    """
    # get mean in vertical direction
    y_cen = get_y_center(cinfo['bbox'])
    return (y_cen - lower_line) / get_width(cinfo['bbox'])
Exemple #3
0
def get_upper_ratio_hor(cinfo, upper_line):
    """
    given the upper line and the cinfo,
    calculate the vertical difference of upper boundary w.r.t. the vertical center,
    with respect to the width of the current char.

    :param cinfo:
    :param upper_line:
    :return:
    """
    y_cen = get_y_center(cinfo['bbox'])
    return (upper_line - y_cen) / get_width(cinfo['bbox'])
Exemple #4
0
def merge_line_ime(char_list_list):
    """
    Though it's called IME processing,
    however it's only merging the bind var,
    no matter it's IME or EME.

    a better name might be merge big op

    only merge based on the bind var operator

    :param char_list_list:
    :return:
    """
    line_bbox_list = []
    for char_list in char_list_list:
        line_bbox = get_char_list_bbox(char_list)  # not removing the accent
        line_bbox_list.append(line_bbox)
    cur_line_idx_list = range(len(char_list_list))
    cur_line_idx_list.sort(key=lambda lid: -line_bbox_list[lid].top())

    uppper_under_line_idx = list()
    line_idx2line_idx_list = {}

    res_char_list_list = []
    for i, line_idx in enumerate(cur_line_idx_list):
        left_bound, right_bound = 1000000, -1
        for char in char_list_list[line_idx]:
            if not isinstance(char, LTChar):
                continue
            latex_val = get_latex_val_of_lt_char(char)
            #print latex_val
            if latex_val in ['\\sum', '\\prod']:
                left_bound = min(left_bound, char.bbox[0]-get_width(char.bbox))
                right_bound = max(right_bound, char.bbox[2]+get_width(char.bbox))
        if left_bound > right_bound:
            line_idx2line_idx_list[line_idx] = [line_idx]
            continue

        line_idx2line_idx_list[line_idx] = [line_idx]
        if i != 0:
            prev_line_idx = cur_line_idx_list[i-1]
            prev_bbox = line_bbox_list[prev_line_idx]
            if prev_bbox.left() > left_bound and prev_bbox.right() < right_bound:
                line_idx2line_idx_list[line_idx].append(prev_line_idx)
                uppper_under_line_idx.append(prev_line_idx)

        if i != len(cur_line_idx_list)-1:
            next_line_idx = cur_line_idx_list[i+1]
            next_bbox = line_bbox_list[next_line_idx]
            if next_bbox.left() > left_bound and next_bbox.right() < right_bound:
                line_idx2line_idx_list[line_idx].append(next_line_idx)
                uppper_under_line_idx.append(next_line_idx)

    res_char_list_list = []
    for line_idx in cur_line_idx_list:
        if line_idx in uppper_under_line_idx:
            continue
        tmp_char_list = []
        for tmp_li in line_idx2line_idx_list[line_idx]:
            tmp_char_list.extend(char_list_list[tmp_li])
        res_char_list_list.append(tmp_char_list)

    return res_char_list_list
Exemple #5
0
def normalized_Ar_Bl_diff_by_width_AB(b1, b2):
    d = b2[0] - b1[2]
    return d / get_width(b1)
Exemple #6
0
def normalized_xcenter_diff_by_merge_AB(b1, b2):
    """
    symmetric horizontal difference
    """
    mb = merge_bbox(b1, b2)
    return abs(get_x_center(b1) - get_x_center(b2)) / get_width(mb)