Esempio n. 1
0
def check_word_subscript_exist(word):
    """
    :param word:
    :return:
    """
    found_subscript = False
    for i in range(len(word) - 1):
        # avoid over doing on the in accurate.
        c1 = word[i].get_text()
        c2 = word[i + 1].get_text()
        if not (c1.isdigit() or c1.isalpha()):
            continue
        if not (c2.isdigit() or c2.isalpha()):
            continue
        from pdfxml.pdf_util.layout_util import get_height
        if get_height(word[i].bbox) == 0:
            continue

        hr_fea_val = height_ratio_AB(word[i].bbox, word[i + 1].bbox)
        nvcd_fea_val = normalized_ycenter_diff_AB(word[i].bbox,
                                                  word[i + 1].bbox)
        if hr_ratio_range[0] <= hr_fea_val <= hr_ratio_range[1] and \
                nvcd_range[0] <= nvcd_fea_val <= nvcd_range[1]:
            found_subscript = True
            me_extraction_logger.debug("{} Bbox1:{}, {} Bbox2:{}".format(
                word[i].get_text().encode('utf-8'), word[i].bbox,
                word[i + 1].get_text().encode('utf-8'), word[i + 1].bbox))
    return found_subscript
Esempio n. 2
0
def adjust_bbox_h_latex_vertically(bbox, latex):
    """

    :param bbox:
    :param latex:
    :return:
    """
    # adjust vertically.
    ver_latex2ur, ver_latex2lr, hor_latex2ur, hor_latex2lr = get_latex2adjustment_ratio(
    )

    #upper_ratio = ver_latex2ur[latex]
    #lower_ratio = ver_latex2lr[latex]
    upper_ratio, lower_ratio = get_upper_lower_ratio(latex, ver_latex2ur,
                                                     ver_latex2lr)

    new_bbox = copy.copy(bbox)
    if isinstance(new_bbox, BBox):
        new_bbox = new_bbox.to_list()
    else:
        new_bbox = list(new_bbox)

    height = get_height(bbox)
    new_bbox[1] = new_bbox[1] - height * lower_ratio
    new_bbox[3] = new_bbox[3] + height * upper_ratio
    return new_bbox
Esempio n. 3
0
def adjust_bbox_h_gt(bbox, gt, debug=False):
    """
    width stable character.

    :param bbox:
    :param gt: String of glyph type of xyz
    :return:
    """
    gt2ur, gt2lr = get_gt2adjust()

    upper_ratio = gt2ur[gt]
    lower_ratio = gt2lr[gt]
    if debug:
        print gt, upper_ratio, lower_ratio
    new_bbox = copy.copy(bbox)
    new_bbox = list(new_bbox)
    height = get_height(bbox)
    new_bbox[1] = new_bbox[1] - height * lower_ratio
    new_bbox[3] = new_bbox[3] + height * upper_ratio
    return new_bbox
Esempio n. 4
0
def normalized_At_Bb_diff_by_height_AB(b1, b2):
    d = b2[1] - b1[3]
    return d / get_height(b1)
Esempio n. 5
0
def normalized_Ab_Bt_diff_by_height_AB(b1, b2):
    d = b2[3] - b1[1]
    return d / get_height(b1)
Esempio n. 6
0
def normalized_Ar_Br_diff_by_height_AB(b1, b2):
    d = b2[2] - b1[2]
    return d / get_height(b1)
Esempio n. 7
0
def normalized_Al_Bl_diff_by_height_AB(b1, b2):
    d = b2[0] - b1[0]
    return d / get_height(b1)
Esempio n. 8
0
def normalized_xcenter_diff_by_height_AB(b1, b2):
    """
    horizontal center difference normalized by height of first bbox
    """
    x_center_diff = get_x_center(b1) - get_x_center(b2)
    return x_center_diff / get_height(b1)
Esempio n. 9
0
def normalized_ycenter_diff_AB(b1, b2):
    """
    vertical center difference normalized by height of first bbox
    """
    return (get_y_center(b2) - get_y_center(b1)) / get_height(b1)
Esempio n. 10
0
def height_ratio_AB(b1, b2):
    """
    height ratio
    """
    return get_height(b2) / get_height(b1)
Esempio n. 11
0
def normalized_ycenter_diff_by_merge_AB(b1, b2):
    """
    symmetric vertical difference
    """
    mb = merge_bbox(b1, b2)
    return abs(get_y_center(b1) - get_y_center(b2)) / get_height(mb)