예제 #1
0
def id_tabs(img, avg_line_height=20, line_blur=20, tab_wiggle_room=5, disp=False):
    """
    Attempts to identify the indent level of each line of text, with the assumption that the first line is at level 0.

    :param img: the input image (should contain text)
    :param avg_line_height: the expected vertical height of a line of text
    :param line_blur: how far the image is blurred to extract features (img.width/line_blur)
    :param tab_wiggle_room: how far in pixels tabs are allowed to be from on another before they are considered distinct
    :param disp: whether to display intermediate results
    :return: An integer list representing the tab level for each line
    """
    # load image as grayscale
    # aggressively horizontally blur the image
    r, c = len(img), len(img[0])
    horizontal_size = c / line_blur
    horizontal_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))
    img = cv2.filter2D(img, -1, horizontal_structure)
    if disp:
        vis_img, _ = auto_crop.reduce_image(img.copy())
        cv2.imshow('Horizontally Blur', vis_img)
        cv2.waitKey(0)
    # Identify connected components & generate bounding boxes
    n, regions = cv2.connectedComponents(img, img)
    img = np.uint8(regions)
    bbs = _generate_bounding_boxes(img, n, avg_line_height)

    return _analyze_bounding_boxes(bbs, tab_wiggle_room)
예제 #2
0
def main():
    disp = False  # Set to true to view pre-processing of the images.
    img = cv2.imread('../images/training/training3.jpg', 0)
    img = auto_crop.crop_to_bounding_box(img, disp=disp)
    # binarize image
    _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
    if disp:
        vis_img, _ = auto_crop.reduce_image(img.copy())
        cv2.imshow('Binary Image', vis_img)
        cv2.waitKey(0)

    img = 255 - img
    if disp:
        vis_img, _ = auto_crop.reduce_image(img.copy())
        cv2.imshow('Inverted', vis_img)
        cv2.waitKey(0)

    # normalize image height
    img, avg_line_height = normalize_training_image(img, 30, disp=disp)

    if disp:
        cv2.imshow('Normalized', img)
        cv2.waitKey(0)
    cv2.waitKey(0)

    # identify indent levels
    tabs = id_tabs(255 - img,
                   avg_line_height=avg_line_height,
                   line_blur=20,
                   tab_wiggle_room=2,
                   disp=True)
    print tabs
    cv2.imwrite("text.png", img)
    pil_img = Image.open("text.png")
    translation = pytesseract.image_to_string(pil_img, 'hww')
    print "TESSERACT RESULTS: "
    print translation

    print "\n\nPROOFREAD: "
    print proofread(translation.split('\n'), tabs)
def normalize_training_image(img, threshold_height, disp=False):
    """
    Takes in a binary image and normalizes the text within the image to the given height.
    :param img: the image to normalize
    :param threshold_height: the average height we're trying to obtain
    :param disp: whether to display intermediate results
    """
    img = _remove_circles(img)
    if disp:
        vis_img, _ = reduce_image(img.copy())
        cv2.imshow('Removed Bounding Circles', vis_img)
        cv2.waitKey(0)
    cc = _horizontally_blur_image(img)
    _, cc = cv2.threshold(cc, 1, 255, cv2.THRESH_BINARY)
    _, contours, hierarchy = cv2.findContours(cc, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    avg_height = 0
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        avg_height += h
    avg_height /= len(contours)

    parsed_contours = []
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        if h >= avg_height / 2:
            parsed_contours.append(cnt)

    new_avg_height = 0
    for cnt in parsed_contours:
        x, y, w, h = cv2.boundingRect(cnt)
        new_avg_height += h
    new_avg_height /= len(parsed_contours)

    p = threshold_height / new_avg_height

    height, width = img.shape
    dim = (int(width * p), int(height * p))
    return cv2.resize(img, dim), threshold_height