def show_rotated(load_path, args=None, indices='all'):
    '''
    ...
    Args:
        load_path:
        args:
        indices:

    Returns:

    '''
    if args is None:
        args = Arguments()
    for i, file in enumerate(os.listdir(load_path)):
        if file.endswith('.jpg') and (indices == 'all' or i in indices):
            file_path = os.path.join(load_path, file)
            img = processing.load_img(file_path)
            components = find_blobs(img, args)
            boxes = components.bboxes()
            stencil = components.get_stencil()
            angle, labels = correction_angle(components, args, False)
            rotated_img = rotate(img, angle * 180 / np.pi)
            plt.imshow(rotated_img)
            plt.grid(True)
            plt.show()
            get_rotation_corrected_blobs(components, angle, labels, args)
def interface():
    '''provides a terminal interface to execute the read function with an image path.
    It prints out the result in the terminal and optionally extracts an IBAN.'''

    path = input(
        'please provide a path to an image containing text relative to this directory [example]\n> '
    )
    if path == '':
        path = ('example.jpg')
    while not os.path.exists(path):
        path = input(
            'This path doesn\'t exist. please provide a valid one [example]\n> '
        )

    args = Arguments()
    img = load_img(path, args)

    results = read(img, args)
    out = ''
    for line in results:
        out += line + '\n'

    print('\nFrom the provided image the following could be read:\n')
    print(out)

    get_iban = input('Do you want to extract an IBAN? y/n? [y] > ')

    if get_iban == 'y' or get_iban == '':
        ibans = detect_iban.find_iban(out)
        if len(ibans) == 0:
            print('No IBAN could be extracted.')
        elif len(ibans) == 1:
            copy = input(
                'found the following IBAN: {} \n Do you want to copy it y/n? [y] > '
                .format(ibans[0]))
            if copy == '' or copy == 'y':
                pyperclip.copy(ibans[0])
                print('\nCopied {} to the clipboard'.format(ibans[0]))
        else:
            print('\nThe following IBANs were found:')
            for i, iban in enumerate(ibans):
                print(i, ':', iban)
            idx = input(
                '\nIf you want to copy any of these type its number > [0]')
            if idx == '':
                pyperclip.copy(ibans[0])
                print('\nCopied {} to the clipboard'.format(ibans[0]))
            elif int(idx) in range(len(ibans)):
                pyperclip.copy(ibans[int(idx)])
                print('\nCopied {} to the clipboard'.format(ibans[int(idx)]))
            else:
                print('\nThe input was not understood.')

    print('\nexit')
def generate_char_data(load_path, args=None):
    '''
    ...
    '''
    if args is None:
        args = Arguments()

    if args.method == 'threshold':
        method = blob_extraction.find_blobs
    elif args.method == 'mser':
        method = mser_extraction.extract_mser
    else:
        assert False

    # load alphabet
    char_dict = args.char_dict

    # load truth
    with open(os.path.join(load_path, 'truth.txt')) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    truth_blocks = []
    current_block = []
    for line in content:
        if line == '':
            truth_blocks.append(current_block)
            current_block = []
        else:
            current_block.append(
                np.array([char_dict[char] for char in line], dtype=np.int32))

    # process images
    char_images = []
    char_truths = []
    for i in range(args.n):
        file = str(i) + '.jpg'
        if file.endswith('.jpg'):
            file_path = os.path.join(load_path, file)
            img = processing.load_img(file_path, args)
            components = method(img, args)
            components = rotation_correct_and_line_order(components)
            chars = components.extract(args)

            # check if number of detected chars conicides with groundtruth
            diff = sum(len(line) for line in truth_blocks[i]) - len(chars)
            print(f'image {i}: #gt chars - #detected chars: {diff}')
            if diff == 0:
                char_images.append(chars)
                char_truths.append(np.concatenate(truth_blocks[i]))
                #print('image is valid')

    return np.concatenate(char_images, axis=0), np.concatenate(char_truths,
                                                               axis=0)
Example #4
0
def test_on_images(args):
    '''
    tests a trained model (specified by args.model_path) on the images labeled 1.jpg, 2.jpg.. in args.image_path
    Args:
        args: Arguments object

    Returns:
        predicted lines, ratio of correctly predicted lines,
        ratio of correctly predicted lines not considering case,
        ratio of lines with a correctly predicted amount of characters
    '''

    # get gt
    with open(os.path.join(args.image_path, 'truth.txt')) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    truth_blocks = []
    current_block = []
    for line in content:
        if line == '':
            truth_blocks.append(current_block)
            current_block = []
        else:
            current_block.append(line)

    total_lines = 0
    correct_lines = 0
    lowercase_correct_lines = 0
    n_letters_correct_lines = 0
    for i in range(args.n):
        file = str(i) + '.jpg'
        file_path = os.path.join(args.image_path, file)
        img = load_img(file_path, args)
        pred_lines = read(img, args)
        gt_lines = truth_blocks[i]
        print('prediction: ', pred_lines)
        print('ground truth: ', gt_lines)
        total_lines += len(gt_lines)
        for i in range(min(len(gt_lines), len(pred_lines))):
            correct_lines += (pred_lines[i] == gt_lines[i])
            lowercase_correct_lines += (
                pred_lines[i].lower() == gt_lines[i].lower())
            n_letters_correct_lines += len(pred_lines[i]) == len(gt_lines[i])
    print(
        f'Out of {total_lines} lines, {correct_lines} ({100*correct_lines/total_lines:.2f}%) were predicted perfectly, '
        f'and {lowercase_correct_lines} ({100*lowercase_correct_lines/total_lines:.2f}%) when neglecting case. '
        f'in {n_letters_correct_lines} ({100*n_letters_correct_lines/total_lines:.2f}%) cases, at least the right number of letters was predicted.'
    )

    return total_lines, correct_lines / total_lines, lowercase_correct_lines / total_lines, n_letters_correct_lines / total_lines
    def load_raws(self):
        '''
        loads raw images from the data directory and stores it as ndarrays

        Returns:
            list of ndarrays
        '''

        raws = []

        for file in os.listdir(self.path):
            if file.endswith('.jpg'):
                file_path = os.path.join(self.path, file)
                raws.append(processing.load_img(file_path))

        return raws
Example #6
0
        lines.append(img[box[0]:box[1], box[2]:box[3]])

    # bounding boxes in shape [x_min, y_min, width, height]
    bboxes = [[box[2], box[0], box[3] - box[2], box[1] - box[0]]
              for box in boxes]

    return lines, bboxes


if __name__ == "__main__":

    args = Arguments()
    args.n_angles = 50
    args.n_bins = 30

    img = load_img('data/5.jpg')
    rotated = correct_rot(img, args)

    lines, bboxes = extract_lines(rotated, args)

    plt.imshow(img[0:150, 0:200], cmap='gray')
    plt.xticks([])
    plt.yticks([])
    #plt.savefig('plots/to_be_rotated.pdf', bbox_inches='tight')
    plt.show()

    o = 50
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.imshow(rotated[o:200, o:250], cmap='gray')
    ax.grid(True)
    angle = angles[np.argmin(H)]
    angle = -angle * 180 / np.pi

    # rotating image

    img = rotate(img, angle)
    return img


if __name__ == "__main__":

    args = Arguments()
    args.n_angles = 50
    args.n_bins = 100

    img = load_img('data_test/1.jpg')
    rotated = correct_rot(img, args)

    plt.imshow(img[0:150, 0:200], cmap='gray')
    plt.xticks([])
    plt.yticks([])
    #plt.savefig('plots/to_be_rotated.pdf', bbox_inches='tight')
    plt.show()

    plt.imshow(rotated[50:200, 50:250], cmap='gray')
    plt.grid(True)
    plt.xticks([])
    plt.yticks([])
    #plt.savefig('plots/rotated.pdf', bbox_inches='tight')
    plt.show()
Example #8
0
    tests the model on different font types and saves the result
    Args:
        args: Arguments object
        prefix: folder in which folders of test images are saved
        font_files: list of fonts to use
        output_file: file to save results
    '''

    if font_files is None:
        font_files = data_gen.get_font_files()

    f = open(output_file, 'w')
    for font in font_files:
        print(ntpath.basename(font))
        args.image_path = os.path.join(prefix, ntpath.basename(font))
        total_lines, correct, correct_case_insensitive, correct_n_chars = test_on_images(
            args)
        f.write(
            str(total_lines) + ' ' + str(correct) + ' ' +
            str(correct_case_insensitive) + ' ' + str(correct_n_chars) + ' ' +
            ntpath.basename(font) + '\n')
    f.close()


if __name__ == '__main__':

    args = Arguments()
    args.documentation = True
    args.method = 'mser'
    img = load_img('IBANs/cropped/IMG_0892.jpg', args)
    print(read(img, args))
        if len(pixels) == 0:
            continue
        y_min = np.min(pixels[:, 1])
        y_max = np.max(pixels[:, 1])
        x_min = np.min(pixels[:, 0])
        x_max = np.max(pixels[:, 0])
        #heappush(boxes, (x_min, x_max, y_min, y_max))
        boxes.append((x_min, x_max, y_min, y_max))
    return np.array(boxes, dtype=np.int32)


if __name__ == "__main__":

    args = Arguments()

    img = processing.load_img('data_test/1.jpg')

    scale = 1
    orig = rescale(deepcopy(img), scale)
    img = rescale(img, scale)
    components = find_blobs(img, args)
    boxes = components.bboxes()
    stencil = components.get_stencil()

    print()
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.imshow(orig, cmap='gray')
    for box in boxes:
        rect = Rectangle((box[2], box[0]),
                         box[3] - box[2],