def show_rotated(load_path, args=None, indices='all'): ''' ... Args: load_path: args: indices: Returns: ''' if args is None: args = Arguments() for i, file in enumerate(os.listdir(load_path)): if file.endswith('.jpg') and (indices == 'all' or i in indices): file_path = os.path.join(load_path, file) img = processing.load_img(file_path) components = find_blobs(img, args) boxes = components.bboxes() stencil = components.get_stencil() angle, labels = correction_angle(components, args, False) rotated_img = rotate(img, angle * 180 / np.pi) plt.imshow(rotated_img) plt.grid(True) plt.show() get_rotation_corrected_blobs(components, angle, labels, args)
def interface(): '''provides a terminal interface to execute the read function with an image path. It prints out the result in the terminal and optionally extracts an IBAN.''' path = input( 'please provide a path to an image containing text relative to this directory [example]\n> ' ) if path == '': path = ('example.jpg') while not os.path.exists(path): path = input( 'This path doesn\'t exist. please provide a valid one [example]\n> ' ) args = Arguments() img = load_img(path, args) results = read(img, args) out = '' for line in results: out += line + '\n' print('\nFrom the provided image the following could be read:\n') print(out) get_iban = input('Do you want to extract an IBAN? y/n? [y] > ') if get_iban == 'y' or get_iban == '': ibans = detect_iban.find_iban(out) if len(ibans) == 0: print('No IBAN could be extracted.') elif len(ibans) == 1: copy = input( 'found the following IBAN: {} \n Do you want to copy it y/n? [y] > ' .format(ibans[0])) if copy == '' or copy == 'y': pyperclip.copy(ibans[0]) print('\nCopied {} to the clipboard'.format(ibans[0])) else: print('\nThe following IBANs were found:') for i, iban in enumerate(ibans): print(i, ':', iban) idx = input( '\nIf you want to copy any of these type its number > [0]') if idx == '': pyperclip.copy(ibans[0]) print('\nCopied {} to the clipboard'.format(ibans[0])) elif int(idx) in range(len(ibans)): pyperclip.copy(ibans[int(idx)]) print('\nCopied {} to the clipboard'.format(ibans[int(idx)])) else: print('\nThe input was not understood.') print('\nexit')
def generate_char_data(load_path, args=None): ''' ... ''' if args is None: args = Arguments() if args.method == 'threshold': method = blob_extraction.find_blobs elif args.method == 'mser': method = mser_extraction.extract_mser else: assert False # load alphabet char_dict = args.char_dict # load truth with open(os.path.join(load_path, 'truth.txt')) as f: content = f.readlines() content = [x.strip() for x in content] truth_blocks = [] current_block = [] for line in content: if line == '': truth_blocks.append(current_block) current_block = [] else: current_block.append( np.array([char_dict[char] for char in line], dtype=np.int32)) # process images char_images = [] char_truths = [] for i in range(args.n): file = str(i) + '.jpg' if file.endswith('.jpg'): file_path = os.path.join(load_path, file) img = processing.load_img(file_path, args) components = method(img, args) components = rotation_correct_and_line_order(components) chars = components.extract(args) # check if number of detected chars conicides with groundtruth diff = sum(len(line) for line in truth_blocks[i]) - len(chars) print(f'image {i}: #gt chars - #detected chars: {diff}') if diff == 0: char_images.append(chars) char_truths.append(np.concatenate(truth_blocks[i])) #print('image is valid') return np.concatenate(char_images, axis=0), np.concatenate(char_truths, axis=0)
def test_on_images(args): ''' tests a trained model (specified by args.model_path) on the images labeled 1.jpg, 2.jpg.. in args.image_path Args: args: Arguments object Returns: predicted lines, ratio of correctly predicted lines, ratio of correctly predicted lines not considering case, ratio of lines with a correctly predicted amount of characters ''' # get gt with open(os.path.join(args.image_path, 'truth.txt')) as f: content = f.readlines() content = [x.strip() for x in content] truth_blocks = [] current_block = [] for line in content: if line == '': truth_blocks.append(current_block) current_block = [] else: current_block.append(line) total_lines = 0 correct_lines = 0 lowercase_correct_lines = 0 n_letters_correct_lines = 0 for i in range(args.n): file = str(i) + '.jpg' file_path = os.path.join(args.image_path, file) img = load_img(file_path, args) pred_lines = read(img, args) gt_lines = truth_blocks[i] print('prediction: ', pred_lines) print('ground truth: ', gt_lines) total_lines += len(gt_lines) for i in range(min(len(gt_lines), len(pred_lines))): correct_lines += (pred_lines[i] == gt_lines[i]) lowercase_correct_lines += ( pred_lines[i].lower() == gt_lines[i].lower()) n_letters_correct_lines += len(pred_lines[i]) == len(gt_lines[i]) print( f'Out of {total_lines} lines, {correct_lines} ({100*correct_lines/total_lines:.2f}%) were predicted perfectly, ' f'and {lowercase_correct_lines} ({100*lowercase_correct_lines/total_lines:.2f}%) when neglecting case. ' f'in {n_letters_correct_lines} ({100*n_letters_correct_lines/total_lines:.2f}%) cases, at least the right number of letters was predicted.' ) return total_lines, correct_lines / total_lines, lowercase_correct_lines / total_lines, n_letters_correct_lines / total_lines
def load_raws(self): ''' loads raw images from the data directory and stores it as ndarrays Returns: list of ndarrays ''' raws = [] for file in os.listdir(self.path): if file.endswith('.jpg'): file_path = os.path.join(self.path, file) raws.append(processing.load_img(file_path)) return raws
lines.append(img[box[0]:box[1], box[2]:box[3]]) # bounding boxes in shape [x_min, y_min, width, height] bboxes = [[box[2], box[0], box[3] - box[2], box[1] - box[0]] for box in boxes] return lines, bboxes if __name__ == "__main__": args = Arguments() args.n_angles = 50 args.n_bins = 30 img = load_img('data/5.jpg') rotated = correct_rot(img, args) lines, bboxes = extract_lines(rotated, args) plt.imshow(img[0:150, 0:200], cmap='gray') plt.xticks([]) plt.yticks([]) #plt.savefig('plots/to_be_rotated.pdf', bbox_inches='tight') plt.show() o = 50 fig = plt.figure() ax = fig.add_subplot(111) ax.imshow(rotated[o:200, o:250], cmap='gray') ax.grid(True)
angle = angles[np.argmin(H)] angle = -angle * 180 / np.pi # rotating image img = rotate(img, angle) return img if __name__ == "__main__": args = Arguments() args.n_angles = 50 args.n_bins = 100 img = load_img('data_test/1.jpg') rotated = correct_rot(img, args) plt.imshow(img[0:150, 0:200], cmap='gray') plt.xticks([]) plt.yticks([]) #plt.savefig('plots/to_be_rotated.pdf', bbox_inches='tight') plt.show() plt.imshow(rotated[50:200, 50:250], cmap='gray') plt.grid(True) plt.xticks([]) plt.yticks([]) #plt.savefig('plots/rotated.pdf', bbox_inches='tight') plt.show()
tests the model on different font types and saves the result Args: args: Arguments object prefix: folder in which folders of test images are saved font_files: list of fonts to use output_file: file to save results ''' if font_files is None: font_files = data_gen.get_font_files() f = open(output_file, 'w') for font in font_files: print(ntpath.basename(font)) args.image_path = os.path.join(prefix, ntpath.basename(font)) total_lines, correct, correct_case_insensitive, correct_n_chars = test_on_images( args) f.write( str(total_lines) + ' ' + str(correct) + ' ' + str(correct_case_insensitive) + ' ' + str(correct_n_chars) + ' ' + ntpath.basename(font) + '\n') f.close() if __name__ == '__main__': args = Arguments() args.documentation = True args.method = 'mser' img = load_img('IBANs/cropped/IMG_0892.jpg', args) print(read(img, args))
if len(pixels) == 0: continue y_min = np.min(pixels[:, 1]) y_max = np.max(pixels[:, 1]) x_min = np.min(pixels[:, 0]) x_max = np.max(pixels[:, 0]) #heappush(boxes, (x_min, x_max, y_min, y_max)) boxes.append((x_min, x_max, y_min, y_max)) return np.array(boxes, dtype=np.int32) if __name__ == "__main__": args = Arguments() img = processing.load_img('data_test/1.jpg') scale = 1 orig = rescale(deepcopy(img), scale) img = rescale(img, scale) components = find_blobs(img, args) boxes = components.bboxes() stencil = components.get_stencil() print() fig = plt.figure() ax = fig.add_subplot(111) ax.imshow(orig, cmap='gray') for box in boxes: rect = Rectangle((box[2], box[0]), box[3] - box[2],