from box import load_box_file def box_widths(boxes, path): """Returns a dict mapping box width -> count.""" counts = defaultdict(int) for box in boxes: width = box.right - box.left if 16 <= width and width <= 19: sys.stderr.write('%s: %s %s\n' % (width, path, box)) counts[width] += 1 return counts def counts_to_list(counts): high_val = max(counts.keys()) return [counts[x] for x in range(0, high_val + 1)] if __name__ == '__main__': count_lists = [] for path in sys.argv[1:]: boxes = load_box_file(path) count_lists.append(counts_to_list(box_widths(boxes, path))) max_width = max(len(x) for x in count_lists) print('\t'.join([''] + [str(x) for x in range(0, max_width)])) for path, counts in zip(sys.argv[1:], count_lists): print('%s\t%s' % (path, '\t'.join(str(x) for x in counts)))
h = box.top - box.bottom assert h > 0 if w < 21: return [box] # probably just a single letter. if h > w: return [box] # maybe it's just large, not wide num_ways = int(round(w / 12.0)) assert num_ways > 1, w boxes = [] for i in range(0, num_ways): b = copy.deepcopy(box) b.left = box.left + int(round((1.0 * i / num_ways * w))) b.right = box.left + int(round((1.0 * (i + 1) / num_ways * w))) boxes.append(b) return boxes def split_boxes(boxes): out_boxes = [] for box in boxes: out_boxes += split_box(box) return out_boxes if __name__ == '__main__': for path in sys.argv[1:]: boxes = load_box_file(path) out_boxes = split_boxes(boxes) out_path = path.replace('.box', '.split.box') open(out_path, 'w').write('\n'.join(str(x) for x in out_boxes))
def padded_box(box, pad_width, pad_height): """Adds some additional margin around the box.""" return BoxLine(box.letter, box.left - pad_width, box.top + pad_height, box.right + pad_width, box.bottom - pad_height, box.page) def crop_image_to_box(im, box): """ Returns a new image containing the pixels inside box. This accounts for BoxLine measuring pixels from the bottom up, whereas Image objects measure from the top down. """ w, h = im.size box = [ int(round(v)) for v in (box.left, h - box.top, box.right, h - box.bottom) ] return im.crop(box) if __name__ == '__main__': _, box_path, image_path, out_image_path = sys.argv boxes = load_box_file(box_path) big_box = find_box_extrema(boxes) pad_box = padded_box(big_box, 20, 20) im = Image.open(image_path) cropped_im = crop_image_to_box(im, pad_box) cropped_im.save(out_image_path)
# From http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python def mkdir_p(path): try: os.makedirs(path) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST: pass else: raise if __name__ == '__main__': _, box_path, image_path, output_dir = sys.argv boxes = box.load_box_file(box_path) im = Image.open(image_path) w, h = im.size for idx, box in enumerate(boxes): x1, x2 = box.left, box.right y1, y2 = h - box.top, h - box.bottom assert x2 > x1 assert y2 > y1 char_im = im.crop((x1, y1, x2, y2)) out_dir, out_file = path_for_letter(output_dir, image_path, idx, box.letter) out_path = os.path.join(out_dir, out_file) mkdir_p(out_dir)
def padded_box(box, pad_width, pad_height): """Adds some additional margin around the box.""" return BoxLine(box.letter, box.left - pad_width, box.top + pad_height, box.right + pad_width, box.bottom - pad_height, box.page) def crop_image_to_box(im, box): """Returns a new image containing the pixels inside box. This accounts for BoxLine measuring pixels from the bottom up, whereas Image objects measure from the top down. """ w, h = im.size box = [int(round(v)) for v in (box.left, h - box.top, box.right, h - box.bottom)] return im.crop(box) if __name__ == '__main__': _, box_path, image_path, out_image_path = sys.argv boxes = load_box_file(box_path) big_box = find_box_extrema(boxes) pad_box = padded_box(big_box, 20, 20) im = Image.open(image_path) cropped_im = crop_image_to_box(im, pad_box) cropped_im.save(out_image_path)
return os.path.join(output_dir, letter), '%s.%s.png' % (image_base, idx) # From http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python def mkdir_p(path): try: os.makedirs(path) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST: pass else: raise if __name__ == '__main__': _, box_path, image_path, output_dir = sys.argv boxes = box.load_box_file(box_path) im = Image.open(image_path) w, h = im.size for idx, box in enumerate(boxes): x1, x2 = box.left, box.right y1, y2 = h - box.top, h - box.bottom assert x2 > x1 assert y2 > y1 char_im = im.crop((x1, y1, x2, y2)) out_dir, out_file = path_for_letter(output_dir, image_path, idx, box.letter) out_path = os.path.join(out_dir, out_file) mkdir_p(out_dir) char_im.save(out_path)