from box import load_box_file


def box_widths(boxes, path):
    """Returns a dict mapping box width -> count."""
    counts = defaultdict(int)
    for box in boxes:
        width = box.right - box.left
        if 16 <= width and width <= 19:
            sys.stderr.write('%s: %s %s\n' % (width, path, box))
        counts[width] += 1
    return counts


def counts_to_list(counts):
    high_val = max(counts.keys())
    return [counts[x] for x in range(0, high_val + 1)]


if __name__ == '__main__':
    count_lists = []
    for path in sys.argv[1:]:
        boxes = load_box_file(path)
        count_lists.append(counts_to_list(box_widths(boxes, path)))

    max_width = max(len(x) for x in count_lists)

    print('\t'.join([''] + [str(x) for x in range(0, max_width)]))
    for path, counts in zip(sys.argv[1:], count_lists):
        print('%s\t%s' % (path, '\t'.join(str(x) for x in counts)))
Example #2
0
    h = box.top - box.bottom
    assert h > 0
    if w < 21: return [box]  # probably just a single letter.
    if h > w:  return [box]  # maybe it's just large, not wide

    num_ways = int(round(w / 12.0))
    assert num_ways > 1, w

    boxes = []
    for i in range(0, num_ways):
        b = copy.deepcopy(box)
        b.left = box.left + int(round((1.0 * i / num_ways * w)))
        b.right = box.left + int(round((1.0 * (i + 1) / num_ways * w)))
        boxes.append(b)
    return boxes


def split_boxes(boxes):
    out_boxes = []
    for box in boxes:
        out_boxes += split_box(box)
    return out_boxes


if __name__ == '__main__':
    for path in sys.argv[1:]:
        boxes = load_box_file(path)
        out_boxes = split_boxes(boxes)
        out_path = path.replace('.box', '.split.box')
        open(out_path, 'w').write('\n'.join(str(x) for x in out_boxes))
Example #3
0
def padded_box(box, pad_width, pad_height):
    """Adds some additional margin around the box."""
    return BoxLine(box.letter, box.left - pad_width, box.top + pad_height,
                   box.right + pad_width, box.bottom - pad_height, box.page)


def crop_image_to_box(im, box):
    """
    Returns a new image containing the pixels inside box.

    This accounts for BoxLine measuring pixels from the bottom up, whereas
    Image objects measure from the top down.
    """
    w, h = im.size
    box = [
        int(round(v))
        for v in (box.left, h - box.top, box.right, h - box.bottom)
    ]
    return im.crop(box)


if __name__ == '__main__':
    _, box_path, image_path, out_image_path = sys.argv
    boxes = load_box_file(box_path)
    big_box = find_box_extrema(boxes)
    pad_box = padded_box(big_box, 20, 20)

    im = Image.open(image_path)
    cropped_im = crop_image_to_box(im, pad_box)
    cropped_im.save(out_image_path)
Example #4
0

# From http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST:
            pass
        else:
            raise


if __name__ == '__main__':
    _, box_path, image_path, output_dir = sys.argv
    boxes = box.load_box_file(box_path)
    im = Image.open(image_path)
    w, h = im.size

    for idx, box in enumerate(boxes):
        x1, x2 = box.left, box.right
        y1, y2 = h - box.top, h - box.bottom

        assert x2 > x1
        assert y2 > y1

        char_im = im.crop((x1, y1, x2, y2))
        out_dir, out_file = path_for_letter(output_dir, image_path, idx,
                                            box.letter)
        out_path = os.path.join(out_dir, out_file)
        mkdir_p(out_dir)
Example #5
0
def padded_box(box, pad_width, pad_height):
    """Adds some additional margin around the box."""
    return BoxLine(box.letter,
                   box.left - pad_width,
                   box.top + pad_height,
                   box.right + pad_width,
                   box.bottom - pad_height,
                   box.page)


def crop_image_to_box(im, box):
    """Returns a new image containing the pixels inside box.
    
    This accounts for BoxLine measuring pixels from the bottom up, whereas
    Image objects measure from the top down.
    """
    w, h = im.size
    box = [int(round(v)) for v in (box.left, h - box.top, box.right, h - box.bottom)]
    return im.crop(box)


if __name__ == '__main__':
    _, box_path, image_path, out_image_path = sys.argv
    boxes = load_box_file(box_path)
    big_box = find_box_extrema(boxes)
    pad_box = padded_box(big_box, 20, 20)

    im = Image.open(image_path)
    cropped_im = crop_image_to_box(im, pad_box)
    cropped_im.save(out_image_path)
Example #6
0
    return os.path.join(output_dir, letter), '%s.%s.png' % (image_base, idx)


# From http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
def mkdir_p(path):
  try:
    os.makedirs(path)
  except OSError as exc: # Python >2.5
    if exc.errno == errno.EEXIST:
      pass
    else: raise


if __name__ == '__main__':
    _, box_path, image_path, output_dir = sys.argv
    boxes = box.load_box_file(box_path)
    im = Image.open(image_path)
    w, h = im.size

    for idx, box in enumerate(boxes):
        x1, x2 = box.left, box.right
        y1, y2 = h - box.top, h - box.bottom

        assert x2 > x1
        assert y2 > y1

        char_im = im.crop((x1, y1, x2, y2))
        out_dir, out_file = path_for_letter(output_dir, image_path, idx, box.letter)
        out_path = os.path.join(out_dir, out_file)
        mkdir_p(out_dir)
        char_im.save(out_path)