def get_region_boxes(img,
                     pad=10,
                     erode_iters=3,
                     min_box_width=50,
                     min_aspect=0.7,
                     min_box_width_rel=0.1,
                     approx_factor=0.06,
                     bin_thresh=5,
                     min_box_height=20,
                     debug=False):

    norm_img_src = preprocess_image(img, bin_thresh, False, debug)
    # pad to make clear edges
    src_img = cv2.copyMakeBorder(img,
                                 pad,
                                 pad,
                                 pad,
                                 pad,
                                 cv2.BORDER_CONSTANT,
                                 value=(0, ))
    if debug:
        cv2.imshow('orig image with black borders', src_img)  # andrey
        cv2.waitKey(0)
    norm_img = cv2.copyMakeBorder(norm_img_src,
                                  pad,
                                  pad,
                                  pad,
                                  pad,
                                  cv2.BORDER_CONSTANT,
                                  value=(0, ))
    if debug:
        cv2.imshow('bw image with black borders', norm_img)  # andrey
        cv2.waitKey(0)

    kernel = np.ones(shape=(3, 3))
    norm_img = cv2.erode(norm_img, kernel, iterations=erode_iters)
    if debug:
        cv2.imshow('bw image after erosion', norm_img)  # andrey
        cv2.waitKey(0)
    norm_img = cv2.dilate(norm_img, kernel, iterations=erode_iters)
    if debug:
        cv2.imshow('bw image after closing', norm_img)  # andrey
        cv2.waitKey(0)

    #kernel = np.ones(shape=(3, 11))
    #norm_img = cv2.erode(norm_img, kernel, iterations=erode_iters)
    #if debug:
    #    cv2.imshow('bw image after erosion', norm_img)  # andrey
    #    cv2.waitKey(0)
    #norm_img = cv2.dilate(norm_img, kernel, iterations=erode_iters)
    #if debug:
    #    cv2.imshow('bw image after closing', norm_img)  # andrey
    #    cv2.waitKey(0)


#
#kernel = np.ones(shape=(11, 3))
#norm_img = cv2.erode(norm_img, kernel, iterations=erode_iters)
#if debug:
#    cv2.imshow('bw image after erosion', norm_img)  # andrey
#    cv2.waitKey(0)
#norm_img = cv2.dilate(norm_img, kernel, iterations=erode_iters)
#if debug:
#    cv2.imshow('bw image after closing', norm_img)  # andrey
#    cv2.waitKey(0)

    regs = src_img.copy()
    cnts = cv2.findContours(norm_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    #cnts = cv2.findContours(norm_img, cv2.RETR_LIST,cv2.CHAIN_APPROX_NONE)

    # box_offset = 2 if erode_iters == 0 else 0
    box_offset = 0
    approx_factor_vec = np.array([0.02, 0.07])

    boxes = []
    for approx_factor in approx_factor_vec:
        for c in cnts[1]:
            p = cv2.arcLength(c, True)
            if p >= (min_box_width + min_box_height):
                approx = cv2.approxPolyDP(c, approx_factor * p, True)
                len_check = len(approx)
                if 4 <= len(approx) <= 8:
                    (x, y, w, h) = cv2.boundingRect(approx)
                    aspect = w / float(h)
                    hl_color = (255, 255, 0)
                    if aspect > min_aspect and w > min_box_width_rel * img.shape[
                            1] and w > min_box_width and h > min_box_height:
                        box = Box(
                            max(0, x - pad + box_offset),
                            max(0, y - pad + box_offset),
                            min(img.shape[1] - 1, x - pad + w - box_offset),
                            min(img.shape[0] - 1, y - pad + h - box_offset))
                        box.img_data = img[box.y1:box.y2, box.x1:box.x2]
                        boxes.append(box)
                        hl_color = (255, 0, 0)
                        cv2.rectangle(regs, (x, y), (x + w, y + h), hl_color,
                                      2)  # andrey
                        if debug:
                            cv2.imshow('image with bounding box',
                                       regs)  # andrey
                            cv2.waitKey(0)
                else:
                    cv2.drawContours(regs, [approx], 0, (0, 255, 0),
                                     2)  # andrey
                    if debug:
                        cv2.imshow('image with contours', regs)  # andrey
                        cv2.waitKey(0)
        #show(regs)

    return [src_img, norm_img, regs], boxes
def main(args):
    print(f'Source region extraction tool version {__version__}')
    a = ArgumentParser()
    a.add_argument(
        'input',
        help=
        'figure image file or image folder path or semicolon separated list of files or meta json lines file'
    )
    a.add_argument(
        '--demo',
        help='highlight source region on input images and save separately',
        default=False,
        action='store_true')
    a.add_argument('-out', help='source regions output dir', default='out')
    a.add_argument('-meta_out',
                   help='source regions output JSONL file',
                   default='regions.json')
    args = a.parse_args(args)
    demo_dir = args.out + '/demo'
    print('Building a list of source files...')
    if os.path.exists(demo_dir):
        shutil.rmtree(demo_dir, ignore_errors=True)
    if args.demo:
        os.makedirs(demo_dir + '/', exist_ok=True)
    if args.out:
        if os.path.exists(args.out):
            shutil.rmtree(args.out, ignore_errors=True)
        try:
            os.makedirs(args.out, exist_ok=True)
            if args.demo:
                os.makedirs(demo_dir + '/', exist_ok=True)  # andrey
        except:
            pass

    #if (args.input[-1]!='\\'):
    articles = get_articles(args.input)
    file_mapping = get_file_mapping(articles)
    #else:
    #    file_mapping = {}
    #    for (dirpath, dirnames, filenames) in os.walk(args.input):
    #        for fnam in filenames:
    #            file_mapping[args.input+fnam] = ArticleImage(id=1,chain_id='0',filename=args.input+fnam,title='',page=1,idx_on_page=1,regions=[])
    #        break
    if len(file_mapping) == 0:
        print('No files found in specified sources')
        exit(-1)
    reg_id = 0
    for i, filename in enumerate(file_mapping.keys()):
        src_img = cv2.imread(filename)
        if src_img is None:
            print(f'Error reading file, skipping: {filename}')
            continue
        cur_id = 1
        # extract with default settings
        boxes, boxes2, boxes3, boxes3a, boxes4 = [], [], [], [], []
        PerformDebug = False
        annotations, boxes = get_region_boxes(src_img,
                                              bin_thresh=5,
                                              debug=PerformDebug)
        cur_id = annotate_boxes(cur_id, boxes, (255, 0, 0))
        # preset 2 to extract bordered regions
        annotations2, boxes2 = get_region_boxes(src_img,
                                                erode_iters=0,
                                                bin_thresh=50,
                                                debug=PerformDebug)
        cur_id = annotate_boxes(cur_id, boxes2, (0, 255, 0))
        # preset 3 with another threshold
        annotations3, boxes3 = get_region_boxes(src_img,
                                                erode_iters=0,
                                                bin_thresh=100,
                                                debug=PerformDebug)
        cur_id = annotate_boxes(cur_id, boxes3, (0, 0, 255))
        # preset 3a with another threshold
        annotations3a, boxes3a = get_region_boxes(src_img,
                                                  erode_iters=0,
                                                  bin_thresh=150,
                                                  debug=PerformDebug)
        cur_id = annotate_boxes(cur_id, boxes3a, (0, 0, 255))
        # preset 4 - low threshold no morph
        annotations4, boxes4 = get_region_boxes(src_img,
                                                erode_iters=0,
                                                debug=PerformDebug)
        cur_id = annotate_boxes(cur_id, boxes4, (255, 0, 255))
        #boxes = filter_boxes(boxes + boxes2 + boxes3 + boxes3a + boxes4)
        all_boxes = boxes + boxes2 + boxes3 + boxes3a + boxes4
        # Check the number of all boxes
        if len(all_boxes) == 1:  # add the entire image as a box
            box = Box(0, 0, src_img.shape[1] - 1, src_img.shape[0] - 1)
            box.img_data = src_img
            all_boxes.append(box)
        PerformDebug = False
        updated_boxes = filter_boxes_updated(all_boxes,
                                             src_img,
                                             debug=PerformDebug)
        # boxes = boxes + boxes2 + boxes3
        if len(updated_boxes) == 0:
            print(
                f'Warning: {filename} no source regions detected, assuming single source region'
            )
            box = Box(0,
                      0,
                      src_img.shape[1] - 1,
                      src_img.shape[0] - 1,
                      id=cur_id)
            box.img_data = src_img
            updated_boxes.append(box)

        # remove borders from regions
        print(f'{filename} number of source regions: {len(updated_boxes)}')
        for b in updated_boxes:
            src_reg = b.img_data
            assert src_reg.shape[:2] == b.img_data.shape[:2]
            # crop borders on normalized reg (white/black borders)

            PerformDebug = False
            x1, x2, y1, y2 = crop_border(preprocess_image(b.img_data,
                                                          150,
                                                          otsu=True),
                                         src_reg,
                                         debug=PerformDebug)
            sr = src_reg[y1:y2, x1:x2]
            # sr = src_img[b.y1:b.y2, b.x1:b.x2]
            reg_path = os.path.join(
                args.out,
                '.'.join(os.path.basename(filename).split('.')[:-1]) + '_' +
                'R' + str(b.id) + '.png')
            article_image = file_mapping[filename]
            article_image.regions.append(
                Region(id=reg_id,
                       chain_id=article_image.chain_id,
                       filename=os.path.abspath(reg_path),
                       box=(b.x1, b.y1, b.x2, b.y2),
                       matches=[],
                       title=os.path.basename(reg_path)))
            reg_id += 1
            #cv2.imshow('subimage', sr)  # andrey
            #cv2.waitKey(0)
            cv2.imwrite(reg_path, sr)

        if args.demo:
            ref = src_img.copy()
            for b in updated_boxes:
                cv2.rectangle(ref, (b.x1, b.y1), (b.x2, b.y2),
                              color=(b.meta if b.meta is not None else
                                     (255, 128, 100)),
                              thickness=2)
            ResultImagePathToSave = os.path.join(
                demo_dir,
                '.'.join(os.path.basename(filename).split('.')[:-1]) + '.png')
            cv2.imwrite(ResultImagePathToSave, ref)
            cv2.imshow('image with bounding box', ref)  # andrey
            cv2.waitKey(0)
        progress.report_progress(i + 1, len(file_mapping),
                                 'Extracting regions')
        #show(ref_imgs=annotations, title=f'{filename}')
        #show(ref_imgs=annotations2, title=f'{filename} pass 2')
        #show(ref_imgs=annotations3, title=f'{filename} pass 3')
        #show(ref_imgs=annotations4, title=f'{filename} pass 4')
    with open(args.meta_out, mode='w') as meta_file:
        for a in articles:
            meta_file.write(a.toJSON() + os.linesep)