Ejemplo n.º 1
0
def predict_image_track_with_precomputed_ref_features(img, ref_features,
                                                      model_func):
    orig_shape = img.shape[:2]
    resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE,
                           cfg.PREPROC.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] *
                    resized_img.shape[1] / img.shape[1])
    boxes, probs, labels, *masks = model_func(resized_img, ref_features)
    boxes = boxes / scale
    # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
    boxes = clip_boxes(boxes, orig_shape)

    if masks:
        # has mask
        full_masks = [
            _paste_mask(box, mask, orig_shape)
            for box, mask in zip(boxes, masks[0])
        ]
        masks = full_masks
    else:
        # fill with none
        masks = [None] * len(boxes)

    results = [
        DetectionResult(*args) for args in zip(boxes, probs, labels, masks)
    ]
    return results
Ejemplo n.º 2
0
def detect_one_image(img, model_func):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from TF model,
            takes image and returns (boxes, probs, labels)

    Returns:
        [DetectionResult]
    """

    orig_shape = img.shape[:2]
    resizer = CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] *
                    resized_img.shape[1] / img.shape[1])
    boxes, probs, labels, fv = model_func(resized_img)
    boxes = boxes / scale
    # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
    boxes = clip_boxes(boxes, orig_shape)

    results = [
        DetectionResult(*args) for args in zip(boxes, probs, labels, fv)
    ]
    return results
Ejemplo n.º 3
0
 def __init__(
         self,
         model_path='weights/MaskRCNN-R50C41x-COCO_finetune-docrop_and_rotate_24500.pb',
         canvas_size=512,
         debug=False):
     if not tf.test.is_gpu_available():
         from tensorflow.python.framework import test_util
         assert get_tf_version_tuple() >= (1, 7) and test_util.IsMklEnabled(), \
             "Inference requires either GPU support or MKL support!"
     self.canvas_size = canvas_size
     self.debug = debug
     self.id_to_class_name = {
         1: 'page',
         2: 'profile_image',
         3: 'van_tay',
         4: 'passport_code'
     }
     self.resizer = CustomResize(self.canvas_size, self.canvas_size)
     print('Loading model at', model_path)
     self.graph = load_graph(model_path)
     self.input_tensor = self.graph.get_tensor_by_name('import/image:0')
     self.output_node_name = [
         'output/boxes', 'output/scores', 'output/labels', 'output/masks'
     ]
     self.outputs_tensor = [
         self.graph.get_tensor_by_name('import/{}:0'.format(each_node))
         for each_node in self.output_node_name
     ]
     self.config = tf.compat.v1.ConfigProto()
     # self.config.gpu_options.allow_growth = True
     self.config.gpu_options.per_process_gpu_memory_fraction = 0.1
     self.sess = tf.compat.v1.Session(config=self.config, graph=self.graph)
     self.predict_crop(np.zeros((200, 200, 3), dtype=np.uint8))
     print('Loaded model!')
Ejemplo n.º 4
0
def detect_one_image(img, model_func):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from TF model,
            takes image and returns (boxes, probs, labels, [masks])

    Returns:
        [DetectionResult]
    """

    orig_shape = img.shape[:2]
    resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = (resized_img.shape[0] * 1.0 / img.shape[0] + resized_img.shape[1] * 1.0 / img.shape[1]) / 2
    boxes, probs, labels, *masks = model_func(resized_img)
    boxes = boxes / scale
    # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
    boxes = clip_boxes(boxes, orig_shape)

    if masks:
        # has mask
        full_masks = [fill_full_mask(box, mask, orig_shape)
                      for box, mask in zip(boxes, masks[0])]
        masks = full_masks
    else:
        # fill with none
        masks = [None] * len(boxes)

    results = [DetectionResult(*args) for args in zip(boxes, probs, labels, masks)]
    return results
Ejemplo n.º 5
0
    def resize_images(inputs):
        resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)
        resized_imgs = [resizer.augment(inp[0]) for inp in inputs]
        org_shapes = [inp[0].shape for inp in inputs]
        scales = [np.sqrt(rimg.shape[0] * 1.0 / org_shape[0] * rimg.shape[1] / org_shape[1]) for rimg, org_shape in zip(resized_imgs, org_shapes)]

        return [[resized_imgs[i], inp[1], scales[i], org_shapes[i][:2]] for i, inp in enumerate(inputs)]
Ejemplo n.º 6
0
def predict_image(img, model_func):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from the TF model.
            It takes image and returns (boxes, probs, labels, [masks])

    Returns:
        [DetectionResult]
    """
    orig_shape = img.shape[:2]
    resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1])
    boxes, probs, labels, *masks = model_func(resized_img)

    # Some slow numpy postprocessing:
    boxes = boxes / scale
    # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
    boxes = clip_boxes(boxes, orig_shape)
    if masks:
        full_masks = [_paste_mask(box, mask, orig_shape)
                      for box, mask in zip(boxes, masks[0])]
        masks = full_masks
    else:
        # fill with none
        masks = [None] * len(boxes)

    results = [DetectionResult(*args) for args in zip(boxes, probs, labels.tolist(), masks)]
    return results
Ejemplo n.º 7
0
def detect_one_image_scale(img, model_func):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from TF model,
            takes image and returns (boxes, probs, labels, [masks])

    Returns:
        [DetectionResult]
    """
    scores_ts = []
    boxes_ts = []
    labels_ts = []
    masks_ts = []

    def add_preds_t(scores_t, boxes_t, labels_t, masks_t):
        scores_ts.append(scores_t)
        boxes_ts.append(boxes_t)
        labels_ts.append(labels_t)
        masks_ts.append(masks_t)

    orig_shape = img.shape[:2]
    for bbox_aug_scale in cfg.TEST.BBOX_AUG_SCALES:
        resizer = CustomResize(bbox_aug_scale, cfg.TEST.BBOX_AUG_MAX_SIZE)
        resized_img = resizer.augment(img)
        scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1])
        boxes, probs, labels, *masks = model_func(resized_img)
        boxes = boxes / scale
        # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
        boxes = clip_boxes(boxes, orig_shape)
        add_preds_t(probs, boxes, labels, masks)

    if cfg.TEST.BBOX_AUG_COORD_HEUR == 'UNION':
        boxes_c = np.vstack(boxes_ts)
        scores_c = np.vstack(scores_ts)
        lables_c = np.vstack(labels_ts)
        masks_c = np.vstack(masks_ts)

    # Apply NMS

    logger.info("detect_one_image_scale...")
    logger.info(boxes_c)
    logger.info(scores_c)


    if masks:
        # has mask
        full_masks = [fill_full_mask(box, mask, orig_shape)
                      for box, mask in zip(boxes_c, masks_c[0])]
        masks = full_masks
    else:
        # fill with none
        masks = [None] * len(boxes_c)

    results = [DetectionResult(*args) for args in zip(boxes_c, scores_c, lables_c, masks)]
    return results
Ejemplo n.º 8
0
def pre_processing_inference(img):
    orig_shape = img.shape[:2]
    resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE,
                           cfg.PREPROC.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] *
                    resized_img.shape[1] / img.shape[1])
    return resized_img, scale, orig_shape
Ejemplo n.º 9
0
def run_resize_image(img):
    orig_shape = img.shape[:2]
    resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE,
                           cfg.PREPROC.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] *
                    resized_img.shape[1] / img.shape[1])
    return resized_img, orig_shape, scale
Ejemplo n.º 10
0
def predict_image(img, model_func):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from the TF model.
            It takes image and returns (boxes, probs, labels, [masks])

    Returns:
        [DetectionResult]
    """
    global total_time
    global cnt
    print("predict_image")
    # print("model_func")
    # print(model_func)
    orig_shape = img.shape[:2]
    resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE,
                           cfg.PREPROC.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] *
                    resized_img.shape[1] / img.shape[1])
    start_time = time.time()
    boxes, probs, labels, *masks = model_func(resized_img)
    end_time = time.time()
    cnt += 1
    total_time += end_time - start_time
    print(
        f"--------- Inference time : {total_time / cnt}seconds -----------------"
    )
    # print(f"boxes : {boxes}")
    # print(f"probs : {probs}")
    # print(f"labels : {labels}")
    # print(f"masks : {masks}")
    # print(len(masks)) # 1
    # print(masks[0].shape) # (11, 28, 28)
    # Some slow numpy postprocessing:
    boxes = boxes / scale
    # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
    boxes = clip_boxes(boxes, orig_shape)
    if masks:
        full_masks = [
            _paste_mask(box, mask, orig_shape)
            for box, mask in zip(boxes, masks[0])
        ]
        masks = full_masks
    else:
        # fill with none
        masks = [None] * len(boxes)

    results = [
        DetectionResult(*args)
        for args in zip(boxes, probs, labels.tolist(), masks)
    ]
    return results
Ejemplo n.º 11
0
    def __init__(self, name, need_network=True, need_img=True, model="best"):
        super().__init__(name=name, is_deterministic=True)
        self._resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE,
                                     cfg.PREPROC.MAX_SIZE)
        self._prev_box = None
        self._ff_gt_feats = None
        self._need_network = need_network
        self._need_img = need_img
        self._rotated_bbox = None

        if need_network:
            logger.set_logger_dir(
                "/tmp/test_log_/" + str(random.randint(0, 10000)), 'd')
            if model == "best":
                load = "train_log/hard_mining3/model-1360500"
            elif model == "nohardexamples":
                load = "train_log/condrcnn_all_2gpu_lrreduce2/model-1200500"
            elif model == "newrpn":
                load = "train_log/newrpn1/model"
            elif model == "resnet50_nohardexamples":
                load = "train_log/condrcnn_all_resnet50/model-1200500"
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3]
            elif model == "resnet50":
                load = "train_log/hard_mining3_resnet50/model-1360500"
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3]
            elif model == "gotonly":
                load = "train_log/hard_mining3_onlygot/model-1361000"
            elif model.startswith("checkpoint:"):
                load = model.replace("checkpoint:", "")
            else:
                assert False, ("unknown model", model)
            from dataset import DetectionDataset
            # init tensorpack model
            # cfg.freeze(False)
            DetectionDataset(
            )  # initialize the config with information from our dataset

            cfg.EXTRACT_GT_FEATURES = True
            cfg.MODE_TRACK = False
            extract_model = ResNetFPNModel()
            extract_ff_feats_cfg = PredictConfig(
                model=extract_model,
                session_init=get_model_loader(load),
                input_names=['image', 'roi_boxes'],
                output_names=['rpn/feature'])
            finalize_configs(is_training=False)
            self._extract_func = OfflinePredictor(extract_ff_feats_cfg)

            cfg.EXTRACT_GT_FEATURES = False
            cfg.MODE_TRACK = True
            cfg.USE_PRECOMPUTED_REF_FEATURES = True
            self._pred_func = self._make_pred_func(load)
def detect_one_image_cls(img, model_func):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from TF model,
            takes image and returns (boxes, probs, labels, [masks])

    Returns:
        [DetectionResult]
    """

    orig_shape = img.shape[:2]
    resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE,
                           cfg.PREPROC.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] *
                    resized_img.shape[1] / img.shape[1])
    boxes, probs, labels, ious, img_level_label, img_level_label_score, *masks = model_func(
        resized_img)
    boxes = boxes / scale
    # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
    boxes = clip_boxes(boxes, orig_shape)

    #    box, prob, keep = soft_nms_py(boxes, probs, overlap_thresh=0.3, score_thresh=0.001, method='gaussian')
    #    top_det, top_score = box_voting(box, prob, boxes, probs, thresh=0.7, scoring_method='ID')
    #
    #    labels = labels[keep]
    #    ious = ious[keep]
    #    boxes = top_det
    #    probs = top_score

    if masks:
        # has mask
        full_masks = [
            fill_full_mask(box, mask, orig_shape)
            for box, mask in zip(boxes, masks[0])
        ]
        masks = full_masks
    else:
        # fill with none
        masks = [None] * len(boxes)

    results = [
        DetectionResult(*args)
        for args in zip(boxes, probs, labels, ious, masks)
    ]
    return results, img_level_label[0]
Ejemplo n.º 13
0
def get_train_aseval_dataflow():
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    prw = PRWDataset(cfg.DATA.BASEDIR)
    imgs = prw.load()

    # no filter for training
    # test if it can repeat keys
    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList(
        [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(img):
        fname = img['file_name']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        orig_shape = im.shape[:2]
        assert im is not None, fname
        im = im.astype('float32')

        # augmentation:
        im, params = aug.augment_return_params(im)

        ret = [fname, im, orig_shape]

        return ret

    ds = MapData(ds, preprocess)
    return ds
Ejemplo n.º 14
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                      cfg.PREPROC.MAX_SIZE),
         imgaug.Flip(horiz=True)
     ])
Ejemplo n.º 15
0
def read_and_augment_images(ds):
    def mapf(dp):
        fname = dp[0]
        im = cv2.imread(fname, cv2.IMREAD_COLOR).astype('float32')
        assert im is not None, dp[0]
        dp[0] = im

        # assume floatbox as input
        assert dp[1].dtype == np.float32
        dp[1] = box_to_point8(dp[1])

        dp.append(fname)
        return dp

    ds = MapData(ds, mapf)

    augs = [
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ]
    ds = AugmentImageComponents(ds, augs, index=(0, ), coords_index=(1, ))

    def unmapf(points):
        boxes = point8_to_box(points)
        return boxes

    ds = MapDataComponent(ds, unmapf, 1)
    return ds
Ejemplo n.º 16
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         imgaug.RandomApplyAug(SquareAspectRatioResize(), 0.075),
         # imgaug.RandomApplyAug(imgaug.RandomCropRandomShape(wmin=int(
         #     0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), hmin=int(0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0])), 0.25),
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
         imgaug.RandomApplyAug(imgaug.Flip(horiz=True), 0.5),
     ])
Ejemplo n.º 17
0
def detect_one_image(img, model_func):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from TF model, takes [image] and returns (probs, boxes)

    Returns:
        [DetectionResult]
    """
    resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = (resized_img.shape[0] * 1.0 / img.shape[0] +
             resized_img.shape[1] * 1.0 / img.shape[1]) / 2
    fg_probs, fg_boxes = model_func(resized_img)
    fg_boxes = fg_boxes / scale
    fg_boxes = clip_boxes(fg_boxes, img.shape[:2])
    return nms_fastrcnn_results(fg_boxes, fg_probs)
Ejemplo n.º 18
0
def detect_one_image(img, model_func):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from TF model, takes [image] and returns (probs, boxes)

    Returns:
        [DetectionResult]
    """

    resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = (resized_img.shape[0] * 1.0 / img.shape[0] +
             resized_img.shape[1] * 1.0 / img.shape[1]) / 2
    boxes, probs, labels = model_func(resized_img)
    boxes = boxes / scale

    results = [DetectionResult(*args) for args in zip(labels, boxes, probs)]
    return results
Ejemplo n.º 19
0
def detect_one_image_TTA2(img, model_func):
    orig_shape = img.shape[:2]
    SCALES = [1800, 2000]
    all_scale_results = []
    augs = [0, 4]
    mask_whole = np.zeros((img.shape[0], img.shape[1]))
    for s in SCALES:
        mask_whole_d = np.zeros((img.shape[0], img.shape[1]))
        for d in augs:
            img = do_flip_transpose(img, d)
            resizer = CustomResize(s, config.MAX_SIZE)
            resized_img = resizer.augment(img.copy())
            scale = (resized_img.shape[0] * 1.0 / img.shape[0] +
                     resized_img.shape[1] * 1.0 / img.shape[1]) / 2
            boxes, probs, labels, *masks = model_func(resized_img)
            boxes = boxes / scale

            if masks:
                # has mask
                full_masks = [
                    fill_full_mask_TTA(box, mask, orig_shape)
                    for box, mask in zip(boxes, masks[0])
                ]
                masks = full_masks
            else:
                # fill with none
                masks = [None] * len(boxes)

            results = [
                DetectionResult(*args)
                for args in zip(boxes, probs, labels, masks)
            ]
            for re in results:
                mask_whole_d += undo_flip_transpose(re.mask, d)
        mask_whole_d = mask_whole_d / float(len(augs))
        mask_whole += mask_whole_d
    mask_whole = mask_whole / float(len(SCALES))
    mask_whole = mask_whole > 0.5
    return mask_whole.astype('uint8')
Ejemplo n.º 20
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         #            imgaug.RandomApplyAug(imgaug.RandomResize( xrange = (0.8, 1.5), minimum = (cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0], cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), aspect_ratio_thres = 0.0 ), prob = 0.5),
         imgaug.Flip(horiz=True, prob=0.5),
         imgaug.Flip(vert=True, prob=0.5),
         imgaug.RandomApplyAug(imgaug.Rotation(max_deg=180.0,
                                               step_deg=30.0,
                                               center_range=(0.5, 0.5)),
                               prob=0.5),
         imgaug.RandomApplyAug(imgaug.Grayscale(keepshape=True), prob=0.5),
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                      cfg.PREPROC.MAX_SIZE),
     ])
Ejemplo n.º 21
0
 def __init__(self, cfg, is_aws, is_gcs):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList(
         [
             CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
             imgaug.Flip(horiz=True),
         ]
     )
     self.is_aws = is_aws
     self.is_gcs = is_gcs
     if self.is_aws:
         self.s3 = boto3.resource("s3")
     elif self.is_gcs:
         self.storage_client = storage.Client.create_anonymous_client()
         self.bucket = self.storage_client.get_bucket("determined-ai-coco-dataset")
Ejemplo n.º 22
0
def get_query_dataflow():
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    prw = PRWDataset(cfg.DATA.BASEDIR)
    imgs = prw.load_query()

    # no filter for training
    # test if it can repeat keys
    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList(
        [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(img):
        fname, boxes, re_id_class = img['file_name'], img['boxes'], img[
            're_id_class']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = [im, boxes, re_id_class]

        return ret

    ds = MapData(ds, preprocess)
    return ds
Ejemplo n.º 23
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
def get_train_dataflow_coco(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        print("start preproc coco")
        start = time.time()
        if config.USE_SECOND_HEAD:
            fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \
                                                          img['second_class'], img['is_crowd']
        else:
            fname, boxes, klass, is_crowd = img['file_name'], img[
                'boxes'], img['class'], img['is_crowd']
            second_klass = None
        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("coco: preproc_img returned None on", fname)
            return None

        ret, params = res
        im = ret[0]
        boxes = ret[3]
        # masks
        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes), (len(segmentation),
                                                     len(boxes))

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        end = time.time()
        elapsed = end - start
        print("coco example done, elapsed:", elapsed)
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=4,
                             map_func=preprocess,
                             buffer_size=20)
    return ds
def get_train_dataflow_mapillary(add_mask=False, map_to_coco=False):
    train_img_path = config.MAPILLARY_PATH + "training/images/"
    train_label_path = config.MAPILLARY_PATH + "training/instances/"
    imgs = glob.glob(train_img_path + "*.jpg")

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(fname):
        print("start preproc mapillary")
        start = time.time()

        label_fname = fname.replace(train_img_path,
                                    train_label_path).replace(".jpg", ".png")
        pil_label = Image.open(label_fname)
        label = np.array(pil_label)
        instances = np.unique(label)
        instance_classes = [x // 256 for x in instances]

        # filter by categories we use
        instances_valid = [
            cls in config.MAPILLARY_CAT_IDS_TO_USE for cls in instance_classes
        ]
        instances = [
            inst for inst, valid in zip(instances, instances_valid) if valid
        ]
        instance_classes = [
            cls for cls, valid in zip(instance_classes, instances_valid)
            if valid
        ]

        if len(instances) == 0:
            print("no instances")
            pil_label.close()
            return None

        if map_to_coco:
            instance_classes = [
                config.MAPILLARY_TO_COCO_MAP[cls] for cls in instance_classes
            ]
            instance_classes = [
                config.VOID_LABEL if cls == config.VOID_LABEL else
                COCOMeta.category_id_to_class_id[cls]
                for cls in instance_classes
            ]
        else:
            # remap to contiguous numbers starting with 1
            instance_classes = [
                config.MAPILLARY_CAT_IDS_TO_USE.index(cls) + 1
                for cls in instance_classes
            ]

        masks = np.array([label == inst for inst in instances], dtype=np.uint8)

        #import cProfile
        #start1 = time.time()
        boxes1 = np.array(
            [get_bbox_from_segmentation_mask(mask) for mask in masks],
            dtype=np.float32)
        #boxes1_time = time.time() - start1
        #pr = cProfile.Profile()
        #pr.enable()
        #start1 = time.time()
        #boxes2 = get_bboxes_from_segmentation_masks(masks)
        #print("boxes1", boxes1_time, "boxes2", time.time() - start1)
        #pr.disable()
        #pr.print_stats(sort="cumulative")
        #assert (boxes1 == boxes2).all(), (boxes1, boxes2)
        boxes = boxes1

        second_klass = np.array(instance_classes, dtype=np.int)
        klass = np.ones_like(second_klass)
        is_crowd = np.zeros_like(second_klass)

        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("mapillary: preproc_img returned None on", fname)
            pil_label.close()
            return None
        ret, params = res
        if add_mask:
            do_flip, h, w = params[1]
            assert do_flip in (True, False), do_flip
            # augment label
            label = np.array(pil_label.resize((w, h), Image.NEAREST))
            if do_flip:
                label = label[:, ::-1]
            # create augmented masks
            masks = np.array([label == inst for inst in instances],
                             dtype=np.uint8)
            ret.append(masks)

        end = time.time()
        elapsed = end - start
        print("mapillary example done, elapsed:", elapsed)

        VISUALIZE = False
        if VISUALIZE:
            from viz import draw_annotation, draw_mask
            config.CLASS_NAMES = [str(idx) for idx in range(81)]
            im = ret[0]
            boxes = ret[3]
            draw_klass = ret[-2]
            viz = draw_annotation(im, boxes, draw_klass)
            for mask in masks:
                viz = draw_mask(viz, mask)
            tpviz.interactive_imshow(viz)

        pil_label.close()
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=8,
                             map_func=preprocess,
                             buffer_size=35)
    return ds
def get_train_dataflow_davis(add_mask=False):
    # train_img_path = config.DAVIS_PATH + "train/"
    # train_label_path = config.DAVIS_PATH + "train-gt/"
    # imgs = glob.glob(train_img_path + "*/*.jpg")

    # train_img_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/"
    # train_label_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/"

    # train_img_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/"
    # train_label_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/"

    # train_img_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_images/"
    # train_label_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_gt/"

    train_img_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_images/"
    train_label_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_gt/"

    imgs = sorted(glob.glob(train_img_path + "*/*.jpg"))

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(fname):
        # print("start preproc mapillary")
        start = time.time()

        label_fname = fname.replace(train_img_path,
                                    train_label_path).replace(".jpg", ".png")
        pil_label = Image.open(label_fname)
        label = np.array(pil_label)
        instances = np.unique(label)
        instance_classes = [x // 256 for x in instances]

        if len(instances) == 0:
            print("no instances")
            pil_label.close()
            return None

        masks = np.array([label == inst for inst in instances], dtype=np.uint8)

        boxes1 = np.array(
            [get_bbox_from_segmentation_mask(mask) for mask in masks],
            dtype=np.float32)
        boxes = boxes1

        # second_klass = np.array(instance_classes, dtype=np.int)
        second_klass = np.zeros_like(instance_classes, dtype=np.int)
        klass = np.ones_like(second_klass)
        is_crowd = np.zeros_like(second_klass)

        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("davis: preproc_img returned None on", fname)
            pil_label.close()
            return None
        ret, params = res
        if add_mask:
            do_flip, h, w = params[1]
            assert do_flip in (True, False), do_flip
            # augment label
            label = np.array(pil_label.resize((w, h), Image.NEAREST))
            if do_flip:
                label = label[:, ::-1]
            # create augmented masks
            masks = np.array([label == inst for inst in instances],
                             dtype=np.uint8)
            ret.append(masks)

        end = time.time()
        elapsed = end - start
        # print("davis example done, elapsed:", elapsed)

        VISUALIZE = False
        if VISUALIZE:
            from viz import draw_annotation, draw_mask
            config.CLASS_NAMES = [str(idx) for idx in range(81)]
            im = ret[0]
            boxes = ret[3]
            draw_klass = ret[-2]
            viz = draw_annotation(im, boxes, draw_klass)
            for mask in masks:
                viz = draw_mask(viz, mask)
            tpviz.interactive_imshow(viz)

        pil_label.close()
        return ret

    ds = MapData(ds, preprocess)
    # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35)
    # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess)
    return ds
Ejemplo n.º 27
0
class MaskRCNNDocCrop():
    def __init__(
            self,
            model_path='weights/MaskRCNN-R50C41x-COCO_finetune-docrop_and_rotate_24500.pb',
            canvas_size=512,
            debug=False):
        if not tf.test.is_gpu_available():
            from tensorflow.python.framework import test_util
            assert get_tf_version_tuple() >= (1, 7) and test_util.IsMklEnabled(), \
                "Inference requires either GPU support or MKL support!"
        self.canvas_size = canvas_size
        self.debug = debug
        self.id_to_class_name = {
            1: 'page',
            2: 'profile_image',
            3: 'van_tay',
            4: 'passport_code'
        }
        self.resizer = CustomResize(self.canvas_size, self.canvas_size)
        print('Loading model at', model_path)
        self.graph = load_graph(model_path)
        self.input_tensor = self.graph.get_tensor_by_name('import/image:0')
        self.output_node_name = [
            'output/boxes', 'output/scores', 'output/labels', 'output/masks'
        ]
        self.outputs_tensor = [
            self.graph.get_tensor_by_name('import/{}:0'.format(each_node))
            for each_node in self.output_node_name
        ]
        self.config = tf.compat.v1.ConfigProto()
        # self.config.gpu_options.allow_growth = True
        self.config.gpu_options.per_process_gpu_memory_fraction = 0.1
        self.sess = tf.compat.v1.Session(config=self.config, graph=self.graph)
        self.predict_crop(np.zeros((200, 200, 3), dtype=np.uint8))
        print('Loaded model!')

    def _scale_box(self, box, scale):
        w_half = (box[2] - box[0]) * 0.5
        h_half = (box[3] - box[1]) * 0.5
        x_c = (box[2] + box[0]) * 0.5
        y_c = (box[3] + box[1]) * 0.5

        w_half *= scale
        h_half *= scale

        scaled_box = np.zeros_like(box)
        scaled_box[0] = x_c - w_half
        scaled_box[2] = x_c + w_half
        scaled_box[1] = y_c - h_half
        scaled_box[3] = y_c + h_half
        return scaled_box

    def _paste_mask(self, box, mask, shape, accurate_paste=True):
        """
        Args:
            box: 4 float
            mask: MxM floats
            shape: h,w
        Returns:
            A uint8 binary image of hxw.
        """
        assert mask.shape[0] == mask.shape[1], mask.shape

        if accurate_paste:
            # This method is accurate but much slower.
            mask = np.pad(mask, [(1, 1), (1, 1)], mode='constant')
            box = self._scale_box(box,
                                  float(mask.shape[0]) / (mask.shape[0] - 2))

            mask_pixels = np.arange(0.0, mask.shape[0]) + 0.5
            mask_continuous = interpolate.interp2d(mask_pixels,
                                                   mask_pixels,
                                                   mask,
                                                   fill_value=0.0)
            h, w = shape
            ys = np.arange(0.0, h) + 0.5
            xs = np.arange(0.0, w) + 0.5
            ys = (ys - box[1]) / (box[3] - box[1]) * mask.shape[0]
            xs = (xs - box[0]) / (box[2] - box[0]) * mask.shape[1]
            # Waste a lot of compute since most indices are out-of-border
            res = mask_continuous(xs, ys)
            return (res >= 0.5).astype('uint8')
        else:
            # This method (inspired by Detectron) is less accurate but fast.

            # int() is floor
            # box fpcoor=0.0 -> intcoor=0.0
            x0, y0 = list(map(int, box[:2] + 0.5))
            # box fpcoor=h -> intcoor=h-1, inclusive
            x1, y1 = list(map(int, box[2:] - 0.5))  # inclusive
            x1 = max(x0, x1)  # require at least 1x1
            y1 = max(y0, y1)

            w = x1 + 1 - x0
            h = y1 + 1 - y0

            # rounding errors could happen here, because masks were not originally computed for this shape.
            # but it's hard to do better, because the network does not know the "original" scale
            mask = (cv2.resize(mask, (w, h)) > 0.5).astype('uint8')
            ret = np.zeros(shape, dtype='uint8')
            ret[y0:y1 + 1, x0:x1 + 1] = mask
            return ret

    def predict_crop(self, img, debug_id=None):
        start_time = time.time()
        orig_shape = img.shape[:2]
        resized_img = self.resizer.augment(img)
        scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] *
                        resized_img.shape[1] / img.shape[1])
        boxes, probs, labels, *masks = self.sess.run(
            self.outputs_tensor, feed_dict={self.input_tensor: resized_img})

        # Some slow numpy postprocessing:
        boxes = boxes / scale
        # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
        boxes = clip_boxes(boxes, orig_shape)
        if masks:
            full_masks = [
                self._paste_mask(box, mask, orig_shape)
                for box, mask in zip(boxes, masks[0])
            ]
            masks = full_masks
        else:
            # fill with none
            masks = [None] * len(boxes)

        polygons = []
        # Estimate polygon based on the mask right here
        for mask in masks:
            temp_mask = np.expand_dims(mask, axis=-1) * 255
            cnts = cv2.findContours(temp_mask, cv2.RETR_LIST,
                                    cv2.CHAIN_APPROX_SIMPLE)
            cnts = imutils.grab_contours(cnts)
            cnt = max(cnts, key=cv2.contourArea)
            peri = cv2.arcLength(cnt, True)
            estimated_polygon = cv2.approxPolyDP(cnt, 0.02 * peri, True)
            polygons.append(estimated_polygon)
            # temp_mask = cv2.cvtColor(temp_mask, cv2.COLOR_GRAY2BGR)
            # viz_img = cv2.polylines(temp_mask, [estimated_polygon], isClosed=True, color=(255, 0, 255), thickness=10)
            # cv2.imwrite('mask.png', viz_img)
            # import ipdb; ipdb.set_trace()

        results = [
            DetectionResult(*args)
            for args in zip(boxes, probs, labels.tolist(), masks, polygons)
        ]

        if self.debug:
            print('Crop tooks {} secs.'.format(time.time() - start_time))
            debug_id = str(uuid.uuid4()) if debug_id is None else debug_id
            debug_path = os.path.join('./debugs/', debug_id)
            os.makedirs(debug_path, exist_ok=True)
            final = draw_final_outputs_blackwhite(img, results)
            # cv2.imwrite(debug_path, final)
            cv2.imwrite(os.path.join(debug_path, 'prediction.png'), final)
            return results, debug_path
        return results

    def create_shapely_polygon(self, each_object):
        try:
            obj_polygon = Polygon([(each[0][0], each[0][1])
                                   for each in each_object.polygon])
            if not obj_polygon.is_valid:
                obj_polygon = obj_polygon.buffer(0)
        except ValueError:
            # Use bb instead
            x1, y1, x2, y2 = [int(each) for each in each_object.box]
            org_bb = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
            obj_polygon = Polygon(org_bb)
            if not obj_polygon.is_valid:
                obj_polygon = obj_polygon.buffer(0)
        return obj_polygon

    def rotate_anno(self, all_object, angle, raw_img_shape, before_shape=None):
        new_object = []
        # Create full page mask
        old_shape = before_shape if before_shape is not None else raw_img_shape[::
                                                                                -1]
        full_page = [(0, 0), (old_shape[1], 0), (old_shape[1], old_shape[0]),
                     (0, old_shape[0])]
        rotated_full_page = rotate_polygon(full_page, angle, raw_img_shape)
        # Calculate offset of rotation
        top_left_x = min([each[0] for each in rotated_full_page])
        top_left_y = min([each[1] for each in rotated_full_page])
        for obj in all_object:
            rotated_obj = obj

            # For bb
            x1, y1, x2, y2 = [int(each) for each in obj.box]
            org_bb = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
            rotated_bb = rotate_polygon(org_bb, angle, raw_img_shape,
                                        top_left_x, top_left_y)
            rotated_bb = [
                item for sublist in [rotated_bb[0], rotated_bb[2]]
                for item in sublist
            ]

            # For polygon
            org_polygon = [(each[0][0], each[0][1]) for each in obj.polygon]
            rotated_polygon = rotate_polygon(org_polygon, angle, raw_img_shape,
                                             top_left_x, top_left_y)
            rotated_polygon = np.expand_dims(np.array(rotated_polygon,
                                                      dtype=np.int32),
                                             axis=1)

            rotated_obj = rotated_obj._replace(polygon=rotated_polygon,
                                               box=rotated_bb)
            new_object.append(rotated_obj)
        return new_object

    def get_overlap_object(self, page, page_index, cropped_result_raw):
        cropped_result = copy.deepcopy(cropped_result_raw)
        page_polygon = self.create_shapely_polygon(page)
        if not page_polygon.is_valid:
            page_polygon = page_polygon.buffer(0)
        del cropped_result[page_index]
        overlaped_object = []
        for each_object in cropped_result:
            obj_polygon = self.create_shapely_polygon(each_object)
            intersec_percentage = obj_polygon.intersection(
                page_polygon).area / obj_polygon.area
            if intersec_percentage >= 0.85:
                overlaped_object.append(each_object)
        return overlaped_object

    def keep_only_biggest(self, all_object):
        group_dict = {k.class_id: [] for k in all_object}
        filtered_object = []
        for each in all_object:
            group_dict[each.class_id].append(each)
        for each_group, all_members in group_dict.items():
            area_list = []
            for each_member in all_members:
                each_polygon = self.create_shapely_polygon(each_member)
                area_list.append(each_polygon.area)
            filtered_object.append(all_members[np.argmax(area_list)])
        return filtered_object

    def refine_object_location(self, page_bbox, other_objects):
        x, y = page_bbox[0], page_bbox[1]
        new_objects = []
        for index, each in enumerate(other_objects):
            refined_object = each
            old_bb = refined_object.box
            new_bb = np.array(
                [old_bb[0] - x, old_bb[1] - y, old_bb[2] - x, old_bb[3] - y],
                dtype=np.int32)
            old_polygon = np.squeeze(refined_object.polygon)
            new_polygon = np.array([[e[0] - x, e[1] - y] for e in old_polygon],
                                   dtype=np.int32)
            new_polygon = np.expand_dims(new_polygon, axis=1)
            refined_object = refined_object._replace(box=new_bb,
                                                     polygon=new_polygon)
            new_objects.append(refined_object)
        return new_objects

    def find_object_by_name(self, all_object, field_name):
        for each_object in all_object:
            if self.id_to_class_name[each_object.class_id] == field_name:
                return each_object
        return None

    def big_rotate_without_anchor(self, cropped_page, page, all_object):
        h, w, _ = cropped_page.shape
        if h > w:
            angle = 90
            before_rotate_shape = cropped_page.shape[:-1]
            cropped_page = imutils.rotate_bound(cropped_page,
                                                angle,
                                                cval=(255, 255, 255))
            after_rotate_shape = cropped_page.shape[:-1]
            page = self.rotate_anno([page], angle, after_rotate_shape,
                                    before_rotate_shape)[0]
            all_object = self.rotate_anno(all_object, angle,
                                          after_rotate_shape,
                                          before_rotate_shape)
        return cropped_page, page, all_object

    def big_rotate_with_anchor(self, cropped_page, page, all_object,
                               anchor_field):
        anchor_object = self.find_object_by_name(all_object, anchor_field)
        anchor_polygon = self.create_shapely_polygon(anchor_object)
        if not anchor_polygon.is_valid:
            anchor_polygon = anchor_polygon.buffer(0)
        anchor_points = anchor_polygon.centroid.coords[0]
        page_height, page_width, _ = cropped_page.shape
        up_side_down = False
        if anchor_field == 'profile_image' or 'van_tay':
            if anchor_points[0] >= 0.5 * page_width:
                up_side_down = True
        elif anchor_field == 'passport_code':
            if anchor_points[1] <= 0.5 * page_height:
                up_side_down = True
        if up_side_down:
            for i in range(2):  # 180 deg
                angle = 90
                before_rotate_shape = cropped_page.shape[:-1]
                cropped_page = imutils.rotate_bound(cropped_page,
                                                    angle,
                                                    cval=(255, 255, 255))
                after_rotate_shape = cropped_page.shape[:-1]
                page = self.rotate_anno([page], angle, after_rotate_shape,
                                        before_rotate_shape)[0]
                all_object = self.rotate_anno(all_object, angle,
                                              after_rotate_shape,
                                              before_rotate_shape)
        return cropped_page, page, all_object

    def crop_and_rotate(self, image, debug_id=None):
        start_time = time.time()
        cropped_results = self.predict_crop(image, debug_id=debug_id)
        if self.debug:
            cropped_result, debug_path = cropped_results
        else:
            cropped_result = cropped_results
        all_pages_result = [(index, each)
                            for index, each in enumerate(cropped_result)
                            if each.class_id == 1]
        results = []
        for page_index, each_page in all_pages_result:
            raw_page_polygon = each_page.polygon  # For later return

            # Find what other object are accosiated with this page
            other_objects = self.get_overlap_object(each_page, page_index,
                                                    cropped_result)

            # Crop the page in raw img
            page_bbox = [int(each) for each in each_page.box]
            cropped_page = image[page_bbox[1]:page_bbox[3],
                                 page_bbox[0]:page_bbox[2]]
            # And then refine the page polygon
            each_page = self.refine_object_location(page_bbox, [each_page])[0]

            if other_objects:
                # Then clear the duplicate by one using the biggest
                other_objects = self.keep_only_biggest(other_objects)
                # And refine the accosiated location right now
                other_objects = self.refine_object_location(
                    page_bbox, other_objects)

            # Then do fine rotation for the whole group first
            # Now we estimate the rotation angle of the page
            angle = cv2.minAreaRect(each_page.polygon)[-1]
            if angle < -45:
                angle = -(90 + angle)
            else:
                angle = -angle

            # Then rotate the whole bounding box including the page and the anno associated with that page too
            before_rotate_shape = cropped_page.shape[:-1]
            cropped_page = imutils.rotate_bound(cropped_page,
                                                angle=angle,
                                                cval=(255, 255, 255))
            after_rotate_shape = cropped_page.shape[:-1]

            # After this gota crop the page and refine all polygon again
            each_page = self.rotate_anno([each_page], angle,
                                         after_rotate_shape,
                                         before_rotate_shape)[0]
            page_polygon = [(each[0][0], each[0][1])
                            for each in each_page.polygon]
            all_X = [each[0] for each in page_polygon]
            all_Y = [each[1] for each in page_polygon]

            # Practice show I should extend the crop a litte bit to avoid bad mask arround the border
            current_height, current_width, _ = cropped_page.shape
            page_bbox = [
                max(0,
                    min(all_X) - int(0.15 * current_width)),
                max(0,
                    min(all_Y) - int(0.15 * current_height)),
                min(current_width,
                    max(all_X) + int(0.1 * current_width)),
                min(current_height,
                    max(all_Y) + int(0.1 * current_height))
            ]
            cropped_page = cropped_page[page_bbox[1]:page_bbox[3],
                                        page_bbox[0]:page_bbox[2]]
            each_page = self.refine_object_location(page_bbox, [each_page])[0]
            if other_objects:
                other_objects = self.rotate_anno(other_objects, angle,
                                                 after_rotate_shape,
                                                 before_rotate_shape)
                other_objects = self.refine_object_location(
                    page_bbox, other_objects)

            if self.debug:
                viz_img = cv2.polylines(cropped_page.copy(),
                                        [x.polygon for x in other_objects],
                                        isClosed=True,
                                        color=(0, 255, 255),
                                        thickness=2)
                cv2.imwrite(
                    os.path.join(
                        debug_path,
                        'rotate_step_1_page_{}.png'.format(page_index)),
                    viz_img)

            # Now we do big rotation like 90 or 180 :P
            cropped_page, each_page, other_objects = self.big_rotate_without_anchor(
                cropped_page, each_page, other_objects)

            if self.debug:
                viz_img = cv2.polylines(cropped_page.copy(),
                                        [x.polygon for x in other_objects],
                                        isClosed=True,
                                        color=(0, 255, 255),
                                        thickness=2)
                cv2.imwrite(
                    os.path.join(
                        debug_path,
                        'rotate_step_2_page_{}.png'.format(page_index)),
                    viz_img)

            other_object_name = []
            if other_objects:
                # Then use some anchor point to correct upsidedown cases
                other_object_name = [
                    self.id_to_class_name[each.class_id]
                    for each in other_objects
                ]
                # If fingerprint, face and mrz all appear, use the one with highest confidents
                # Priority profile image and fingerprint first
                most_conf = max(other_objects, key=lambda x: x.score)
                anchor_field = self.id_to_class_name[most_conf.class_id]
                do_it = False
                if 'profile_image' in other_object_name and anchor_field != 'profile_image':
                    profile_image_object = other_objects[
                        other_object_name.index('profile_image')]
                    if abs(profile_image_object.score -
                           most_conf.score) <= 0.05:
                        anchor_field = 'profile_image'
                        do_it = True
                if not do_it and 'van_tay' in other_object_name and anchor_field != 'van_tay':
                    profile_image_object = other_objects[
                        other_object_name.index('van_tay')]
                    if abs(profile_image_object.score -
                           most_conf.score) <= 0.05:
                        anchor_field = 'van_tay'
                cropped_page, each_page, other_objects = self.big_rotate_with_anchor(
                    cropped_page, each_page, other_objects, anchor_field)

            if self.debug:
                viz_img = cv2.polylines(cropped_page.copy(),
                                        [x.polygon for x in other_objects],
                                        isClosed=True,
                                        color=(0, 255, 255),
                                        thickness=2)
                cv2.imwrite(
                    os.path.join(debug_path,
                                 'rotated_page_{}.png'.format(page_index)),
                    viz_img)

            # Now just do some minor formating
            return_res = []
            for field in ['profile_image', 'passport_code']:
                if field in other_object_name:
                    obj = other_objects[other_object_name.index(field)]
                    temp_res = {
                        'polys':
                        [(each[0][0], each[0][1]) for each in obj.polygon],
                        'conf': obj.score
                    }
                else:
                    temp_res = None
                return_res.append(temp_res)

            face_res, mrz_res = return_res
            results.append({
                'crop_rotated_page': {
                    'image':
                    cropped_page,
                    'polys':
                    [(each[0][0], each[0][1]) for each in raw_page_polygon],
                    'conf':
                    each_page.score,
                },
                'face': face_res,
                'mrz': mrz_res
            })

        if self.debug:
            print('Crop and rotate tooks {} secs'.format(time.time() -
                                                         start_time))
            # with open(os.path.join(debug_path, 'crop_and_rotate.json'), 'w', encoding='utf-8') as f:
            #     json.dump(results, f, ensure_ascii=False, indent=4)
        return results
Ejemplo n.º 28
0
def detect_one_image(img, model_func, *args):
    """
    Run detection on one image, using the TF callable.
    This function should handle the preprocessing internally.

    Args:
        img: an image
        model_func: a callable from TF model,
            takes image and returns (boxes, probs, labels, [masks])

    Returns:
        [DetectionResult]
    """

    orig_shape = img.shape[:2]
    resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = (resized_img.shape[0] * 1.0 / img.shape[0] +
             resized_img.shape[1] * 1.0 / img.shape[1]) / 2
    if config.USE_SECOND_HEAD:
        if config.EXTRACT_FEATURES:
            boxes, probs, labels, posteriors, second_labels, second_posteriors, masks, features = model_func(
                resized_img)
            masks = [masks]
        else:
            boxes, probs, labels, posteriors, second_labels, second_posteriors, *masks = model_func(
                resized_img)
            features = [None for _ in range(labels.size)]
    else:
        if config.EXTRACT_FEATURES:
            boxes, probs, labels, posteriors, masks, features = model_func(
                resized_img, *args)
            masks = [masks]
        else:
            boxes, probs, labels, posteriors, *masks = model_func(resized_img)
            features = [None for _ in range(labels.size)]
    boxes = boxes / scale
    boxes = clip_boxes(boxes, orig_shape)

    if masks:
        # has mask
        full_masks = [
            fill_full_mask(box, mask, orig_shape)
            for box, mask in zip(boxes, masks[0])
        ]
        masks = full_masks
    else:
        # fill with none
        masks = [None] * len(boxes)

    if config.USE_SECOND_HEAD:
        results = [
            SecondDetectionResult(*args)
            for args in zip(boxes, probs, labels, posteriors, masks,
                            second_labels, second_posteriors, features)
        ]
    else:
        results = [
            DetectionResult(*args)
            for args in zip(boxes, probs, labels, posteriors, masks, features)
        ]
    return results
Ejemplo n.º 29
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Ejemplo n.º 30
0
def get_train_dataflow(src):
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)

    classes = (
        'BG',  # always index 0
        'bathtub',
        'bed',
        'bookshelf',
        'box',
        'chair',
        'counter',
        'desk',
        'door',
        'dresser',
        'garbage_bin',
        'lamp',
        'monitor',
        'night_stand',
        'pillow',
        'sink',
        'sofa',
        'table',
        'toilet',
        'tv')

    class_to_ind = dict(list(zip(classes, list(range(len(classes))))))
    #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/'
    textfile_index = natsorted(
        [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')])
    imgs = []
    count = 0
    for fn in textfile_index:
        each_file = {}
        count = count + 1
        print(str(count) + ':::', fn)
        F = open(fn, 'r')
        file_F = F.read()
        file_F = file_F.split('\n')
        each_file['file_name'] = file_F[0]
        im = cv2.imread(each_file['file_name'])
        each_file['height'] = im.shape[0]
        each_file['width'] = im.shape[1]
        objects = file_F[2:len(file_F) - 1]
        boxes = []
        class_ = []
        for obj in objects:
            objs_line = obj.split(' ')
            x1 = float(objs_line[1]) - 1.0
            y1 = float(objs_line[2]) - 1.0
            x2 = float(objs_line[3]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            if x1 >= x2:
                x2 = x1 + 1
            boxes.append([x1, y1, x2, y2])
            cls = class_to_ind[objs_line[0]]
            class_.append(cls)
        each_file['boxes'] = np.array(boxes).astype(np.float32)
        each_file['class'] = np.array(class_).astype(np.int32)
        each_file['is_crowd'] = np.zeros_like(each_file['class']).astype(
            np.int8)
        imgs.append(each_file)
    """ 
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds