def build(self, config):
        """Build Mask R-CNN architecture.
        """

        # Image size must be dividable by 2 multiple times
        h, w = config.IMAGE_SHAPE[:2]
        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
            raise Exception(
                "Image size must be dividable by 2 at least 6 times "
                "to avoid fractions when downscaling and upscaling."
                "For example, use 256, 320, 384, 448, 512, ... etc. ")

        # Build the shared convolutional layers.
        # Bottom-up Layers
        # Returns a list of the last layers of each stage, 5 in total.
        # Don't create the thead (stage 5), so we pick the 4th item in the list.
        resnet = ResNet.ResNet("resnet101", stage5=True)
        C1, C2, C3, C4, C5 = resnet.stages()

        # Top-down Layers
        # TODO: add assert to varify feature map sizes match what's in config
        self.fpn = FPN.FPN(C1, C2, C3, C4, C5, out_channels=256)

        # Generate Anchors
        self.anchors = Variable(torch.from_numpy(
            utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                           config.RPN_ANCHOR_RATIOS,
                                           config.BACKBONE_SHAPES,
                                           config.BACKBONE_STRIDES,
                                           config.RPN_ANCHOR_STRIDE)).float(),
                                requires_grad=False)
        if self.config.GPU_COUNT:
            self.anchors = self.anchors.cuda()

        # RPN
        self.rpn = RPN.RPN(len(config.RPN_ANCHOR_RATIOS),
                           config.RPN_ANCHOR_STRIDE, 256)

        # FPN Classifier
        self.classifier = FPN_head.Classifier(256, config.POOL_SIZE,
                                              config.IMAGE_SHAPE,
                                              config.NUM_CLASSES)

        # FPN Mask
        self.mask = FPN_head.Mask(256, config.MASK_POOL_SIZE,
                                  config.IMAGE_SHAPE, config.NUM_CLASSES)

        # Fix batch norm layers
        def set_bn_fix(m):
            classname = m.__class__.__name__
            if classname.find('BatchNorm') != -1:
                for p in m.parameters():
                    p.requires_grad = False

        self.apply(set_bn_fix)
Exemple #2
0
 def get_anchors(self, image_shape):
     backbone_shapes = utils.compute_backbone_shapes(
         self.backbone, self.backbone_strides, image_shape)
     if not hasattr(self, "_anchor_cache"):
         self._anchor_cache = {}
     if not tuple(image_shape) in self._anchor_cache:
         a = utils.generate_pyramid_anchors(self.rpn_anchor_scales,
                                            self.rpn_anchor_ratios,
                                            backbone_shapes,
                                            self.backbone_strides,
                                            self.rpn_anchor_stride)
         self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(
             a, image_shape[:2])
     return self._anchor_cache[tuple(image_shape)]
def generate_all_anchors(fpn_shapes, image_shape, config):
    '''
	generate anchor for pyramid feature maps
	'''
    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, \
               config.RPN_ANCHOR_RATIOS, \
               fpn_shapes, \
               config.BACKBONE_STRIDES, \
               config.RPN_ANCHOR_STRIDE)
    # normalize coordinates
    # numpy array [N, 4]
    norm_anchors = utils.norm_boxes(anchors, image_shape)
    anchors_tensor = tf.convert_to_tensor(norm_anchors)
    # Duplicate across the batch dimension
    batch_anchors = tf.broadcast_to(anchors_tensor,\
        [config.IMAGES_PER_GPU, tf.shape(anchors_tensor)[0],tf.shape(anchors_tensor)[1]])
    return batch_anchors
Exemple #4
0
    def __init__(self, dataset, config, augment=True):
        """A generator that returns images and corresponding target class ids,
            bounding box deltas, and masks.

            dataset: The Dataset object to pick data from
            config: The model config object
            shuffle: If True, shuffles the samples before every epoch
            augment: If True, applies image augmentation to images (currently only
                     horizontal flips are supported)

            Returns a Python generator. Upon calling next() on it, the
            generator returns two lists, inputs and outputs. The containtes
            of the lists differs depending on the received arguments:
            inputs list:
            - images: [batch, H, W, C]
            - image_metas: [batch, size of image meta]
            - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
            - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
            - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
            - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
            - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                        are those of the image unless use_mini_mask is True, in which
                        case they are defined in MINI_MASK_SHAPE.

            outputs list: Usually empty in regular training. But if detection_targets
                is True then the outputs list contains target class_ids, bbox deltas,
                and masks.
            """
        self.b = 0  # batch item index
        self.image_index = -1
        self.image_ids = np.copy(dataset.image_ids)
        self.error_count = 0

        self.dataset = dataset
        self.config = config
        self.augment = augment

        # Anchors
        # [anchor_count, (y1, x1, y2, x2)]
        self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                                      config.RPN_ANCHOR_RATIOS,
                                                      config.BACKBONE_SHAPES,
                                                      config.BACKBONE_STRIDES,
                                                      config.RPN_ANCHOR_STRIDE)
Exemple #5
0
def get_anchors(image_shape, config):
    """Returns anchor pyramid for the given image size."""
    backbone_shapes = compute_backbone_shapes(config, image_shape)
    # Cache anchors and reuse if image shape is the same
    _anchor_cache = {}
    if not tuple(image_shape) in _anchor_cache:
        # Generate Anchors
        a = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                           config.RPN_ANCHOR_RATIOS,
                                           backbone_shapes,
                                           config.BACKBONE_STRIDES,
                                           config.RPN_ANCHOR_STRIDE)
        # Keep a copy of the latest anchors in pixel coordinates because
        # it's used in inspect_model notebooks.
        # TODO: Remove this after the notebook are refactored to not use it
        anchors = a
        # Normalize coordinates
        _anchor_cache[tuple(image_shape)] = utils.norm_boxes(
            a, image_shape[:2])
    return _anchor_cache[tuple(image_shape)]
    def __init__(self,
                 options,
                 config,
                 split,
                 random=True,
                 loadNeighborImage=False,
                 load_semantics=False,
                 load_boundary=False):
        self.options = options
        self.config = config
        self.split = split
        self.random = random

        self.dataFolder = options.dataFolder

        self.scenes = []
        self.sceneImageIndices = []

        self.loadClassMap()

        #planenet_scene_ids_val = np.load('datasets/scene_ids_val.npy')
        #planenet_scene_ids_val = {scene_id.decode('utf-8'): True for scene_id in planenet_scene_ids_val}
        #print(planenet_scene_ids_val)
        # with open(self.dataFolder + '/ScanNet/Tasks/Benchmark/scannetv2_' + split + '.txt') as f:
        #     for line in f:
        #         scene_id = line.strip()
        #         if split == 'test':
        #             ## Remove scenes which are in PlaneNet's training set for fair comparison
        #             # if scene_id not in planenet_scene_ids_val:
        #             #     continue
        #             pass
        #         scenePath = self.dataFolder + '/scans/' + scene_id
        #         if not os.path.exists(scenePath + '/' + scene_id + '.txt') or not os.path.exists(scenePath + '/annotation/planes.npy'):
        #             # print(scenePath + '/' + scene_id + '.txt')
        #             # print(scenePath + '/annotation/planes.npy')
        #             # print("here")
        #             # if True:
        #             #     exit()
        #             continue
        #         scene = CustomScene(options, scenePath, scene_id, self.confident_labels, self.layout_labels, load_semantics=load_semantics, load_boundary=load_boundary)
        #         self.scenes.append(scene)
        #         self.sceneImageIndices += [[len(self.scenes) - 1, imageIndex] for imageIndex in range(len(scene.imagePaths))]
        #         continue
        #     pass

        scene_id = 'scene0003_02'
        scenePath = self.dataFolder + '/scans/' + scene_id

        # Taking class of CustomScene from custom_scene
        scene = CustomScene(options,
                            scenePath,
                            scene_id,
                            self.confident_labels,
                            self.layout_labels,
                            load_semantics=load_semantics,
                            load_boundary=load_boundary)
        #print("reached #10132483")

        self.scenes.append(scene)
        print("scenes--", self.scenes)
        self.sceneImageIndices += [[
            len(self.scenes) - 1, imageIndex
        ] for imageIndex in range(len(scene.imagePaths))]
        #print(self.sceneImageIndices)

        if random:
            t = int(time.time() * 1000000)
            np.random.seed(((t & 0xff000000) >> 24) + ((t & 0x00ff0000) >> 8) +
                           ((t & 0x0000ff00) << 8) + ((t & 0x000000ff) << 24))
        else:
            np.random.seed(0)
            pass
        np.random.shuffle(self.sceneImageIndices)
        print("length of indices----", len(self.sceneImageIndices))
        #self.invalid_indices = {}

        # with open(self.dataFolder + '/invalid_indices_' + split + '.txt', 'r') as f:
        #     for line in f:
        #         tokens = line.split(' ')
        #         if len(tokens) == 3:
        #             assert(int(tokens[2]) < 10000)
        #             invalid_index = int(tokens[1]) * 10000 + int(tokens[2])
        #             if invalid_index not in self.invalid_indices:
        #                 self.invalid_indices[invalid_index] = True
        #                 pass
        #             pass
        #         continue
        #     pass

        self.sceneImageIndices = [[
            sceneIndex, imageIndex
        ] for sceneIndex, imageIndex in self.sceneImageIndices]

        print('num images', len(self.sceneImageIndices))

        # if True:
        #     exit()

        self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                                      config.RPN_ANCHOR_RATIOS,
                                                      config.BACKBONE_SHAPES,
                                                      config.BACKBONE_STRIDES,
                                                      config.RPN_ANCHOR_STRIDE)

        self.loadNeighborImage = loadNeighborImage

        return
Exemple #7
0
    def build(self, mode, config):
        """Build Mask R-CNN architecture.
            input_shape: The shape of the input image.
            mode: Either "training" or "inference". The inputs and
                outputs of the model differ accordingly.
        """
        assert mode in ['training', 'inference']

        # Image size must be dividable by 2 multiple times
        h, w = config.IMAGE_SHAPE[:2]
        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
            raise Exception(
                "Image size must be dividable by 2 at least 6 times "
                "to avoid fractions when downscaling and upscaling."
                "For example, use 256, 320, 384, 448, 512, ... etc. ")

        # Inputs
        input_image = KL.Input(shape=config.IMAGE_SHAPE.tolist(),
                               name="input_image")
        input_image_meta = KL.Input(shape=[None], name="input_image_meta")

        if mode == "training":
            # RPN GT
            input_rpn_match = KL.Input(shape=[None, 1],
                                       name="input_rpn_match",
                                       dtype=tf.int32)
            input_rpn_bbox = KL.Input(shape=[None, 4],
                                      name="input_rpn_bbox",
                                      dtype=tf.float32)

            # Detection GT (class IDs, bounding boxes, and masks)
            # 1. GT Class IDs (zero padded)
            input_gt_class_ids = KL.Input(shape=[None],
                                          name="input_gt_class_ids",
                                          dtype=tf.int32)
            # 2. GT Boxes in pixels (zero padded)
            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
            input_gt_boxes = KL.Input(shape=[None, 4],
                                      name="input_gt_boxes",
                                      dtype=tf.float32)
            # Normalize coordinates
            h, w = K.shape(input_image)[1], K.shape(input_image)[2]
            image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32)
            gt_boxes = KL.Lambda(lambda x: x / image_scale)(input_gt_boxes)
            # 3. GT Masks (zero padded)
            # [batch, height, width, MAX_GT_INSTANCES]
            if config.USE_MINI_MASK:
                input_gt_masks = KL.Input(shape=[
                    config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None
                ],
                                          name="input_gt_masks",
                                          dtype=bool)
            else:
                input_gt_masks = KL.Input(
                    shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],
                    name="input_gt_masks",
                    dtype=bool)

        # Build the shared convolutional layers.
        # Bottom-up Layers
        # Returns a list of the last layers of each stage, 5 in total.
        # Don't create the thead (stage 5), so we pick the 4th item in the list.
        _, C2, C3, C4, C5 = resnet_graph(input_image, "resnet101", stage5=True)
        # Top-down Layers
        # TODO: add assert to varify feature map sizes match what's in config
        P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5)
        P4 = KL.Add(name="fpn_p4add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
            KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)
        ])
        P3 = KL.Add(name="fpn_p3add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
            KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)
        ])
        P2 = KL.Add(name="fpn_p2add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
            KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)
        ])
        # Attach 3x3 conv to all P layers to get the final feature maps.
        P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2)
        P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3)
        P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4)
        P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5)
        # P6 is used for the 5th anchor scale in RPN. Generated by
        # subsampling from P5 with stride of 2.
        P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)

        # Note that P6 is used in RPN, but not in the classifier heads.
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        mrcnn_feature_maps = [P2, P3, P4, P5]

        # Generate Anchors
        self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                                      config.RPN_ANCHOR_RATIOS,
                                                      config.BACKBONE_SHAPES,
                                                      config.BACKBONE_STRIDES,
                                                      config.RPN_ANCHOR_STRIDE)

        # RPN Model
        rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE,
                              len(config.RPN_ANCHOR_RATIOS), 256)
        # Loop through pyramid layers
        layer_outputs = []  # list of lists
        for p in rpn_feature_maps:
            layer_outputs.append(rpn([p]))
        # Concatenate layer outputs
        # Convert from list of lists of level outputs to list of lists
        # of outputs across levels.
        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
        outputs = list(zip(*layer_outputs))
        outputs = [
            KL.Concatenate(axis=1, name=n)(list(o))
            for o, n in zip(outputs, output_names)
        ]

        rpn_class_logits, rpn_class, rpn_bbox = outputs

        # Generate proposals
        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
        # and zero padded.
        proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\
            else config.POST_NMS_ROIS_INFERENCE
        rpn_rois = ProposalLayer(proposal_count=proposal_count,
                                 nms_threshold=config.RPN_NMS_THRESHOLD,
                                 name="ROI",
                                 anchors=self.anchors,
                                 config=config)([rpn_class, rpn_bbox])

        if mode == "training":
            # Class ID mask to mark class IDs supported by the dataset the image
            # came from.
            _, _, _, active_class_ids = KL.Lambda(
                lambda x: parse_image_meta_graph(x),
                mask=[None, None, None, None])(input_image_meta)

            if not config.USE_RPN_ROIS:
                # Ignore predicted ROIs and use ROIs provided as an input.
                input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
                                      name="input_roi",
                                      dtype=np.int32)
                # Normalize coordinates to 0-1 range.
                target_rois = KL.Lambda(lambda x: K.cast(x, tf.float32) /
                                        image_scale[:4])(input_rois)
            else:
                target_rois = rpn_rois

            # Generate detection targets
            # Subsamples proposals and generates target outputs for training
            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
            # padded. Equally, returned rois and targets are zero padded.
            rois, target_class_ids, target_bbox, target_mask =\
                DetectionTargetLayer(config, name="proposal_targets")([
                    target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])

            # Network Heads
            # TODO: verify that this handles zero padded ROIs
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
                fpn_classifier_graph(rois, mrcnn_feature_maps, config.IMAGE_SHAPE,
                                     config.POOL_SIZE, config.NUM_CLASSES)

            mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps,
                                              config.IMAGE_SHAPE,
                                              config.MASK_POOL_SIZE,
                                              config.NUM_CLASSES)

            # TODO: clean up (use tf.identify if necessary)
            output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)

            # Losses
            rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x),
                                       name="rpn_class_loss")(
                                           [input_rpn_match, rpn_class_logits])
            rpn_bbox_loss = KL.Lambda(
                lambda x: rpn_bbox_loss_graph(config, *x),
                name="rpn_bbox_loss")(
                    [input_rpn_bbox, input_rpn_match, rpn_bbox])
            class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x),
                                   name="mrcnn_class_loss")([
                                       target_class_ids, mrcnn_class_logits,
                                       active_class_ids
                                   ])
            bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x),
                                  name="mrcnn_bbox_loss")([
                                      target_bbox, target_class_ids, mrcnn_bbox
                                  ])
            mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x),
                                  name="mrcnn_mask_loss")([
                                      target_mask, target_class_ids, mrcnn_mask
                                  ])

            # Model
            inputs = [
                input_image, input_image_meta, input_rpn_match, input_rpn_bbox,
                input_gt_class_ids, input_gt_boxes, input_gt_masks
            ]
            if not config.USE_RPN_ROIS:
                inputs.append(input_rois)
            outputs = [
                rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits,
                mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois,
                rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss
            ]
            model = KM.Model(inputs, outputs, name='mask_rcnn')
        else:
            # Network Heads
            # Proposal classifier and BBox regressor heads
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
                fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, config.IMAGE_SHAPE,
                                     config.POOL_SIZE, config.NUM_CLASSES)

            # Detections
            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates
            detections = DetectionLayer(config, name="mrcnn_detection")(
                [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])

            # Convert boxes to normalized coordinates
            # TODO: let DetectionLayer return normalized coordinates to avoid
            #       unnecessary conversions
            h, w = config.IMAGE_SHAPE[:2]
            detection_boxes = KL.Lambda(
                lambda x: x[..., :4] / np.array([h, w, h, w]))(detections)

            # Create masks for detections
            mrcnn_mask = build_fpn_mask_graph(detection_boxes,
                                              mrcnn_feature_maps,
                                              config.IMAGE_SHAPE,
                                              config.MASK_POOL_SIZE,
                                              config.NUM_CLASSES)

            model = KM.Model([input_image, input_image_meta], [
                detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois,
                rpn_class, rpn_bbox
            ],
                             name='mask_rcnn')

        # Add multi-GPU support.
        if config.GPU_COUNT > 1:
            from parallel_model import ParallelModel
            model = ParallelModel(model, config.GPU_COUNT)

        return model
Exemple #8
0
# Display image and additional stats
print("image_id ", image_id, dataset.image_reference(image_id))
log("image", image)
log("mask", mask)
log("class_ids", class_ids)
log("bbox", bbox)
# Display image and instances
visualize.display_instances(image, bbox, mask, class_ids, dataset.class_names)

BACKBONE_SHAPES = compute_backbone_shapes(config, config.IMAGE_SHAPE)

# Generate Anchors
anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                         config.RPN_ANCHOR_RATIOS,
                                         BACKBONE_SHAPES,
                                         config.BACKBONE_STRIDES,
                                         config.RPN_ANCHOR_STRIDE)

# Print summary of anchors
num_levels = len(BACKBONE_SHAPES)
anchors_per_cell = len(config.RPN_ANCHOR_RATIOS)
print("Count: ", anchors.shape[0])
print("Scales: ", config.RPN_ANCHOR_SCALES)
print("ratios: ", config.RPN_ANCHOR_RATIOS)
print("Anchors per Cell: ", anchors_per_cell)
print("Levels: ", num_levels)
anchors_per_level = []
for l in range(num_levels):
    num_cells = BACKBONE_SHAPES[l][0] * BACKBONE_SHAPES[l][1]
    anchors_per_level.append(anchors_per_cell * num_cells //
Exemple #9
0
    def build(self, mode, config):
        """Build Mask R-CNN architecture.
            input_shape: The shape of the input image.
            mode: Either "training" or "inference". The inputs and
                outputs of the model differ accordingly.
        """
        assert mode in ['training', 'inference']

        # Image size must be dividable by 2 multiple times
        h, w = config.IMAGE_SHAPE[:2]
        print("HEIGHT AND WIDTH BELOW")
        print(h)
        print(w)
        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
            raise Exception(
                "Image size must be dividable by 2 at least 6 times "
                "to avoid fractions when downscaling and upscaling."
                "For example, use 256, 320, 384, 448, 512, ... etc. ")

        # Inputs
        input_image = KL.Input(shape=config.IMAGE_SHAPE.tolist(),
                               name="input_image")
        input_image_meta = KL.Input(shape=[None], name="input_image_meta")

        # Build the shared convolutional layers.
        # Bottom-up Layers
        # Returns a list of the last layers of each stage, 5 in total.
        # Don't create the thead (stage 5), so we pick the 4th item in the list.
        _, C2, C3, C4, C5 = resnet_graph(input_image, "resnet101", stage5=True)
        # Top-down Layers
        P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5)
        P4 = KL.Add(name="fpn_p4add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
            KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)
        ])
        P3 = KL.Add(name="fpn_p3add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
            KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)
        ])
        P2 = KL.Add(name="fpn_p2add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
            KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)
        ])
        # Attach 3x3 conv to all P layers to get the final feature maps.
        P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2)
        P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3)
        P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4)
        P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5)
        # P6 is used for the 5th anchor scale in RPN. Generated by
        # subsampling from P5 with stride of 2.
        P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)

        # Note that P6 is used in RPN, but not in the classifier heads.
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        mrcnn_feature_maps = [P2, P3, P4, P5]

        # Generate Anchors
        self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                                      config.RPN_ANCHOR_RATIOS,
                                                      config.BACKBONE_SHAPES,
                                                      config.BACKBONE_STRIDES,
                                                      config.RPN_ANCHOR_STRIDE)

        # RPN Model
        rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE,
                              len(config.RPN_ANCHOR_RATIOS), 256)
        # Loop through pyramid layers
        layer_outputs = []  # list of lists
        for p in rpn_feature_maps:
            layer_outputs.append(rpn([p]))
        # Concatenate layer outputs
        # Convert from list of lists of level outputs to list of lists
        # of outputs across levels.
        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
        outputs = list(zip(*layer_outputs))
        outputs = [
            KL.Concatenate(axis=1, name=n)(list(o))
            for o, n in zip(outputs, output_names)
        ]

        rpn_class_logits, rpn_class, rpn_bbox = outputs

        # Generate proposals
        # Proposals are [N, (y1, x1, y2, x2)] in normalized coordinates.
        # proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training" \
        #     else config.POST_NMS_ROIS_INFERENCE
        proposal_count = config.POST_NMS_ROIS_INFERENCE
        rpn_rois = ProposalLayer(proposal_count=proposal_count,
                                 nms_threshold=0.7,
                                 name="ROI",
                                 anchors=self.anchors,
                                 config=config)([rpn_class, rpn_bbox])

        # Network Heads
        # Proposal classifier and BBox regressor heads
        mrcnn_class_logits, mrcnn_class, mrcnn_bbox = \
            fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, config.IMAGE_SHAPE,
                                 config.POOL_SIZE, config.NUM_CLASSES)
        # Detections
        # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates
        detections = DetectionLayer(config, name="mrcnn_detection")(
            [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])
        # Convert boxes to normalized coordinates
        h, w = config.IMAGE_SHAPE[:2]
        detection_boxes = KL.Lambda(
            lambda x: x[..., :4] / np.array([h, w, h, w]))(detections)
        # Create masks for detections
        mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps,
                                          config.IMAGE_SHAPE,
                                          config.MASK_POOL_SIZE,
                                          config.NUM_CLASSES)
        model = KM.Model([input_image, input_image_meta], [
            detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois,
            rpn_class, rpn_bbox
        ],
                         name='mask_rcnn')
        # Add multi-GPU support.
        if config.GPU_COUNT > 1:
            from parallel_model import ParallelModel
            model = ParallelModel(model, config.GPU_COUNT)

        return model
Exemple #10
0
def data_generator(dataset,
                   shuffle=True,
                   augment=False,
                   augmentation=None,
                   random_rois=0,
                   batch_size=1,
                   detection_targets=False,
                   no_augmentation_sources=None):
    """A generator that returns images and corresponding target class ids,
    bounding box deltas, and masks.

    dataset: The Dataset object to pick data from
    config: The model config object
    shuffle: If True, shuffles the samples before every epoch
    augment: (deprecated. Use augmentation instead). If true, apply random
        image augmentation. Currently, only horizontal flipping is offered.
    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
        right/left 50% of the time.
    random_rois: If > 0 then generate proposals to be used to train the
                 network classifier and mask heads. Useful if training
                 the Mask RCNN part without the RPN.
    batch_size: How many images to return in each call
    detection_targets: If True, generate detection targets (class IDs, bbox
        deltas, and masks). Typically for debugging or visualizations because
        in trainig detection targets are generated by DetectionTargetLayer.
    no_augmentation_sources: Optional. List of sources to exclude for
        augmentation. A source is string that identifies a dataset and is
        defined in the Dataset class.

    Returns a Python generator. Upon calling next() on it, the
    generator returns two lists, inputs and outputs. The contents
    of the lists differs depending on the received arguments:
    inputs list:
    - images: [batch, H, W, C]
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                are those of the image unless use_mini_mask is True, in which
                case they are defined in MINI_MASK_SHAPE.

    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,
        and masks.
    """
    b = 0  # batch item index
    image_index = -1
    image_ids = np.copy(dataset.image_ids)

    error_count = 0
    no_augmentation_sources = no_augmentation_sources or []

    backbone_shapes = utils.compute_backbone_shapes(
        hyper_parameters.FLAGS.BACKBONE,
        hyper_parameters.FLAGS.BACKBONE_STRIDES,
        hyper_parameters.FLAGS.IMAGE_SHAPE)
    anchors = utils.generate_pyramid_anchors(
        hyper_parameters.FLAGS.RPN_ANCHOR_SCALES,
        hyper_parameters.FLAGS.RPN_ANCHOR_RATIOS, backbone_shapes,
        hyper_parameters.FLAGS.BACKBONE_STRIDES,
        hyper_parameters.FLAGS.RPN_ANCHOR_STRIDE)

    while True:
        try:
            # Increment index to pick next image. Shuffle if at the start of an epoch.
            image_index = (image_index + 1) % len(image_ids)
            if shuffle and image_index == 0:
                np.random.shuffle(image_ids)

            # Get GT bounding boxes and masks for image.
            image_id = image_ids[image_index]

            if dataset.image_info[image_id][
                    'source'] in no_augmentation_sources:
                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                    load_image_gt(dataset, image_id, augment=augment,
                                  augmentation=None,
                                  use_mini_mask=hyper_parameters.FLAGS.USE_MINI_MASK)
            else:
                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                    load_image_gt(dataset, image_id, augment=augment,
                                  augmentation=augmentation,
                                  use_mini_mask=hyper_parameters.FLAGS.USE_MINI_MASK)

            # Skip images that have no instances. This can happen in cases
            # where we train on a subset of classes and the image doesn't
            # have any of the classes we care about.
            if not np.any(gt_class_ids > 0):
                continue

            # RPN Targets
            rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
                                                    gt_class_ids, gt_boxes)

            # Mask R-CNN Targets
            if random_rois:
                rpn_rois = generate_random_rois(image.shape, random_rois,
                                                gt_class_ids, gt_boxes)
                if detection_targets:
                    rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask = \
                        build_detection_targets(
                            rpn_rois, gt_class_ids, gt_boxes, gt_masks)

            if b == 0:
                batch_image_meta = np.zeros((batch_size, ) + image_meta.shape,
                                            dtype=image_meta.dtype)
                batch_rpn_match = np.zeros([batch_size, anchors.shape[0], 1],
                                           dtype=rpn_match.dtype)
                batch_rpn_bbox = np.zeros([
                    batch_size,
                    hyper_parameters.FLAGS.RPN_TRAIN_ANCHORS_PER_IMAGE, 4
                ],
                                          dtype=rpn_bbox.dtype)
                batch_images = np.zeros((batch_size, ) + image.shape,
                                        dtype=np.float32)
                batch_gt_class_ids = np.zeros(
                    (batch_size, hyper_parameters.FLAGS.MAX_GT_INSTANCES),
                    dtype=np.int32)
                batch_gt_boxes = np.zeros(
                    (batch_size, hyper_parameters.FLAGS.MAX_GT_INSTANCES, 4),
                    dtype=np.int32)
                batch_gt_masks = np.zeros(
                    (batch_size, gt_masks.shape[0], gt_masks.shape[1],
                     hyper_parameters.FLAGS.MAX_GT_INSTANCES),
                    dtype=gt_masks.dtype)

                if random_rois:
                    batch_rpn_rois = np.zeros((batch_size, random_rois, 4),
                                              dtype=np.int32)
                    if detection_targets:
                        batch_rois = np.zeros((batch_size, ) + rois.shape,
                                              dtype=rois.dtype)
                        batch_mrcnn_class_ids = np.zeros(
                            (batch_size, ) + mrcnn_class_ids.shape,
                            dtype=mrcnn_class_ids.dtype)
                        batch_mrcnn_bbox = np.zeros(
                            (batch_size, ) + mrcnn_bbox.shape,
                            dtype=mrcnn_bbox.dtype)
                        batch_mrcnn_mask = np.zeros(
                            (batch_size, ) + mrcnn_mask.shape,
                            dtype=mrcnn_mask.dtype)

            if gt_boxes.shape[0] > hyper_parameters.FLAGS.MAX_GT_INSTANCES:
                ids = np.random.choice(np.arange(gt_boxes.shape[0]),
                                       hyper_parameters.FLAGS.MAX_GT_INSTANCES,
                                       replace=False)
                gt_boxes = gt_boxes[ids]
                gt_class_ids = gt_class_ids[ids]
                gt_masks = gt_masks[:, :, ids]

            batch_image_meta[b] = image_meta
            batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
            batch_rpn_match[b] = rpn_match[:, np.newaxis]
            batch_rpn_bbox[b] = rpn_bbox
            batch_images[b] = utils.mold_image(
                image.astype(np.float32), hyper_parameters.FLAGS.MEAN_PIXEL)
            batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids
            batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks
            if random_rois:
                batch_rpn_rois[b] = rpn_rois
                if detection_targets:
                    batch_rois[b] = rois
                    batch_mrcnn_class_ids[b] = mrcnn_class_ids
                    batch_mrcnn_bbox[b] = mrcnn_bbox
                    batch_mrcnn_mask[b] = mrcnn_mask

            b += 1

            if b >= batch_size:
                inputs = [
                    batch_images, batch_image_meta, batch_rpn_match,
                    batch_rpn_bbox, batch_gt_class_ids, batch_gt_boxes,
                    batch_gt_masks
                ]
                outputs = []

                if random_rois:
                    inputs.extend([batch_rpn_rois])
                    if detection_targets:
                        inputs.extend([batch_rois])
                        # Keras requires that output and targets have the same number of dimensions
                        batch_mrcnn_class_ids = np.expand_dims(
                            batch_mrcnn_class_ids, -1)
                        outputs.extend([
                            batch_mrcnn_class_ids, batch_mrcnn_bbox,
                            batch_mrcnn_mask
                        ])

                yield inputs, outputs

                b = 0

        except (GeneratorExit, KeyboardInterrupt):
            raise
        except:
            # Log it and skip the image
            logging.exception("Error processing image {}".format(
                dataset.image_info[image_id]))
            error_count += 1
            if error_count > 5:
                raise
Exemple #11
0
def data_generator(dataset, config, shuffle=True, augment=True, random_rois=0,
                   batch_size=1, detection_targets=False):
    """A generator that returns images and corresponding target class ids,
    bounding box deltas, and masks.

    dataset: The Dataset object to pick data from
    tf_config: The model tf_config object
    shuffle: If True, shuffles the samples before every epoch
    augment: If True, applies image augmentation to images (currently only
             horizontal flips are supported)
    random_rois: If > 0 then generate proposals to be used to train the
                 network classifier and mask heads. Useful if training
                 the Mask RCNN part without the RPN.
    batch_size: How many images to return in each call
    detection_targets: If True, generate detection targets (class IDs, bbox
        deltas, and masks). Typically for debugging or visualizations because
        in trainig detection targets are generated by DetectionTargetLayer.

    Returns a Python generator. Upon calling next() on it, the
    generator returns two lists, inputs and outputs. The containtes
    of the lists differs depending on the received arguments:
    inputs list:
    - images: [batch, H, W, C]
    - image_meta: [batch, size of image meta]
    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                are those of the image unless use_mini_mask is True, in which
                case they are defined in MINI_MASK_SHAPE.

    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,
        and masks.
    """
    b = 0  # batch item index
    image_index = -1
    image_ids = np.copy(dataset.image_ids)
    error_count = 0

    # Anchors
    # [anchor_count, (y1, x1, y2, x2)]
    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                             config.RPN_ANCHOR_RATIOS,
                                             config.BACKBONE_SHAPES,
                                             config.BACKBONE_STRIDES,
                                             config.RPN_ANCHOR_STRIDE)

    # Keras requires a generator to run indefinately.
    while True:
        try:
            # Increment index to pick next image. Shuffle if at the start of an epoch.
            image_index = (image_index + 1) % len(image_ids)
            if shuffle and image_index == 0:
                np.random.shuffle(image_ids)

            # Get GT bounding boxes and masks for image.
            image_id = image_ids[image_index]
            image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                load_image_gt(dataset, config, image_id, augment=augment,
                              use_mini_mask=config.USE_MINI_MASK)

            # Skip images that have no instances. This can happen in cases
            # where we train on a subset of classes and the image doesn't
            # have any of the classes we care about.
            if not np.any(gt_class_ids > 0):
                continue

            # RPN Targets
            rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
                                                    gt_class_ids, gt_boxes, config)

            # Mask R-CNN Targets
            if random_rois:
                rpn_rois = generate_random_rois(
                    image.shape, random_rois, gt_class_ids, gt_boxes)
                if detection_targets:
                    rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\
                        build_detection_targets(
                            rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)

            # Init batch arrays
            if b == 0:
                batch_image_meta = np.zeros(
                    (batch_size,) + image_meta.shape, dtype=image_meta.dtype)
                batch_rpn_match = np.zeros(
                    [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype)
                batch_rpn_bbox = np.zeros(
                    [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype)
                batch_images = np.zeros(
                    (batch_size,) + image.shape, dtype=np.float32)
                batch_gt_class_ids = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
                batch_gt_boxes = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
                if config.USE_MINI_MASK:
                    batch_gt_masks = np.zeros((batch_size, config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1],
                                               config.MAX_GT_INSTANCES))
                else:
                    batch_gt_masks = np.zeros(
                        (batch_size, image.shape[0], image.shape[1], config.MAX_GT_INSTANCES))
                if random_rois:
                    batch_rpn_rois = np.zeros(
                        (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype)
                    if detection_targets:
                        batch_rois = np.zeros(
                            (batch_size,) + rois.shape, dtype=rois.dtype)
                        batch_mrcnn_class_ids = np.zeros(
                            (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype)
                        batch_mrcnn_bbox = np.zeros(
                            (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype)
                        batch_mrcnn_mask = np.zeros(
                            (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype)

            # If more instances than fits in the array, sub-sample from them.
            if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
                ids = np.random.choice(
                    np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False)
                gt_class_ids = gt_class_ids[ids]
                gt_boxes = gt_boxes[ids]
                gt_masks = gt_masks[:, :, ids]

            # Add to batch
            batch_image_meta[b] = image_meta
            batch_rpn_match[b] = rpn_match[:, np.newaxis]
            batch_rpn_bbox[b] = rpn_bbox
            batch_images[b] = mold_image(image.astype(np.float32), config)
            batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids
            batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
            batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks
            if random_rois:
                batch_rpn_rois[b] = rpn_rois
                if detection_targets:
                    batch_rois[b] = rois
                    batch_mrcnn_class_ids[b] = mrcnn_class_ids
                    batch_mrcnn_bbox[b] = mrcnn_bbox
                    batch_mrcnn_mask[b] = mrcnn_mask
            b += 1

            # Batch full?
            if b >= batch_size:
                inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox,
                          batch_gt_class_ids, batch_gt_boxes, batch_gt_masks]
                outputs = []

                if random_rois:
                    inputs.extend([batch_rpn_rois])
                    if detection_targets:
                        inputs.extend([batch_rois])
                        # Keras requires that output and targets have the same number of dimensions
                        batch_mrcnn_class_ids = np.expand_dims(
                            batch_mrcnn_class_ids, -1)
                        outputs.extend(
                            [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask])

                yield inputs, outputs

                # start a new batch
                b = 0
        except (GeneratorExit, KeyboardInterrupt):
            raise
        except:
            # Log it and skip the image
            logging.exception("Error processing image {}".format(
                dataset.image_info[image_id]))
            error_count += 1
            if error_count > 5:
                raise
Exemple #12
0
    def __init__(self,
                 options,
                 config,
                 split,
                 random=True,
                 loadNeighborImage=False,
                 load_semantics=False,
                 load_boundary=False):
        self.options = options
        self.config = config
        self.split = split
        self.random = random

        self.dataFolder = options.dataFolder

        self.scenes = []
        self.sceneImageIndices = []

        self.loadClassMap()

        planenet_scene_ids_val = np.load('datasets/scene_ids_val.npy')
        planenet_scene_ids_val = {
            scene_id.decode('utf-8'): True
            for scene_id in planenet_scene_ids_val
        }
        with open(self.dataFolder + '/ScanNet/Tasks/Benchmark/scannetv1_' +
                  split + '.txt') as f:
            for line in f:
                scene_id = line.strip()
                if split == 'test':
                    ## Remove scenes which are in PlaneNet's training set for fair comparison
                    if scene_id not in planenet_scene_ids_val:
                        continue
                    pass
                scenePath = self.dataFolder + '/scans/' + scene_id
                if not os.path.exists(scenePath + '/' + scene_id +
                                      '.txt') or not os.path.exists(
                                          scenePath +
                                          '/annotation/planes.npy'):
                    continue
                scene = ScanNetScene(options,
                                     scenePath,
                                     scene_id,
                                     self.confident_labels,
                                     self.layout_labels,
                                     load_semantics=load_semantics,
                                     load_boundary=load_boundary)
                self.scenes.append(scene)
                self.sceneImageIndices += [[
                    len(self.scenes) - 1, imageIndex
                ] for imageIndex in range(len(scene.imagePaths))]
                continue
            pass

        if random:
            t = int(time.time() * 1000000)
            np.random.seed(((t & 0xff000000) >> 24) + ((t & 0x00ff0000) >> 8) +
                           ((t & 0x0000ff00) << 8) + ((t & 0x000000ff) << 24))
        else:
            np.random.seed(0)
            pass
        np.random.shuffle(self.sceneImageIndices)

        self.invalid_indices = {}

        with open(self.dataFolder + '/invalid_indices_' + split + '.txt',
                  'r') as f:
            for line in f:
                tokens = line.split(' ')
                if len(tokens) == 3:
                    assert (int(tokens[2]) < 10000)
                    invalid_index = int(tokens[1]) * 10000 + int(tokens[2])
                    if invalid_index not in self.invalid_indices:
                        self.invalid_indices[invalid_index] = True
                        pass
                    pass
                continue
            pass

        self.sceneImageIndices = [
            [sceneIndex, imageIndex]
            for sceneIndex, imageIndex in self.sceneImageIndices
            if (sceneIndex * 10000 + imageIndex) not in self.invalid_indices
        ]

        print('num images', len(self.sceneImageIndices))

        self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                                      config.RPN_ANCHOR_RATIOS,
                                                      config.BACKBONE_SHAPES,
                                                      config.BACKBONE_STRIDES,
                                                      config.RPN_ANCHOR_STRIDE)

        self.loadNeighborImage = loadNeighborImage

        return
Exemple #13
0
    def build(self, mode, config, images):

        assert mode in ['training', 'inference']
        # Image size must be dividable by 2 multiple times
        # h, w = config.IMAGE_SHAPE[:2]
        # if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
        #     raise Exception("Image size must be dividable by 2 at least 6 times "
        #                     "to avoid fractions when downscaling and upscaling."
        #                     "For example, use 256, 320, 384, 448, 512, ... etc. ")
        # input_image = tf.placeholder(shape=config.IMAGE_SHAPE.tolist(), name="input_image")

        C2, C3, C4, C5 = resnet_graph(images, "resnet50", stage5=True)

        #128*4*4*256
        P5 = setool.conv_op(input_op=C5,
                            name='fpn_c5p5',
                            kh=1,
                            kw=1,
                            n_out=256)
        P4 = setool.conv_op(input_op=C4, name='fpn_c4p4',kh=1, kw=1,  n_out=256) + \
                     tf.image.resize_images(P5, [64,64])
        P3= setool.conv_op(input_op=C3, name='fpn_c3p3',kh=1, kw=1,  n_out=256) + \
                     tf.image.resize_images(P4, [128, 128])
        P2= setool.conv_op(input_op=C2, name='fpn_c2p2',kh=1, kw=1,  n_out=256) + \
                     tf.image.resize_images(P3, [256, 256])

        P2 = setool.conv_op(input_op=P2, name='fpn_p2', n_out=256)
        P3 = setool.conv_op(input_op=P3, name='fpn_p3', n_out=256)
        P4 = setool.conv_op(input_op=P4, name='fpn_p4', n_out=256)
        P5 = setool.conv_op(input_op=P5, name='fpn_p5', n_out=256)
        P6 = setool.mpool_op(input_tensor=P5, k=1, s=2, name="fpn_p6")

        rpn_feature_maps = [P2, P3, P4, P5, P6]
        mrcnn_feature_maps = [P2, P3, P4, P5]
        # Generate Anchors
        self.anchors = utils.generate_pyramid_anchors(
            self.config.RPN_ANCHOR_SCALES, self.config.RPN_ANCHOR_RATIOS,
            self.config.BACKBONE_SHAPES, self.config.BACKBONE_STRIDES,
            self.config.RPN_ANCHOR_STRIDE)
        #(32, 64, 128, 256, 512)  3, [256,128,64,32,16], [4, 8, 16, 32, 64], 1
        rpn_P6 = RPN_net(
            P6, anchor_stride=self.config.RPN_ANCHOR_STRIDE).build_rpn_model()
        rpn_P5 = RPN_net(
            P5, anchor_stride=self.config.RPN_ANCHOR_STRIDE).build_rpn_model()
        rpn_P4 = RPN_net(
            P4, anchor_stride=self.config.RPN_ANCHOR_STRIDE).build_rpn_model()
        rpn_P3 = RPN_net(
            P3, anchor_stride=self.config.RPN_ANCHOR_STRIDE).build_rpn_model()
        rpn_P2 = RPN_net(
            P2, anchor_stride=self.config.RPN_ANCHOR_STRIDE).build_rpn_model()

        rpn_class_logits = tf.concat(
            [rpn_P2[0], rpn_P3[0], rpn_P4[0], rpn_P5[0], rpn_P6[0]], 1)
        rpn_class = tf.concat(
            [rpn_P2[1], rpn_P3[1], rpn_P4[1], rpn_P5[1], rpn_P6[1]], 1)
        rpn_bbox = tf.concat(
            [rpn_P2[2], rpn_P3[2], rpn_P4[2], rpn_P5[2], rpn_P6[2]], 1)
        # print(rpn_class_logits.shape)
        # print(rpn_class.shape)
        # print(rpn_bbox.shape)

        return rpn_class_logits, rpn_bbox, rpn_class
Exemple #14
0
    molded_image = mold_image(molded_image)
    print("Moded image shape is : ", molded_image.shape)
    image_meta = compose_image_meta(
        0, image.shape, molded_image.shape, inferwindow, scale,
        np.zeros([inferconfig.NUM_CLASSES], dtype=np.int32))
    #image =  image[np.newaxis,:]
    #anchors = anchors[np.newaxis, :]
    image_meta = image_meta.reshape(1, -1)

    backbone_shapes = compute_backbone_shapes(inferconfig, molded_image.shape)
    imageshapeinfer = molded_image.shape
    molded_image = molded_image[np.newaxis, :]
    #print("Backbone shape is : ", backbone_shapes)
    anchors = utils.generate_pyramid_anchors(inferconfig.RPN_ANCHOR_SCALES,
                                             inferconfig.RPN_ANCHOR_RATIOS,
                                             backbone_shapes,
                                             inferconfig.BACKBONE_STRIDES,
                                             inferconfig.RPN_ANCHOR_STRIDE)
    #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_SCALES)
    #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_RATIOS)
    #print("Anchor generate paramenter :",backbone_shapes)
    #print("Anchor generate parameter : ",inferconfig.BACKBONE_STRIDES)
    #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_STRIDE)
    #print("Original anchor shape is :", anchors.shape)
    anchors = np.broadcast_to(anchors,
                              (inferconfig.BATCH_SIZE, ) + anchors.shape)
    anchors = utils.norm_boxes(anchors, imageshapeinfer[:2])
    print("The input anchors shape is : ", anchors.shape)
    print('The input anchors are : \n', anchors)
    #print(image.shape)
    test_list = []
Exemple #15
0
def inference():
    # Root directory of the project
    ROOT_DIR = os.getcwd()
    # Directory to save logs and model checkpoints, if not provided
    # through the command line argument --logs
    LOG_DIR = os.path.join(ROOT_DIR, "output/logs")
    MODEL_DIR = os.path.join(ROOT_DIR, "output/training")
    dataset_path = os.path.join(ROOT_DIR, 'data/coco')

    config = InferenceConfig()
    config.display()

    dataset_val = gen_cocodb.CocoDataSet()
    dataset_val.load_coco(dataset_path,
                          "minival",
                          year="2014",
                          auto_download=False)
    dataset_val.prepare()

    print("Images: {}\nClasses: {}".format(len(dataset_val.image_ids),
                                           dataset_val.class_names))

    image_id = random.choice(dataset_val.image_ids)
    # image, image_meta, gt_class_id, gt_bbox, gt_mask = dataset_val.load_image_gt(dataset_val, config, image_id, use_mini_mask=False)
    # info = dataset_val.image_info[image_id]
    # print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_id,
    #                                        dataset_val.image_reference(image_id)))

    image = dataset_val.load_image(image_id)
    images = np.expand_dims(image, axis=0)
    molded_images, image_metas, windows = gen_cocodb.mold_inputs(
        images, config)
    print(molded_images.shape, image_metas.shape, windows.shape)

    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                             config.RPN_ANCHOR_RATIOS,
                                             config.BACKBONE_SHAPES,
                                             config.BACKBONE_STRIDES,
                                             config.RPN_ANCHOR_STRIDE)
    with tf.device('/device:CPU:0'):
        model = modellib.MaskRCNN(mode='inference',
                                  config=config,
                                  model_dir=LOG_DIR,
                                  anchors=anchors)
    print(len(model.outputs))

    feed_dict = {
        model.input_image: molded_images,
        model.input_image_meta: image_metas
    }

    detections = model.outputs['detections']
    mrcnn_class = model.outputs['mrcnn_class']
    mrcnn_bbox = model.outputs['mrcnn_bbox']
    mrcnn_mask = model.outputs['mrcnn_mask']

    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()
    with tf.device('/device:CPU:0'):
        with tf.Session() as sess:
            sess.run(init_op)
            # saver.restore(sess, "output/training/mrcnn.ckpt-96000")
            ckpt = tf.train.get_checkpoint_state(MODEL_DIR)
            """ resotre checkpoint of Backbone network """
            if ckpt is not None:
                ckpt_path = tf.train.latest_checkpoint(MODEL_DIR)
                # ckpt_path = FLAGS.checkpoint_model
                saver.restore(sess, ckpt_path)
            else:
                ckpt_path = "output/training/mrcnn.ckpt-96000"
                saver.restore(sess, ckpt_path)
            print('ckpt_path', ckpt_path)

            pre_nms_anchors = sess.graph.get_tensor_by_name(
                "pre_nms_anchors:0")
            refined_anchors = sess.graph.get_tensor_by_name(
                "refined_anchors:0")
            refined_anchors_clipped = sess.graph.get_tensor_by_name(
                "refined_anchors_clipped:0")
            print(pre_nms_anchors)
            print(refined_anchors)
            print(refined_anchors_clipped)

            detect, pred_class, pred_bbox, pred_mask = sess.run(
                [detections, mrcnn_class, mrcnn_bbox, mrcnn_mask],
                feed_dict=feed_dict)

            print(detect.shape, pred_class.shape, pred_bbox.shape,
                  pred_mask.shape)

            # Process detections
            final_rois, final_class_ids, final_scores, final_masks = gen_cocodb.unmold_detections(
                detect[0], pred_mask[0], image.shape, windows[0])

            ax = get_ax(1)
            visualize.display_instances(image,
                                        final_rois,
                                        final_masks,
                                        final_class_ids,
                                        dataset_val.class_names,
                                        final_scores,
                                        ax=ax,
                                        title="Predictions")
            print(final_rois.shape, final_class_ids.shape, final_scores.shape,
                  final_masks.shape)
            print(final_class_ids)
            print(final_scores)
            print(final_rois)
def train(train_dataset, config, lr, train_layers, epochs):

    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                             config.RPN_ANCHOR_RATIOS,
                                             config.BACKBONE_SHAPES,
                                             config.BACKBONE_STRIDES,
                                             config.RPN_ANCHOR_STRIDE)
    with tf.Graph().as_default():
        deploy_config = model_deploy.DeploymentConfig(num_clones=config.GPU_COUNT,
                                                      clone_on_cpu=False,
                                                      replica_id=0,
                                                      num_replicas=1,
                                                      num_ps_tasks=0)

        with tf.device(deploy_config.variables_device()):
            print(deploy_config.variables_device())
            global_step = tf.train.create_global_step()

        with tf.device(deploy_config.inputs_device()):
            print(deploy_config.inputs_device())
            with tf.name_scope('coco_data_generator'):
                train_generator = data_generator(train_dataset, config, anchors, shuffle=True)

        models =[]
        def clone_fn():
            model = modellib.MaskRCNN(mode=mode, config=config, model_dir=DEFAULT_LOGS_DIR, anchors=anchors)
            models.append(model)

            losses = tf.get_collection(tf.GraphKeys.LOSSES)
            model_loss = tf.add_n(losses)
            return model_loss


        clones = model_deploy.create_clones(deploy_config, clone_fn)
        first_clone_scope = deploy_config.clone_scope(0)
        print(first_clone_scope)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        for loss in tf.get_collection(tf.GraphKeys, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        #########################################
        # Configure the optimization procedure. #
        #########################################
        print(deploy_config.optimizer_device())
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate')
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=config.LEARNING_MOMENTUM, name='Momentum')
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        variables_to_train = set_trainable(train_layers)

        total_loss, clones_gradients = model_deploy.optimize_clones(clones,
                                                                    optimizer,
                                                                    var_list=variables_to_train)
        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)

        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))
        print(total_loss)

        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        summary_op = tf.summary.merge(list(summaries), name='summary_op')
        summary_writer = tf.summary.FileWriter(models[0].log_dir, graph=tf.Session().graph)

        """ set saver for saving final model and backbone model for restore """
        # variables_to_restore = _get_restore_vars('FeatureExtractor/MobilenetV1')
        # re_saver = tf.train.Saver(var_list=variables_to_restore)

        saver = tf.train.Saver(max_to_keep=3)
        """ Set Gpu Env """
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        """ Starting Training..... """
        gpu_opt = tf.GPUOptions(per_process_gpu_memory_fraction=0.9, allow_growth=True)
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_opt)) as sess:
            sess.run(init_op)
            # re_saver.restore(sess, 'data/pretrained_models/mobilenet_v1_coco/model.ckpt')
            ckpt = tf.train.get_checkpoint_state("output/training")
            """ resotre checkpoint of Backbone network """
            if ckpt:
                lastest_ckpt = tf.train.latest_checkpoint("output/training")
                print('lastest', lastest_ckpt)
                saver.restore(sess, lastest_ckpt)
            try:
                while True:
                    feed_dict={learning_rate:lr}
                    inputs = train_generator.next()
                    num_epoch = inputs[7]
                    for i in range(len(clones)):
                        s = 2*i
                        e = 2*(i+1)
                        feed_dict[models[i].input_image] = inputs[0][s:e, :]
                        feed_dict[models[i].input_image_meta] = inputs[1][s:e, :]
                        feed_dict[models[i].input_rpn_match] = inputs[2][s:e, :]
                        feed_dict[models[i].input_rpn_bbox] = inputs[3][s:e, :]
                        feed_dict[models[i].input_gt_class_ids] = inputs[4][s:e, :]
                        feed_dict[models[i].input_gt_boxes] = inputs[5][s:e, :]
                        feed_dict[models[i].input_gt_masks] = inputs[6][s:e, :]

                    _, loss, current_step, summary =  sess.run([update_op, total_loss,
                                                                global_step, summary_op],
                                                               feed_dict=feed_dict)
                    print ("""iter %d : total-loss %.4f """ %(current_step, loss))

                    if np.isnan(loss) or np.isinf(loss):
                        print('isnan or isinf', loss)
                        raise
                    if current_step % 1000 == 0:
                        # write summary
                        # summary = sess.run(summary_op, feed_dict=feed_dict)
                        summary_writer.add_summary(summary, current_step)
                        summary_writer.flush()

                    if current_step % 3000 == 0:
                        # Save a checkpoint
                        save_path = 'output/training/mrcnn.ckpt'
                        saver.save(sess, save_path, global_step=current_step)

                    if num_epoch > epochs:
                        print("num epoch : %d and training End!!!" % num_epoch)
                        break
            except Exception as ex:
                print('Error occured!!!! => ', ex)
            finally:
                print("Final!!")
                saver.save(sess, 'output/models/mrcnn_final.ckpt', write_meta_graph=False)
def data_generator(config, shuffle=True, augmentation=None,batch_size=1):
    """
    A generator that returns images and corresponding target class ids,
    bounding box deltas, and masks.

    Returns a Python generator. Upon calling next() on it, the
    generator returns two lists, inputs and outputs. The contents
    of the lists differs depending on the received arguments:
    inputs list:
    - images: [batch, H, W, C]
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                are those of the image unless use_mini_mask is True, in which
                case they are defined in MINI_MASK_SHAPE.

    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,
        and masks.

    """
    b = 0
    ix = 0
    image_files = glob.glob("./data/train/*.jpg")

    # Anchors
    # [anchor_count, (y1, x1, y2, x2)]
    backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE)
    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                             config.RPN_ANCHOR_RATIOS,
                                             backbone_shapes,
                                             config.BACKBONE_STRIDES,
                                             config.RPN_ANCHOR_STRIDE)

    while True:
        if shuffle and ix == 0:
            np.random.shuffle(image_files)
        image_path = image_files[ix]

        json_path = image_files[ix].replace("jpg", "json")

        image = load_image(image_path)
        original_shape = image.shape

        mask, class_ids = load_mask(json_path)

        image, window, scale, padding, crop = utils.resize_image(
            image,
            min_dim=config.IMAGE_MIN_DIM,
            min_scale=config.IMAGE_MIN_SCALE,
            max_dim=config.IMAGE_MAX_DIM,
            mode=config.IMAGE_RESIZE_MODE)
        mask = utils.resize_mask(mask, scale, padding, crop)

        # Augmentation
        # This requires the imgaug lib (https://github.com/aleju/imgaug)
        if augmentation:
            import imgaug
            # Augmenters that are safe to apply to masks
            # Some, such as Affine, have settings that make them unsafe, so always
            # test your augmentation on masks
            MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
                               "Fliplr", "Flipud", "CropAndPad",
                               "Affine", "PiecewiseAffine"]

            def hook(images, augmenter, parents, default):
                """Determines which augmenters to apply to masks."""
                return augmenter.__class__.__name__ in MASK_AUGMENTERS

            # Store shapes before augmentation to compare
            image_shape = image.shape
            mask_shape = mask.shape
            # Make augmenters deterministic to apply similarly to images and masks
            det = augmentation.to_deterministic()
            image = det.augment_image(image)
            # Change mask to np.uint8 because imgaug doesn't support np.bool
            mask = det.augment_image(mask.astype(np.uint8),
                                     hooks=imgaug.HooksImages(activator=hook))
            # Verify that shapes didn't change
            assert image.shape == image_shape, "Augmentation shouldn't change image size"
            assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
            # Change mask back to bool
            mask = mask.astype(np.bool)

        bbox = utils.extract_bboxes(mask)

        use_mini_mask = True

        if use_mini_mask:
            mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

        # image_meta is for debug
        image_meta = compose_image_meta(0, original_shape, image.shape,
                                        window, scale, np.ones(len(class_name2idx)))

        # RPN Targets
        rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
                                                class_ids, bbox, config)

        if b == 0:
            batch_image_meta = np.zeros(
                (batch_size,) + image_meta.shape, dtype=image_meta.dtype)
            batch_rpn_match = np.zeros(
                [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype)
            batch_rpn_bbox = np.zeros(
                [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype)
            batch_images = np.zeros(
                (batch_size,) + image.shape, dtype=np.float32)
            batch_gt_class_ids = np.zeros(
                (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
            batch_gt_boxes = np.zeros(
                (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
            batch_gt_masks = np.zeros(
                (batch_size, mask.shape[0], mask.shape[1],
                 config.MAX_GT_INSTANCES), dtype=mask.dtype)
        # Add to batch
        batch_image_meta[b] = image_meta
        batch_rpn_match[b] = rpn_match[:, np.newaxis]
        batch_rpn_bbox[b] = rpn_bbox
        batch_images[b] = mold_image(image.astype(np.float32), config)
        batch_gt_class_ids[b, :class_ids.shape[0]] = class_ids
        batch_gt_boxes[b, :bbox.shape[0]] = bbox
        batch_gt_masks[b, :, :, :mask.shape[-1]] = mask
        b += 1
        ix = (ix + 1) % len(image_files)
        if b >= batch_size:
            inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox,
                      batch_gt_class_ids, batch_gt_boxes, batch_gt_masks]
            outputs = []

            yield inputs,outputs
            b = 0