예제 #1
0
    def __init__(self,
                 input_shape,
                 categories,
                 anchor_aspect_ratios=None,
                 anchor_base_size=16,
                 anchor_padding=1,
                 anchor_scales=None,
                 anchor_stride=16,
                 backbone=None,
                 dense_units=512,
                 mask_shape=(28, 28),
                 maximum_proposals=300,
                 minimum_size=16):
        if anchor_aspect_ratios is None:
            anchor_aspect_ratios = [0.5, 1.0, 2.0]

        if anchor_scales is None:
            anchor_scales = [4, 8, 16]

        n_categories = len(categories) + 1

        k = len(anchor_aspect_ratios) * len(anchor_scales)

        target_bounding_boxes = keras.layers.Input(
            shape=(None, 4), name="target_bounding_boxes")

        target_categories = keras.layers.Input(shape=(None, n_categories),
                                               name="target_categories")

        target_image = keras.layers.Input(shape=input_shape,
                                          name="target_image")

        target_masks = keras.layers.Input(shape=(None, ) + mask_shape,
                                          name="target_masks")

        target_metadata = keras.layers.Input(shape=(3, ),
                                             name="target_metadata")

        options = {
            "activation": "relu",
            "kernel_size": (3, 3),
            "padding": "same"
        }

        inputs = [
            target_bounding_boxes, target_categories, target_image,
            target_masks, target_metadata
        ]

        if backbone:
            output_features = backbone()(target_image)
        else:
            output_features = keras_rcnn.models.backbone.VGG16()(target_image)

        convolution_3x3 = keras.layers.Conv2D(filters=64,
                                              name="3x3",
                                              **options)(output_features)

        output_deltas = keras.layers.Conv2D(filters=k * 4,
                                            kernel_size=(1, 1),
                                            activation="linear",
                                            kernel_initializer="zero",
                                            name="deltas1")(convolution_3x3)

        output_scores = keras.layers.Conv2D(filters=k * 1,
                                            kernel_size=(1, 1),
                                            activation="sigmoid",
                                            kernel_initializer="uniform",
                                            name="scores1")(convolution_3x3)

        target_anchors, target_proposal_bounding_boxes, target_proposal_categories = keras_rcnn.layers.Anchor(
            padding=anchor_padding,
            aspect_ratios=anchor_aspect_ratios,
            base_size=anchor_base_size,
            scales=anchor_scales,
            stride=anchor_stride,
        )([target_bounding_boxes, target_metadata, output_scores])

        output_deltas, output_scores = keras_rcnn.layers.RPN()([
            target_proposal_bounding_boxes, target_proposal_categories,
            output_deltas, output_scores
        ])

        output_proposal_bounding_boxes = keras_rcnn.layers.ObjectProposal(
            maximum_proposals=maximum_proposals, minimum_size=minimum_size)([
                target_anchors, target_metadata, output_deltas, output_scores
            ])

        target_proposal_bounding_boxes, target_proposal_categories, output_proposal_bounding_boxes = keras_rcnn.layers.ProposalTarget(
        )([
            target_bounding_boxes, target_categories,
            output_proposal_bounding_boxes
        ])

        output_features = keras_rcnn.layers.RegionOfInterest((14, 14))(
            [target_metadata, output_features, output_proposal_bounding_boxes])

        output_features = keras.layers.TimeDistributed(
            keras.layers.Flatten())(output_features)

        output_features = keras.layers.TimeDistributed(
            keras.layers.Dense(units=dense_units,
                               activation="relu",
                               name="fc1"))(output_features)

        output_deltas = keras.layers.TimeDistributed(
            keras.layers.Dense(units=4 * n_categories,
                               activation="linear",
                               kernel_initializer="zero",
                               name="deltas2"))(output_features)

        output_scores = keras.layers.TimeDistributed(
            keras.layers.Dense(units=1 * n_categories,
                               activation="softmax",
                               kernel_initializer="zero",
                               name="scores2"))(output_features)

        output_deltas, output_scores = keras_rcnn.layers.RCNN()([
            target_proposal_bounding_boxes, target_proposal_categories,
            output_deltas, output_scores
        ])

        output_bounding_boxes, output_categories = keras_rcnn.layers.ObjectDetection(
        )([
            target_metadata, output_deltas, output_proposal_bounding_boxes,
            output_scores
        ])

        outputs = [output_bounding_boxes, output_categories]

        super(RCNN, self).__init__(inputs, outputs)
예제 #2
0
파일: _rcnn.py 프로젝트: lqchien/keras-rcnn
    def __init__(self, input_shape, categories, backbone=None):
        n_categories = len(categories) + 1

        target_bounding_boxes = keras.layers.Input(
            shape=(None, 4), name="target_bounding_boxes")

        target_categories = keras.layers.Input(shape=(None, n_categories),
                                               name="target_categories")

        target_image = keras.layers.Input(shape=input_shape,
                                          name="target_image")

        target_masks = keras.layers.Input(shape=(None, 28, 28),
                                          name="target_masks")

        target_metadata = keras.layers.Input(shape=(3, ),
                                             name="target_metadata")

        options = {
            "activation": "relu",
            "kernel_size": (3, 3),
            "padding": "same"
        }

        inputs = [
            target_bounding_boxes, target_categories, target_image,
            target_masks, target_metadata
        ]

        if backbone:
            output_features = backbone()(target_image)
        else:
            output_features = keras_rcnn.models.backbone.VGG16()(target_image)

        convolution_3x3 = keras.layers.Conv2D(64, **options)(output_features)

        output_deltas = keras.layers.Conv2D(filters=9 * 4,
                                            kernel_size=(1, 1),
                                            activation="linear",
                                            kernel_initializer="zero",
                                            name="deltas")(convolution_3x3)

        output_scores = keras.layers.Conv2D(filters=9 * 1,
                                            kernel_size=(1, 1),
                                            activation="sigmoid",
                                            kernel_initializer="uniform",
                                            name="scores")(convolution_3x3)

        target_anchors, target_proposal_bounding_boxes, target_proposal_categories = keras_rcnn.layers.AnchorTarget(
        )([target_bounding_boxes, target_metadata, output_scores])

        output_deltas, output_scores = keras_rcnn.layers.RPN()([
            target_proposal_bounding_boxes, target_proposal_categories,
            output_deltas, output_scores
        ])

        output_proposal_bounding_boxes = keras_rcnn.layers.ObjectProposal()(
            [target_anchors, target_metadata, output_deltas, output_scores])

        target_proposal_bounding_boxes, target_proposal_categories, output_proposal_bounding_boxes = keras_rcnn.layers.ProposalTarget(
        )([
            target_bounding_boxes, target_categories,
            output_proposal_bounding_boxes
        ])

        output_features = keras_rcnn.layers.RegionOfInterest((14, 14))(
            [target_metadata, output_features, output_proposal_bounding_boxes])

        output_features = keras.layers.TimeDistributed(
            keras.layers.Flatten())(output_features)

        output_features = keras.layers.TimeDistributed(
            keras.layers.Dense(units=512, activation="relu"))(output_features)

        output_deltas = keras.layers.TimeDistributed(
            keras.layers.Dense(units=4 * n_categories,
                               activation="linear",
                               kernel_initializer="zero"))(output_features)

        output_scores = keras.layers.TimeDistributed(
            keras.layers.Dense(units=1 * n_categories,
                               activation="softmax",
                               kernel_initializer="zero"))(output_features)

        output_deltas, output_scores = keras_rcnn.layers.RCNN()([
            target_proposal_bounding_boxes, target_proposal_categories,
            output_deltas, output_scores
        ])

        output_bounding_boxes, output_categories = keras_rcnn.layers.ObjectDetection(
        )([
            output_proposal_bounding_boxes, output_deltas, output_scores,
            target_metadata
        ])

        outputs = [output_bounding_boxes, output_categories]

        super(RCNN, self).__init__(inputs, outputs)
예제 #3
0
    def __init__(
            self,
            input_shape,
            categories,
            anchor_aspect_ratios=None,
            anchor_base_size=16,
            anchor_scales=None,
            anchor_stride=16,
            backbone=None,
            dense_units=512,
            mask_shape=(28, 28),
            maximum_proposals=300,
            minimum_size=16
    ):
        """
        A Region-based Convolutional Neural Network (RCNN)

        Parameters
        ----------

        input_shape : A shape tuple (integer) without the batch dimension.

            For example:

                `input_shape=(224, 224, 3)`

            specifies that the input are batches of $224 × 224$ RGB images.

            Likewise:

                `input_shape=(224, 224)`

            specifies that the input are batches of $224 × 224$ grayscale
            images.

        categories : An array-like with shape:

                $$(categories,)$$.

            For example:

                `categories=["circle", "square", "triangle"]`

            specifies that the detected objects belong to either the
            “circle,” “square,” or “triangle” category.

        anchor_aspect_ratios : An array-like with shape:

                $$(aspect_ratios,)$$

            used to generate anchors.

            For example:

                `aspect_ratios=[0.5, 1., 2.]`

            corresponds to 1:2, 1:1, and 2:1 respectively.

        anchor_base_size : Integer that specifies an anchor’s base area:

                $$base_area = base_size^{2}$$.

        anchor_scales : An array-like with shape:

                $$(scales,)$$

            used to generate anchors. A scale corresponds to:

                $$area_{scale}=\sqrt{\frac{area_{anchor}}{area_{base}}}$$.

        anchor_stride : A positive integer

        backbone :

        dense_units : A positive integer that specifies the dimensionality of
            the fully-connected layer.

            The fully-connected layer is the layer that precedes the
            fully-connected layers for the classification, regression and
            segmentation target functions.

            Increasing the number of dense units will increase the
            expressiveness of the network and consequently the ability to
            correctly learn the target functions, but it’ll substantially
            increase the number of learnable parameters and memory needed by
            the model.

        mask_shape : A shape tuple (integer).

        maximum_proposals : A positive integer that specifies the maximum
            number of object proposals returned from the model.

            The model always return an array-like with shape:

                $$(maximum_proposals, 4)$$

            regardless of the number of object proposals returned after
            non-maximum suppression is performed. If the number of object
            proposals returned from non-maximum suppression is less than the
            number of objects specified by the `maximum_proposals` parameter,
            the model will return bounding boxes with the value:

                `[0., 0., 0., 0.]`

            and scores with the value `[0.]`.

        minimum_size : A positive integer that specifies the maximum width
            or height for each object proposal.
        """
        if anchor_aspect_ratios is None:
            anchor_aspect_ratios = [0.5, 1.0, 2.0]

        if anchor_scales is None:
            anchor_scales = [4, 8, 16]

        n_categories = len(categories) + 1

        k = len(anchor_aspect_ratios) * len(anchor_scales)

        target_bounding_boxes = keras.layers.Input(
            shape=(None, 4),
            name="target_bounding_boxes"
        )

        target_categories = keras.layers.Input(
            shape=(None, n_categories),
            name="target_categories"
        )

        target_image = keras.layers.Input(
            shape=input_shape,
            name="target_image"
        )

        target_masks = keras.layers.Input(
            shape=(None,) + mask_shape,
            name="target_masks"
        )

        target_metadata = keras.layers.Input(
            shape=(3,),
            name="target_metadata"
        )

        options = {
            "activation": "relu",
            "kernel_size": (3, 3),
            "padding": "same"
        }

        inputs = [
            target_bounding_boxes,
            target_categories,
            target_image,
            target_masks,
            target_metadata
        ]

        if backbone:
            output_features = backbone()(target_image)
        else:
            output_features = keras_rcnn.models.backbone.VGG16()(target_image)

        convolution_3x3 = keras.layers.Conv2D(
            filters=64,
            **options
        )(output_features)

        output_deltas = keras.layers.Conv2D(
            filters=k * 4,
            kernel_size=(1, 1),
            activation="linear",
            kernel_initializer="zero",
            name="deltas"
        )(convolution_3x3)

        output_scores = keras.layers.Conv2D(
            filters=k * 1,
            kernel_size=(1, 1),
            activation="sigmoid",
            kernel_initializer="uniform",
            name="scores"
        )(convolution_3x3)

        target_anchors, target_proposal_bounding_boxes, target_proposal_categories = keras_rcnn.layers.AnchorTarget(
            aspect_ratios=anchor_aspect_ratios,
            base_size=anchor_base_size,
            scales=anchor_scales,
            stride=anchor_stride
        )([
            target_bounding_boxes,
            target_metadata,
            output_scores
        ])

        output_deltas, output_scores = keras_rcnn.layers.RPN()([
            target_proposal_bounding_boxes,
            target_proposal_categories,
            output_deltas,
            output_scores
        ])

        output_proposal_bounding_boxes = keras_rcnn.layers.ObjectProposal(
            maximum_proposals=maximum_proposals,
            minimum_size=minimum_size
        )([
            target_anchors,
            target_metadata,
            output_deltas,
            output_scores
        ])

        target_proposal_bounding_boxes, target_proposal_categories, output_proposal_bounding_boxes = keras_rcnn.layers.ProposalTarget()([
            target_bounding_boxes,
            target_categories,
            output_proposal_bounding_boxes
        ])

        output_features = keras_rcnn.layers.RegionOfInterest((14, 14))([
            target_metadata,
            output_features,
            output_proposal_bounding_boxes
        ])

        output_features = keras.layers.TimeDistributed(
            keras.layers.Flatten()
        )(output_features)

        output_features = keras.layers.TimeDistributed(
            keras.layers.Dense(
                units=dense_units,
                activation="relu"
            )
        )(output_features)

        output_deltas = keras.layers.TimeDistributed(
            keras.layers.Dense(
                units=4 * n_categories,
                activation="linear",
                kernel_initializer="zero"
            )
        )(output_features)

        output_scores = keras.layers.TimeDistributed(
            keras.layers.Dense(
                units=1 * n_categories,
                activation="softmax",
                kernel_initializer="zero"
            )
        )(output_features)

        output_deltas, output_scores = keras_rcnn.layers.RCNN()([
            target_proposal_bounding_boxes,
            target_proposal_categories,
            output_deltas,
            output_scores
        ])

        output_bounding_boxes, output_categories = keras_rcnn.layers.ObjectDetection()([
            output_proposal_bounding_boxes,
            output_deltas,
            output_scores,
            target_metadata
        ])

        outputs = [
            output_bounding_boxes,
            output_categories
        ]

        super(RCNN, self).__init__(inputs, outputs)
예제 #4
0
파일: _rcnn.py 프로젝트: vz415/keras-rcnn
    def __init__(
            self,
            input_shape,
            categories,
            anchor_aspect_ratios=None,
            anchor_base_size=16,
            anchor_padding=1,
            anchor_scales=None,
            anchor_stride=16,
            backbone=None,
            dense_units=1024,
            mask_shape=(28, 28),
            maximum_proposals=300,
            minimum_size=16
    ):

        if anchor_aspect_ratios is None:
            anchor_aspect_ratios = [0.5, 1.0, 2.0]

        if anchor_scales is None:
            anchor_scales = [4, 8, 16]

        self.mask_shape = mask_shape

        self.n_categories = len(categories) + 1

        k = len(anchor_aspect_ratios) * len(anchor_scales)

        target_bounding_boxes = keras.layers.Input(
            shape=(None, 4),
            name="target_bounding_boxes"
        )

        target_categories = keras.layers.Input(
            shape=(None, self.n_categories),
            name="target_categories"
        )

        target_image = keras.layers.Input(
            shape=input_shape,
            name="target_image"
        )

        target_masks = keras.layers.Input(
            shape=(None,) + mask_shape,
            name="target_masks"
        )

        target_metadata = keras.layers.Input(
            shape=(3,),
            name="target_metadata"
        )

        options = {
            "activation": "relu",
            "kernel_size": (3, 3),
            "padding": "same"
        }

        inputs = [
            target_bounding_boxes,
            target_categories,
            target_image,
            target_masks,
            target_metadata
        ]

        if backbone:
            output_features = backbone()(target_image)
        else:
            output_features = keras_rcnn.models.backbone.VGG16()(target_image)

        convolution_3x3 = keras.layers.Conv2D(
            filters=512,
            name="3x3",
            **options
        )(output_features)

        output_deltas = keras.layers.Conv2D(
            filters=k * self.n_categories,
            kernel_size=(1, 1),
            activation="linear",
            kernel_initializer="zero",
            name="deltas1"
        )(convolution_3x3)

        output_scores = keras.layers.Conv2D(
            filters=k * self.n_categories,
            kernel_size=(1, 1),
            activation="sigmoid",
            kernel_initializer="uniform",
            name="scores1"
        )(convolution_3x3)

        # Definitely check that AnchorTarget is same as in Master branch
        target_anchors, target_proposal_bounding_boxes, target_proposal_categories = keras_rcnn.layers.Anchor(
            padding=anchor_padding,
            aspect_ratios=anchor_aspect_ratios,
            base_size=anchor_base_size,
            scales=anchor_scales,
            stride=anchor_stride
        )([
            target_bounding_boxes,
            target_metadata,
            output_scores
        ])

        output_deltas, output_scores = keras_rcnn.layers.RPN()([
            target_proposal_bounding_boxes,
            target_proposal_categories,
            output_deltas,
            output_scores
        ])

        # Check if this is the same as master branch
        output_proposal_bounding_boxes = keras_rcnn.layers.ObjectProposal(
            maximum_proposals=maximum_proposals,
            minimum_size=minimum_size
        )([
            target_anchors,
            target_metadata,
            output_deltas,
            output_scores
        ])

        target_proposal_bounding_boxes, target_proposal_categories, output_proposal_bounding_boxes = keras_rcnn.layers.ProposalTarget()([
            target_bounding_boxes,
            target_categories,
            output_proposal_bounding_boxes
        ])


        mask_features = self._mask_network()(
            [
                target_metadata,
                output_features,
                output_proposal_bounding_boxes
            ]
        )

        output_features = keras_rcnn.layers.RegionOfInterest(
            extent=(7, 7),
            strides=1
        )([
            target_metadata,
            output_features,
            output_proposal_bounding_boxes
        ])

        output_features = keras.layers.TimeDistributed(
            keras.layers.Flatten()
        )(output_features_pooled)

        output_features = keras.layers.TimeDistributed(
            keras.layers.Dense(
                units=dense_units,
                activation="relu",
                name="fc1"
            )
        )(output_features)

        output_features = keras.layers.TimeDistributed(
            keras.layers.Dense(
                units=dense_units,
                activation="relu",
                name="fc2"
            )
        )(output_features)

        # Bounding Boxes - Regression network - why call it 'output_deltas'?
        output_deltas = keras.layers.TimeDistributed(
            keras.layers.Dense(
                units=4 * self.n_categories,
                activation="linear",
                kernel_initializer="zero",
                name="deltas2"
            )
        )(output_features)

        # Categories - Classification network that classifies each pixel into the classes predicted by first CNN
        output_scores = keras.layers.TimeDistributed(
            keras.layers.Dense(
                units=1 * self.n_categories,
                activation="softmax",
                kernel_initializer="zero",
                name="scores2"
            )
        )(output_features)

        # Masks branch
        output_masks = keras.layers.TimeDistributed(
            keras.layers.Conv2D(
                filters=256,
                kernel_size=(3, 3),
                activation="relu",
                padding="same"
            )
        )(output_features_pooled)

        output_masks = keras.layers.TimeDistributed(
            keras.layers.Conv2DTranspose(
                activation="relu",
                filters=256,
                kernel_size=(2, 2),
                strides=2
            )
        )(output_masks)

        output_masks = keras.layers.TimeDistributed(
            keras.layers.Conv2D(
                activation="sigmoid",
                filters=self.n_categories,
                kernel_size=(1, 1),
                strides=1
            )
        )(output_masks)

        # Losses
        output_deltas, output_scores = keras_rcnn.layers.RCNN()([
            target_proposal_bounding_boxes,
            target_proposal_categories,
            output_deltas,
            output_scores
        ])

        # New - Mask Loss layer
        output_masks = keras_rcnn.layers.RCNNMaskLoss()([
            target_proposal_bounding_boxes, # previously 'target_proposal' - 'target_proposal_categories'
            output_deltas,
            target_masks,
            output_masks
        ])

        # bbox loss stuff?
        output_bounding_boxes, output_categories = keras_rcnn.layers.ObjectDetection()([
            target_metadata,
            output_deltas,
            output_proposal_bounding_boxes,
            output_scores
        ])

        # New
        outputs = [
            output_bounding_boxes,
            output_categories,
            output_masks
        ]

        super(RCNN, self).__init__(inputs, outputs)