Example #1
0
    def __init__(self, input_size, fpn_channels, feature_map_size, num_class, num_mask, aspect_ratio, scales):
        super(Yolact, self).__init__()
        out = ['conv3_block4_out', 'conv4_block6_out', 'conv5_block3_out']
        # use pre-trained ResNet50
        # Todo figure out how pre-trained can be train again
        base_model = tf.keras.applications.ResNet50(input_shape=(550, 550, 3),
                                                    include_top=False,
                                                    layers=tf.keras.layers,
                                                    weights='imagenet')
        # extract certain feature maps for FPN
        self.backbone_resnet = tf.keras.Model(inputs=base_model.input,
                                              outputs=[base_model.get_layer(x).output for x in out])
        self.backbone_fpn = FeaturePyramidNeck(fpn_channels)
        self.protonet = ProtoNet(num_mask)

        # semantic segmentation branch to boost feature richness
        self.semantic_segmentation = tf.keras.layers.Conv2D(num_class, (1, 1), 1, padding="same",
                                                            kernel_initializer=tf.keras.initializers.glorot_uniform())

        self.num_anchor, self.priors = make_priors(input_size, feature_map_size, aspect_ratio, scales)
        print("prior shape:", self.priors.shape)
        print("num anchor per feature map: ", self.num_anchor)

        # shared prediction head
        self.predictionHead = PredictionModule(256, len(aspect_ratio), num_class, num_mask)
Example #2
0
    def __init__(self, img_h, img_w, fpn_channels, num_class, num_mask, aspect_ratio, scales):
        super(Yolact, self).__init__()
        out = ['conv3_block4_out', 'conv4_block6_out', 'conv5_block3_out']
        # use pre-trained ResNet50
        # Keras BatchNormalization problem 
        # https://github.com/keras-team/keras/pull/9965#issuecomment-501933060
        tf.keras.layers.BatchNormalization = FrozenBatchNormalization
        base_model = tf.keras.applications.ResNet50(input_shape=(img_h, img_w, 3),
                                                    include_top=False,
                                                    layers=tf.keras.layers,
                                                    weights='imagenet')
        # extract certain feature maps for FPN
        self.backbone_resnet = tf.keras.Model(inputs=base_model.input,
                                              outputs=[base_model.get_layer(x).output for x in out])
        
        # Calculating feature map size
        # https://stackoverflow.com/a/44242277/4582711
        # https://github.com/tensorflow/tensorflow/issues/4297#issuecomment-246080982
        self.feature_map_size = np.array([list(base_model.get_layer(x).output.shape[1:3]) for x in out])
        out_height_p6 = np.ceil((self.feature_map_size[-1, 0]).astype(np.float32) / float(2))
        out_width_p6  = np.ceil((self.feature_map_size[-1, 1]).astype(np.float32) / float(2))
        out_height_p7 = np.ceil(out_height_p6 / float(2))
        out_width_p7  = np.ceil(out_width_p6/ float(2))
        self.feature_map_size = np.concatenate((self.feature_map_size, [[out_height_p6, out_width_p6], [out_height_p7, out_width_p7]]), axis=0)
        self.protonet_out_size = self.feature_map_size[0]*2 # Only one upsampling on p3 

        self.backbone_fpn = FeaturePyramidNeck(fpn_channels)
        self.protonet = ProtoNet(num_mask)

        # semantic segmentation branch to boost feature richness
        self.semantic_segmentation = tf.keras.layers.Conv2D(num_class-1, (1, 1), 1, padding="same",
                                                            kernel_initializer=tf.keras.initializers.glorot_uniform())

        anchorobj = anchor.Anchor(img_size_h=img_h,img_size_w=img_w,
                              feature_map_size=self.feature_map_size,
                              aspect_ratio=aspect_ratio,
                              scale=scales)

        self.num_anchors = anchorobj.num_anchors
        self.priors = anchorobj.anchors
        # print("prior shape:", self.priors.shape)
        # print("num anchor per feature map: ", self.num_anchor)

        # shared prediction head
        # Here, len(aspect_ratio) is passed as during prior calculations, individula scale is selected for each layer.
        # So, when scale are [24, 48, 96, 130, 192] that means 24 is for p3; 48 is for p4 and so on.
        # So, number of priors for that layer will be HxWxlen(aspect_ratio)
        # Hence, passing len(aspect_ratio)
        # This implementation differs from the original used in yolact
        self.predictionHead = PredictionModule(256, len(aspect_ratio), num_class, num_mask)

        # post-processing for evaluation
        self.detect = Detect(num_class, bkg_label=0, top_k=200,
                conf_thresh=0.05, nms_thresh=0.5)
        self.max_output_size = 300
Example #3
0
    def __init__(self, input_size, fpn_channels, feature_map_size, num_class,
                 num_mask, aspect_ratio, scales):
        # use pre-trained MobileNetV2
        self.input_shape = (input_size, input_size, 3)
        self.backbone_pretrained = MobileNetV2(
            input_shape=(self.input_shape)).gen()
        self.backbone_pretrained.trainable = True

        # extract certain feature maps for FPN
        self.backbone_fpn = FeaturePyramidNeck(fpn_channels)
        self.protonet = ProtoNet(num_mask)

        # semantic segmentation branch to boost feature richness
        self.semantic_segmentation = tf.keras.layers.Conv2D(
            num_class, (1, 1),
            1,
            padding="same",
            kernel_initializer=tf.keras.initializers.glorot_uniform())

        self.num_anchor, self.priors = make_priors(input_size,
                                                   feature_map_size,
                                                   aspect_ratio, scales)
        print("prior shape:", self.priors.shape)
        print("num anchor per feature map: ", self.num_anchor)

        # shared prediction head
        self.pred_head = [
            PredictionModule(fpn_channels, len(aspect_ratio), num_class,
                             num_mask),
            PredictionModule(fpn_channels, len(aspect_ratio), num_class,
                             num_mask),
            PredictionModule(fpn_channels, len(aspect_ratio), num_class,
                             num_mask),
            PredictionModule(fpn_channels, len(aspect_ratio), num_class,
                             num_mask),
            PredictionModule(fpn_channels, len(aspect_ratio), num_class,
                             num_mask)
        ]

        self.concat = tf.keras.layers.Concatenate(axis=1)
Example #4
0
    def __init__(self, backbone, fpn_channels, num_class, num_mask,
                 anchor_params, detect_params):

        super(Yolact, self).__init__()
        # choose the backbone network
        try:
            out = backbones_extracted[backbone]
            base_model = backbones_objects[backbone]
        except:
            raise Exception(
                f'Backbone option of {backbone} is not supported yet!!!')

        # extract certain feature maps for FPN
        self.backbone = tf.keras.Model(
            inputs=base_model.input,
            outputs=[base_model.get_layer(x).output for x in out])
        # create remain parts of model
        self.backbone_fpn = FeaturePyramidNeck(fpn_channels)
        self.protonet = ProtoNet(num_mask)

        # semantic segmentation branch to boost feature richness
        # predict num_class - 1
        self.semantic_segmentation = tf.keras.layers.Conv2D(num_class - 1,
                                                            1,
                                                            1,
                                                            padding="same")

        # instance of anchor object
        self.anchor_instance = Anchor(**anchor_params)
        priors = self.anchor_instance.get_anchors()
        # print("prior shape:", priors.shape)
        # print("num anchor per feature map: ", tf.shape(priors)[0])

        # shared prediction head
        self.predictionHead = PredictionModule(
            256, len(anchor_params["aspect_ratio"]), num_class, num_mask)

        # detection layer
        self.detect = Detect(anchors=priors, **detect_params)