def __init__(self, input_size, fpn_channels, feature_map_size, num_class, num_mask, aspect_ratio, scales): super(Yolact, self).__init__() out = ['conv3_block4_out', 'conv4_block6_out', 'conv5_block3_out'] # use pre-trained ResNet50 # Todo figure out how pre-trained can be train again base_model = tf.keras.applications.ResNet50(input_shape=(550, 550, 3), include_top=False, layers=tf.keras.layers, weights='imagenet') # extract certain feature maps for FPN self.backbone_resnet = tf.keras.Model(inputs=base_model.input, outputs=[base_model.get_layer(x).output for x in out]) self.backbone_fpn = FeaturePyramidNeck(fpn_channels) self.protonet = ProtoNet(num_mask) # semantic segmentation branch to boost feature richness self.semantic_segmentation = tf.keras.layers.Conv2D(num_class, (1, 1), 1, padding="same", kernel_initializer=tf.keras.initializers.glorot_uniform()) self.num_anchor, self.priors = make_priors(input_size, feature_map_size, aspect_ratio, scales) print("prior shape:", self.priors.shape) print("num anchor per feature map: ", self.num_anchor) # shared prediction head self.predictionHead = PredictionModule(256, len(aspect_ratio), num_class, num_mask)
def __init__(self, img_h, img_w, fpn_channels, num_class, num_mask, aspect_ratio, scales): super(Yolact, self).__init__() out = ['conv3_block4_out', 'conv4_block6_out', 'conv5_block3_out'] # use pre-trained ResNet50 # Keras BatchNormalization problem # https://github.com/keras-team/keras/pull/9965#issuecomment-501933060 tf.keras.layers.BatchNormalization = FrozenBatchNormalization base_model = tf.keras.applications.ResNet50(input_shape=(img_h, img_w, 3), include_top=False, layers=tf.keras.layers, weights='imagenet') # extract certain feature maps for FPN self.backbone_resnet = tf.keras.Model(inputs=base_model.input, outputs=[base_model.get_layer(x).output for x in out]) # Calculating feature map size # https://stackoverflow.com/a/44242277/4582711 # https://github.com/tensorflow/tensorflow/issues/4297#issuecomment-246080982 self.feature_map_size = np.array([list(base_model.get_layer(x).output.shape[1:3]) for x in out]) out_height_p6 = np.ceil((self.feature_map_size[-1, 0]).astype(np.float32) / float(2)) out_width_p6 = np.ceil((self.feature_map_size[-1, 1]).astype(np.float32) / float(2)) out_height_p7 = np.ceil(out_height_p6 / float(2)) out_width_p7 = np.ceil(out_width_p6/ float(2)) self.feature_map_size = np.concatenate((self.feature_map_size, [[out_height_p6, out_width_p6], [out_height_p7, out_width_p7]]), axis=0) self.protonet_out_size = self.feature_map_size[0]*2 # Only one upsampling on p3 self.backbone_fpn = FeaturePyramidNeck(fpn_channels) self.protonet = ProtoNet(num_mask) # semantic segmentation branch to boost feature richness self.semantic_segmentation = tf.keras.layers.Conv2D(num_class-1, (1, 1), 1, padding="same", kernel_initializer=tf.keras.initializers.glorot_uniform()) anchorobj = anchor.Anchor(img_size_h=img_h,img_size_w=img_w, feature_map_size=self.feature_map_size, aspect_ratio=aspect_ratio, scale=scales) self.num_anchors = anchorobj.num_anchors self.priors = anchorobj.anchors # print("prior shape:", self.priors.shape) # print("num anchor per feature map: ", self.num_anchor) # shared prediction head # Here, len(aspect_ratio) is passed as during prior calculations, individula scale is selected for each layer. # So, when scale are [24, 48, 96, 130, 192] that means 24 is for p3; 48 is for p4 and so on. # So, number of priors for that layer will be HxWxlen(aspect_ratio) # Hence, passing len(aspect_ratio) # This implementation differs from the original used in yolact self.predictionHead = PredictionModule(256, len(aspect_ratio), num_class, num_mask) # post-processing for evaluation self.detect = Detect(num_class, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) self.max_output_size = 300
def __init__(self, input_size, fpn_channels, feature_map_size, num_class, num_mask, aspect_ratio, scales): # use pre-trained MobileNetV2 self.input_shape = (input_size, input_size, 3) self.backbone_pretrained = MobileNetV2( input_shape=(self.input_shape)).gen() self.backbone_pretrained.trainable = True # extract certain feature maps for FPN self.backbone_fpn = FeaturePyramidNeck(fpn_channels) self.protonet = ProtoNet(num_mask) # semantic segmentation branch to boost feature richness self.semantic_segmentation = tf.keras.layers.Conv2D( num_class, (1, 1), 1, padding="same", kernel_initializer=tf.keras.initializers.glorot_uniform()) self.num_anchor, self.priors = make_priors(input_size, feature_map_size, aspect_ratio, scales) print("prior shape:", self.priors.shape) print("num anchor per feature map: ", self.num_anchor) # shared prediction head self.pred_head = [ PredictionModule(fpn_channels, len(aspect_ratio), num_class, num_mask), PredictionModule(fpn_channels, len(aspect_ratio), num_class, num_mask), PredictionModule(fpn_channels, len(aspect_ratio), num_class, num_mask), PredictionModule(fpn_channels, len(aspect_ratio), num_class, num_mask), PredictionModule(fpn_channels, len(aspect_ratio), num_class, num_mask) ] self.concat = tf.keras.layers.Concatenate(axis=1)
def __init__(self, backbone, fpn_channels, num_class, num_mask, anchor_params, detect_params): super(Yolact, self).__init__() # choose the backbone network try: out = backbones_extracted[backbone] base_model = backbones_objects[backbone] except: raise Exception( f'Backbone option of {backbone} is not supported yet!!!') # extract certain feature maps for FPN self.backbone = tf.keras.Model( inputs=base_model.input, outputs=[base_model.get_layer(x).output for x in out]) # create remain parts of model self.backbone_fpn = FeaturePyramidNeck(fpn_channels) self.protonet = ProtoNet(num_mask) # semantic segmentation branch to boost feature richness # predict num_class - 1 self.semantic_segmentation = tf.keras.layers.Conv2D(num_class - 1, 1, 1, padding="same") # instance of anchor object self.anchor_instance = Anchor(**anchor_params) priors = self.anchor_instance.get_anchors() # print("prior shape:", priors.shape) # print("num anchor per feature map: ", tf.shape(priors)[0]) # shared prediction head self.predictionHead = PredictionModule( 256, len(anchor_params["aspect_ratio"]), num_class, num_mask) # detection layer self.detect = Detect(anchors=priors, **detect_params)