def test(self, test_path=test_image_dir): gc.enable() self.load_weights("{}_weights.best.hdf5".format('seg_model')) # Prepare Full Resolution Model if IMG_SCALING is not None: fullres_model = models.Sequential() fullres_model.add( layers.AvgPool2D(IMG_SCALING, input_shape=(None, None, 3))) fullres_model.add(self.seg_model) fullres_model.add(layers.UpSampling2D(IMG_SCALING)) else: fullres_model = self.seg_model fullres_model.save('fullres_model.h5') test_paths = os.listdir(test_path) print(len(test_paths), 'test images found') fig, m_axs = plt.subplots(8, 2, figsize=(10, 40)) [c_ax.axis('off') for c_ax in m_axs.flatten()] for (ax1, ax2), c_img_name in zip(m_axs, test_paths): c_path = os.path.join(test_path, c_img_name) c_img = imread(c_path) first_img = np.expand_dims(c_img, 0) / 255.0 first_seg = fullres_model.predict(first_img) ax1.imshow(first_img[0]) ax1.set_title('Image') ax2.imshow(first_seg[0, :, :, 0], vmin=0, vmax=1) ax2.set_title('Prediction') fig.savefig('test_predictions.png') out_pred_rows = [] for c_img_name in tqdm(test_paths): c_path = os.path.join(test_image_dir, c_img_name) c_img = imread(c_path) c_img = np.expand_dims(c_img, 0) / 255.0 cur_seg = fullres_model.predict(c_img)[0] cur_seg = binary_opening(cur_seg > 0.5, np.expand_dims(disk(2), -1)) cur_rles = multi_rle_encode(cur_seg) if len(cur_rles) > 0: for c_rle in cur_rles: out_pred_rows += [{ 'ImageId': c_img_name, 'EncodedPixels': c_rle }] else: out_pred_rows += [{ 'ImageId': c_img_name, 'EncodedPixels': None }] gc.collect() submission_df = pd.DataFrame(out_pred_rows)[[ 'ImageId', 'EncodedPixels' ]] submission_df.to_csv('submission_test.csv', index=False) submission_df.sample(3) submission_df['counts'] = submission_df.apply( lambda c_row: c_row['counts'] if isinstance(c_row['EncodedPixels'], str) else 0, 1) submission_df['counts'].hist()
def build(self): ''' Build Mask R-CNN architecture. input_shape: The shape of the input image. mode: Either "training" or "inference". The inputs and outputs of the model differ accordingly. ''' # Inputs input_image = KL.Input(shape=[None, None, self.cfg.IMAGE.NB_CHANNELS], name="input_image") input_image_meta = KL.Input(shape=[self.cfg.IMAGE_META_SIZE], name="input_image_meta") if self.mode == 'training': # RPN GT input_rpn_match = KL.Input(shape=[None, 1], name="input_rpn_match", dtype=tf.int32) input_rpn_bbox = KL.Input(shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) # Detection GT (class IDs, bounding boxes, and masks) # 1. GT Class IDs (zero padded) input_gt_class_ids = KL.Input(shape=[None], name="input_gt_class_ids", dtype=tf.int32) # 2. GT Boxes in pixels (zero padded) # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates input_gt_boxes = KL.Input(shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) # Normalize coordinates gt_boxes = KL.Lambda(lambda x: gutils.norm_boxes_graph(x, K.shape(input_image)[1:3]))(input_gt_boxes) elif self.mode == 'inference': # Anchors in normalized coordinates input_anchors = KL.Input(shape=[None, 4], name="input_anchors") # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. if callable(self.cfg.ARCHI.BACKBONE): _, C2, C3, C4, C5 = self.cfg.ARCHI.BACKBONE(input_image, stage5=True, train_bn=self.cfg.ARCHI.TRAIN_BN) else: _, C2, C3, C4, C5 = resnet.resnet_graph(input_image, self.cfg.ARCHI.BACKBONE, stage5=True, train_bn=self.cfg.ARCHI.TRAIN_BN) # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config P5 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5) P4 = KL.Add(name="fpn_p4add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)]) P3 = KL.Add(name="fpn_p3add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)]) P2 = KL.Add(name="fpn_p2add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)]) # Attach 3x3 conv to all P layers to get the final feature maps. P2 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2) P3 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3) P4 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4) P5 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5) # P6 is used for the 5th anchor scale in RPN. Generated by # subsampling from P5 with stride of 2. P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) # Note that P6 is used in RPN, but not in the classifier heads. rpn_feature_maps = [P2, P3, P4, P5, P6] mrcnn_feature_maps = [P2, P3, P4, P5] # Anchors if self.mode == 'training': anchors = self.get_anchors(self.img_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.cfg.BATCH_SIZE,) + anchors.shape) # A hack to get around Keras's bad support for constants anchors = KL.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image) else: anchors = input_anchors # RPN Model rpn = rpnlib.build_rpn_model(self.cfg.ARCHI.RPN_ANCHOR_STRIDE, len(self.cfg.ARCHI.RPN_ANCHOR_RATIOS), self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE) # Loop through pyramid layers layer_outputs = [] # list of lists for p in rpn_feature_maps: layer_outputs.append(rpn([p])) # Concatenate layer outputs # Convert from list of lists of level outputs to list of lists # of outputs across levels. # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] outputs = list(zip(*layer_outputs)) outputs = [KL.Concatenate(axis=1, name=n)(list(o)) for o, n in zip(outputs, output_names)] rpn_class_logits, rpn_class, rpn_bbox = outputs # Generate proposals # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates and zero padded. proposal_count = self.cfg.ARCHI.POST_NMS_ROIS_TRAINING if self.mode == 'training' else self.cfg.ARCHI.POST_NMS_ROIS_INFERENCE rpn_rois = proposal.ProposalLayer( proposal_count=proposal_count, nms_threshold=self.cfg.ARCHI.RPN_NMS_THRESHOLD, name="ROI", config=self.cfg)([rpn_class, rpn_bbox, anchors]) if self.mode == 'training': # Class ID mask to mark class IDs supported by the dataset the image came from. active_class_ids = KL.Lambda(lambda x: meta.parse_image_meta_graph(x)["active_class_ids"])(input_image_meta) if not self.cfg.ARCHI.USE_RPN_ROIS: # Ignore predicted ROIs and use ROIs provided as an input. input_rois = KL.Input(shape=[self.cfg.ARCHI.POST_NMS_ROIS_TRAINING, 4], name='input_roi', dtype=np.int32) # Normalize coordinates target_rois = KL.Lambda(lambda x: gutils.norm_boxes_graph(x, K.shape(input_image)[1:3]))(input_rois) else: target_rois = rpn_rois # Generate detection targets # Subsamples proposals and generates target outputs for training # Note that proposal class IDs, gt_boxes, and gt_masks are zero # padded. Equally, returned rois and targets are zero padded. rois, target_class_ids, target_bbox = detection_target.DetectionTargetLayer(self.cfg, name='proposal_targets')([target_rois, input_gt_class_ids, gt_boxes]) # Network Heads # TODO: verify that this handles zero padded ROIs mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpnlib.fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta, self.cfg.ARCHI.POOL_SIZE, self.cfg.DATASET.NB_CLASSES, train_bn=self.cfg.ARCHI.TRAIN_BN, fc_layers_size=self.cfg.ARCHI.FPN_CLASSIF_FC_LAYERS_SIZE) # TODO: clean up (use tf.identify if necessary) output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois) # Losses rpn_class_loss = KL.Lambda(lambda x: l.rpn_class_loss_graph(*x), name="rpn_class_loss")( [input_rpn_match, rpn_class_logits]) rpn_bbox_loss = KL.Lambda(lambda x: l.rpn_bbox_loss_graph(self.cfg, *x), name="rpn_bbox_loss")( [input_rpn_bbox, input_rpn_match, rpn_bbox]) class_loss = KL.Lambda(lambda x: l.mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")( [target_class_ids, mrcnn_class_logits, active_class_ids]) bbox_loss = KL.Lambda(lambda x: l.mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")( [target_bbox, target_class_ids, mrcnn_bbox]) # Model inputs = [input_image, input_image_meta, input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes] if not self.cfg.ARCHI.USE_RPN_ROIS: inputs.append(input_rois) outputs = [rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits, mrcnn_class, mrcnn_bbox, rpn_rois, output_rois, rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss] model = KM.Model(inputs, outputs, name='mask_rcnn') else: # Network Heads # Proposal classifier and BBox regressor heads mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpnlib.fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta, self.cfg.ARCHI.POOL_SIZE, self.cfg.DATASET.NB_CLASSES, train_bn=self.cfg.ARCHI.TRAIN_BN, fc_layers_size=self.cfg.ARCHI.FPN_CLASSIF_FC_LAYERS_SIZE) # Detections # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in # normalized coordinates detections = detection.DetectionLayer(self.cfg, name="mrcnn_detection")([rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) model = KM.Model([input_image, input_image_meta, input_anchors], [detections, mrcnn_class, mrcnn_bbox, rpn_rois, rpn_class, rpn_bbox], name='mask_rcnn') # Add multi-GPU support. if self.cfg.GPU_COUNT > 1: from mrcnn.parallel_model import ParallelModel model = ParallelModel(model, self.cfg.GPU_COUNT) return model
# This is our input image input_img = keras.Input(shape=(32, 32, 1)) x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input_img) x = layers.MaxPooling2D((2, 2), padding='same')(x) x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x) x = layers.MaxPooling2D((2, 2), padding='same')(x) x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x) # "encoded" is the encoded representation of the input encoded = layers.MaxPooling2D((2, 2), padding='same')(x) # at this point the representation is (4, 4, 8) i.e. 128-dimensional x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(encoded) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) # "decoded" is the lossy reconstruction of the input decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x) # This model maps an input to its reconstruction autoencoder = keras.Model(input_img, decoded) print(autoencoder.summary()) autoencoder.compile(optimizer='rmsprop', loss='mean_squared_error')
def Unet(GAUSSIAN_NOISE=0.1, UPSAMPLE_MODE='SIMPLE', NET_SCALING=(1, 1), EDGE_CROP=16): def upsample_conv(filters, kernel_size, strides, padding): return layers.Conv2DTranspose(filters, kernel_size, strides=strides, padding=padding) def upsample_simple(filters, kernel_size, strides, padding): return layers.UpSampling2D(strides) if UPSAMPLE_MODE == 'DECONV': upsample = upsample_conv else: upsample = upsample_simple input_img = layers.Input((768, 768, 3), name='RGB_Input') pp_in_layer = input_img if NET_SCALING is not None: pp_in_layer = layers.AvgPool2D(NET_SCALING)(pp_in_layer) pp_in_layer = layers.GaussianNoise(GAUSSIAN_NOISE)(pp_in_layer) pp_in_layer = layers.BatchNormalization()(pp_in_layer) c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(pp_in_layer) c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c1) p1 = layers.MaxPooling2D((2, 2))(c1) c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(p1) c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c2) p2 = layers.MaxPooling2D((2, 2))(c2) c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p2) c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c3) p3 = layers.MaxPooling2D((2, 2))(c3) c4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p3) c4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c4) p4 = layers.MaxPooling2D(pool_size=(2, 2))(c4) c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p4) c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c5) u6 = upsample(128, (2, 2), strides=(2, 2), padding='same')(c5) u6 = layers.concatenate([u6, c4]) c6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u6) c6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c6) u7 = upsample(64, (2, 2), strides=(2, 2), padding='same')(c6) u7 = layers.concatenate([u7, c3]) c7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u7) c7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c7) u8 = upsample(32, (2, 2), strides=(2, 2), padding='same')(c7) u8 = layers.concatenate([u8, c2]) c8 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u8) c8 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c8) u9 = upsample(16, (2, 2), strides=(2, 2), padding='same')(c8) u9 = layers.concatenate([u9, c1], axis=3) c9 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(u9) c9 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c9) d = layers.Conv2D(1, (1, 1), activation='sigmoid')(c9) # d = layers.Cropping2D((EDGE_CROP, EDGE_CROP))(d) # d = layers.ZeroPadding2D((EDGE_CROP, EDGE_CROP))(d) if NET_SCALING is not None: d = layers.UpSampling2D(NET_SCALING)(d) seg_model = models.Model(inputs=[input_img], outputs=[d]) seg_model.summary() return seg_model
def create_model(self, ): concat_axis = 3 inputs = layers.Input(self.input_shape) pad = layers.ZeroPadding2D((7, 7))(inputs) conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same', name='conv1')(pad) conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv1) pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv2')(pool1) conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv2) pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='conv3')(pool2) conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv3) pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='conv4')(pool3) conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv4) pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4) conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5')(pool4) conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv5) up_conv5 = layers.UpSampling2D(size=(2, 2))(conv5) ch, cw = self.get_crop_shape(conv4, up_conv5) crop_conv4 = layers.Cropping2D(cropping=(ch, cw))(conv4) up6 = layers.concatenate([up_conv5, crop_conv4], axis=concat_axis) conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='conv6')(up6) conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv6) up_conv6 = layers.UpSampling2D(size=(2, 2))(conv6) ch, cw = self.get_crop_shape(conv3, up_conv6) crop_conv3 = layers.Cropping2D(cropping=(ch, cw))(conv3) up7 = layers.concatenate([up_conv6, crop_conv3], axis=concat_axis) conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='conv7')(up7) conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv7) up_conv7 = layers.UpSampling2D(size=(2, 2))(conv7) ch, cw = self.get_crop_shape(conv2, up_conv7) crop_conv2 = layers.Cropping2D(cropping=(ch, cw))(conv2) up8 = layers.concatenate([up_conv7, crop_conv2], axis=concat_axis) conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv8')(up8) conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv8) up_conv8 = layers.UpSampling2D(size=(2, 2))(conv8) ch, cw = self.get_crop_shape(conv1, up_conv8) crop_conv1 = layers.Cropping2D(cropping=(ch, cw))(conv1) up9 = layers.concatenate([up_conv8, crop_conv1], axis=concat_axis) conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same', name='conv9')(up9) conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv9) # ch, cw = self.get_crop_shape(inputs, conv9) # conv9 = layers.ZeroPadding2D(padding=((ch[0], ch[1]), (cw[0], cw[1])))(conv9) ch, cw = self.get_crop_shape(conv9, inputs) conv9 = layers.Cropping2D(cropping=(ch, cw))(conv9) conv10 = layers.Conv2D(1, kernel_size=(1, 1), name='conv10')(conv9) model = models.Model(inputs=inputs, outputs=conv10) return model
def define_VAE_architecture(latent_dim=10): ''' Defines a variational autoenconder with a convolutional neural network architecture for our 128 x 128 images. Returns the model. ''' ### Definition of NN architecture latent_dim = latent_dim # Encoder. encoder_input = layers.Input(shape=(128, 128, 1)) encoder_output = layers.Conv2D( 32, (3, 3), kernel_initializer=initializers.lecun_normal(seed=0), activation="relu", padding="same")(encoder_input) encoder_output = layers.MaxPooling2D((2, 2), padding="same")(encoder_output) encoder_output = layers.Conv2D( 32, (3, 3), kernel_initializer=initializers.lecun_normal(seed=0), activation="relu", padding="same")(encoder_output) encoder_output = layers.MaxPooling2D((2, 2), padding="same")(encoder_output) encoder_output = layers.Flatten()(encoder_output) encoder_output = layers.Dense( 1024, activation="relu", kernel_initializer=initializers.lecun_normal(seed=0))(encoder_output) encoder_output = layers.Dense( 64, activation="relu", kernel_initializer=initializers.lecun_normal(seed=0))(encoder_output) encoder = models.Model(encoder_input, encoder_output) # Decoder. decoder_input = layers.Input(shape=(latent_dim, )) decoder_output = layers.Dense( 64, activation="relu", kernel_initializer=initializers.lecun_normal(seed=0))(decoder_input) decoder_output = layers.Dense( 1024, activation="sigmoid", kernel_initializer=initializers.lecun_normal(seed=0))(decoder_output) decoder_output = layers.Dense( 32 * 32 * 32, activation="sigmoid", kernel_initializer=initializers.lecun_normal(seed=0))(decoder_output) decoder_output = layers.Reshape((32, 32, 32))(decoder_output) decoder_output = layers.Conv2D( 32, (3, 3), kernel_initializer=initializers.lecun_normal(seed=0), activation="relu", padding="same")(decoder_output) decoder_output = layers.UpSampling2D((2, 2))(decoder_output) decoder_output = layers.Conv2D( 32, (3, 3), kernel_initializer=initializers.lecun_normal(seed=0), activation="relu", padding="same")(decoder_output) decoder_output = layers.UpSampling2D((2, 2))(decoder_output) decoder_output = layers.Conv2D( 1, (3, 3), kernel_initializer=initializers.lecun_normal(seed=0), activation="sigmoid", padding="same")(decoder_output) decoder = models.Model(decoder_input, decoder_output) #autoencoder vae = ngdlmodels.VAE(encoder, decoder, latent_dim=latent_dim) return vae
def unet_model_double_dropout(height=0,width=0,channels=1,n_init=12,n_layers=2,drop=0): #-- define input inputs = kl.Input((height,width,channels)) c = {} p = {} count = 0 #-- define input p[0] = inputs n_filts = copy.copy(n_init) for i in range(1,n_layers+1): #-- convolution layer c[i] = kl.Conv2D(n_filts,3,activation='relu',padding='same')(p[i-1]) if drop != 0: c[i] = kl.Dropout(drop)(c[i]) c[i] = kl.Conv2D(n_filts,3,activation='relu',padding='same')(c[i]) #-- pool, 2x2 blockcs #-- don't do pooling for the last down layer #-- also don't double the filter numbers if i != n_layers: p[i] = kl.MaxPooling2D(pool_size=(2,2))(c[i]) n_filts *= 2 count += 1 #--------------------------------------------- #-- now go back up to reconsturct the image #--------------------------------------------- upsampled_c = {} up = {} print('Max Number of Convlution Filters: ',n_filts) while count>1: n_filts = int(n_filts/2) #-- concatenate the 1st convolution layer with an upsampled 2nd layer #-- where the missing elements in the 2nd layer are padded with 0 #-- concatenating along the color channels upsampled_c[i] = kl.UpSampling2D(size=(2,2))(c[i]) up[i] = kl.concatenate([upsampled_c[i],c[count-1]],axis=3) #-- now do a convlution with the merged upsampled layer i += 1 c[i] = kl.Conv2D(n_filts,3,activation='relu',padding='same')(up[i-1]) if drop != 0: c[i] = kl.Dropout(drop)(c[i]) c[i] = kl.Conv2D(n_filts,3,activation='relu',padding='same')(c[i]) #-- counter decreases as we go back up count -= 1 print('Number of Convlution Filters at the end of up segment: ',n_filts) #-- convlution across the last n_iniy filters into 3 channels i += 1 c[i] = kl.Conv2D(3,3,activation='relu',padding='same')(c[i-1]) #-- do one final sigmoid convolution into just 1 final channel (None,h,w,1) i += 1 c[i] = kl.Conv2D(1,1,activation='sigmoid')(c[i-1]) #-- reshape into a flattened output to match sample weights i += 1 c[i] = kl.Reshape((height*width,1,))(c[i-1]) print('output shape: ', c[i].shape) print('Total Number of layers: ',i) #-- make model model = km.Model(input=inputs,output=c[i]) #-- return model return model
def unet(): # Build U-Net model GAUSSIAN_NOISE = 0.1 UPSAMPLE_MODE = 'SIMPLE' # downsampling inside the network NET_SCALING = (1, 1) # downsampling in preprocessing def upsample_conv(filters, kernel_size, strides, padding): return layers.Conv2DTranspose(filters, kernel_size, strides=strides, padding=padding) def upsample_simple(filters, kernel_size, strides, padding): return layers.UpSampling2D(strides) if UPSAMPLE_MODE == 'DECONV': upsample = upsample_conv else: upsample = upsample_simple input_img = layers.Input([96, 96, 3], name='RGB_Input') pp_in_layer = input_img if NET_SCALING is not None: pp_in_layer = layers.AvgPool2D(NET_SCALING)(pp_in_layer) pp_in_layer = layers.GaussianNoise(GAUSSIAN_NOISE)(pp_in_layer) pp_in_layer = layers.BatchNormalization()(pp_in_layer) c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(pp_in_layer) #'conv1_1' p1 = layers.MaxPooling2D((2, 2))(c1) c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1) #'conv2_1' p2 = layers.MaxPooling2D((2, 2))(c2) c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2) #'conv3_1' c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3) #'conv3_2' p3 = layers.MaxPooling2D((2, 2))(c3) c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p3) #'conv4_1' c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c4) #'conv4_2' p4 = layers.MaxPooling2D(pool_size=(2, 2))(c4) c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p4) #'conv5_1' c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c5) #'conv5_2' p5 = layers.MaxPooling2D(pool_size=(2, 2))(c5) c_center = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p5) u6 = upsample(256, (2, 2), strides=(2, 2), padding='same')(c_center) u6 = layers.concatenate([u6, c5]) c6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(u6) u7 = upsample(256, (2, 2), strides=(2, 2), padding='same')(c6) u7 = layers.concatenate([u7, c4]) c7 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(u7) u8 = upsample(128, (2, 2), strides=(2, 2), padding='same')(c7) u8 = layers.concatenate([u8, c3]) c8 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u8) u9 = upsample(64, (2, 2), strides=(2, 2), padding='same')(c8) u9 = layers.concatenate([u9, c2], axis=3) c9 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u9) u10 = upsample(32, (2, 2), strides=(2, 2), padding='same')(c9) u10 = layers.concatenate([u10, c1], axis=3) #c10 = layers.Conv2D(1, (3, 3), activation='relu', padding='same') (u10) d = layers.Conv2D(1, (1, 1), activation='sigmoid')(u10) # d = layers.Cropping2D((EDGE_CROP, EDGE_CROP))(d) # d = layers.ZeroPadding2D((EDGE_CROP, EDGE_CROP))(d) if NET_SCALING is not None: d = layers.UpSampling2D(NET_SCALING)(d) seg_model = models.Model(inputs=[input_img], outputs=[d]) #seg_model.summary() return seg_model
def up_sampling(input_tensor, scale): # dims = K.int_shape(input_tensor) # net = tf.keras.layers.UpSampling2D # net = layers.Lambda(lambda x: bilinear_upsameple(tensor=x, size=(scale, scale)))(input_tensor) net = layers.UpSampling2D(size=(scale, scale), interpolation='bilinear')(input_tensor) #, interpolation='bilinear' return net
def create_model(): C2 = Input(shape=(152, 152, 128), name="input_C2") C3 = Input(shape=(76, 76, 256), name="input_C3") C4 = Input(shape=(38, 38, 512), name="input_C4") C5 = Input(shape=(19, 19, 1024), name="input_C5") roi_input = Input(shape=(my_num_rois, 4), name="input_rois") P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5) P4 = KL.Add(name="fpn_p4add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4) ]) P3 = KL.Add(name="fpn_p3add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3) ]) P2 = KL.Add(name="fpn_p2add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2) ]) # Attach 3x3 conv to all P layers to get the final feature maps. P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2) P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3) P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4) P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5) feature_maps = [P2, P3, P4, P5] roi_pool_layer = PyramidROIAlign([my_msk_inp, my_msk_inp], np.array([608, 608, 3]), name="roi_align_mask")([roi_input] + feature_maps) #roi_pool_layer = RoiPoolingConv(my_msk_inp, my_num_rois)([img_input, roi_input]) #x = TimeDistributed(Conv2D(2048, (3, 3), activation='relu', padding='same'))(roi_pool_layer) #x = KL.TimeDistributed(KL.Conv2D(2048, (3, 3), padding="same"))(roi_pool_layer) #x = KL.TimeDistributed(BatchNorm(axis=3))(x) #x = KL.Activation('relu')(x) #x = TimeDistributed(KL.Dropout(0.5))(x) #x = TimeDistributed(Conv2DTranspose(256, (2, 2), activation='relu', strides=2))(x) #x = TimeDistributed(KL.Dropout(0.5))(x) #x = TimeDistributed(Conv2D(1, (1, 1), activation='sigmoid', strides=1))(x) #return Model(inputs=[img_input, roi_input], outputs=x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv1")(roi_pool_layer) x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_mask_bn1')(x) x = KL.Activation('relu')(x) #x = KL.Dropout(0.5)(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv2")(x) x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_mask_bn2')(x) x = KL.Activation('relu')(x) #x = KL.Dropout(0.5)(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv3")(x) x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_mask_bn3')(x) x = KL.Activation('relu')(x) #x = KL.Dropout(0.5)(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv4")(x) x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_mask_bn4')(x) x = KL.Activation('relu')(x) #x = KL.Dropout(0.5)(x) x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), name="mrcnn_mask_deconv")(x) #x = KL.Dropout(0.5)(x) x = KL.TimeDistributed(KL.Conv2D(1, (1, 1), strides=1, activation="sigmoid"), name="mask_output")(x) return Model(inputs=[C2, C3, C4, C5, roi_input], outputs=x)
def build(self, config): """Build Mask R-CNN architecture. input_shape: The shape of the input image. mode: Either "training" or "inference". The inputs and outputs of the model differ accordingly. """ # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception( "Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Inputs input_image = KL.Input(shape=config.IMAGE_SHAPE, name="input_image") test_img = np.zeros(config.IMAGE_SHAPE) _, image_metas, _ = self.mold_inputs([test_img]) #input_image_meta = KL.Input(tensor=K.constant(image_metas, name="input_image_meta")) input_image_meta = KL.Lambda(lambda x: K.constant( image_metas, name="input_image_meta"))(input_image) #input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE], # name="input_image_meta") # Anchors anchors = self.get_anchors(self.config.IMAGE_SHAPE) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE, ) + anchors.shape) # Anchors in normalized coordinates #input_anchors = KL.Input(tensor=K.constant(anchors, name="input_anchors")) input_anchors = KL.Lambda( lambda x: K.constant(anchors, name="input_anchors"))(input_image) # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. if callable(config.BACKBONE): _, C2, C3, C4, C5 = config.BACKBONE(input_image, stage5=True, train_bn=config.TRAIN_BN) else: _, C2, C3, C4, C5 = resnet_graph(input_image, config.BACKBONE, stage5=True, train_bn=config.TRAIN_BN) # Top-down Layers # TODO: add assert to verify feature map sizes match what's in config P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5) P4 = KL.Add(name="fpn_p4add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4) ]) P3 = KL.Add(name="fpn_p3add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3) ]) P2 = KL.Add(name="fpn_p2add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2) ]) # Attach 3x3 conv to all P layers to get the final feature maps. P2 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2) P3 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3) P4 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4) P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5) # P6 is used for the 5th anchor scale in RPN. Generated by # subsampling from P5 with stride of 2. P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) # Note that P6 is used in RPN, but not in the classifier heads. rpn_feature_maps = [P2, P3, P4, P5, P6] mrcnn_feature_maps = [P2, P3, P4, P5] # Anchors anchors = input_anchors # RPN Model rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE, len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE) # Loop through pyramid layers layer_outputs = [] # list of lists for p in rpn_feature_maps: layer_outputs.append(rpn([p])) # Concatenate layer outputs # Convert from list of lists of level outputs to list of lists # of outputs across levels. # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] outputs = list(zip(*layer_outputs)) outputs = [ KL.Concatenate(axis=1, name=n)(list(o)) for o, n in zip(outputs, output_names) ] rpn_class_logits, rpn_class, rpn_bbox = outputs # Generate proposals # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates # and zero padded. proposal_count = config.POST_NMS_ROIS_INFERENCE rpn_rois = ProposalLayer(proposal_count=proposal_count, nms_threshold=config.RPN_NMS_THRESHOLD, name="ROI", config=config)([rpn_class, rpn_bbox, anchors]) # Network Heads # Proposal classifier and BBox regressor heads mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta, config.POOL_SIZE, config.NUM_CLASSES, train_bn=config.TRAIN_BN, fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE) # Detections # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in # normalized coordinates detections = DetectionLayer(config, name="mrcnn_detection")( [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) # Create masks for detections detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections) mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps, input_image_meta, config.MASK_POOL_SIZE, config.NUM_CLASSES, train_bn=config.TRAIN_BN) model = KM.Model(input_image, [ detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, rpn_class, rpn_bbox ], name='mask_rcnn') # Add multi-GPU support. #if config.GPU_COUNT > 1: # from mrcnn.parallel_model import ParallelModel # model = ParallelModel(model, config.GPU_COUNT) return model
def build(self, mode, config): """Build Mask R-CNN architecture. input_shape: The shape of the input image. mode: Either "training" or "inference". The inputs and outputs of the model differ accordingly. """ assert mode in ['training', 'inference'] # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] print("HEIGHT AND WIDTH BELOW") print(h) print(w) if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception( "Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Inputs input_image = KL.Input(shape=config.IMAGE_SHAPE.tolist(), name="input_image") input_image_meta = KL.Input(shape=[None], name="input_image_meta") # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. _, C2, C3, C4, C5 = resnet_graph(input_image, "resnet101", stage5=True) # Top-down Layers P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5) P4 = KL.Add(name="fpn_p4add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4) ]) P3 = KL.Add(name="fpn_p3add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3) ]) P2 = KL.Add(name="fpn_p2add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2) ]) # Attach 3x3 conv to all P layers to get the final feature maps. P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2) P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3) P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4) P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5) # P6 is used for the 5th anchor scale in RPN. Generated by # subsampling from P5 with stride of 2. P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) # Note that P6 is used in RPN, but not in the classifier heads. rpn_feature_maps = [P2, P3, P4, P5, P6] mrcnn_feature_maps = [P2, P3, P4, P5] # Generate Anchors self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) # RPN Model rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE, len(config.RPN_ANCHOR_RATIOS), 256) # Loop through pyramid layers layer_outputs = [] # list of lists for p in rpn_feature_maps: layer_outputs.append(rpn([p])) # Concatenate layer outputs # Convert from list of lists of level outputs to list of lists # of outputs across levels. # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] outputs = list(zip(*layer_outputs)) outputs = [ KL.Concatenate(axis=1, name=n)(list(o)) for o, n in zip(outputs, output_names) ] rpn_class_logits, rpn_class, rpn_bbox = outputs # Generate proposals # Proposals are [N, (y1, x1, y2, x2)] in normalized coordinates. # proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training" \ # else config.POST_NMS_ROIS_INFERENCE proposal_count = config.POST_NMS_ROIS_INFERENCE rpn_rois = ProposalLayer(proposal_count=proposal_count, nms_threshold=0.7, name="ROI", anchors=self.anchors, config=config)([rpn_class, rpn_bbox]) # Network Heads # Proposal classifier and BBox regressor heads mrcnn_class_logits, mrcnn_class, mrcnn_bbox = \ fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, config.IMAGE_SHAPE, config.POOL_SIZE, config.NUM_CLASSES) # Detections # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates detections = DetectionLayer(config, name="mrcnn_detection")( [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) # Convert boxes to normalized coordinates h, w = config.IMAGE_SHAPE[:2] detection_boxes = KL.Lambda( lambda x: x[..., :4] / np.array([h, w, h, w]))(detections) # Create masks for detections mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps, config.IMAGE_SHAPE, config.MASK_POOL_SIZE, config.NUM_CLASSES) model = KM.Model([input_image, input_image_meta], [ detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, rpn_class, rpn_bbox ], name='mask_rcnn') # Add multi-GPU support. if config.GPU_COUNT > 1: from parallel_model import ParallelModel model = ParallelModel(model, config.GPU_COUNT) return model
def get_model(self, train_only_top=False): input_img = layers.Input(INPUT_SHAPE, name='RGB_Input') pp_in_layer = input_img if NET_SCALING is not None: pp_in_layer = layers.AvgPool2D(NET_SCALING)(pp_in_layer) pp_in_layer = layers.GaussianNoise(GAUSSIAN_NOISE)(pp_in_layer) pp_in_layer = layers.BatchNormalization()(pp_in_layer) c1 = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(pp_in_layer) c1 = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(c1) p1 = layers.MaxPooling2D((2, 2))(c1) c2 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(p1) c2 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c2) p2 = layers.MaxPooling2D((2, 2))(c2) c3 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(p2) c3 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c3) p3 = layers.MaxPooling2D((2, 2))(c3) c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p3) c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c4) p4 = layers.MaxPooling2D(pool_size=(2, 2))(c4) c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p4) c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c5) u6 = self.upsample(filters=64, kernel_size=(2, 2), strides=(2, 2), padding='same')(c5) u6 = layers.concatenate([u6, c4]) c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u6) c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c6) u7 = self.upsample(filters=32, kernel_size=(2, 2), strides=(2, 2), padding='same')(c6) u7 = layers.concatenate([u7, c3]) c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u7) c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c7) u8 = self.upsample(filters=16, kernel_size=(2, 2), strides=(2, 2), padding='same')(c7) u8 = layers.concatenate([u8, c2]) c8 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(u8) c8 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c8) u9 = self.upsample(filters=8, kernel_size=(2, 2), strides=(2, 2), padding='same')(c8) u9 = layers.concatenate([u9, c1], axis=3) c9 = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(u9) c9 = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(c9) d = layers.Conv2D(1, (1, 1), activation='sigmoid')(c9) # d = layers.Cropping2D((EDGE_CROP, EDGE_CROP))(d) # d = layers.ZeroPadding2D((EDGE_CROP, EDGE_CROP))(d) if NET_SCALING is not None: d = layers.UpSampling2D(NET_SCALING)(d) return models.Model(inputs=[input_img], outputs=[d])
def __init__(self, input_shape): if UPSAMPLE_MODE == 'DECONV': self.upsample = upsample_conv else: self.upsample = upsample_simple self.input_img = layers.Input(input_shape, name='RGB_Input') self.pp_in_layer = self.input_img if NET_SCALING is not None: self.pp_in_layer = layers.AvgPool2D(NET_SCALING)(self.pp_in_layer) self.pp_in_layer = layers.GaussianNoise(GAUSSIAN_NOISE)( self.pp_in_layer) self.pp_in_layer = layers.BatchNormalization()(self.pp_in_layer) self.c1 = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(self.pp_in_layer) self.c1 = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(self.c1) self.p1 = layers.MaxPooling2D((2, 2))(self.c1) self.c2 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(self.p1) self.c2 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(self.c2) self.p2 = layers.MaxPooling2D((2, 2))(self.c2) self.c3 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(self.p2) self.c3 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(self.c3) self.p3 = layers.MaxPooling2D((2, 2))(self.c3) self.c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(self.p3) self.c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(self.c4) self.p4 = layers.MaxPooling2D(pool_size=(2, 2))(self.c4) self.c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(self.p4) self.c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(self.c5) self.u6 = self.upsample(64, (2, 2), strides=(2, 2), padding='same')(self.c5) self.u6 = layers.concatenate([self.u6, self.c4]) self.c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(self.u6) self.c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(self.c6) self.u7 = self.upsample(32, (2, 2), strides=(2, 2), padding='same')(self.c6) self.u7 = layers.concatenate([self.u7, self.c3]) self.c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(self.u7) self.c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(self.c7) self.u8 = self.upsample(16, (2, 2), strides=(2, 2), padding='same')(self.c7) self.u8 = layers.concatenate([self.u8, self.c2]) self.c8 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(self.u8) self.c8 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(self.c8) self.u9 = self.upsample(8, (2, 2), strides=(2, 2), padding='same')(self.c8) self.u9 = layers.concatenate([self.u9, self.c1], axis=3) self.c9 = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(self.u9) self.c9 = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(self.c9) self.d = layers.Conv2D(1, (1, 1), activation='sigmoid')(self.c9) self.d = layers.Cropping2D((EDGE_CROP, EDGE_CROP))(self.d) self.d = layers.ZeroPadding2D((EDGE_CROP, EDGE_CROP))(self.d) if NET_SCALING is not None: self.d = layers.UpSampling2D(NET_SCALING)(self.d) self.seg_model = models.Model(inputs=[self.input_img], outputs=[self.d]) self.seg_model.compile( optimizer=Adam(1e-4, decay=1e-6), loss=dice_p_bce, metrics=[dice_coef, 'binary_accuracy', true_positive_rate]) print(self.seg_model.summary()) self.weight_path = "{}_weights.best.hdf5".format('seg_model') self.checkpoint = ModelCheckpoint(self.weight_path, monitor='val_dice_coef', verbose=1, save_best_only=True, mode='max', save_weights_only=True) self.reduceLROnPlat = ReduceLROnPlateau(monitor='val_dice_coef', factor=0.5, patience=3, verbose=1, mode='max', epsilon=0.0001, cooldown=2, min_lr=1e-6) self.early = EarlyStopping(monitor="val_dice_coef", mode="max", patience=20)
def nn_model_atrous_double_dropout(height=0, width=0, channels=1, n_filts=32, drop=0): #-- inner function for convolutional units def conv_unit(x, nn): #-- convolution layer c = kl.Conv2D(nn, 3, activation='elu', padding='same')(x) if drop != 0: c = kl.Dropout(drop)(c) c = kl.Conv2D(nn, 3, activation='elu', padding='same')(c) return (c) #-- define input inputs = kl.Input((height, width, channels)) #-- call convolutional block c1 = conv_unit(inputs, n_filts) #(h,w) #-- 2x2 pooling p1 = kl.MaxPooling2D(pool_size=(2, 2))(c1) #(h/2,w/2) #-- second convolutional block c2 = conv_unit(p1, n_filts * 2) #(h/2,w/2) #-- 2x2 pooling p2 = kl.MaxPooling2D(pool_size=(2, 2))(c2) #(h/4,w/4) #-- third convolutional block c3 = conv_unit(p2, n_filts * 2) #(h/4,w/4) #-- 2x2 pooling p3 = kl.MaxPooling2D(pool_size=(2, 2))(c3) #(h/8,w/8) #-- fourth convolutional block c4 = conv_unit(p3, n_filts * 4) #(h/8,w/8) #-- now perform parallel atrous convolutions a = {} for i in [1, 2, 3, 4, 5]: a[i] = kl.SeparableConv2D(n_filts*4,3,activation='elu',\ dilation_rate=(i,i), depth_multiplier=1,padding='same')(c4) #-- concatanate dilated convs c5 = kl.Concatenate(axis=3)([a[i] for i in a.keys()]) #(h/8,w/8) #-- upsample (h/4,w/4) c6 = kl.UpSampling2D(size=(2, 2))(c5) c7 = kl.Concatenate(axis=3)([c6, c3]) #-- convolutional block c8 = conv_unit(c7, n_filts * 2) #-- upsample (h/2,w/2) c9 = kl.UpSampling2D(size=(2, 2))(c8) c10 = kl.Concatenate(axis=3)([c9, c2]) #-- convolutional block c11 = conv_unit(c10, n_filts) #-- upsample (h,w) c12 = kl.UpSampling2D(size=(2, 2))(c11) c13 = kl.Concatenate(axis=3)([c12, c1]) #-- convolutional block c14 = conv_unit(c13, n_filts) #-- convlution across the last 'n_filts' filters into 3 channels c15 = kl.Conv2D(3, 3, activation='elu', padding='same')(c14) #-- do one final sigmoid convolution into just 1 final channel (None,h,w,1) c16 = kl.Conv2D(1, 1, activation='sigmoid')(c15) #-- reshape into a flattened output to match sample weights c17 = kl.Reshape(( height * width, 1, ))(c16) #-- make model model = km.Model(inputs=inputs, outputs=c17) #-- return model return model
Conv5 = layers.Conv2D(128, kernel3, activation=activation, padding=pad) (pooling4) Conv5 = layers.Conv2D(128, kernel3, activation=activation, padding=pad) (Conv5) upsample1 = upsample(64, kernel2, strides=kernel2, padding=pad) (Conv5) upsample1 = layers.concatenate([upsample1, Conv4]) Conv6 = layers.Conv2D(64, kernel3, activation=activation, padding=pad) (upsample1) Conv6 = layers.Conv2D(64, kernel3, activation=activation, padding=pad) (Conv6) upsample2 = upsample(32, kernel2, strides=kernel2, padding=pad) (Conv6) upsample2 = layers.concatenate([upsample2, Conv3]) Conv7 = layers.Conv2D(32, kernel3, activation=activation, padding=pad) (upsample2) Conv7 = layers.Conv2D(32, kernel3, activation=activation, padding=pad) (Conv7) upsample3 = upsample(16, kernel2, strides=kernel2, padding=pad) (Conv7) upsample3 = layers.concatenate([upsample3, Conv2]) Conv8 = layers.Conv2D(16, kernel3, activation=activation, padding=pad) (upsample3) Conv8 = layers.Conv2D(16, kernel3, activation=activation, padding=pad) (Conv8) upsample4 = upsample(8, kernel2, strides=kernel2, padding=pad) (Conv8) upsample4 = layers.concatenate([upsample4, Conv1], axis=3) Conv9 = layers.Conv2D(8, kernel3, activation=activation, padding=pad) (upsample4) Conv9 = layers.Conv2D(8,kernel3, activation=activation, padding=pad) (Conv9) output = layers.Conv2D(1, (1, 1), activation='sigmoid') (Conv9) output = layers.UpSampling2D(NET_SCALING)(output) segmentation_model = models.Model(inputs=[input], outputs=[output]) segmentation_model.summary()
def build_u_net_model(input_shape, upsample_mode="DECONV", gaussian_noise=0.1, padding="same", net_scaling=None, img_scaling=IMG_SCALING, *args, **kargs): upsample = UPSAMPLE_DICT.get(upsample_mode, _upsample_simple) input_img = layers.Input(input_shape, name='RGB_Input') pp_in_layer = input_img # TODO: Add dropout for regularization? # Some preprocessing # TODO: Add explanation of the different stes. if net_scaling is not None: pp_in_layer = layers.AvgPool2D(net_scaling)(pp_in_layer) pp_in_layer = layers.GaussianNoise(gaussian_noise)(pp_in_layer) pp_in_layer = layers.BatchNormalization()(pp_in_layer) c1 = layers.Conv2D(8, (3, 3), activation='relu', padding=padding)(pp_in_layer) c1 = layers.Conv2D(8, (3, 3), activation='relu', padding=padding)(c1) p1 = layers.MaxPooling2D((2, 2))(c1) c2 = layers.Conv2D(16, (3, 3), activation='relu', padding=padding)(p1) c2 = layers.Conv2D(16, (3, 3), activation='relu', padding=padding)(c2) p2 = layers.MaxPooling2D((2, 2))(c2) c3 = layers.Conv2D(32, (3, 3), activation='relu', padding=padding)(p2) c3 = layers.Conv2D(32, (3, 3), activation='relu', padding=padding)(c3) p3 = layers.MaxPooling2D((2, 2))(c3) c4 = layers.Conv2D(64, (3, 3), activation='relu', padding=padding)(p3) c4 = layers.Conv2D(64, (3, 3), activation='relu', padding=padding)(c4) p4 = layers.MaxPooling2D(pool_size=(2, 2))(c4) c5 = layers.Conv2D(128, (3, 3), activation='relu', padding=padding)(p4) c5 = layers.Conv2D(128, (3, 3), activation='relu', padding=padding)(c5) u6 = upsample(64, (2, 2), strides=(2, 2), padding=padding)(c5) u6 = layers.concatenate([u6, c4]) c6 = layers.Conv2D(64, (3, 3), activation='relu', padding=padding)(u6) c6 = layers.Conv2D(64, (3, 3), activation='relu', padding=padding)(c6) u7 = upsample(32, (2, 2), strides=(2, 2), padding=padding)(c6) u7 = layers.concatenate([u7, c3]) c7 = layers.Conv2D(32, (3, 3), activation='relu', padding=padding)(u7) c7 = layers.Conv2D(32, (3, 3), activation='relu', padding=padding)(c7) u8 = upsample(16, (2, 2), strides=(2, 2), padding=padding)(c7) u8 = layers.concatenate([u8, c2]) c8 = layers.Conv2D(16, (3, 3), activation='relu', padding=padding)(u8) c8 = layers.Conv2D(16, (3, 3), activation='relu', padding=padding)(c8) u9 = upsample(8, (2, 2), strides=(2, 2), padding=padding)(c8) u9 = layers.concatenate([u9, c1], axis=3) c9 = layers.Conv2D(8, (3, 3), activation='relu', padding=padding)(u9) c9 = layers.Conv2D(8, (3, 3), activation='relu', padding=padding)(c9) d = layers.Conv2D(1, (1, 1), activation='sigmoid')(c9) if net_scaling is not None: d = layers.UpSampling2D(net_scaling)(d) seg_model = models.Model(inputs=[input_img], outputs=[d]) if img_scaling is not None: fullres_model = models.Sequential() fullres_model.add( layers.AvgPool2D(img_scaling, input_shape=(None, None, 3))) fullres_model.add(seg_model) fullres_model.add(layers.UpSampling2D(img_scaling)) else: fullres_model = seg_model return fullres_model
def build(self): # image shape h, w, c = self.image_shape[:] print("image_shape: {}".format(self.image_shape)) if h / 2 ** 6 != int(h / 2 ** 6) or w / 2 ** 6 != int(w / 2 ** 6): raise Exception("Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Inputs input_image = kl.Input(shape=[None, None, c], name="input_image") input_image_meta = kl.Input(shape=[cfg.COMMON.IMAGE_META_SIZE], name="input_image_meta") # 训练 if self.train_flag: # RPN GT input_rpn_match = kl.Input(shape=[None, 1], name="input_rpn_match", dtype=tf.int32) input_rpn_bbox = kl.Input(shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) # Detection GT (class IDs, bounding boxes, and masks) # 1. GT Class IDs (zero padded) input_gt_class_ids = kl.Input(shape=[None], name="input_gt_class_ids", dtype=tf.int32) # 2. GT Boxes in pixels (zero padded) # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates input_gt_boxes = kl.Input(shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) # Normalize coordinates gt_boxes = kl.Lambda(lambda x: self.bbox_util.norm_boxes_graph(x, k.shape(input_image)[1:3]))( input_gt_boxes) # 3. GT Masks (zero padded) # [batch, height, width, MAX_GT_INSTANCES] if cfg.TRAIN.USE_MINI_MASK: min_h, min_w = cfg.TRAIN.MINI_MASK_SHAPE[:] input_gt_masks = kl.Input(shape=[min_h, min_w, None], name="input_gt_masks", dtype=bool) else: input_gt_masks = kl.Input(shape=[h, w, None], name="input_gt_masks", dtype=bool) pass # anchor anchors = self.anchor_utils.get_anchors(self.image_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.batch_size,) + anchors.shape) # A hack to get around Keras's bad support for constants anchors = kl.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image) anchors = kl.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image) pass else: # Anchors in normalized coordinates anchors = kl.Input(shape=[None, 4], name="input_anchors") # 上面训练用到的参数,测试不需要,但是在 if else 里面定义一下,免得 undefined input_rpn_match = None input_rpn_bbox = None input_gt_class_ids = None gt_boxes = None input_gt_boxes = None input_gt_masks = None pass # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. _, c2, c3, c4, c5 = backbone.resnet_graph(input_image, self.backbone, stage5=True) # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config p5 = kl.Conv2D(self.top_down_pyramid_size, (1, 1), name='fpn_c5p5')(c5) p4 = kl.Add(name="fpn_p4add")([kl.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(p5), kl.Conv2D(self.top_down_pyramid_size, (1, 1), name='fpn_c4p4')(c4)]) p3 = kl.Add(name="fpn_p3add")([kl.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(p4), kl.Conv2D(self.top_down_pyramid_size, (1, 1), name='fpn_c3p3')(c3)]) p2 = kl.Add(name="fpn_p2add")([kl.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(p3), kl.Conv2D(self.top_down_pyramid_size, (1, 1), name='fpn_c2p2')(c2)]) # Attach 3x3 conv to all P layers to get the final feature maps. p2 = kl.Conv2D(self.top_down_pyramid_size, (3, 3), padding="SAME", name="fpn_p2")(p2) p3 = kl.Conv2D(self.top_down_pyramid_size, (3, 3), padding="SAME", name="fpn_p3")(p3) p4 = kl.Conv2D(self.top_down_pyramid_size, (3, 3), padding="SAME", name="fpn_p4")(p4) p5 = kl.Conv2D(self.top_down_pyramid_size, (3, 3), padding="SAME", name="fpn_p5")(p5) # P6 is used for the 5th anchor scale in RPN. Generated by # subsampling from P5 with stride of 2. p6 = kl.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(p5) # Note that P6 is used in RPN, but not in the classifier heads. rpn_feature_maps = [p2, p3, p4, p5, p6] mrcnn_feature_maps = [p2, p3, p4, p5] # RPN Model rpn = common.build_rpn_model(self.rpn_anchor_stride, len(self.rpn_anchor_ratios), self.top_down_pyramid_size) # Loop through pyramid layers layer_outputs = [] # list of lists for p in rpn_feature_maps: layer_outputs.append(rpn([p])) pass # Concatenate layer outputs # Convert from list of lists of level outputs to list of lists # of outputs across levels. # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] outputs = list(zip(*layer_outputs)) outputs = [kl.Concatenate(axis=1, name=n)(list(o)) for o, n in zip(outputs, output_names)] rpn_class_logits, rpn_class, rpn_bbox = outputs # Generate proposals # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates # and zero padded. proposal_count = cfg.TRAIN.POST_NMS_ROIS if self.train_flag else cfg.TEST.POST_NMS_ROIS rpn_rois = common.ProposalLayer(proposal_count=proposal_count, nms_threshold=self.rpn_nms_threshold, batch_size=self.batch_size, name="ROI")([rpn_class, rpn_bbox, anchors]) fc_layer_size = cfg.COMMON.FPN_CLASS_FC_LAYERS_SIZE pool_size = cfg.COMMON.POOL_SIZE mask_pool_size = cfg.COMMON.MASK_POOL_SIZE train_or_freeze = cfg.COMMON.TRAIN_FLAG if self.train_flag: # Class ID mask to mark class IDs supported by the dataset the image # came from. active_class_ids = kl.Lambda(lambda x: self.image_utils.parse_image_meta_graph(x)["active_class_ids"])( input_image_meta) if not cfg.TRAIN.USE_RPN_ROIS: # Ignore predicted ROIs and use ROIs provided as an input. input_rois = kl.Input(shape=[proposal_count, 4], name="input_roi", dtype=np.int32) # Normalize coordinates target_rois = kl.Lambda(lambda x: self.bbox_util.norm_boxes_graph(x, k.shape(input_image)[1:3]))( input_rois) else: target_rois = rpn_rois input_rois = None # Generate detection targets # Subsamples proposals and generates target outputs for training # Note that proposal class IDs, gt_boxes, and gt_masks are zero # padded. Equally, returned rois and targets are zero padded. rois, target_class_ids, target_bbox, target_mask = \ common.DetectionTargetLayer(self.batch_size, name="proposal_targets")([ target_rois, input_gt_class_ids, gt_boxes, input_gt_masks]) # Network Heads # TODO: verify that this handles zero padded ROIs mrcnn_class_logits, mrcnn_class, mrcnn_bbox = common.fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta, pool_size, self.class_num, train_flag=train_or_freeze, fc_layers_size=fc_layer_size) mrcnn_mask = common.build_fpn_mask_graph(rois, mrcnn_feature_maps, input_image_meta, mask_pool_size, self.class_num, train_flag=train_or_freeze) # TODO: clean up (use tf.identify if necessary) output_rois = kl.Lambda(lambda x: x * 1, name="output_rois")(rois) #TODO loss 定义 # Losses rpn_class_loss = kl.Lambda(lambda x: common.rpn_class_loss_graph(*x), name="rpn_class_loss")( [input_rpn_match, rpn_class_logits]) rpn_bbox_loss = kl.Lambda(lambda x: common.rpn_bbox_loss_graph(self.batch_size, *x), name="rpn_bbox_loss")( [input_rpn_bbox, input_rpn_match, rpn_bbox]) class_loss = kl.Lambda(lambda x: common.mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")( [target_class_ids, mrcnn_class_logits, active_class_ids]) bbox_loss = kl.Lambda(lambda x: common.mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")( [target_bbox, target_class_ids, mrcnn_bbox]) mask_loss = kl.Lambda(lambda x: common.mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")( [target_mask, target_class_ids, mrcnn_mask]) # Model inputs = [input_image, input_image_meta, input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks] if not cfg.TRAIN.USE_RPN_ROIS: inputs.append(input_rois) outputs = [rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois, rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss] model = km.Model(inputs, outputs, name='mask_rcnn') pass else: # Network Heads # Proposal classifier and BBox regressor heads mrcnn_class_logits, mrcnn_class, mrcnn_bbox = common.fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta, pool_size, self.class_num, train_flag=train_or_freeze, fc_layers_size=fc_layer_size) # Detections # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in # normalized coordinates detections = common.DetectionLayer(self.batch_size, name="mrcnn_detection")([rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) # Create masks for detections detection_boxes = kl.Lambda(lambda x: x[..., :4])(detections) mrcnn_mask = common.build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps, input_image_meta, mask_pool_size, self.class_num, train_flag=train_or_freeze) model = km.Model([input_image, input_image_meta, anchors], [detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, rpn_class, rpn_bbox], name='mask_rcnn') pass # Add multi-GPU support. 多 GPU 操作 gpu_count = cfg.COMMON.GPU_COUNT if gpu_count > 1: from m_rcnn.parallel_model import ParallelModel model = ParallelModel(model, gpu_count) return model pass
u8 = upsample(16, (2, 2), strides=(2, 2), padding='same') (c7) u8 = layers.concatenate([u8, c2]) c8 = layers.Conv2D(16, (3, 3), activation='relu', padding='same') (u8) c8 = layers.Conv2D(16, (3, 3), activation='relu', padding='same') (c8) u9 = upsample(8, (2, 2), strides=(2, 2), padding='same') (c8) u9 = layers.concatenate([u9, c1], axis=3) c9 = layers.Conv2D(8, (3, 3), activation='relu', padding='same') (u9) c9 = layers.Conv2D(8, (3, 3), activation='relu', padding='same') (c9) d = layers.Conv2D(1, (1, 1), activation='sigmoid') (c9) d = layers.Cropping2D((EDGE_CROP, EDGE_CROP))(d) d = layers.ZeroPadding2D((EDGE_CROP, EDGE_CROP))(d) if NET_SCALING is not None: d = layers.UpSampling2D(NET_SCALING)(d) seg_model = models.Model(inputs=[input_img], outputs=[d]) seg_model.summary() # In[ ]: import keras.backend as K from keras.optimizers import Adam from keras.losses import binary_crossentropy def dice_coef(y_true, y_pred, smooth=1): intersection = K.sum(y_true * y_pred, axis=[1,2,3]) union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3]) return K.mean( (2. * intersection + smooth) / (union + smooth), axis=0) def dice_p_bce(in_gt, in_pred):
model.add(layers.Conv2D(32, (3, 3), padding="SAME")) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu')) model.add(layers.MaxPool2D((2, 2))) model.add(layers.Flatten()) model.add(layers.Dense(128, activation='relu')) #bottleneck model.add(layers.Dense(latent_dim, activation='relu')) #decoder model model.add(layers.Dense(32 * 7 * 7, activation='relu')) model.add(layers.Reshape((7, 7, 32))) model.add(layers.UpSampling2D()) model.add( layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')) model.add(layers.UpSampling2D()) model.add( layers.Conv2D(16, kernel_size=(3, 3), activation='relu', padding='same')) model.add( layers.Conv2D(1, kernel_size=(3, 3), activation='sigmoid', padding='same')) model.compile(optimizer=optimizers.Adam(lr=0.01), loss='binary_crossentropy', metrics=['acc']) summary = model.fit(X_tr, X_tr, validation_data=(X_valid, X_valid), batch_size=128,
def build_model_resUnet(self): def Tanimoto_loss(label,pred): square=tf.square(pred) sum_square=tf.reduce_sum(square,axis=-1) product=tf.multiply(pred,label) sum_product=tf.reduce_sum(product,axis=-1) denomintor=tf.subtract(tf.add(sum_square,1),sum_product) loss=tf.divide(sum_product,denomintor) loss=tf.reduce_mean(loss) return 1.0-loss def Tanimoto_dual_loss(label,pred): loss1=Tanimoto_loss(pred,label) pred=tf.subtract(1.0,pred) label=tf.subtract(1.0,label) loss2=Tanimoto_loss(label,pred) loss=(loss1+loss2)/2 def ResBlock(input,filter,kernel_size,dilation_rates,stride): def branch(dilation_rate): x=KL.BatchNormalization()(input) x=KL.Activation('relu')(x) x=KL.Conv2D(filter,kernel_size,strides=stride,dilation_rate=dilation_rate,padding='same')(x) x=KL.BatchNormalization()(x) x=KL.Activation('relu')(x) x=KL.Conv2D(filter,kernel_size,strides=stride,dilation_rate=dilation_rate,padding='same')(x) return x out=[] for d in dilation_rates: out.append(branch(d)) if len(dilation_rates)>1: out=KL.Add()(out) else: out=out[0] return out def PSPPooling(input,filter): x1=KL.MaxPooling2D(pool_size=(2,2))(input) x2=KL.MaxPooling2D(pool_size=(4,4))(input) x3=KL.MaxPooling2D(pool_size=(8,8))(input) x4=KL.MaxPooling2D(pool_size=(16,16))(input) x1=KL.Conv2D(int(filter/4),(1,1))(x1) x2=KL.Conv2D(int(filter/4),(1,1))(x2) x3=KL.Conv2D(int(filter/4),(1,1))(x3) x4=KL.Conv2D(int(filter/4),(1,1))(x4) x1=KL.UpSampling2D(size=(2,2))(x1) x2=KL.UpSampling2D(size=(4,4))(x2) x3=KL.UpSampling2D(size=(8,8))(x3) x4=KL.UpSampling2D(size=(16,16))(x4) x=KL.Concatenate()([x1,x2,x3,x4,input]) x=KL.Conv2D(filter,(1,1))(x) return x def combine(input1,input2,filter): x=KL.Activation('relu')(input1) x=KL.Concatenate()([x,input2]) x=KL.Conv2D(filter,(1,1))(x) return x inputs=KM.Input(shape=(self.config.IMAGE_H, self.config.IMAGE_W, self.config.IMAGE_C)) c1=x=KL.Conv2D(32,(1,1),strides=(1,1),dilation_rate=1)(inputs) c2=x=ResBlock(x,32,(3,3),[1,3,15,31],(1,1)) x=KL.Conv2D(64,(1,1),strides=(2,2))(x) c3=x=ResBlock(x,64,(3,3),[1,3,15,31],(1,1)) x=KL.Conv2D(128,(1,1),strides=(2,2))(x) c4=x=ResBlock(x,128,(3,3),[1,3,15],(1,1)) x=KL.Conv2D(256,(1,1),strides=(2,2))(x) c5=x=ResBlock(x,256,(3,3),[1,3,15],(1,1)) x=KL.Conv2D(512,(1,1),strides=(2,2))(x) c6=x=ResBlock(x,512,(3,3),[1],(1,1)) x=KL.Conv2D(1024,(1,1),strides=(2,2))(x) x=ResBlock(x,1024,(3,3),[1],(1,1)) x=PSPPooling(x,1024) x=KL.Conv2D(512,(1,1))(x) x=KL.UpSampling2D()(x) x=combine(x,c6,512) x=ResBlock(x,512,(3,3),[1],1) x=KL.Conv2D(256,(1,1))(x) x=KL.UpSampling2D()(x) x=combine(x,c5,256) x=ResBlock(x,256,(3,3),[1,3,15],1) x=KL.Conv2D(128,(1,1))(x) x=KL.UpSampling2D()(x) x=combine(x,c4,128) x=ResBlock(x,128,(3,3),[1,3,15],1) x=KL.Conv2D(64,(1,1))(x) x=KL.UpSampling2D()(x) x=combine(x,c3,64) x=ResBlock(x,64,(3,3),[1,3,15,31],1) x=KL.Conv2D(32,(1,1))(x) x=KL.UpSampling2D()(x) x=combine(x,c2,32) x=ResBlock(x,32,(3,3),[1,3,15,31],1) x=combine(x,c1,32) x=PSPPooling(x,32) x=KL.Conv2D(self.config.CLASSES_NUM,(1,1))(x) x=KL.Activation('softmax')(x) model=KM.Model(inputs=inputs,outputs=x) model.compile(optimizer=keras.optimizers.SGD(lr=0.001,momentum=0.8),loss=Tanimoto_loss,metrics=['accuracy']) model.summary() return model
def unet(weights=None, input_shape=(256, 256, 1), classes=1, background_as_class=False, up_conv='upsampling', batch_normalization=False, dropout_rate=(0., 0., 0., .5, .5, 0., 0., 0., 0.)): """ Instantiates the U-Net architecture for Keras. Note that convolutions have `padding='same'` argument instead of `padding='valid'` in reference paper in order to have the same input and output shapes. `padding='valid'` is currently not supported. Since U-Net is fully-convolutional network, in fact it requires no input height and width, but just the number of channels. However, it can be useful to set input height and width to obtain feature map shapes in `model.summary()` Using up-sampling with convolution afterwards instead of transpose convolution can avoid some undesirable artifacts (see e.g. https://distill.pub/2016/deconv-checkerboard/). Bilinear interpolation was chosen as more accurate and the same kernel size as in transpose convolution is used in order to not to increase the number of parameters Dropout rates were discussed only briefly in reference paper, default values were therefore taken from the reference Caffe implementation. :param weights: optional path to the weights file to be loaded (random initialization if `None`) :param input_shape: [integer/tuple] optional number of input channels or input shape :param classes: [int] optional number of classes to predict :param background_as_class: [bool] whether to create additional channel for background class :param up_conv: ['deconvolution'/'upsampling'] how to perform the up-convolution :param batch_normalization: [bool] whether to apply batch normalization after each convolution :param dropout_rate: [integer/tuple/list] dropout rate to apply to all building blocks or tuple/list of size 9 with block-wise dropout rates :return: Keras model instance """ if isinstance(input_shape, int): if backend.image_data_format() == 'channels_last': input_shape = (None, None, input_shape) else: input_shape = (input_shape, None, None) elif isinstance(input_shape, tuple) and len(input_shape) == 3: if backend.image_data_format() == 'channels_last': input_height, input_width = input_shape[0], input_shape[1] else: input_height, input_width = input_shape[1], input_shape[2] if input_height % 16 != 0 or input_width % 16 != 0: raise ValueError( "Input height and width should be a multiply of 16 in order to do 4 down-samplings and " "then 4 up-samplings correctly") else: raise ValueError( "The `input_shape` argument should be either integer (number of input channels)" "or tuple of size 3 with input shape") if background_as_class is True: # Add one more class for background classes += 1 # Classes (and background) probabilities in each pixel are conditional dependent top_activation = 'softmax' else: # Classes (and background) probabilities in each pixel are independent # Some pixel is background if all classes activations in this pixel are nearly zeros top_activation = 'sigmoid' if up_conv not in ('deconvolution', 'upsampling'): raise ValueError( "The `up_conv` argument should be either 'deconvolution' (up-convolution by transposed" "convolution or so called deconvolution) or 'upsampling' (up-convolution by up-sampling and" "regular convolution") if isinstance(dropout_rate, float): dropout_rate = [dropout_rate] * 9 elif not isinstance(dropout_rate, tuple) and not isinstance( dropout_rate, list) or len(dropout_rate) != 9: raise ValueError( "The `dropout_rate` argument should be either float (the same dropout rate" "for all building blocks) or list/tuple of size 9 with block-wise dropout rates" ) channel_axis = 3 if backend.image_data_format() == 'channels_last' else 1 data = layers.Input(input_shape) down0 = double_conv2d(data, 64, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[0]) down1 = layers.MaxPooling2D(pool_size=(2, 2))(down0) down1 = double_conv2d(down1, 128, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[1]) down2 = layers.MaxPooling2D(pool_size=(2, 2))(down1) down2 = double_conv2d(down2, 256, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[2]) down3 = layers.MaxPooling2D(pool_size=(2, 2))(down2) down3 = double_conv2d(down3, 512, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[3]) down4 = layers.MaxPooling2D(pool_size=(2, 2))(down3) down4 = double_conv2d(down4, 1024, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[4]) if up_conv == 'deconvolution': up3 = layers.Conv2DTranspose(512, 2, strides=(2, 2), kernel_initializer='he_normal')(down4) else: up3 = layers.UpSampling2D(size=(2, 2), interpolation='bilinear')(down4) up3 = layers.Conv2D(512, 2, padding='same', kernel_initializer='he_normal')(up3) up3 = layers.Activation('relu')(up3) up3 = layers.concatenate([down3, up3], axis=channel_axis) up3 = double_conv2d(up3, 512, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[5]) if up_conv == 'deconvolution': up2 = layers.Conv2DTranspose(256, 2, strides=(2, 2), kernel_initializer='he_normal')(up3) else: up2 = layers.UpSampling2D(size=(2, 2), interpolation='bilinear')(up3) up2 = layers.Conv2D(256, 2, padding='same', kernel_initializer='he_normal')(up2) up2 = layers.Activation('relu')(up2) up2 = layers.concatenate([down2, up2], axis=channel_axis) up2 = double_conv2d(up2, 256, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[6]) if up_conv == 'deconvolution': up1 = layers.Conv2DTranspose(128, 2, strides=(2, 2), kernel_initializer='he_normal')(up2) else: up1 = layers.UpSampling2D(size=(2, 2), interpolation='bilinear')(up2) up1 = layers.Conv2D(128, 2, padding='same', kernel_initializer='he_normal')(up1) up1 = layers.Activation('relu')(up1) up1 = layers.concatenate([down1, up1], axis=channel_axis) up1 = double_conv2d(up1, 128, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[7]) if up_conv == 'deconvolution': up0 = layers.Conv2DTranspose(64, 2, strides=(2, 2), kernel_initializer='he_normal')(up1) else: up0 = layers.UpSampling2D(size=(2, 2), interpolation='bilinear')(up1) up0 = layers.Conv2D(64, 2, padding='same', kernel_initializer='he_normal')(up0) up0 = layers.Activation('relu')(up0) up0 = layers.concatenate([down0, up0], axis=channel_axis) up0 = double_conv2d(up0, 64, 3, padding='same', batch_normalization=batch_normalization, dropout_rate=dropout_rate[8]) score = layers.Conv2D(classes, 1, padding='same', kernel_initializer='he_normal')(up0) score = layers.Activation(top_activation)(score) model = models.Model(data, score) if weights is not None: model.load_weights(weights) return model
def upsample_simple(filters, kernel_size, strides, padding): return layers.UpSampling2D(strides)
discriminator_optimizer = keras.optimizers.RMSprop(lr=0.0006, clipvalue=1.0, decay=1e-8) gan_optimizer = keras.optimizers.RMSprop(lr=0.0006, clipvalue=1.0, decay=1e-8) ######################### MODEL BEGIN ##################################### generator_input = keras.Input(shape=(latent_dim, )) # First, transform the input into a 8x8 128-channels feature map x = layers.Dense(128 * 8 * 8, name='g_top_layer')(generator_input) x = layers.BatchNormalization(momentum=0.5)(x) x = layers.Activation("relu")(x) x = layers.Reshape((8, 8, 128))(x) # Upsampling x = layers.UpSampling2D()(x) x = layers.Conv2D(128, 5, padding='same', use_bias=True)(x) x = layers.BatchNormalization(momentum=0.5)(x) x = layers.Activation("relu")(x) x = layers.UpSampling2D()(x) x = layers.Conv2D(64, 5, padding='same', use_bias=True)(x) x = layers.BatchNormalization(momentum=0.5)(x) x = layers.Activation("relu")(x) # Produce a 32x32 1-channel feature map x = layers.Conv2D(channels, 5, activation='tanh', padding='same', name='g_btm_layer')(x) generator = keras.models.Model(generator_input, x)
def residual_network(x): """ ResNeXt by default. For ResNet set `cardinality` = 1 above. """ def add_common_layers(y): y = layers.BatchNormalization()(y) y = layers.LeakyReLU()(y) return y def attention_block(y, nb_channels_in, _strides): y = layers.Conv2D(nb_channels_in, kernel_size=(1, 1), strides=(1, 1), padding='same')(y) y_scores = layers.Activation('softmax')(y) y = add_common_layers(y) y = layers.Conv2D(nb_channels_in, kernel_size=(3, 3), strides=(1, 1), padding='same')(y) y = layers.Multiply()([y_scores, y]) y = add_common_layers(y) y = layers.Conv2D(nb_channels_in, kernel_size=(3, 3), strides=_strides, padding='same')(y) y = add_common_layers(y) return y def grouped_convolution(y, nb_channels, _strides): # when `cardinality` == 1 this is just a standard convolution if cardinality == 1: return layers.Conv2D(nb_channels, kernel_size=(3, 3), strides=_strides, padding='same')(y) assert not nb_channels % cardinality _d = nb_channels // cardinality # in a grouped convolution layer, input and output channels are divided into `cardinality` groups, # and convolutions are separately performed within each group groups = [] for j in range(cardinality): group = layers.Lambda(lambda z: z[:, :, :, j * _d:j * _d + _d])(y) groups.append(attention_block(group, _d, _strides)) #groups.append(layers.Conv2D(_d, kernel_size=(3, 3), strides=_strides, padding='same')(group)) # the grouped convolutional layer concatenates them as the outputs of the layer y = layers.concatenate(groups) return y def residual_block(y, nb_channels_in, nb_channels_out, _strides=(1, 1), _project_shortcut=False): """ Our network consists of a stack of residual blocks. These blocks have the same topology, and are subject to two simple rules: - If producing spatial maps of the same size, the blocks share the same hyper-parameters (width and filter sizes). - Each time the spatial map is down-sampled by a factor of 2, the width of the blocks is multiplied by a factor of 2. """ shortcut = y # we modify the residual building block as a bottleneck design to make the network more economical y = layers.Conv2D(nb_channels_in, kernel_size=(1, 1), strides=(1, 1), padding='same')(y) y = add_common_layers(y) # ResNeXt (identical to ResNet when `cardinality` == 1) y = grouped_convolution(y, nb_channels_in, _strides=_strides) y = add_common_layers(y) y = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=(1, 1), padding='same')(y) # batch normalization is employed after aggregating the transformations and before adding to the shortcut y = layers.BatchNormalization()(y) # identity shortcuts used directly when the input and output are of the same dimensions if _project_shortcut or _strides != (1, 1): # when the dimensions increase projection shortcut is used to match dimensions (done by 1×1 convolutions) # when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2 shortcut = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=_strides, padding='same')(shortcut) shortcut = layers.BatchNormalization()(shortcut) y = layers.add([shortcut, y]) # relu is performed right after each batch normalization, # expect for the output of the block where relu is performed after the adding to the shortcut y = layers.LeakyReLU()(y) return y conv1 = layers.Conv2D(84, kernel_size=(3, 3), strides=(1,1), padding='same')(x) conv1_d = residual_block(conv1, 84, 84, _project_shortcut=False, _strides=(2,2)) conv2 = residual_block(conv1_d, 84, 144, _project_shortcut=True, _strides=(1,1)) conv2_d = residual_block(conv2, 144, 144, _project_shortcut=False, _strides=(2,2)) conv3 = residual_block(conv2_d, 144, 255, _project_shortcut=True, _strides=(1,1)) conv3_d = residual_block(conv3, 255, 255, _project_shortcut=False, _strides=(2,2)) conv4 = residual_block(conv3_d, 255, 396, _project_shortcut=True, _strides=(1,1)) conv4_d = residual_block(conv4, 396, 396, _project_shortcut=False, _strides=(2,2)) bottleneck = residual_block(conv4_d, 396, 510, _project_shortcut=True, _strides=(1,1)) up1 = layers.UpSampling2D(size = (2,2))(bottleneck) up1_c = residual_block(up1, 510, 396, _project_shortcut=True, _strides=(1,1)) merge1 = layers.Add()([conv4, up1_c]) conv5 = residual_block(merge1, 396, 396, _project_shortcut=False, _strides=(1,1)) up2 = layers.UpSampling2D(size = (2,2))(conv5) up2_c = residual_block(up2, 396, 255, _project_shortcut=True, _strides=(1,1)) merge2 = layers.Add()([conv3, up2_c]) conv6 = residual_block(merge2, 255, 255, _project_shortcut=False, _strides=(1,1)) up3 = layers.UpSampling2D(size = (2,2))(conv6) up3_c = residual_block(up3, 255, 144, _project_shortcut=True, _strides=(1,1)) merge3 = layers.Add()([conv2, up3_c]) conv7 = residual_block(merge3, 144, 144, _project_shortcut=False, _strides=(1,1)) up4 = layers.UpSampling2D(size = (2,2))(conv7) up4_c = residual_block(up4, 144, 84, _project_shortcut=True, _strides=(1,1)) merge4 = layers.Add()([conv1, up4_c]) conv8 = residual_block(merge4, 84, 48, _project_shortcut=True, _strides=(1,1)) conv9 = residual_block(conv8, 48, 27, _project_shortcut=True, _strides=(1,1)) conv10 = residual_block(conv9, 27, 9, _project_shortcut=True, _strides=(1,1)) out = layers.Conv2D(1, 1, activation = 'sigmoid', padding = 'same', kernel_initializer = 'he_normal')(conv10) return out
def UNet_like2(input_tensor=None): img_input = input_tensor ### Conv1 conv1 = layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='conv1_1')(img_input) conv1 = layers.BatchNormalization(axis=3, name='conv1_1bn')(conv1) conv1 = layers.PReLU(shared_axes=[1, 2], name='prelu1_1')(conv1) conv1 = layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='conv1_2')(conv1) conv1 = layers.BatchNormalization(axis=3, name='conv1_2bn')(conv1) conv1 = layers.PReLU(shared_axes=[1, 2], name='prelu1_2')(conv1) pool1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(conv1) ### Conv 2 conv2 = layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_normal', name='conv2_1')(pool1) conv2 = layers.BatchNormalization(axis=3, name='conv2_1bn')(conv2) conv2 = layers.PReLU(shared_axes=[1, 2], name='prelu2_1')(conv2) conv2 = layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_normal', name='conv2_2')(conv2) conv2 = layers.BatchNormalization(axis=3, name='conv2_2bn')(conv2) conv2 = layers.PReLU(shared_axes=[1, 2], name='prelu2_2')(conv2) pool2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(conv2) ### Conv 3 conv3 = layers.Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', name='conv3_1')(pool2) conv3 = layers.BatchNormalization(axis=3, name='conv3_1bn')(conv3) conv3 = layers.PReLU(shared_axes=[1, 2], name='prelu3_1')(conv3) conv3 = layers.Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', name='conv3_2')(conv3) conv3 = layers.BatchNormalization(axis=3, name='conv3_2bn')(conv3) conv3 = layers.PReLU(shared_axes=[1, 2], name='prelu3_2')(conv3) pool3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(conv3) ### Conv 4 conv4 = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', name='conv4_1')(pool3) conv4 = layers.BatchNormalization(axis=3, name='conv4_1bn')(conv4) conv4 = layers.PReLU(shared_axes=[1, 2], name='prelu4_1')(conv4) conv4 = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', name='conv4_2')(conv4) conv4 = layers.BatchNormalization(axis=3, name='conv4_2bn')(conv4) conv4 = layers.PReLU(shared_axes=[1, 2], name='prelu4_2')(conv4) drop4 = layers.Dropout(0.5)(conv4) ### pool4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(drop4) ### Conv 5 conv5 = layers.Conv2D(1024, (3, 3), padding='same', kernel_initializer='he_normal', name='conv5_1')(pool4) conv5 = layers.BatchNormalization(axis=3, name='conv5_1bn')(conv5) conv5 = layers.PReLU(shared_axes=[1, 2], name='prelu5_1')(conv5) conv5 = layers.Conv2D(1024, (3, 3), padding='same', kernel_initializer='he_normal', name='conv5_2')(conv5) conv5 = layers.BatchNormalization(axis=3, name='conv5_2bn')(conv5) conv5 = layers.PReLU(shared_axes=[1, 2], name='prelu5_2')(conv5) drop5 = layers.Dropout(0.5)(conv5) ### ### upconv + conv 6 upconv6 = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv6_1')( layers.UpSampling2D(size=(2, 2))(drop5)) upconv6 = layers.PReLU(shared_axes=[1, 2], name='prelu6_1')(upconv6) merge6 = layers.concatenate([drop4, upconv6], axis=3) conv6 = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv6_2')(merge6) conv6 = layers.PReLU(shared_axes=[1, 2], name='prelu6_2')(conv6) conv6 = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv6_3')(conv6) conv6 = layers.PReLU(shared_axes=[1, 2], name='prelu6_3')(conv6) ### Upconv + Conv 7 upconv7 = layers.Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv7_1')( layers.UpSampling2D(size=(2, 2))(conv6)) upconv7 = layers.PReLU(shared_axes=[1, 2], name='prelu7_1')(upconv7) merge7 = layers.concatenate([conv3, upconv7], axis=3) conv7 = layers.Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv7_2')(merge7) conv7 = layers.PReLU(shared_axes=[1, 2], name='prelu7_2')(conv7) conv7 = layers.Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv7_3')(conv7) conv7 = layers.PReLU(shared_axes=[1, 2], name='prelu7_3')(conv7) ### Upconv + Conv 8 upconv8 = layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv8_1')( layers.UpSampling2D(size=(2, 2))(conv7)) upconv8 = layers.PReLU(shared_axes=[1, 2], name='prelu8_1')(upconv8) merge8 = layers.concatenate([conv2, upconv8], axis=3) conv8 = layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv8_2')(merge8) conv8 = layers.PReLU(shared_axes=[1, 2], name='prelu8_2')(conv8) conv8 = layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv8_3')(conv8) conv8 = layers.PReLU(shared_axes=[1, 2], name='prelu8_3')(conv8) ### Upconv + Conv 9 upconv9 = layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv9_1')( layers.UpSampling2D(size=(2, 2))(conv8)) upconv9 = layers.PReLU(shared_axes=[1, 2], name='prelu9_1')(upconv9) merge9 = layers.concatenate([conv1, upconv9], axis=3) conv9 = layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv9_2')(merge9) conv9 = layers.PReLU(shared_axes=[1, 2], name='prelu9_2')(conv9) conv9 = layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv9_3')(conv9) conv9 = layers.PReLU(shared_axes=[1, 2], name='prelu9_3')(conv9) conv9 = layers.Conv2D(2, (3, 3), padding='same', kernel_initializer='he_normal', name='upconv9_4')(conv9) conv9 = layers.PReLU(shared_axes=[1, 2], name='prelu9_4')(conv9) ### Conv 10 conv10 = layers.Conv2D(1, (1, 1), activation='sigmoid', name='conv10')(conv9) # Create model model = models.Model(img_input, conv10, name='U-Net') return model
def define_model(self): input_img = layers.Input(shape=( 28, 28, 1)) # adapt this if using `channels_first` image data format x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img) x = layers.MaxPooling2D((2, 2), padding='same')(x) x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x) x = layers.MaxPooling2D((2, 2), padding='same')(x) x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x) x = layers.MaxPooling2D((2, 2), padding='same')(x) x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x) x = layers.MaxPooling2D((2, 2), padding='same')(x) x = layers.Conv2D(self.latent_space_dims, (3, 3), activation='relu', padding='same')(x) latent_space = layers.MaxPooling2D((2, 2), padding='same')(x) encoder = Model(input_img, latent_space, name='Encoder') encoder.summary() # at this point the representation is (4, 4, 8) i.e. 128-dimensional decoder_inputs = layers.Input(shape=K.int_shape(latent_space)[1:]) x = layers.Conv2D(self.latent_space_dims, (3, 3), activation='relu', padding='same')(decoder_inputs) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x) x = layers.UpSampling2D((2, 2))(x) x = layers.Conv2D(32, (3, 3), activation='relu')(x) x = layers.UpSampling2D((2, 2))(x) decoded_img = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x) decoder = Model(decoder_inputs, decoded_img, name='decoder_model') decoder.summary() z_decoded = decoder(latent_space) AE = Model(input_img, z_decoded) AE.compile(optimizer='adadelta', loss='binary_crossentropy') AE.summary() encoder.compile(optimizer='rmsprop', loss='binary_crossentropy') decoder.compile(optimizer='rmsprop', loss='binary_crossentropy') self.model = AE self.encoder = encoder self.decoder = decoder self.define_flag = True
def UNet_like(input_tensor=None): img_input = input_tensor ### Conv1 conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv1_1')(img_input) conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv1_2')(conv1) pool1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(conv1) ### Conv 2 conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv2_1')(pool1) conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv2_2')(conv2) pool2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(conv2) ### Conv 3 conv3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_1')(pool2) conv3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_2')(conv3) pool3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(conv3) ### Conv 4 conv4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_1')(pool3) conv4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_2')(conv4) pool4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(conv4) ### Conv 5 conv5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_1')(pool4) conv5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_2')(conv5) ### upconv + conv 6 upconv6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv6_1')\ (layers.UpSampling2D(size=(2, 2))(conv5)) merge6 = layers.concatenate([conv4, upconv6], axis=3) conv6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv6_2')(merge6) conv6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv6_3')(conv6) ### Upconv + Conv 7 upconv7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv7_1')( layers.UpSampling2D(size=(2, 2))(conv6)) merge7 = layers.concatenate([conv3, upconv7], axis=3) conv7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv7_2')(merge7) conv7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv7_3')(conv7) ### Upconv + Conv 8 upconv8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv8_1')\ (layers.UpSampling2D(size=(2, 2))(conv7)) merge8 = layers.concatenate([conv2, upconv8], axis=3) conv8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv8_2')(merge8) conv8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv8_3')(conv8) ### Upconv + Conv 9 upconv9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv9_1')( layers.UpSampling2D(size=(2, 2))(conv8)) merge9 = layers.concatenate([conv1, upconv9], axis=3) conv9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv9_2')(merge9) conv9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv9_3')(conv9) conv9 = layers.Conv2D(2, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='upconv9_4')(conv9) ### Conv 10 conv10 = layers.Conv2D(1, (1, 1), activation='sigmoid', name='conv10')(conv9) # Create model model = models.Model(img_input, conv10, name='U-Net') return model
def build(self, mode, config): """Build Mask R-CNN architecture. input_shape: The shape of the input image. mode: Either "training" or "inference". The inputs and outputs of the model differ accordingly. """ assert mode in ['training', 'inference'] # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception( "Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Inputs input_image = KL.Input(shape=config.IMAGE_SHAPE.tolist(), name="input_image") input_image_meta = KL.Input(shape=[None], name="input_image_meta") if mode == "training": # RPN GT input_rpn_match = KL.Input(shape=[None, 1], name="input_rpn_match", dtype=tf.int32) input_rpn_bbox = KL.Input(shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) # Detection GT (class IDs, bounding boxes, and masks) # 1. GT Class IDs (zero padded) input_gt_class_ids = KL.Input(shape=[None], name="input_gt_class_ids", dtype=tf.int32) # 2. GT Boxes in pixels (zero padded) # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates input_gt_boxes = KL.Input(shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) # Normalize coordinates h, w = K.shape(input_image)[1], K.shape(input_image)[2] image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32) gt_boxes = KL.Lambda(lambda x: x / image_scale)(input_gt_boxes) # 3. GT Masks (zero padded) # [batch, height, width, MAX_GT_INSTANCES] if config.USE_MINI_MASK: input_gt_masks = KL.Input(shape=[ config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None ], name="input_gt_masks", dtype=bool) else: input_gt_masks = KL.Input( shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None], name="input_gt_masks", dtype=bool) # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. _, C2, C3, C4, C5 = resnet_graph(input_image, "resnet101", stage5=True) # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5) P4 = KL.Add(name="fpn_p4add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4) ]) P3 = KL.Add(name="fpn_p3add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3) ]) P2 = KL.Add(name="fpn_p2add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2) ]) # Attach 3x3 conv to all P layers to get the final feature maps. P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2) P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3) P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4) P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5) # P6 is used for the 5th anchor scale in RPN. Generated by # subsampling from P5 with stride of 2. P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) # Note that P6 is used in RPN, but not in the classifier heads. rpn_feature_maps = [P2, P3, P4, P5, P6] mrcnn_feature_maps = [P2, P3, P4, P5] # Generate Anchors self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) # RPN Model rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE, len(config.RPN_ANCHOR_RATIOS), 256) # Loop through pyramid layers layer_outputs = [] # list of lists for p in rpn_feature_maps: layer_outputs.append(rpn([p])) # Concatenate layer outputs # Convert from list of lists of level outputs to list of lists # of outputs across levels. # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] outputs = list(zip(*layer_outputs)) outputs = [ KL.Concatenate(axis=1, name=n)(list(o)) for o, n in zip(outputs, output_names) ] rpn_class_logits, rpn_class, rpn_bbox = outputs # Generate proposals # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates # and zero padded. proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\ else config.POST_NMS_ROIS_INFERENCE rpn_rois = ProposalLayer(proposal_count=proposal_count, nms_threshold=config.RPN_NMS_THRESHOLD, name="ROI", anchors=self.anchors, config=config)([rpn_class, rpn_bbox]) if mode == "training": # Class ID mask to mark class IDs supported by the dataset the image # came from. _, _, _, active_class_ids = KL.Lambda( lambda x: parse_image_meta_graph(x), mask=[None, None, None, None])(input_image_meta) if not config.USE_RPN_ROIS: # Ignore predicted ROIs and use ROIs provided as an input. input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4], name="input_roi", dtype=np.int32) # Normalize coordinates to 0-1 range. target_rois = KL.Lambda(lambda x: K.cast(x, tf.float32) / image_scale[:4])(input_rois) else: target_rois = rpn_rois # Generate detection targets # Subsamples proposals and generates target outputs for training # Note that proposal class IDs, gt_boxes, and gt_masks are zero # padded. Equally, returned rois and targets are zero padded. rois, target_class_ids, target_bbox, target_mask =\ DetectionTargetLayer(config, name="proposal_targets")([ target_rois, input_gt_class_ids, gt_boxes, input_gt_masks]) # Network Heads # TODO: verify that this handles zero padded ROIs mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ fpn_classifier_graph(rois, mrcnn_feature_maps, config.IMAGE_SHAPE, config.POOL_SIZE, config.NUM_CLASSES) mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps, config.IMAGE_SHAPE, config.MASK_POOL_SIZE, config.NUM_CLASSES) # TODO: clean up (use tf.identify if necessary) output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois) # Losses rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")( [input_rpn_match, rpn_class_logits]) rpn_bbox_loss = KL.Lambda( lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")( [input_rpn_bbox, input_rpn_match, rpn_bbox]) class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")([ target_class_ids, mrcnn_class_logits, active_class_ids ]) bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")([ target_bbox, target_class_ids, mrcnn_bbox ]) mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")([ target_mask, target_class_ids, mrcnn_mask ]) # Model inputs = [ input_image, input_image_meta, input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks ] if not config.USE_RPN_ROIS: inputs.append(input_rois) outputs = [ rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois, rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss ] model = KM.Model(inputs, outputs, name='mask_rcnn') else: # Network Heads # Proposal classifier and BBox regressor heads mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, config.IMAGE_SHAPE, config.POOL_SIZE, config.NUM_CLASSES) # Detections # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates detections = DetectionLayer(config, name="mrcnn_detection")( [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) # Convert boxes to normalized coordinates # TODO: let DetectionLayer return normalized coordinates to avoid # unnecessary conversions h, w = config.IMAGE_SHAPE[:2] detection_boxes = KL.Lambda( lambda x: x[..., :4] / np.array([h, w, h, w]))(detections) # Create masks for detections mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps, config.IMAGE_SHAPE, config.MASK_POOL_SIZE, config.NUM_CLASSES) model = KM.Model([input_image, input_image_meta], [ detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, rpn_class, rpn_bbox ], name='mask_rcnn') # Add multi-GPU support. if config.GPU_COUNT > 1: from parallel_model import ParallelModel model = ParallelModel(model, config.GPU_COUNT) return model
s = reset_tf_session() import keras from keras.models import Sequential from keras import layers as L CODE_SIZE = 256 generator = Sequential() generator.add(L.InputLayer([CODE_SIZE],name='noise')) generator.add(L.Dense(10*8*8, activation='elu')) generator.add(L.Reshape((8,8,10))) generator.add(L.Deconv2D(64,kernel_size=(5,5),activation='elu')) generator.add(L.Deconv2D(64,kernel_size=(5,5),activation='elu')) generator.add(L.UpSampling2D(size=(2,2))) generator.add(L.Deconv2D(32,kernel_size=3,activation='elu')) generator.add(L.Deconv2D(32,kernel_size=3,activation='elu')) generator.add(L.Deconv2D(32,kernel_size=3,activation='elu')) generator.add(L.Conv2D(3,kernel_size=3,activation=None)) assert generator.output_shape[1:] == IMG_SHAPE, "generator must output an image of shape %s, but instead it produces %s"%(IMG_SHAPE,generator.output_shape[1:]) """### Discriminator * Discriminator is your usual convolutional network with interlooping convolution and pooling layers * The network does not include dropout/batchnorm to avoid learning complications. * We also regularize the pre-output layer to prevent discriminator from being too certain. """ discriminator = Sequential()