def call(self, inputs): print(' Detection Layer : call() ', type(inputs), len(inputs)) # logt('rpn_proposals_roi ', inputs[0], verbose = self.verbose) # logt('mrcnn_class.shape ', inputs[1], verbose = self.verbose) # logt('mrcnn_bboxes.shape', inputs[2], verbose = self.verbose) # logt('input_image_meta ', inputs[0], verbose = self.verbose) logt('input_gt_class_ids', inputs[0], verbose = self.verbose) logt('input_gt_bboxes ', inputs[1], verbose = self.verbose) def wrapper(gt_class_ids, gt_bboxes): # def wrapper(rois, mrcnn_class, mrcnn_bbox, image_meta, gt_class_ids, gt_bboxes): from mrcnn.utils import parse_image_meta mod_detections_batch = [] for b in range(self.config.BATCH_SIZE): ##--------------------------------------------------------------------------------------------- ## Run the regular detection graph, as we do in inference mode ## 24-01-2019 : In add_evaluation_detections_1 & 2 we do not need the inference detections, ## So this has been commented out. ##--------------------------------------------------------------------------------------------- # _, _, window, _ = parse_image_meta(image_meta) # detections = refine_detections(rois[b], mrcnn_class[b], mrcnn_bbox[b], window[b], self.config) ##--------------------------------------------------------------------------------------------- ## Call routine to build the control file using GT annotations, adding false detections: ##--------------------------------------------------------------------------------------------- # mod_detections = add_evaluation_detections_1(detections, image_meta[b], gt_class_ids[b], gt_bboxes[b], self.config) mod_detections, max_overlap = self.build_evaluation_detections( gt_class_ids[b], gt_bboxes[b], self.config, self.class_pred_stats) # if self.config.VERBOSE: # print(' original detections (GT annotations) shape :', gt_bboxes[b].shape) # print(' modified detections (after adding false positives):', mod_detections.shape) # print(' Max Overlap: ', max_overlap) # print(detections) # pass # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = self.config.DETECTION_MAX_INSTANCES - mod_detections.shape[0] assert gap >= 0 if gap > 0: mod_detections = np.pad(mod_detections, [(0, gap), (0, 0)], 'constant', constant_values=0) mod_detections_batch.append(mod_detections) # Stack detections and cast to float32 # TODO: track where float64 is introduced mod_detections_batch = np.array(mod_detections_batch).astype(np.float32) num_columns = mod_detections_batch.shape[-1] # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_score, dt_ind)] in pixels return np.reshape(mod_detections_batch, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, num_columns]) # Return wrapped function return tf.py_func(wrapper, inputs, tf.float32, name = 'detections')
def call(self, inputs): print(' Detection Layer : call() ', type(inputs), len(inputs)) logt('rpn_proposals_roi ', inputs[0], verbose=self.verbose) logt('mrcnn_class.shape ', inputs[1], verbose=self.verbose) logt('mrcnn_bboxes.shape', inputs[2], verbose=self.verbose) logt('input_image_meta ', inputs[3], verbose=self.verbose) def wrapper(rois, mrcnn_class, mrcnn_bbox, image_meta): from mrcnn.utils import parse_image_meta detections_batch = [] # logt('detection wrapper - rpn_proposals_roi ', rois , verbose = self.verbose) # logt('detection wrapper - mrcnn_class.shape ', mrcnn_class, verbose = self.verbose) # logt('detection wrapper - mrcnn_bboxes.shape ', mrcnn_bbox , verbose = self.verbose) # logt('detection wrapper - image_meta ', image_meta , verbose = self.verbose) # process item per item in batch for b in range(self.config.BATCH_SIZE): _, _, window, _ = parse_image_meta(image_meta) detections = refine_detections(rois[b], mrcnn_class[b], mrcnn_bbox[b], window[b], self.config) # if self.verbose: # print('\n\n config.DETECTION_MAX_INSTANCES: ', self.config.DETECTION_MAX_INSTANCES) # print(' Detections shape:', detections.shape) # print(detections) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = self.config.DETECTION_MAX_INSTANCES - detections.shape[0] assert gap >= 0 if gap > 0: detections = np.pad(detections, [(0, gap), (0, 0)], 'constant', constant_values=0) detections_batch.append(detections) # Stack detections and cast to float32 # TODO: track where float64 is introduced detections_batch = np.array(detections_batch).astype(np.float32) num_columns = detections_batch.shape[-1] # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_score)] in pixels return np.reshape(detections_batch, [ self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, num_columns ]) # Return wrapped function return tf.py_func(wrapper, inputs, tf.float32, name="detections")
def compute_output_shape(self, input_shape): # may need to change dimensions of first return from IMAGE_SHAPE to MAX_DIM input_num_classes = input_shape[1][1] input_detections = input_shape[1][2] input_columns = input_shape[1][3] logt(' FCNScoringLayer - Compute output shape() ', verbose=self.config.VERBOSE) logt(' input_num_classes : ', input_num_classes, verbose=self.config.VERBOSE) logt(' input_detections : ', input_detections, verbose=self.config.VERBOSE) logt(' input_columns : ', input_columns, verbose=self.config.VERBOSE) return [(None, input_num_classes, input_detections, input_columns)]
def fcn_heatmap_BCE_loss_graph(target_heatmap, pred_heatmap): ''' Binary Cross Entropy Loss for the FCN heatmaps. Apply a per-pixel sigmoid and binary loss, similar to the Lmask loss calculation in MaskRCNN. Two approaches : 1- Only calaculate loss for classes which have active GT bounding boxes 2- Calculate for all classes We will implement approach 1. target_heatmaps: [batch, height, width, num_classes]. A float32 tensor of values 0 or 1. Uses zero padding to fill array. target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded. pred_masks: [batch, height, width, num_classes] float32 tensor with values from 0 to 1. # active_class_ids: [batch, num_classes]. Has a value of 1 for # classes that are in the dataset of the image, and 0 # for classes that are not in the dataset. ''' print() print('-------------------------------' ) print('>>> fcn_heatmap_BCE_loss_graph ' ) print('-------------------------------' ) logt(' target_class_ids :', target_heatmap) logt(' pred_class_logits :', pred_heatmap) # target_class_ids = tf.cast(target_class_ids, 'int64') # Find predictions of classes that are active (present in the GT heatmaps) target_heatmap = tf.transpose(target_heatmap, [0,3,1,2]) pred_heatmap = tf.transpose( pred_heatmap, [0,3,1,2]) logt(' trgt_heatmap ', target_heatmap) logt(' trgt_heatmap ', pred_heatmap ) tgt_hm_sum = tf.reduce_sum(target_heatmap, axis = [2,3]) logt(' tgt_hm_sum ',tgt_hm_sum) class_idxs = tf.where(tgt_hm_sum > 0) logt(' class indeixes ', class_idxs) active_tgt_heatmaps = tf.gather_nd(target_heatmap, class_idxs) active_pred_heatmaps = tf.gather_nd(pred_heatmap, class_idxs) logt('active_tgt_heatmaps ',active_tgt_heatmaps) logt('active_pred_heatmaps ',active_pred_heatmaps) y_true = tf.reshape(active_tgt_heatmaps, (-1,)) y_pred = tf.reshape(active_pred_heatmaps, (-1,)) logt('y_true : ', y_true) logt('y_pred : ', y_pred) loss = KB.switch(tf.size(y_true) > 0, KB.binary_crossentropy(target=y_true, output=y_pred), tf.constant(0.0)) logt('loss', loss) loss_mean = KB.mean(loss) logt('mean loss ', loss_mean) loss_final = tf.reshape(loss_mean, [1, 1], name = 'fcn_BCE_loss') logt('loss (final) ', loss_final) # return loss print(' loss :', loss.get_shape() , KB.int_shape(loss) , 'KerasTensor: ', KB.is_keras_tensor(loss)) print(' loss mean :', loss_mean.get_shape() , KB.int_shape(loss_mean) , 'KerasTensor: ', KB.is_keras_tensor(loss_mean)) print(' loss final :', loss_final.get_shape() , KB.int_shape(loss_final) , 'KerasTensor: ', KB.is_keras_tensor(loss_final)) return loss_final
def fcn8_graph(feature_map, config, mode=None): '''Builds the computation graph of Region Proposal Network. feature_map: Contextual Tensor [batch, num_classes, width, depth] Returns: ''' print() print('---------------') print('>>> FCN8 Layer - mode:', mode) print('---------------') batch_size = config.BATCH_SIZE height, width = config.FCN_INPUT_SHAPE[0:2] num_classes = config.NUM_CLASSES rois_per_class = config.TRAIN_ROIS_PER_IMAGE weight_decay = config.WEIGHT_DECAY # In the original implementatoin , batch_momentum was used for batch normalization layers for the ResNet # backbone. We are not using this backbone in FCN, therefore it is unused. # batch_momentum = config.BATCH_MOMENTUM verbose = config.VERBOSE feature_map_shape = (width, height, num_classes) print(' feature map :', feature_map.shape) print(' height :', height, 'width :', width, 'classes :', num_classes) print(' image_data_format: ', KB.image_data_format()) print(' rois_per_class : ', KB.image_data_format()) if mode == 'training': KB.set_learning_phase(1) else: KB.set_learning_phase(0) print(' Set learning phase to :', KB.learning_phase()) # feature_map = KL.Input(shape= feature_map_shape, name="input_fcn_feature_map") # TODO: Assert proper shape of input [batch_size, width, height, num_classes] # TODO: check if stride of 2 causes alignment issues if the featuremap is not even. # if batch_shape: # img_input = Input(batch_shape=batch_shape) # image_size = batch_shape[1:3] # else: # img_input = Input(shape=input_shape) # image_size = input_shape[0:2] ##------------------------------------------------------------------------------------------------------- ## Block 1 data_format='channels_last', ##------------------------------------------------------------------------------------------------------- x = KL.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(feature_map) print(' Input feature map : ', feature_map.shape) logt('Input feature map ', feature_map, verbose=1) logt('FCN Block 11 ', x, verbose=verbose) x = KL.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 12 ', x, verbose=verbose) x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) logt('FCN Block 13 (Max pooling) ', x, verbose=verbose) ##------------------------------------------------------------------------------------------------------- ## Block 2 ##------------------------------------------------------------------------------------------------------- x = KL.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 21 ', x, verbose=verbose) x = KL.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 22 ', x, verbose=verbose) x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) logt('FCN Block 23 (Max pooling) ', x, verbose=verbose) ##------------------------------------------------------------------------------------------------------- ## Block 3 ##------------------------------------------------------------------------------------------------------- x = KL.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 31 ', x, verbose=verbose) x = KL.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 32 ', x, verbose=verbose) x = KL.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 33 ', x, verbose=verbose) Pool3 = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) logt('FCN Block 34 (Max pooling) ', Pool3, verbose=verbose) ##------------------------------------------------------------------------------------------------------- ## Block 4 ##------------------------------------------------------------------------------------------------------- x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(Pool3) logt('FCN Block 41 ', x, verbose=verbose) x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 42 ', x, verbose=verbose) x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 43 ', x, verbose=verbose) Pool4 = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) logt('FCN Block 44 (Max pooling) ', Pool4, verbose=verbose) ##------------------------------------------------------------------------------------------------------- ## Block 5 ##------------------------------------------------------------------------------------------------------- x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(Pool4) logt('FCN Block 51 ', x, verbose=verbose) x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 52 ', x, verbose=verbose) x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN Block 53 ', x, verbose=verbose) x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) logt('FCN Block 54 (Max pooling) ', x, verbose=verbose) ##------------------------------------------------------------------------------------------------------- ## FCN32 Specific Structure ##------------------------------------------------------------------------------------------------------- # Convolutional layers transfered from fully-connected layers # changed from 4096 to 2048 - reduction of weights from 42,752,644 to # changed ftom 2048 to 1024 - 11-05-2018 # FC_SIZE = 2048 FC_SIZE = 4096 x = KL.Conv2D(FC_SIZE, (7, 7), activation='relu', padding='same', name='fcn32_fc1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print() print(' --- FCN32 ----------------------------') logt(' FCN fully connected 1 (fc1) ', x, verbose=verbose) x = KL.Dropout(0.5)(x) x = KL.Conv2D(FC_SIZE, (1, 1), activation='relu', padding='same', name='fcn32_fc2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) logt('FCN fully connected 2 (fc2) ', x, verbose=verbose) x = KL.Dropout(0.5)(x) # Classifying layer x = KL.Conv2D(num_classes, (1, 1), activation='linear', padding='valid', strides=(1, 1), name='fcn32_deconv2D', kernel_initializer='he_normal', bias_initializer='zeros')(x) logt('FCN conv2d (fcn32_deconv2D) ', x, verbose=verbose) ##------------------------------------------------------------------------------------------------------- ## FCN16 Specific Structure ##------------------------------------------------------------------------------------------------------- # Score Pool4 - Reduce Pool4 filters from 512 to num_classes (81) scorePool4 = KL.Conv2D(num_classes, (1, 1), activation='relu', padding='valid', name='fcn16_score_pool4', kernel_initializer='glorot_uniform', bias_initializer='zeros')(Pool4) print() print(' --- FCN16 ----------------------------') logt('FCN scorePool4 (Conv2D(Pool4)) ', scorePool4, verbose=verbose) # 2x Upsampling of fcn_deconv2D to generate Score2 (padding was originally "valid") x = KL.Deconvolution2D(num_classes, kernel_size=(4, 4), activation=None, padding='valid', name='fcn16_score2', strides=(2, 2))(x) logt('FCN 2x Upsampling (Deconvolution2D(fcn32_classify)) ', x, verbose=verbose) # Crop to appropriate shape if required score2_c = KL.Cropping2D(cropping=((1, 1), (1, 1)), name='fcn16_crop_score2')(x) logt('FCN 2x Upsampling/Cropped (Cropped2D(score2)) ', score2_c, verbose=verbose) # Sum Score2, scorePool4 x = KL.Add(name='fcn16_fuse_pool4')([score2_c, scorePool4]) logt('FCN Add Score2,scorePool4 Add(score2_c, scorePool4) ', x, verbose=verbose) # 2x Upsampling (padding was originally "valid", I changed it to "same" ) x = KL.Deconvolution2D(num_classes, kernel_size=(4, 4), activation=None, padding='same', name='fcn16_upscore_pool4', kernel_initializer='glorot_uniform', bias_initializer='zeros', strides=(2, 2))(x) logt('FCN upscore_pool4 (Deconv(fuse_Pool4)) ', x, verbose=verbose) ##------------------------------------------------------------------------------------------------------- ## FCN8 Specific Structure ##------------------------------------------------------------------------------------------------------- # Score Pool3 - Reduce Pool3 filters from 256 to num_classes (81) scorePool3 = KL.Conv2D(num_classes, (1, 1), activation='relu', padding='valid', name='fcn8_score_pool3', kernel_initializer='glorot_uniform', bias_initializer='zeros')(Pool3) print() print(' --- FCN8 ----------------------------') logt('FCN scorePool3 (Conv2D(Pool3)) ', scorePool3, verbose=verbose) upscore_pool4_c = KL.Cropping2D(cropping=((0, 0), (0, 0)), name='fcn8_crop_pool4')(x) logt('FCN 2x Upsampling/Cropped (Cropped2D(score2)) ', upscore_pool4_c, verbose=verbose) # Sum upscore_pool4_c, scorePool3 x = KL.Add(name='fcn8_fuse_pool3')([upscore_pool4_c, scorePool3]) logt('FCN Add Score2,scorePool4', x, verbose=verbose) print() ##------------------------------------------------------------------------------------------------------- ## fcn_heatmap ##------------------------------------------------------------------------------------------------------- # 8x Upsampling (padding was originally "valid", I changed it to "same" ) fcn_hm = KL.Deconvolution2D(num_classes, kernel_size=(16, 16), activation=None, padding='same', name='fcn8_heatmap', kernel_initializer='glorot_uniform', bias_initializer='zeros', strides=(8, 8))(x) # fcn_hm = tf.identity(fcn_hm) fcn_hm.set_shape(feature_map.shape) logt('FCN fcn8_classify/heatmap (Deconv(fuse_Pool4)) ', fcn_hm, verbose=verbose) fcn_hm = KL.Lambda(lambda z: tf.identity(z, name='fcn_hm'), name='fcn_heatmap_lambda')(fcn_hm) logt('fcn_hm (final)', fcn_hm, verbose=verbose) print() # fcn_classify_shape = KB.int_shape(fcn_hm) # h_factor = height / fcn_classify_shape[1] # w_factor = width / fcn_classify_shape[2] # print(' fcn_classify_shape:',fcn_classify_shape,' h_factor : ', h_factor, ' w_factor : ', w_factor) # x = BilinearUpSampling2D(size=(h_factor, w_factor), name='fcn_bilinear')(x) # print(' FCN Bilinear upsmapling layer shape is : ' , KB.int_shape(x), ' Keras tensor ', KB.is_keras_tensor(x) ) ##------------------------------------------------------------------------------------------------------- ## fcn_heatmap ##------------------------------------------------------------------------------------------------------- fcn_sm = KL.Activation("softmax", name="fcn8_softmax")(fcn_hm) logt('fcn8_softmax ', fcn_sm, verbose=verbose) fcn_sm = KL.Lambda(lambda z: tf.identity(z, name='fcn_sm'), name='fcn_softmax_lambda')(fcn_hm) logt('fcn_sm (final)', fcn_sm, verbose=verbose) print() #--------------------------------------------------------------------------------------------- # heatmap L2 normalization # Normalization using the `gauss_sum` (batchsize , num_classes, height, width) # 17-05-2018 (New method, replace dthe previous method that usedthe transposed gauss sum # 17-05-2018 Replaced with normalization across the CLASS axis # normalize along the CLASS axis #--------------------------------------------------------------------------------------------- # print('\n L2 normalization ------------------------------------------------------') # fcn_hm_L2norm = KL.Lambda(lambda z: tf.nn.l2_normalize(z, axis = 3, name = 'fcn_heatmap_L2norm'),\ # name = 'fcn_heatmap_L2norm')(x) # print('\n normalization ------------------------------------------------------') # fcn_hm_norm = KL.Lambda(normalize, name="fcn_heatmap_norm") (x) return fcn_hm, fcn_sm
def fcn_scoring_graph(input, config, mode): in_heatmap, pr_scores = input detections_per_image = pr_scores.shape[2] rois_per_image = KB.int_shape(pr_scores)[2] img_h, img_w = config.IMAGE_SHAPE[:2] batch_size = config.BATCH_SIZE num_classes = config.NUM_CLASSES heatmap_scale = config.HEATMAP_SCALE_FACTOR class_column = 4 score_column = 5 if mode == 'training': sequence_column = 6 norm_score_column = 7 else: dt_type_column = 6 sequence_column = 7 norm_score_column = 8 print('\n ') print('----------------------') print('>>> FCN Scoring Layer - mode:', mode) print('----------------------') logt('in_heatmap.shape ', in_heatmap) logt('pr_hm_scores.shape', pr_scores) # rois per image is determined by size of input tensor # detection mode: config.TRAIN_ROIS_PER_IMAGE # ground_truth : config.DETECTION_MAX_INSTANCES print(' detctions_per_image : ', detections_per_image, 'pr_scores shape', pr_scores.shape) print(' rois_per_image : ', rois_per_image) print(' config.DETECTION_MAX_INSTANCES : ', config.DETECTION_MAX_INSTANCES) print(' config.DETECTIONS_PER_CLASS : ', config.DETECTION_PER_CLASS) print(' sequence_column : ', sequence_column) print(' norm_score_column : ', norm_score_column) ##--------------------------------------------------------------------------------------------- ## Stack non_zero bboxes from PR_SCORES into pt2_dense ##--------------------------------------------------------------------------------------------- # pt2_ind shape : [?, 3] : [ {image_index, class_index , roi row_index }] # pt2_dense shape: [?, 11] : # pt2_dense[0:3] roi coordinates # pt2_dense[4] is class id # pt2_dense[5] is score from mrcnn # pt2_dense[6] is bbox sequence id # pt2_dense[7] is normalized score (per class) #----------------------------------------------------------------------------- pt2_sum = tf.reduce_sum(tf.abs(pr_scores[:, :, :, :class_column]), axis=-1) pt2_ind = tf.where(pt2_sum > 0) pt2_dense = tf.gather_nd(pr_scores, pt2_ind) logt('in_heatmap ', in_heatmap) logt('pr_scores.shape ', pr_scores) logt('pt2_sum shape ', pt2_sum) logt('pt2_ind shape ', pt2_ind) logt('pt2_dense shape ', pt2_dense) ##--------------------------------------------------------------------------------------------- ## Build mean and convariance tensors for bounding boxes ##--------------------------------------------------------------------------------------------- # bboxes_scaled = tf.to_int32(tf.round(pt2_dense[...,0:4])) / heatmap_scale bboxes_scaled = pt2_dense[..., 0:class_column] / heatmap_scale width = bboxes_scaled[:, 3] - bboxes_scaled[:, 1] # x2 - x1 height = bboxes_scaled[:, 2] - bboxes_scaled[:, 0] cx = bboxes_scaled[:, 1] + (width / 2.0) cy = bboxes_scaled[:, 0] + (height / 2.0) # means = tf.stack((cx,cy),axis = -1) covar = tf.stack((width * 0.5, height * 0.5), axis=-1) covar = tf.sqrt(covar) ##--------------------------------------------------------------------------------------------- ## build indices and extract heatmaps corresponding to each bounding boxes' class id ##--------------------------------------------------------------------------------------------- hm_indices = tf.cast(pt2_ind[:, :2], dtype=tf.int32) logt('hm_indices ', hm_indices) pt2_heatmaps = tf.transpose(in_heatmap, [0, 3, 1, 2]) logt('pt2_heatmaps', pt2_heatmaps) pt2_heatmaps = tf.gather_nd(pt2_heatmaps, hm_indices) logt('pt2_heatmaps', pt2_heatmaps) ##-------------------------------------------------------------------------------------------- ## (0) Generate scores using prob_grid and pt2_dense ##-------------------------------------------------------------------------------------------- old_style_scores = tf.map_fn( build_hm_score_v2, [pt2_heatmaps, bboxes_scaled, pt2_dense[:, norm_score_column]], dtype=tf.float32, swap_memory=True) logt('old_style_scores', old_style_scores) # old_style_scores = tf.scatter_nd(pt2_ind, old_style_scores, # [batch_size, num_classes, rois_per_image, KB.int_shape(old_style_scores)[-1]], # name = 'scores_scattered') # print(' old_style_scores :', old_style_scores.get_shape(), KB.int_shape(old_style_scores)) ##--------------------------------------------------------------------------------------------- ## generate score based on gaussian using bounding box masks ##--------------------------------------------------------------------------------------------- alt_scores_1 = tf.map_fn(build_hm_score_v3, [pt2_heatmaps, cy, cx, covar], dtype=tf.float32) logt('alt_scores_1 ', alt_scores_1) ##--------------------------------------------------------------------------------------------- ## Scatter back to per-class tensor / normalize by class ##--------------------------------------------------------------------------------------------- alt_scores_1_norm = tf.scatter_nd( pt2_ind, alt_scores_1, [ batch_size, num_classes, detections_per_image, KB.int_shape(alt_scores_1)[-1] ], name='alt_scores_1_norm') logt('alt_scores_1_scattered', alt_scores_1_norm) alt_scores_1_norm = normalize_scores(alt_scores_1_norm) logt('alt_scores_1_norm(by_class)', alt_scores_1_norm) alt_scores_1_norm = tf.gather_nd(alt_scores_1_norm, pt2_ind) logt('alt_scores_1_norm(by_image)', alt_scores_1_norm) ##--------------------------------------------------------------------------------------------- ## Normalize input heatmap normalization (per class) to calculate alt_score_2 ##-------------------------------------------------------------------------------------------- print( '\n Normalize heatmap within each class !-------------------------------------' ) in_heatmap_norm = tf.transpose(in_heatmap, [0, 3, 1, 2]) print(' in_heatmap_norm : ', in_heatmap_norm.get_shape(), 'Keras tensor ', KB.is_keras_tensor(in_heatmap_norm)) ## normalize in class normalizer = tf.reduce_max(in_heatmap_norm, axis=[-2, -1], keepdims=True) normalizer = tf.where(normalizer < 1.0e-15, tf.ones_like(normalizer), normalizer) in_heatmap_norm = in_heatmap_norm / normalizer # gauss_heatmap_sum_normalized = gauss_heatmap_sum / normalizer print(' normalizer shape : ', normalizer.shape) print(' normalized heatmap : ', in_heatmap_norm.shape, ' Keras tensor ', KB.is_keras_tensor(in_heatmap_norm)) ##--------------------------------------------------------------------------------------------- ## build indices and extract heatmaps corresponding to each bounding boxes' class id ## build alternative scores# based on normalized/sclaked clipped heatmap ##--------------------------------------------------------------------------------------------- hm_indices = tf.cast(pt2_ind[:, :2], dtype=tf.int32) logt('hm_indices shape', hm_indices) pt2_heatmaps = tf.gather_nd(in_heatmap_norm, hm_indices) logt('pt2_heatmaps', pt2_heatmaps) alt_scores_2 = tf.map_fn(build_hm_score_v3, [pt2_heatmaps, cy, cx, covar], dtype=tf.float32) logt('alt_scores_2', alt_scores_2) alt_scores_2_norm = tf.scatter_nd( pt2_ind, alt_scores_2, [ batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_2)[-1] ], name='alt_scores_2') logt('alt_scores_2(scattered)', alt_scores_2_norm) alt_scores_2_norm = normalize_scores(alt_scores_2_norm) logt('alt_scores_2_norm(by_class)', alt_scores_2_norm) alt_scores_2_norm = tf.gather_nd(alt_scores_2_norm, pt2_ind) logt('alt_scores_2_norm(by_image)', alt_scores_2_norm) #################################################################################################################### ##-------------------------------------------------------------------------------------------- ## Append alt_scores_1, alt_scores_1_norm to yield fcn_scores_dense ##-------------------------------------------------------------------------------------------- fcn_scores_dense = tf.concat([ pt2_dense[:, :norm_score_column + 1], old_style_scores, alt_scores_1, alt_scores_1_norm, alt_scores_2, alt_scores_2_norm ], axis=-1, name='fcn_scores_dense') logt('fcn_scores_dense ', fcn_scores_dense) ##--------------------------------------------------------------------------------------------- ## Scatter back to per-image tensor ##--------------------------------------------------------------------------------------------- seq_ids = tf.to_int32(rois_per_image - pt2_dense[:, sequence_column]) scatter_ind = tf.stack([hm_indices[:, 0], seq_ids], axis=-1, name='scatter_ind') fcn_scores_by_class = tf.scatter_nd( pt2_ind, fcn_scores_dense, [ batch_size, num_classes, detections_per_image, fcn_scores_dense.shape[-1] ], name='fcn_hm_scores') # fcn_scores_by_image = tf.scatter_nd(scatter_ind, fcn_scores_dense, # [batch_size, detections_per_image, fcn_scores_dense.shape[-1]], name='fcn_hm_scores_by_image') logt('seq_ids ', seq_ids) logt('sscatter_ids ', scatter_ind) logt('fcn_scores_by_class ', fcn_scores_by_class) # logt('fcn_scores_by_image ', fcn_scores_by_image) logt('complete') return fcn_scores_by_class
def fcn32_graph(feature_map, config, mode=None): '''Builds the computation graph of Region Proposal Network. feature_map: Contextual Tensor [batch, num_classes, width, depth] Returns: ''' print() print('---------------') print('>>> FCN32 Layer - mode:', mode) print('---------------') batch_size = config.BATCH_SIZE height, width = config.FCN_INPUT_SHAPE[0:2] num_classes = config.NUM_CLASSES rois_per_class = config.TRAIN_ROIS_PER_IMAGE weight_decay = config.WEIGHT_DECAY batch_momentum = config.BATCH_MOMENTUM verbose = config.VERBOSE feature_map_shape = (width, height, num_classes) print(' feature map :', feature_map.shape) print(' height :', height, 'width :', width, 'classes :', num_classes) print(' image_data_format: ', KB.image_data_format()) print(' rois_per_class : ', KB.image_data_format()) # feature_map = KL.Input(shape= feature_map_shape, name="input_fcn_feature_map") # TODO: Assert proper shape of input [batch_size, width, height, num_classes] # TODO: check if stride of 2 causes alignment issues if the featuremap is not even. # if batch_shape: # img_input = Input(batch_shape=batch_shape) # image_size = batch_shape[1:3] # else: # img_input = Input(shape=input_shape) # image_size = input_shape[0:2] ## , kernel_regularizer=l2(weight_decay) # Block 1 data_format='channels_last', x = KL.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(feature_map) print(' FCN Block 11 shape is : ', x.get_shape()) x = KL.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 12 shape is : ', x.get_shape()) x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) print(' FCN Block 13 shape is : ', x.get_shape()) x0 = x # Block 2 x = KL.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 21 shape is : ', x.get_shape()) x = KL.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 22 shape is : ', x.get_shape()) x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) print(' FCN Block 23 (Max pooling) shape is : ', x.get_shape()) x1 = x # Block 3 x = KL.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 31 shape is : ', x.get_shape()) x = KL.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 32 shape is : ', x.get_shape()) x = KL.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 33 shape is : ', x.get_shape()) x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) print(' FCN Block 34 (Max pooling) shape is : ', x.get_shape()) # Block 4 x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 41 shape is : ', x.get_shape()) x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 42 shape is : ', x.get_shape()) x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 43 shape is : ', x.get_shape()) x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) print(' FCN Block 44 (Max pooling) shape is : ', x.get_shape()) # Block 5 x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 51 shape is : ', x.get_shape()) x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 52 shape is : ', x.get_shape()) x = KL.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN Block 53 shape is : ', x.get_shape()) x = KL.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) print(' FCN Block 54 (Max pooling) shape is : ', x.get_shape()) ##------------------------------------------------------------------------------------------------------- ## FCN32 Specific Structure ##------------------------------------------------------------------------------------------------------- # Convolutional layers transfered from fully-connected layers # changed from 4096 to 2048 - reduction of weights from 42,752,644 to # changed ftom 2048 to 1024 - 11-05-2018 # FC_SIZE = 2048 FC_SIZE = 4096 x = KL.Conv2D(FC_SIZE, (7, 7), activation='relu', padding='same', name="fc1", kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print() print(' --- FCN32 ----------------------------') print(' FCN fully connected 1 (fcn_fc1) shape is : ', KB.int_shape(x)) x = KL.Dropout(0.5)(x) #fc2 x = KL.Conv2D(FC_SIZE, (1, 1), activation='relu', padding='same', name="fc2", kernel_initializer='glorot_uniform', bias_initializer='zeros')(x) print(' FCN fully connected 2 (fcn_fc2) shape is : ', x.get_shape()) x = KL.Dropout(0.5)(x) #classifying layer x = KL.Conv2D(num_classes, (1, 1), kernel_initializer='he_normal', bias_initializer='zeros', activation='linear', padding='valid', strides=(1, 1), name="fcn_classify")(x) print(' FCN final conv2d (fcn_classify) shape is : ', x.get_shape(), ' keras_tensor ', KB.is_keras_tensor(x)) fcn_classify_shape = KB.int_shape(x) h_factor = height / fcn_classify_shape[1] w_factor = height / fcn_classify_shape[2] print(' h_factor : ', h_factor, 'w_factor : ', w_factor) # x = BilinearUpSampling2D(size=(h_factor, w_factor), name='fcn_bilinear')(x) # print(' FCN Bilinear upsmapling layer shape is : ' , x.get_shape(), ' Keras tensor ', KB.is_keras_tensor(x) ) ##------------------------------------------------------------------------------------------------------- ## fcn_heatmap ##------------------------------------------------------------------------------------------------------- # 8x Upsampling (padding was originally "valid", I changed it to "same" ) fcn_hm = KL.Deconvolution2D(num_classes, kernel_size=(16, 16), strides=(32, 32), kernel_initializer='glorot_uniform', bias_initializer='zeros', padding='same', activation=None, name="fcn8_heatmap")(x) # fcn_hm = tf.identity(fcn_hm) fcn_hm.set_shape(feature_map.shape) logt('FCN fcn8_classify/heatmap (Deconv(fuse_Pool4)) ', fcn_hm, verbose=verbose) fcn_hm = KL.Lambda(lambda z: tf.identity(z, name='fcn_hm'), name='fcn_heatmap_lambda')(fcn_hm) logt('fcn_hm (final)', fcn_hm, verbose=verbose) print() ##------------------------------------------------------------------------------------------------------- ## fcn_SOFTMAX ##------------------------------------------------------------------------------------------------------- fcn_sm = KL.Activation("softmax", name="fcn8_softmax")(fcn_hm) logt('fcn8_softmax ', fcn_sm, verbose=verbose) fcn_sm = KL.Lambda(lambda z: tf.identity(z, name='fcn_sm'), name='fcn_softmax_lambda')(fcn_hm) logt('fcn_sm (final)', fcn_sm, verbose=verbose) print() #--------------------------------------------------------------------------------------------- # heatmap L2 normalization # Normalization using the `gauss_sum` (batchsize , num_classes, height, width) # 17-05-2018 (New method, replace dthe previous method that usedthe transposed gauss sum # 17-05-2018 Replaced with normalization across the CLASS axis # normalize along the CLASS axis #--------------------------------------------------------------------------------------------- # print('\n L2 normalization ------------------------------------------------------') # fcn_hm_L2norm = KL.Lambda(lambda z: tf.nn.l2_normalize(z, axis = 3, name = 'fcn_heatmap_L2norm'),\ # name = 'fcn_heatmap_L2norm')(x) # print('\n normalization ------------------------------------------------------') # fcn_hm_norm = KL.Lambda(normalize, name="fcn_heatmap_norm") (x) print(' fcn_heatmap : ', fcn_hm.shape, ' Keras tensor ', KB.is_keras_tensor(fcn_hm)) # print(' fcn_heatmap_norm : ', fcn_hm_norm.shape ,' Keras tensor ', KB.is_keras_tensor(fcn_hm_norm) ) # print(' fcn_heatmap_L2norm: ', fcn_hm_L2norm.shape ,' Keras tensor ', KB.is_keras_tensor(fcn_hm_L2norm) ) return fcn_hm, fcn_sm
def call(self, inputs): verbose = self.config.VERBOSE tgt_class_ids, tgt_bboxes = inputs logt(' > CHMLayerTgt Call() :', inputs, verbose=verbose) logt(' tgt_class_ids.shape :', tgt_class_ids, verbose=verbose) logt(' tgt_bboxes.shape :', tgt_bboxes, verbose=verbose) gt_tensor = build_gt_tensor(tgt_class_ids, tgt_bboxes, self.config) gt_hm, gt_hm_scores = build_gt_heatmap(gt_tensor, self.config, names=['gt_heatmap']) # gt_cls_cnt = KL.Lambda(lambda x: tf.count_nonzero(x[:,:,:,-1],axis = -1), name = 'gt_cls_count')(gt_tensor) logt(' ', verbose=verbose) logt('gt_heatmap ', gt_hm, verbose=verbose) logt('gt_heatmap_scores ', gt_hm_scores, verbose=verbose) logt('complete', verbose=verbose) return [gt_hm, gt_hm_scores]
def fcn_heatmap_BCE_loss_graph_2(target_heatmap, pred_heatmap, config): ''' Binary Cross Entropy Loss for the FCN heatmaps - calculate for ONE CLASS ONLY! Apply a per-pixel sigmoid and binary loss, similar to the Lmask loss calculation in MaskRCNN. Two approaches : 1- Only calaculate loss for classes which have active GT bounding boxes 2- Calculate for all classes We will implement approach 1. target_heatmaps: [batch, height, width, num_classes]. A float32 tensor of values 0 or 1. Uses zero padding to fill array. target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded. pred_masks: [batch, height, width, num_classes] float32 tensor with values from 0 to 1. # active_class_ids: [batch, num_classes]. Has a value of 1 for # classes that are in the dataset of the image, and 0 # for classes that are not in the dataset. ''' print() print('--------------------------------------------------------') print('>>> fcn_heatmap_BCE_loss_graph_2 -- On ONE CLASS ONLY! ') print('--------------------------------------------------------') logt(' target_class_ids :', target_heatmap) logt(' pred_class_logits :', pred_heatmap) error_cls = config.FCN_BCE_LOSS_CLASS logt(' fcn_bce_loss_class:', error_cls) # Transpose to Image, Class, Height, Width target_heatmap = tf.transpose(target_heatmap, [0, 3, 1, 2]) pred_heatmap = tf.transpose(pred_heatmap, [0, 3, 1, 2]) logt(' trgt_heatmap ', target_heatmap) logt(' trgt_heatmap ', pred_heatmap) # LOSS 3 : Loass on SUN class only loss2 = KB.binary_crossentropy( target=target_heatmap[:, error_cls:error_cls + 1], output=pred_heatmap[:, error_cls:error_cls + 1]) logt('loss2 ', loss2) loss2_mean = KB.mean(loss2) logt('loss2_mean ', loss2_mean) loss2_final = tf.reshape(loss2_mean, [1, 1], name='fcn_BCE_loss') logt('loss2_final', loss2_final) return loss2_final
def build_gt_heatmap(in_tensor, config, names=None): verbose = config.VERBOSE num_detections = config.DETECTION_MAX_INSTANCES img_h, img_w = config.IMAGE_SHAPE[:2] batch_size = config.BATCH_SIZE num_classes = config.NUM_CLASSES heatmap_scale = config.HEATMAP_SCALE_FACTOR grid_h, grid_w = config.IMAGE_SHAPE[:2] // heatmap_scale # rois per image is determined by size of input tensor # detection mode: config.TRAIN_ROIS_PER_IMAGE # ground_truth : config.DETECTION_MAX_INSTANCES # strt_cls = 0 if rois_per_image == 32 else 1 # rois_per_image = config.DETECTION_PER_CLASS rois_per_image = (in_tensor.shape)[2] if verbose: print('\n ') print(' > build_heatmap() for ', names) print(' in_tensor shape : ', in_tensor.shape) print(' num bboxes per class : ', rois_per_image) print(' heatmap scale : ', heatmap_scale, 'Dimensions: w:', grid_w, ' h:', grid_h) ##----------------------------------------------------------------------------- ## Stack non_zero bboxes from in_tensor into pt2_dense ##----------------------------------------------------------------------------- # pt2_ind shape is [?, 3]. # pt2_ind[0] corresponds to image_index # pt2_ind[1] corresponds to class_index # pt2_ind[2] corresponds to roi row_index # pt2_dense shape is [?, 7] # pt2_dense[0:3] roi coordinates # pt2_dense[4] is class id # pt2_dense[5] is score from mrcnn # pt2_dense[6] is bbox sequence id # pt2_dense[7] is normalized score (per class) #----------------------------------------------------------------------------- pt2_sum = tf.reduce_sum(tf.abs(in_tensor[:, :, :, :4]), axis=-1) pt2_ind = tf.where(pt2_sum > 0) pt2_dense = tf.gather_nd(in_tensor, pt2_ind) logt('pt2_sum ', pt2_sum, verbose=verbose) logt('pt2_ind ', pt2_ind, verbose=verbose) logt('pt2_dense ', pt2_dense, verbose=verbose) ##----------------------------------------------------------------------------- ## Build mesh-grid to hold pixel coordinates ##----------------------------------------------------------------------------- # X = tf.range(grid_w, dtype=tf.int32) # Y = tf.range(grid_h, dtype=tf.int32) # X, Y = tf.meshgrid(X, Y) # duplicate (repeat) X and Y into a batch_size x rois_per_image tensor # print(' X/Y shapes :', X.get_shape(), Y.get_shape()) # ones = tf.ones([tf.shape(pt2_dense)[0] , 1, 1], dtype = tf.int32) # rep_X = ones * X # rep_Y = ones * Y # print(' Ones: ', ones.shape) # print(' ones_exp * X', ones.shape, '*', X.shape, '= ',rep_X.shape) # print(' ones_exp * Y', ones.shape, '*', Y.shape, '= ',rep_Y.shape) # # stack the X and Y grids # pos_grid = tf.to_float(tf.stack([rep_X,rep_Y], axis = -1)) # print(' pos_grid before transpose : ', pos_grid.get_shape()) # pos_grid = tf.transpose(pos_grid,[1,2,0,3]) # print(' pos_grid after transpose : ', pos_grid.get_shape()) ##----------------------------------------------------------------------------- ## Build mean and convariance tensors for Multivariate Normal Distribution ##----------------------------------------------------------------------------- pt2_dense_scaled = pt2_dense[:, :4] / heatmap_scale width = pt2_dense_scaled[:, 3] - pt2_dense_scaled[:, 1] # x2 - x1 height = pt2_dense_scaled[:, 2] - pt2_dense_scaled[:, 0] cx = pt2_dense_scaled[:, 1] + (width / 2.0) cy = pt2_dense_scaled[:, 0] + (height / 2.0) means = tf.stack((cx, cy), axis=-1) covar = tf.stack((width * 0.5, height * 0.5), axis=-1) covar = tf.sqrt(covar) ##----------------------------------------------------------------------------- ## Compute Normal Distribution for bounding boxes ##----------------------------------------------------------------------------- prob_grid = tf.ones([tf.shape(pt2_dense)[0], grid_h, grid_w], dtype=tf.float32) logt('Prob_grid ', prob_grid, verbose=verbose) # tfd = tf.contrib.distributions # mvn = tfd.MultivariateNormalDiag(loc = means, scale_diag = covar) # prob_grid = mvn.prob(pos_grid) # print(' >> input to MVN.PROB: pos_grid (meshgrid) shape: ', pos_grid.shape) # print(' box_dims: ', box_dims.shape) # print(' Prob_grid shape from mvn.probe: ', prob_grid.shape) # prob_grid = tf.transpose(prob_grid,[2,0,1]) # print(' Prob_grid shape after tanspose: ', prob_grid.shape) # print(' << output probabilities shape : ', prob_grid.shape) #-------------------------------------------------------------------------------- # Kill distributions of NaN boxes (resulting from bboxes with height/width of zero # which cause singular sigma cov matrices #-------------------------------------------------------------------------------- # prob_grid = tf.where(tf.is_nan(prob_grid), tf.zeros_like(prob_grid), prob_grid) #--------------------------------------------------------------------------------------------- # (1) apply normalization per bbox heatmap instance #--------------------------------------------------------------------------------------------- # print('\n normalization ------------------------------------------------------') # normalizer = tf.reduce_max(prob_grid, axis=[-2,-1], keepdims = True) # normalizer = tf.where(normalizer < 1.0e-15, tf.ones_like(normalizer), normalizer) # print(' normalizer : ', normalizer.shape) # prob_grid_norm = prob_grid / normalizer #--------------------------------------------------------------------------------------------- # (2) multiply normalized heatmap by normalized score in i n_tensor/ (pt2_dense column 7) # broadcasting : https://stackoverflow.com/questions/49705831/automatic-broadcasting-in-tensorflow #--------------------------------------------------------------------------------------------- # prob_grid_norm_scaled = tf.transpose(tf.transpose(prob_grid_norm) * pt2_dense[:,7]) # print(' prob_grid_norm_scaled : ', prob_grid_norm_scaled.shape) ##--------------------------------------------------------------------------------------------- ## (NEW STEP) Clip heatmap to region surrounding Cy,Cx and Covar X, Y ##--------------------------------------------------------------------------------------------- prob_grid_clipped = tf.map_fn(clip_heatmap, [prob_grid, cy, cx, covar], dtype=tf.float32, swap_memory=True) logt('prob_grid_clipped ', prob_grid_clipped, verbose=verbose) ##-------------------------------------------------------------------------------------------- ## (0) Generate scores using prob_grid and pt2_dense - (NEW METHOD added 09-21-2018) ## pt2_dense[:,7] is the per-class-normalized score from in_tensor ## ## 11-27-2018: (note - here, build_hm_score_v2 is being applied to prob_grid_clipped, ## unlilke chm_layer) - Changed to prob_grid to make it consistent with chm_layer.py ## ## When using prob_grid: ## [ 1.0000 1.0000 138.0000 1.0000 4615.0000 4531.1250 4615.0000 ## [ 3.0000 1.0000 179.0000 1.0000 570.0000 547.5000 570.0000 ## ## When using prob_grid_clipped: ## [ 1.0000 1.0000 138.0000 1.0000 144.0000 4531.1250 144.0000 ## [ 3.0000 1.0000 179.0000 1.0000 56.0000 547.5000 56.0000 ##-------------------------------------------------------------------------------------------- old_style_scores = tf.map_fn( build_hm_score_v2, [prob_grid, pt2_dense_scaled, pt2_dense[:, 7]], dtype=tf.float32, swap_memory=True) old_style_scores = tf.scatter_nd( pt2_ind, old_style_scores, [batch_size, num_classes, rois_per_image, 3], name='scores_scattered') logt('old_style_scores ', old_style_scores, verbose=verbose) ##--------------------------------------------------------------------------------------------- ## - Build alternative scores based on normalized/scaled/clipped heatmap ##--------------------------------------------------------------------------------------------- alt_scores_1 = tf.map_fn(build_hm_score_v3, [prob_grid_clipped, cy, cx, covar], dtype=tf.float32) logt('alt_scores_1 ', alt_scores_1, verbose=verbose) alt_scores_1 = tf.scatter_nd(pt2_ind, alt_scores_1, [ batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_1)[-1] ], name='alt_scores_1') alt_scores_1_norm = normalize_scores(alt_scores_1) logt('alt_scores_1(by class) ', alt_scores_1, verbose=verbose) logt('alt_scores_1_norm(by_class) ', alt_scores_1_norm, verbose=verbose) ##------------------------------------------------------------------------------------- ## (3) scatter out the probability distribution heatmaps based on class ##------------------------------------------------------------------------------------- gauss_heatmap = tf.scatter_nd( pt2_ind, prob_grid_clipped, [batch_size, num_classes, rois_per_image, grid_w, grid_h], name='gauss_heatmap') logt( '\n Scatter out the probability distributions based on class --------------' ) logt('pt2_ind ', pt2_ind, verbose=verbose) logt('prob_grid ', prob_grid, verbose=verbose) logt('gauss_heatmap ', gauss_heatmap, verbose=verbose) # batch_sz , num_classes, num_rois, image_h, image_w ##------------------------------------------------------------------------------------- ## (4) MAX : Reduce_MAX up gauss_heatmaps by class ## Since all values are set to '1' in the 'heatmap', there is no need to ## sum or normalize. We Reduce_max on the class axis, and as a result the ## correspoding areas in the heatmap are set to '1' ##------------------------------------------------------------------------------------- gauss_heatmap = tf.reduce_max(gauss_heatmap, axis=2, name='gauss_heatmap') logt( '\n Reduce MAX based on class -------------------------------------', verbose=verbose) logt(' gaussian_heatmap : ', gauss_heatmap, verbose=verbose) #--------------------------------------------------------------------------------------------- # (5) heatmap normalization # normalizer is set to one when the max of class is zero # this prevents elements of gauss_heatmap_norm computing to nan #--------------------------------------------------------------------------------------------- # print('\n normalization ------------------------------------------------------') # normalizer = tf.reduce_max(gauss_heatmap, axis=[-2,-1], keepdims = True) # normalizer = tf.where(normalizer < 1.0e-15, tf.ones_like(normalizer), normalizer) # gauss_heatmap_norm = gauss_heatmap / normalizer # print(' normalizer shape : ', normalizer.shape) # print(' gauss norm : ', gauss_heatmap_norm.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_norm) ) ##--------------------------------------------------------------------------------------------- ## build indices and extract heatmaps corresponding to each bounding boxes' class id ## build alternative scores# based on normalized/sclaked clipped heatmap ##--------------------------------------------------------------------------------------------- hm_indices = tf.cast(pt2_ind[:, :2], dtype=tf.int32) pt2_heatmaps = tf.gather_nd(gauss_heatmap, hm_indices) logt('hm_indices ', hm_indices, verbose=verbose) logt('pt2_heatmaps ', pt2_heatmaps, verbose=verbose) alt_scores_2 = tf.map_fn(build_hm_score_v3, [pt2_heatmaps, cy, cx, covar], dtype=tf.float32) logt('alt_scores_2 ', alt_scores_2, verbose=verbose) alt_scores_2 = tf.scatter_nd(pt2_ind, alt_scores_2, [ batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_2)[-1] ], name='alt_scores_2') alt_scores_2_norm = normalize_scores(alt_scores_2) logt('alt_scores_2(by class) : ', alt_scores_2, verbose=verbose) logt('alt_scores_2_norm(by_class) : ', alt_scores_2_norm, verbose=verbose) ##-------------------------------------------------------------------------------------------- ## Transpose tensor to [BatchSz, Height, Width, Num_Classes] ##-------------------------------------------------------------------------------------------- gauss_heatmap = tf.transpose(gauss_heatmap, [0, 2, 3, 1], name=names[0]) # gauss_heatmap_norm = tf.transpose(gauss_heatmap_norm,[0,2,3,1], name = names[0]+'_norm') # print(' gauss_heatmap_norm : ', gauss_heatmap_norm.shape,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_norm) ) # print(' complete') ##-------------------------------------------------------------------------------------------- ## APPEND ALL SCORES TO input score tensor TO YIELD output scores tensor ##-------------------------------------------------------------------------------------------- gauss_scores = tf.concat([ in_tensor, old_style_scores, alt_scores_1, alt_scores_1_norm, alt_scores_2, alt_scores_2_norm ], axis=-1, name=names[0] + '_scores') # alt_scores_2[...,:3], alt_scores_3], logt('gauss_heatmap ', gauss_heatmap, verbose=verbose) logt('gauss_scores', gauss_scores, verbose=verbose) logt('complete ', verbose=verbose) return gauss_heatmap, gauss_scores
def call(self, inputs): fcn_heatmap, pr_hm_scores = inputs logt('> FCNScoreLayer Call() ', len(inputs), verbose=verbose) logt(' fcn_heatmap.shape ', fcn_heatmap, verbose=verbose) logt(' pr_hm_scores.shape ', pr_hm_scores, verbose=verbose) fcn_scores = fcn_scoring_graph([fcn_heatmap, pr_hm_scores], self.config) logt('\n Output build_fcn_score ', verbose=verbose) logt(' fcn_scores ', fcn_scores, verbose=verbose) logt(' complete', verbose=verbose) return [fcn_scores]
def fpn_classifier_graph(rois, feature_maps, image_shape, pool_size, num_classes, verbose=0): ''' Builds the computation graph of the feature pyramid network classifier and regressor heads. Inputs: ------- rois: [batch, num_rois, 4 ] Proposal boxes in normalized coordinates (y1, x1, y2, x2) feature_maps: List of feature maps from diffent layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_shape: [height, width, depth] pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results Returns: -------- logits: [N, NUM_CLASSES] classifier logits (before softmax) probs: [N, NUM_CLASSES] classifier probabilities bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes ''' print('\n>>> FPN Classifier Graph verbose:', verbose) if verbose: logt(' INPUT: rois shape ', rois) logt(' INPUT: mrcnn feature_maps ', len(feature_maps)) logt(' - feature_map P2 ', feature_maps[0]) logt(' - feature_map P3 ', feature_maps[1]) logt(' - feature_map P4 ', feature_maps[2]) logt(' - feature_map P5 ', feature_maps[3]) logt(' INPUT: image_shape', image_shape) logt(' INPUT: pool_size ', pool_size) logt(' INPUT: num_classes', num_classes) # ROI Pooling # Shape: [batch, num_boxes, pool_height, pool_width, channels] x = PyramidROIAlign([pool_size, pool_size], image_shape, name="roi_align_classifier")([rois] + feature_maps) logt('roi_align_classifier ', x, verbose=verbose) # Two 1024 FC layers (implemented with Conv2D for consistency) #------------------------------------------------------------------------------------------- # TimeDistributed : # # Applies the Conv2D layer to each slice of the batch input. The input should be at least 3D, # and the dimension of index one will be considered to be the temporal dimension. # # Example: # Consider a batch of 32 samples, where each sample is a sequence of 10 vectors of 16 dimensions. # The batch input shape of the layer is then (32, 10, 16). The input_shape, not including the # samples dimension, is (10, 16). # You can then use TimeDistributed to apply a Dense layer to each of the 10 timesteps, independently: # ## as the first layer in a model # model = Sequential() # model.add(TimeDistributed(Dense(8), input_shape=(10, 16))) # ## now model.output_shape == (None, 10, 8) # # In subsequent layers, there is no need for the input_shape: # # model.add(TimeDistributed(Dense(32))) # # now model.output_shape == (None, 10, 32) # # The output will then have shape (32, 10, 32). #------------------------------------------------------------------------------------------- x = KL.TimeDistributed(KL.Conv2D(1024, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1")(x) logt('mrcnn_class_conv1', x, verbose=verbose) x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn1')(x) logt('mrcnn_class_bn1 ', x, verbose=verbose) x = KL.Activation('relu')(x) logt('mrcnn_class_relu1', x, verbose=verbose) logt(verbose=verbose) # x = KL.Dropout(0.5)(x) x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)), name="mrcnn_class_conv2")(x) logt('mrcnn_class_conv2 ', x, verbose=verbose) x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn2')(x) logt('mrcnn_class_bn2 ', x, verbose=verbose) x = KL.Activation('relu')(x) logt('mrcnn_class_relu2 ', x, verbose=verbose) logt(verbose=verbose) shared = KL.Lambda(lambda x: KB.squeeze(KB.squeeze(x, 3), 2), name="pool_squeeze")(x) logt('pool_squeeze(Shared)', shared, verbose=verbose) ## Classifier head # x = KL.TimeDistributed(KL.Dense(num_classes, name = 'mrcnn_class_logits'))(shared) mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes), name='mrcnn_class_logits')(shared) logt('mrcnn_class_logits ', mrcnn_class_logits, verbose=verbose) mrcnn_class_logits = KL.Lambda( lambda x: KB.identity(x, name='mrcnn_class_logits'), name='mrcnn_logits_lambda')(mrcnn_class_logits) logt('mrcnn_class_logits (final)', mrcnn_class_logits, verbose=verbose) # x = KL.TimeDistributed(KL.Activation("softmax"))(mrcnn_class_logits) mrcnn_probs = KL.TimeDistributed( KL.Activation("softmax"), name='mrcnn_class_act')(mrcnn_class_logits) logt('mrcnn_probs ', mrcnn_probs, verbose=verbose) mrcnn_probs = KL.Lambda(lambda x: KB.identity(x, name='mrcnn_class'), name='mrcnn_class_lambda')(mrcnn_probs) logt('mrcnn_probs (final) ', mrcnn_probs, verbose=verbose) ## BBox head # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))] x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'), name='mrcnn_bbox_fc')(shared) logt('mrcnn_bbox_fc ', x, verbose=verbose) # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))] s = KB.int_shape(x) mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name='mrcnn_bbox_rs')(x) logt('mrcnn_bbox_fc reshaped output', mrcnn_bbox, verbose=verbose) # mrcnn_bbox = KB.identity(mrcnn_bbox, name = "mrcnn_bbox") mrcnn_bbox = KL.Lambda(lambda x: KB.identity(x, name='mrcnn_bbox'), name='mrcnn_bbox_lambda')(mrcnn_bbox) logt('mrcnn_bbox (final)', mrcnn_bbox, verbose=verbose) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
def fpn_graph(Resnet_Layers, verbose=0): """ #---------------------------------------------------------------------------- # Build the Feature Pyramid Network (FPN) layers. # Top-down Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. #---------------------------------------------------------------------------- # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config """ print('\n>>> Feature Pyramid Network (FPN) Graph ') _, C2, C3, C4, C5 = Resnet_Layers logt('Input FPN C5 ', C5, verbose=verbose) logt('Input FPN C4 ', C4, verbose=verbose) logt('Input FPN C3 ', C3, verbose=verbose) logt('Input FPN C2 ', C2, verbose=verbose) P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5) logt('FPN P5 ', P5, verbose=verbose) x = KL.UpSampling2D(size=(2, 2))(P5) y = KL.Conv2D(256, (1, 1))(C4) logt(' Upsampled P5 (x)', x, verbose=verbose) logt(' Conv2D C4 (y)', y, verbose=verbose) P4 = KL.Add(name="fpn_p4add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4) ]) logt('FPN P4 (x+y)', P4, verbose=verbose) x = KL.UpSampling2D(size=(2, 2))(P4) y = KL.Conv2D(256, (1, 1))(C3) logt(' Upsampled P4 (x)', x, verbose=verbose) logt(' Conv2D C3 (y)', y, verbose=verbose) P3 = KL.Add(name="fpn_p3add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3) ]) logt('FPN P3 (x+y)', P3, verbose=verbose) x = KL.UpSampling2D(size=(2, 2))(P3) y = KL.Conv2D(256, (1, 1))(C2) logt(' Upsampled P3 (x)', x, verbose=verbose) logt(' Conv2D C2 (y)', y, verbose=verbose) P2 = KL.Add(name="fpn_p2add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2) ]) logt('FPN P2 (x+y)', P2, verbose=verbose) # Attach 3x3 conv to all P layers to get the final feature maps. P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2) P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3) P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4) P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5) # P6 is used for the 5th anchor scale in RPN. Generated by # subsampling from P5 with stride of 2. P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) if verbose: print() print(' FPN Final output') logt(' FPN P6 (Maxpool2D of P5 w/ stride 2)', P6) logt(' FPN P5 (Conv2D (3,3) of P5)', P5) logt(' FPN P4 (Conv2D (3,3) of P4)', P4) logt(' FPN P3 (Conv2D (3,3) of P3)', P3) logt(' FPN P2 (Conv2D (3,3) of P2)', P2) return [P2, P3, P4, P5, P6]
def resnet_graph(input_image, architecture, stage5=False, verbose = 0): assert architecture in ["resnet50", "resnet101"] print() print('----------------------------') print('>>> Resnet Graph ') print('----------------------------') print(' Input_image shape :', input_image.shape) # Stage 1 : Convolutional Layer 1 # zero pad image 3 x 3 # apply 2D convolution of 64 filters with kernal size of 7 x 7 stride 2 x 2 # apply batch normalization to output # apply Relu activation # apply max pooling (3,3) stride (2,2) x = KL.ZeroPadding2D((3, 3))(input_image) logt('After ZeroPadding2D ', x, verbose = verbose) x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x) logt('After Conv2D padding :', x, verbose = verbose) x = BatchNorm(axis=3, name='bn_conv1')(x) logt('After BatchNorm', x, verbose = verbose) x = KL.Activation('relu')(x) C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) logt('C1 ', C1, verbose = verbose) # Stage 2 # conv block , kernel size: 3, filters: [64, 64, 256] x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') logt('C2 ', C2, verbose = verbose) # Stage 3 x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') C3 = x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') logt('C3 ', C3, verbose = verbose) # Stage 4 x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') block_count = {"resnet50": 5, "resnet101": 22}[architecture] for i in range(block_count): x = identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i)) C4 = x logt('C4 ', C4, verbose = verbose) # Stage 5 if stage5: x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') C5 = x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') else: C5 = None logt('C5 ', C5, verbose = verbose) return [C1, C2, C3, C4, C5]
def fcn_heatmap_CE_loss_graph_2(target_heatmap, pred_heatmap, active_class_ids): ''' Categorical Cross Entropy Loss for the FCN heatmaps. target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero padding to fill in the array. pred_class_logits: [batch, num_rois, num_classes] active_class_ids: [batch, num_classes]. Has a value of 1 for classes that are in the dataset of the image, and 0 for classes that are not in the dataset. ''' print() print('--------------------------------' ) print('>>> fcn_heatmap_CE_loss_graph_2 ' ) print('--------------------------------' ) logt('target_class_ids ', target_heatmap) logt('pred_class_logits ', pred_heatmap ) logt('active_class_ids ', active_class_ids) # target_class_ids = tf.cast(target_class_ids, 'int64') # Find predictions of classes that are not in the dataset. pred_class_ids = KB.argmax(pred_heatmap , axis=-1) gt_class_ids = KB.argmax(target_heatmap, axis=-1) logt('pred_class_ids ', pred_class_ids) logt('gt_class_ids ', gt_class_ids ) # TODO: Update this line to work with batch > 1. Right now it assumes all # images in a batch have the same active_class_ids pred_active = tf.gather(active_class_ids[0], pred_class_ids) # Loss loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=target_heatmap, logits=pred_heatmap) logt('pred_active ', pred_active) logt('loss ', loss) # Erase losses of predictions of classes that are not in the active # classes of the image. # loss = loss * pred_active # print('loss*pred_active ', loss) # Compute loss mean. Use only predictions that contribute to the loss to get a correct mean. loss = tf.reduce_sum(loss) ##/ tf.reduce_sum(pred_active) loss_mean = KB.mean(loss) loss_final = tf.reshape(loss_mean, [1, 1], name = "fcn_CE_loss") logt('loss ', loss) logt('loss mean ', loss_mean) logt('loss final', loss_final) return loss_final
def build_heatmap_inference(in_tensor, config, names = None): ''' input: ------- pred_tensor: [ Bsz, Num_Classes, 200, 9 : {y1,x1,y2,x2, class, score, det_type, sequence_id, normalized_score}] output: ------- pr_heatmap (None, Heatmap-height, Heatmap_width, num_classes) pr_scores (None, num_classes, 200, 24) [batchSz, Detection_Max_instance, (y1,x1,y2,x2, class, score, det_type, sequence_id, normalized_score, scores-0: gaussian_sum, bbox_area, weighted_norm_sum scores-1: score, mask_sum, score/mask_sum, (score, mask_sum, score/mask_sum) normalized by class scores-2: score, mask_sum, score/mask_sum, (score, mask_sum, score/mask_sum) normalized by class ] ''' verbose = config.VERBOSE num_detections = config.DETECTION_MAX_INSTANCES img_h, img_w = config.IMAGE_SHAPE[:2] batch_size = config.BATCH_SIZE num_classes = config.NUM_CLASSES heatmap_scale = config.HEATMAP_SCALE_FACTOR grid_h, grid_w = config.IMAGE_SHAPE[:2] // heatmap_scale # rois_per_image = config.DETECTION_PER_CLASS rois_per_image = (in_tensor.shape)[2] CLASS_COLUMN = 4 SCORE_COLUMN = 5 DT_TYPE_COLUMN = 6 SEQUENCE_COLUMN = 7 NORM_SCORE_COLUMN = 8 if verbose: print('\n ') print(' > build_inference_heatmap() for ', names ) print(' in_tensor shape : ', in_tensor.shape) print(' num bboxes per class : ', rois_per_image ) print(' heatmap scale : ', heatmap_scale, 'Dimensions: w:', grid_w,' h:', grid_h) ##----------------------------------------------------------------------------- ## Stack non_zero bboxes from in_tensor into pt2_dense ##----------------------------------------------------------------------------- # pt2_ind shape is [?, 3]. pt2_dense shape is [?, 7] # pt2_ind[0] corresponds to image_index pt2_dense[0:3] roi coordinates # pt2_ind[1] corresponds to class_index pt2_dense[4] class id # pt2_ind[2] corresponds to roi row_index pt2_dense[5] score from mrcnn # pt2_dense[6] bbox sequence id # pt2_dense[7] per-class normalized score #----------------------------------------------------------------------------- pt2_sum = tf.reduce_sum(tf.abs(in_tensor[:,:,:,:4]), axis=-1) pt2_ind = tf.where(pt2_sum > 0) pt2_dense = tf.gather_nd( in_tensor, pt2_ind) logt('pt2_sum ', pt2_sum, verbose = verbose) logt('pt2_ind ', pt2_ind, verbose = verbose) logt('pt2_dense ', pt2_dense, verbose = verbose) ##----------------------------------------------------------------------------- ## Build mesh-grid to hold pixel coordinates ##----------------------------------------------------------------------------- X = tf.range(grid_w, dtype=tf.int32) Y = tf.range(grid_h, dtype=tf.int32) X, Y = tf.meshgrid(X, Y) # duplicate (repeat) X and Y into a batch_size x rois_per_image tensor ones = tf.ones([tf.shape(pt2_dense)[0] , 1, 1], dtype = tf.int32) rep_X = ones * X rep_Y = ones * Y if verbose: print(' X/Y shapes :', X.get_shape(), Y.get_shape()) print(' Ones: ', ones.shape) print(' ones_exp * X', ones.shape, '*', X.shape, '= ',rep_X.shape) print(' ones_exp * Y', ones.shape, '*', Y.shape, '= ',rep_Y.shape) # # stack the X and Y grids pos_grid = tf.to_float(tf.stack([rep_X,rep_Y], axis = -1)) logt('pos_grid before transpse ', pos_grid, verbose = verbose) pos_grid = tf.transpose(pos_grid,[1,2,0,3]) logt('pos_grid after transpose ', pos_grid, verbose = verbose) ##----------------------------------------------------------------------------- ## Build mean and convariance tensors for Multivariate Normal Distribution ##----------------------------------------------------------------------------- bboxes_scaled = pt2_dense[:,:4]/heatmap_scale width = bboxes_scaled[:,3] - bboxes_scaled[:,1] # x2 - x1 height = bboxes_scaled[:,2] - bboxes_scaled[:,0] cx = bboxes_scaled[:,1] + ( width / 2.0) cy = bboxes_scaled[:,0] + ( height / 2.0) means = tf.stack((cx,cy),axis = -1) covar = tf.stack((width * 0.5 , height * 0.5), axis = -1) covar = tf.sqrt(covar) ## Added 2019-05-12 to prevent NaN when bounding box is extremely small ## resulting in width or height being equal to zero covar = tf.where(covar < 1.0e-15, tf.ones_like(covar), covar) ##----------------------------------------------------------------------------- ## Compute Normal Distribution for bounding boxes ##----------------------------------------------------------------------------- tfd = tf.contrib.distributions mvn = tfd.MultivariateNormalDiag(loc = means, scale_diag = covar) prob_grid = mvn.prob(pos_grid) logt('Input to MVN.PROB: pos_grid (meshgrid) ', pos_grid, verbose = verbose) logt('Prob_grid shape from mvn.probe ',prob_grid, verbose = verbose) prob_grid = tf.transpose(prob_grid,[2,0,1]) logt('Prob_grid shape after tanspose ', prob_grid, verbose = verbose) logt('Output probabilities shape ' , prob_grid, verbose = verbose) ##-------------------------------------------------------------------------------------------- ## (0) Generate scores using prob_grid and pt2_dense - (NEW METHOD added 09-21-2018) ##-------------------------------------------------------------------------------------------- old_style_scores = tf.map_fn(build_hm_score_v2, [prob_grid, bboxes_scaled, pt2_dense[ :, NORM_SCORE_COLUMN ] ], dtype = tf.float32, swap_memory = True) old_style_scores = tf.scatter_nd(pt2_ind, old_style_scores, [batch_size, num_classes, rois_per_image, KB.int_shape(old_style_scores)[-1]], name = 'scores_scattered') logt('old_style_scores :', old_style_scores, verbose = verbose) ##---------------------------------------------------------------------------------------------------- ## Generate scores using same method as FCN, over the prob_grid ## using (prob_grid_clipped) as input is superfluous == RETURNS EXACT SAME Results AS prob_grid above ##---------------------------------------------------------------------------------------------------- # alt_scores_0 = tf.map_fn(build_hm_score_v3, [prob_grid, cy, cx,covar], dtype=tf.float32) # print(' alt_scores_0 : ', KB.int_shape(alt_scores_0), ' Keras tensor ', KB.is_keras_tensor(alt_scores_0) ) # alt_scores_0 = tf.scatter_nd(pt2_ind, alt_scores_0, # [batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_0)[-1]], name = 'alt_scores_0') ##--------------------------------------------------------------------------------------------- ## (NEW STEP - Clipped heatmaps) ## (1) Clip heatmap to region surrounding Cy,Cx and Covar X, Y ## Similar ro what is being done for gt_heatmap in CHMLayerTarget ##--------------------------------------------------------------------------------------------- prob_grid_clipped = tf.map_fn(clip_heatmap, [prob_grid, cy,cx, covar], dtype = tf.float32, swap_memory = True) logt(' prob_grid_clipped : ', prob_grid_clipped, verbose = verbose) ##--------------------------------------------------------------------------------------------- ## (2) apply normalization per bbox heatmap instance --> move to [0,1] range ##--------------------------------------------------------------------------------------------- logt('\n normalization ------------------------------------------------------', verbose = verbose) normalizer = tf.reduce_max(prob_grid_clipped, axis=[-2,-1], keepdims = True) normalizer = tf.where(normalizer < 1.0e-15, tf.ones_like(normalizer), normalizer) logt(' normalizer : ', normalizer, verbose = verbose) prob_grid_cns = prob_grid_clipped / normalizer logt(' prob_grid_cns: clipped/normed/scaled : ', prob_grid_cns, verbose = verbose) ## replace above lines with lines below ## x_max = tf.reduce_max(prob_grid_clipped, axis=[-2,-1], keepdims = True) ## x_min = tf.reduce_min(prob_grid_clipped, axis=[-2,-1], keepdims = True) ##logt(' Reduce Max Shape: ', x_max, verbose = verbose) ##logt(' Reduce Min Shape: ', x_min, verbose = verbose) ## prob_grid_cns = (prob_grid_clipped - x_min) / (x_max - x_min) ## logt(' prob_grid_cns: clipped/normed/scaled : ', prob_grid_cns, verbose = verbose) ##--------------------------------------------------------------------------------------------- ## (3) multiply normalized heatmap by normalized score in in_tensor/ (pt2_dense NORM_SCORE_COLUMN) ## broadcasting : https://stackoverflow.com/questions/49705831/automatic-broadcasting-in-tensorflow ##--------------------------------------------------------------------------------------------- prob_grid_cns = tf.transpose(tf.transpose(prob_grid_cns) * pt2_dense[ :, NORM_SCORE_COLUMN ]) logt(' prob_grid_cns: clipped/normed/scaled : ', prob_grid_cns, verbose = verbose) ##--------------------------------------------------------------------------------------------- ## - Build alternative scores based on normalized/scaled/clipped heatmap ##--------------------------------------------------------------------------------------------- alt_scores_1 = tf.map_fn(build_hm_score_v3, [prob_grid_cns, cy, cx,covar], dtype=tf.float32) logt('alt_scores_1 ', alt_scores_1, verbose = verbose) alt_scores_1 = tf.scatter_nd(pt2_ind, alt_scores_1, [batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_1)[-1]], name = 'alt_scores_1') logt('alt_scores_1(by class) ', alt_scores_1, verbose = verbose) alt_scores_1_norm = normalize_scores(alt_scores_1) logt('alt_scores_1_norm(by_class) ', alt_scores_1_norm, verbose = verbose) # alt_scores_1_norm = tf.gather_nd(alt_scores_1_norm, pt2_ind) # print(' alt_scores_1_norm(by_image) : ', alt_scores_1_norm.shape, KB.int_shape(alt_scores_1_norm)) ##------------------------------------------------------------------------------------- ## (3) scatter out the probability distributions based on class ##------------------------------------------------------------------------------------- gauss_heatmap = tf.scatter_nd(pt2_ind, prob_grid_cns, [batch_size, num_classes, rois_per_image, grid_w, grid_h], name = 'gauss_scatter') logt('\n Scatter out the probability distributions based on class --------------', verbose = verbose) logt('pt2_ind shape ', pt2_ind , verbose = verbose) logt('prob_grid_clippped ', prob_grid_cns, verbose = verbose) logt('gauss_heatmap ', gauss_heatmap, verbose = verbose) # batch_sz , num_classes, num_rois, image_h, image_w ##------------------------------------------------------------------------------------- ## Construction of Gaussian Heatmap output using Reduce SUM ## ## (4) SUM : Reduce and sum up gauss_heatmaps by class ## (5) heatmap normalization (per class) ## (6) Transpose heatmap to shape required for FCN ##------------------------------------------------------------------------------------- gauss_heatmap_sum = tf.reduce_sum(gauss_heatmap, axis=2, name='gauss_heatmap_sum') logt('\n Reduce SUM based on class and normalize within each class -----------------------', verbose = verbose) logt('gaussian_heatmap_sum ', gauss_heatmap_sum , verbose = verbose) ## normalize in class normalizer = tf.reduce_max(gauss_heatmap_sum, axis=[-2,-1], keepdims = True) normalizer = tf.where(normalizer < 1.0e-15, tf.ones_like(normalizer), normalizer) gauss_heatmap_sum = gauss_heatmap_sum / normalizer logt('normalizer shape : ', normalizer, verbose = verbose) logt('normalized heatmap : ', gauss_heatmap_sum, verbose = verbose) ## replaced above with following two lines::: 5-30-19 ## gauss_heatmap_sum = tf.transpose(gauss_heatmap_sum, [0,2,3,1]) ## gauss_heatmap_sum = normalize_heatmaps(gauss_heatmap_sum) ## logt('normalized heatmap : ', gauss_heatmap_sum, verbose = verbose) ##--------------------------------------------------------------------------------------------- ## Score on reduced sum heatmaps. ## ## build indices and extract heatmaps corresponding to each bounding boxes' class id ## build alternative scores# based on normalized/sclaked clipped heatmap ##--------------------------------------------------------------------------------------------- hm_indices = tf.cast(pt2_ind[:, :2],dtype=tf.int32) logt('hm_indices ', hm_indices, verbose = verbose) pt2_heatmaps = tf.gather_nd(gauss_heatmap_sum, hm_indices ) ## added5-30-2019 to replace above line ## pt2_heatmaps = tf.transpose(gauss_heatmap_sum, [0,3,1,2]) ## pt2_heatmaps = tf.gather_nd(pt2_heatmaps, hm_indices ) logt('pt2_heatmaps ', pt2_heatmaps, verbose = verbose) alt_scores_2 = tf.map_fn(build_hm_score_v3, [pt2_heatmaps, cy, cx,covar], dtype=tf.float32) logt(' alt_scores_2 : ', alt_scores_2, verbose = verbose) alt_scores_2 = tf.scatter_nd(pt2_ind, alt_scores_2, [batch_size, num_classes, rois_per_image, KB.int_shape(alt_scores_2)[-1]], name = 'alt_scores_2') logt('alt_scores_2(scattered) ', alt_scores_2, verbose = verbose) alt_scores_2_norm = normalize_scores(alt_scores_2) logt('alt_scores_2_norm(by_class) ', alt_scores_2_norm, verbose = verbose) ##--------------------------------------------------------------------------------------------- ## (6) Transpose heatmaps to shape required for FCN [batchsize , width, height, num_classes] ##--------------------------------------------------------------------------------------------- gauss_heatmap_sum = tf.transpose(gauss_heatmap_sum ,[0,2,3,1], name = names[0]) logt(' gauss_heatmap_sum (final) ', gauss_heatmap_sum, verbose = verbose) # gauss_heatmap_sum_normalized = tf.transpose(gauss_heatmap_sum_normalized,[0,2,3,1], name = names[0]+'_norm') # print(' reshaped heatmap normalized : ', gauss_heatmap_sum_normalized.shape,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_sum_normalized) ) # gauss_heatmap_max = tf.transpose(gauss_heatmap_max ,[0,2,3,1], name = names[0]+'_max') # print(' reshaped heatmap_max : ', gauss_heatmap_max.shape,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_max) ) # gauss_heatmap_max_normalized = tf.transpose(gauss_heatmap_max_normalized,[0,2,3,1], name = names[0]+'_max_norm') # print(' reshaped heatmap_max normalized: ', gauss_heatmap_max_normalized.shape,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap_max_normalized) ) ##-------------------------------------------------------------------------------------------- ## APPEND ALL SCORES TO input score tensor TO YIELD output scores tensor ##-------------------------------------------------------------------------------------------- gauss_scores = tf.concat([in_tensor, old_style_scores, alt_scores_1, alt_scores_1_norm, alt_scores_2, alt_scores_2_norm], axis = -1,name = names[0]+'_scores') logt(' gauss_scores : ', gauss_scores, verbose = verbose) logt(' complete', verbose = verbose) return gauss_heatmap_sum, gauss_scores
def build_gt_tensor(gt_class_ids, norm_gt_bboxes, config): verbose = config.VERBOSE batch_size = config.BATCH_SIZE num_classes = config.NUM_CLASSES h, w = config.IMAGE_SHAPE[:2] det_per_class = config.DETECTION_PER_CLASS num_bboxes = KB.int_shape(norm_gt_bboxes)[1] scale = tf.constant([h, w, h, w], dtype=tf.float32) # dup_scale = tf.reshape(tf.tile(scale, [num_rois]),[num_rois,-1]) dup_scale = scale * tf.ones([batch_size, num_bboxes, 1], dtype='float32') gt_bboxes = tf.multiply(norm_gt_bboxes, dup_scale) # num of bounding boxes is determined by bbox_list.shape[1] instead of config.DETECTION_MAX_INSTANCES # use of this routine for both input_gt_boxes, and target_gt_deltas if num_bboxes == config.DETECTION_MAX_INSTANCES: tensor_name = "gt_tensor_max" else: tensor_name = "gt_tensor" if verbose: print('\n') print(' > BUILD_GROUND TRUTH_TF()') print(' num_bboxes : ', num_bboxes, '(building ', tensor_name, ')') print(' gt_class_ids shape : ', gt_class_ids.get_shape(), ' ', KB.int_shape(gt_class_ids)) print(' norm_gt_bboxes.shape : ', norm_gt_bboxes.get_shape(), ' ', KB.int_shape(norm_gt_bboxes)) print(' gt_bboxes.shape : ', gt_bboxes.get_shape(), ' ', KB.int_shape(gt_bboxes)) #--------------------------------------------------------------------------- # use the argmaxof each row to determine the dominating (predicted) class # mask identifies class_ids > 0 #--------------------------------------------------------------------------- gt_classes_exp = tf.to_float(tf.expand_dims(gt_class_ids, axis=-1)) logt('gt_classes_exp ', gt_classes_exp, verbose=verbose) ones = tf.ones_like(gt_class_ids) zeros = tf.zeros_like(gt_class_ids) mask = tf.greater(gt_class_ids, 0) gt_scores = tf.where(mask, ones, zeros) gt_scores_exp = tf.to_float(KB.expand_dims(gt_scores, axis=-1)) logt('gt_scores_exp ', gt_scores_exp, verbose=verbose) ##------------------------------------------------------------------------------------ ## Generate GT_ARRAY ## Note that we add gt_scores_exp also at the end, to match the the dimensions of ## pred_tensor generated in build_predictions (corresponds to the normalized score) ## ## sequence id is used to preserve the order of rois as passed to this routine ##------------------------------------------------------------------------------------ batch_grid, bbox_grid = tf.meshgrid(tf.range(batch_size, dtype=tf.int32), tf.range(num_bboxes, dtype=tf.int32), indexing='ij') sequence = gt_scores * (bbox_grid[..., ::-1] + 1) sequence = tf.to_float(tf.expand_dims(sequence, axis=-1)) gt_array = tf.concat( [gt_bboxes, gt_classes_exp, gt_scores_exp, sequence, gt_scores_exp], axis=-1, name='gt_array') # print(' batch_grid shape ', batch_grid.get_shape()) # print(' bbox_grid shape ', bbox_grid.get_shape()) # print(' sequence shape ', sequence.get_shape()) ##------------------------------------------------------------------------------ ## Create indicies to scatter rois out to multi-dim tensor by image id and class ## resulting tensor is batch size x num_classes x num_bboxes x 7 (num columns) ##------------------------------------------------------------------------------ scatter_ind = tf.stack([batch_grid, gt_class_ids, bbox_grid], axis=-1) gt_scatter = tf.scatter_nd( scatter_ind, gt_array, [batch_size, num_classes, num_bboxes, gt_array.shape[-1]]) logt('gt_array ', gt_array, verbose=verbose) logt('scatter_ind ', scatter_ind, verbose=verbose) logt('gt_array ', gt_array, verbose=verbose) logt('gt_scatter ', gt_scatter, verbose=verbose) ##------------------------------------------------------------------------------- ## sort in each class dimension based on on sequence number (column 6) ## scatter_nd places bboxs in a sparse fashion --- this sort is to place all bboxes ## at the top of the class bbox array ##------------------------------------------------------------------------------- _, sort_inds = tf.nn.top_k(tf.abs(gt_scatter[:, :, :, 6]), k=gt_scatter.shape[2]) # build indexes to gather rows from pred_scatter based on sort order class_grid, batch_grid, bbox_grid = tf.meshgrid(tf.range(num_classes), tf.range(batch_size), tf.range(num_bboxes)) bbox_grid_exp = tf.to_float(tf.expand_dims(bbox_grid, axis=-1)) gather_inds = tf.stack([batch_grid, class_grid, sort_inds], axis=-1) gt_tensor = tf.gather_nd(gt_scatter, gather_inds, name=tensor_name) # append an index to the end of each row --- commented out 30-04-2018 # gt_tensor = tf.concat([gt_tensor, bbox_grid_exp], axis = -1) logt('sort_inds ', sort_inds, verbose=verbose) logt('class_grid ', class_grid, verbose=verbose) logt('batch_grid ', batch_grid, verbose=verbose) logt('gather_inds ', gather_inds, verbose=verbose) logt('gt_tensor ', gt_tensor, verbose=verbose) return gt_tensor