def build_graph(self, image, label): image = self.image_preprocess(image) if is_channels_first(self.data_format): image = tf.transpose(image, [0, 3, 1, 2], name="image_transpose") # tf.summary.image('input_image_', image) # tf.summary.tensor_summary('input_tensor_', image) # with tf.name_scope('tmp1_summaries'): # add_tensor_summary(image, ['histogram', 'rms', 'sparsity'], name='tmp1_tensor') is_training = get_current_tower_context().is_training logits = self.model_lambda(x=image, training=is_training) loss = ImageNetModel.compute_loss_and_error( logits=logits, label=label, label_smoothing=self.label_smoothing) if self.weight_decay > 0: wd_loss = regularize_cost(regex=self.weight_decay_pattern, func=tf.contrib.layers.l2_regularizer( self.weight_decay), name='l2_regularize_loss') add_moving_summary(loss, wd_loss) total_cost = tf.add_n([loss, wd_loss], name='cost') else: total_cost = tf.identity(loss, name='cost') add_moving_summary(total_cost) if self.loss_scale != 1.0: logger.info("Scaling the total loss by {} ...".format( self.loss_scale)) return total_cost * self.loss_scale else: return total_cost
def build_graph(self, x, y): is_training = get_current_tower_context().is_training z = self._vqvae_model.encode(x) z = self._vqvae_model.quantize(z)['quantize'] embeddings = self.embed(z, is_training) embeddings = tf.nn.l2_normalize(embeddings, 1, 1e-10, name='embeddings') if self._loss_stragegy == 'triplet-all': distance = self.pairwise_distance(embeddings) triplet_distance = tf.expand_dims(distance, 2) - \ tf.expand_dims(distance, 1) + self._margin triplet_distance, num_valid_triplet = \ self.mask_triplet(triplet_distance, y) triplet_distance = tf.maximum(triplet_distance, 0.) num_pos_triplet = tf.reduce_sum( tf.to_float(tf.greater(triplet_distance, 1e-16))) loss = tf.reduce_sum(triplet_distance) / (num_pos_triplet + 1e-16) pos_triplet_frac = num_pos_triplet / (num_valid_triplet + 1e-16) add_moving_summary(tf.identity(loss, 'loss')) add_moving_summary( tf.identity(pos_triplet_frac, 'pos_triplet_frac')) elif self._loss_stragegy == 'triplet-hard': distance = self.pairwise_distance(embeddings) valid_pos_mask = self.get_valid_mask(y) valid_pos_anchor = distance * valid_pos_mask hardest_pos_anchor = tf.reduce_max(distance, axis=1, keepdims=True) valid_neg_mask = self.get_valid_mask(y, positive_mask=False) max_dist = tf.reduce_max(distance, axis=1, keepdims=True) dummy_distance = distance + max_dist * (1. - valid_neg_mask) hardest_neg_anchor = tf.reduce_min(dummy_distance, axis=1, keepdims=True) triplet_loss = tf.maximum( hardest_pos_anchor - hardest_neg_anchor + self._margin, 0.) loss = tf.reduce_mean(triplet_loss) add_moving_summary(tf.identity(loss, 'loss')) else: logits = tf.layers.dense(embeddings, self._num_labels) predictions = tf.argmax(logits, axis=1) correct_prediction = tf.to_float(tf.equal(predictions, tf.cast(y, tf.int64)), name='correct_prediction') accuracy = tf.reduce_mean(correct_prediction, name='accuracy') cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=y) loss = tf.reduce_mean(cross_entropy, name='loss') add_moving_summary(loss) add_moving_summary(accuracy) return loss
def build_graph(self, x, _): is_training = get_current_tower_context().is_training ze, zq, x_recon = self.reconstruct(x, is_training) tf.identity(tf.layers.Flatten()(ze), name='embeddings') tf.identity(tf.layers.Flatten()(zq['quantize']), name='latent_zq') tf.identity(zq['encoding_indices'], name='pz_x') perplexity = tf.identity(zq['perplexity'], name='perplexity') x_recon = tf.identity(x_recon, name='x_recon') recon_loss = tf.reduce_mean((x_recon - x)**2) loss = recon_loss + zq['loss'] loss = tf.identity(loss, name='loss') add_moving_summary(loss, perplexity) return loss
def generate_fpn_proposals_topk_per_image(multilevel_anchor_boxes, multilevel_box_logits, multilevel_label_logits, orig_image_dims, batch_size): """ Args: multilevel_box_logits: #lvl [ BS x (NAx4) x H x W ] boxes multilevel_label_logits: #lvl [ BS x H x W x A ] tensors orig_image_dimensions: Original (prepadding) image dimensions (h,w,c) BS x 3 Returns: boxes: K x 5 float scores: (#lvl x BS x K) vector (logits) """ num_lvl = len(cfg.FPN.ANCHOR_STRIDES) assert len(multilevel_label_logits) == num_lvl orig_images_hw = orig_image_dims[:, :2] training = get_current_tower_context().is_training all_boxes = [] all_scores = [] if cfg.FPN.PROPOSAL_MODE == 'Level': fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK boxes_list = [] scores_list = [] bs = batch_size if training else 1 for i in range(bs): all_boxes = [] all_scores = [] for lvl in range(num_lvl): with tf.name_scope(f'Lvl{lvl}'): im_info = tf.cast(orig_images_hw[i:(i + 1)], tf.float32) # h, w scores = multilevel_label_logits[lvl][i:(i + 1)] bbox_deltas = tf.transpose( multilevel_box_logits[lvl][i:(i + 1)], [0, 2, 3, 1]) single_level_anchor_boxes = multilevel_anchor_boxes[lvl] single_level_anchor_boxes = tf.reshape( single_level_anchor_boxes, (-1, 4)) # https://caffe2.ai/docs/operators-catalogue.html#generateproposals rois, rois_probs = tf.generate_bounding_box_proposals( scores, bbox_deltas, im_info, single_level_anchor_boxes, spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl], pre_nms_topn=fpn_nms_topk, post_nms_topn=fpn_nms_topk, nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH, min_size=cfg.RPN.MIN_SIZE) # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix) all_boxes.append( tf.concat((i + rois[:, :1], rois[:, 1:]), axis=1)) all_scores.append(rois_probs) proposal_boxes = tf.concat(all_boxes, axis=0) # (#lvl x BS) x K x 5 proposal_boxes = tf.reshape(proposal_boxes, [-1, 5]) # (#lvl x BS x K) x 5 proposal_scores = tf.concat(all_scores, axis=0) # (#lvl x BS) x K proposal_scores = tf.reshape(proposal_scores, [-1]) # (#lvl x BS x 5) vector topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk) topk_scores, topk_indices = tf.nn.top_k(proposal_scores, k=topk, sorted=False) boxes_list.append(tf.gather(proposal_boxes, topk_indices)) scores_list.append(tf.gather(proposal_scores, topk_indices)) # # boxes_list = [] # scores_list = [] # # for i in range(batch_size): # batch_ind = tf.squeeze(tf.where(tf.equal(proposal_boxes[:, 0], i)), axis=1) # image_scores = tf.gather(proposal_scores, batch_ind) # image_boxes = tf.gather(proposal_boxes, batch_ind) # # image_proposal_topk = tf.minimum(tf.size(image_scores), fpn_nms_topk//batch_size) # image_proposal_scores, image_topk_indices = tf.nn.top_k(image_scores, k=image_proposal_topk, sorted=False) # boxes_list.append(tf.gather(image_boxes, image_topk_indices)) # scores_list.append(image_proposal_scores) boxes = tf.concat(boxes_list, axis=0) scores = tf.concat(scores_list, axis=0) # proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk) # proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False) # proposal_boxes = tf.gather(proposal_boxes, topk_indices) else: raise RuntimeError( "Only level-wise predictions are supported with batches") return tf.stop_gradient(boxes, name='boxes'), \ tf.stop_gradient(scores, name='scores')
def generate_fpn_proposals(multilevel_anchor_boxes, multilevel_box_logits, multilevel_label_logits, orig_image_dims, batch_size): """ Generating the rois from the box logits and pick K with top label scores as the box proposals. Args: multilevel_box_logits: #lvl [ BS x (NA * 4) x H_feature x W_feature ] boxes multilevel_label_logits: #lvl [ BS x H_feature x W_feature x NA ] tensors orig_image_dimensions: Original (prepadding) image dimensions (h,w,c) BS x 3 Returns: boxes: K x 5 float scores: 1-D, K (logits) """ prefix = "model_fpn.generate_fpn_proposals" bug_prefix = "GEN_PROPOSALS_BUG fpn" num_lvl = len(cfg.FPN.ANCHOR_STRIDES) assert len(multilevel_label_logits) == num_lvl orig_images_hw = orig_image_dims[:, :2] training = get_current_tower_context().is_training all_boxes = [] all_scores = [] if cfg.FPN.PROPOSAL_MODE == 'Level': fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK * batch_size if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK for lvl in range(num_lvl): with tf.name_scope(f'Lvl{lvl}'): im_info = tf.cast(orig_images_hw, tf.float32) scores = multilevel_label_logits[ lvl] # BS x H_feature x W_featurex NA bbox_deltas = tf.transpose( multilevel_box_logits[lvl], [0, 2, 3, 1]) #BS x H_feature x W_feature x (NA * 4) single_level_anchor_boxes = multilevel_anchor_boxes[lvl] single_level_anchor_boxes = tf.reshape( single_level_anchor_boxes, (-1, 4)) # # This is a custom tensorflow op that translates the bbox deltas into bounding box coordinates # and then runs NMS. See CODEBASE.md for more info # # roi: (# boxes for a single level) x 5, the 5 colunms arranged as: batch_index, x_1, y_1, x_2, y_2 # rois_probs: 1-D, # boxes for a single level rois, rois_probs = tf.generate_bounding_box_proposals( scores, bbox_deltas, im_info, single_level_anchor_boxes, spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl], pre_nms_topn=fpn_nms_topk, post_nms_topn=fpn_nms_topk, nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH, min_size=cfg.RPN.MIN_SIZE) # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix) all_boxes.append(rois) all_scores.append(rois_probs) proposal_boxes = tf.concat(all_boxes, axis=0) # Num_all_rois x 5 proposal_boxes = tf.reshape(proposal_boxes, [-1, 5]) # Num_all_rois x 5 proposal_scores = tf.concat(all_scores, axis=0) # 1-D Num_all_rois proposal_scores = tf.reshape(proposal_scores, [-1]) # 1-D Num_all_rois proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk) proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False) proposal_boxes = tf.gather(proposal_boxes, topk_indices) # K x 5 else: raise RuntimeError( "Only level-wise predictions are supported with batches") return tf.stop_gradient(proposal_boxes, name='boxes'), \ tf.stop_gradient(proposal_scores, name='scores')
def dropout(inp, keep_prob): is_training = get_current_tower_context().is_training return tf.layers.dropout(inp, rate=1 - keep_prob, training=is_training)