Beispiel #1
0
    def build_graph(self, image, label):

        image = self.image_preprocess(image)
        if is_channels_first(self.data_format):
            image = tf.transpose(image, [0, 3, 1, 2], name="image_transpose")

        # tf.summary.image('input_image_', image)
        # tf.summary.tensor_summary('input_tensor_', image)
        # with tf.name_scope('tmp1_summaries'):
        #     add_tensor_summary(image, ['histogram', 'rms', 'sparsity'], name='tmp1_tensor')

        is_training = get_current_tower_context().is_training
        logits = self.model_lambda(x=image, training=is_training)
        loss = ImageNetModel.compute_loss_and_error(
            logits=logits, label=label, label_smoothing=self.label_smoothing)

        if self.weight_decay > 0:
            wd_loss = regularize_cost(regex=self.weight_decay_pattern,
                                      func=tf.contrib.layers.l2_regularizer(
                                          self.weight_decay),
                                      name='l2_regularize_loss')
            add_moving_summary(loss, wd_loss)
            total_cost = tf.add_n([loss, wd_loss], name='cost')
        else:
            total_cost = tf.identity(loss, name='cost')
            add_moving_summary(total_cost)

        if self.loss_scale != 1.0:
            logger.info("Scaling the total loss by {} ...".format(
                self.loss_scale))
            return total_cost * self.loss_scale
        else:
            return total_cost
Beispiel #2
0
    def build_graph(self, x, y):
        is_training = get_current_tower_context().is_training
        z = self._vqvae_model.encode(x)
        z = self._vqvae_model.quantize(z)['quantize']

        embeddings = self.embed(z, is_training)
        embeddings = tf.nn.l2_normalize(embeddings,
                                        1,
                                        1e-10,
                                        name='embeddings')

        if self._loss_stragegy == 'triplet-all':
            distance = self.pairwise_distance(embeddings)
            triplet_distance = tf.expand_dims(distance, 2) - \
                tf.expand_dims(distance, 1) + self._margin
            triplet_distance, num_valid_triplet = \
                self.mask_triplet(triplet_distance, y)
            triplet_distance = tf.maximum(triplet_distance, 0.)
            num_pos_triplet = tf.reduce_sum(
                tf.to_float(tf.greater(triplet_distance, 1e-16)))
            loss = tf.reduce_sum(triplet_distance) / (num_pos_triplet + 1e-16)
            pos_triplet_frac = num_pos_triplet / (num_valid_triplet + 1e-16)
            add_moving_summary(tf.identity(loss, 'loss'))
            add_moving_summary(
                tf.identity(pos_triplet_frac, 'pos_triplet_frac'))
        elif self._loss_stragegy == 'triplet-hard':
            distance = self.pairwise_distance(embeddings)
            valid_pos_mask = self.get_valid_mask(y)
            valid_pos_anchor = distance * valid_pos_mask
            hardest_pos_anchor = tf.reduce_max(distance, axis=1, keepdims=True)

            valid_neg_mask = self.get_valid_mask(y, positive_mask=False)
            max_dist = tf.reduce_max(distance, axis=1, keepdims=True)
            dummy_distance = distance + max_dist * (1. - valid_neg_mask)
            hardest_neg_anchor = tf.reduce_min(dummy_distance,
                                               axis=1,
                                               keepdims=True)
            triplet_loss = tf.maximum(
                hardest_pos_anchor - hardest_neg_anchor + self._margin, 0.)
            loss = tf.reduce_mean(triplet_loss)
            add_moving_summary(tf.identity(loss, 'loss'))
        else:
            logits = tf.layers.dense(embeddings, self._num_labels)
            predictions = tf.argmax(logits, axis=1)
            correct_prediction = tf.to_float(tf.equal(predictions,
                                                      tf.cast(y, tf.int64)),
                                             name='correct_prediction')
            accuracy = tf.reduce_mean(correct_prediction, name='accuracy')
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=y)
            loss = tf.reduce_mean(cross_entropy, name='loss')

            add_moving_summary(loss)
            add_moving_summary(accuracy)

        return loss
Beispiel #3
0
    def build_graph(self, x, _):
        is_training = get_current_tower_context().is_training
        ze, zq, x_recon = self.reconstruct(x, is_training)

        tf.identity(tf.layers.Flatten()(ze), name='embeddings')
        tf.identity(tf.layers.Flatten()(zq['quantize']), name='latent_zq')
        tf.identity(zq['encoding_indices'], name='pz_x')
        perplexity = tf.identity(zq['perplexity'], name='perplexity')

        x_recon = tf.identity(x_recon, name='x_recon')
        recon_loss = tf.reduce_mean((x_recon - x)**2)
        loss = recon_loss + zq['loss']
        loss = tf.identity(loss, name='loss')

        add_moving_summary(loss, perplexity)
        return loss
def generate_fpn_proposals_topk_per_image(multilevel_anchor_boxes,
                                          multilevel_box_logits,
                                          multilevel_label_logits,
                                          orig_image_dims, batch_size):
    """
    Args:
        multilevel_box_logits:      #lvl [ BS x (NAx4) x H x W ] boxes
        multilevel_label_logits:    #lvl [ BS x H x W x A ] tensors
        orig_image_dimensions: Original (prepadding) image dimensions (h,w,c)   BS x 3
    Returns:
        boxes: K x 5 float
        scores:  (#lvl x BS x K) vector       (logits)
    """

    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_label_logits) == num_lvl
    orig_images_hw = orig_image_dims[:, :2]

    training = get_current_tower_context().is_training
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        boxes_list = []
        scores_list = []

        bs = batch_size if training else 1

        for i in range(bs):
            all_boxes = []
            all_scores = []
            for lvl in range(num_lvl):
                with tf.name_scope(f'Lvl{lvl}'):
                    im_info = tf.cast(orig_images_hw[i:(i + 1)], tf.float32)
                    # h, w

                    scores = multilevel_label_logits[lvl][i:(i + 1)]
                    bbox_deltas = tf.transpose(
                        multilevel_box_logits[lvl][i:(i + 1)], [0, 2, 3, 1])

                    single_level_anchor_boxes = multilevel_anchor_boxes[lvl]
                    single_level_anchor_boxes = tf.reshape(
                        single_level_anchor_boxes, (-1, 4))

                    # https://caffe2.ai/docs/operators-catalogue.html#generateproposals
                    rois, rois_probs = tf.generate_bounding_box_proposals(
                        scores,
                        bbox_deltas,
                        im_info,
                        single_level_anchor_boxes,
                        spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl],
                        pre_nms_topn=fpn_nms_topk,
                        post_nms_topn=fpn_nms_topk,
                        nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH,
                        min_size=cfg.RPN.MIN_SIZE)

                    # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix)
                    all_boxes.append(
                        tf.concat((i + rois[:, :1], rois[:, 1:]), axis=1))
                    all_scores.append(rois_probs)

            proposal_boxes = tf.concat(all_boxes,
                                       axis=0)  # (#lvl x BS) x K x 5
            proposal_boxes = tf.reshape(proposal_boxes,
                                        [-1, 5])  # (#lvl x BS x K) x 5

            proposal_scores = tf.concat(all_scores, axis=0)  # (#lvl x BS) x K
            proposal_scores = tf.reshape(proposal_scores,
                                         [-1])  # (#lvl x BS x 5) vector

            topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
            topk_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=topk,
                                                    sorted=False)

            boxes_list.append(tf.gather(proposal_boxes, topk_indices))
            scores_list.append(tf.gather(proposal_scores, topk_indices))

        #
        #        boxes_list = []
        #        scores_list = []
        #
        #        for i in range(batch_size):
        #            batch_ind = tf.squeeze(tf.where(tf.equal(proposal_boxes[:, 0], i)), axis=1)
        #            image_scores = tf.gather(proposal_scores, batch_ind)
        #            image_boxes = tf.gather(proposal_boxes, batch_ind)
        #
        #            image_proposal_topk = tf.minimum(tf.size(image_scores), fpn_nms_topk//batch_size)
        #            image_proposal_scores, image_topk_indices = tf.nn.top_k(image_scores, k=image_proposal_topk, sorted=False)
        #            boxes_list.append(tf.gather(image_boxes, image_topk_indices))
        #            scores_list.append(image_proposal_scores)

        boxes = tf.concat(boxes_list, axis=0)
        scores = tf.concat(scores_list, axis=0)

        #        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
    #        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False)
    #        proposal_boxes = tf.gather(proposal_boxes, topk_indices)

    else:
        raise RuntimeError(
            "Only level-wise predictions are supported with batches")

    return tf.stop_gradient(boxes, name='boxes'), \
        tf.stop_gradient(scores, name='scores')
def generate_fpn_proposals(multilevel_anchor_boxes, multilevel_box_logits,
                           multilevel_label_logits, orig_image_dims,
                           batch_size):
    """
    Generating the rois from the box logits and pick K with top label scores as
    the box proposals.

    Args:
        multilevel_box_logits:      #lvl [ BS x (NA * 4) x H_feature x W_feature ] boxes
        multilevel_label_logits:    #lvl [ BS x H_feature x W_feature x NA ] tensors
        orig_image_dimensions: Original (prepadding) image dimensions (h,w,c)   BS x 3
    Returns:
        boxes: K x 5 float
        scores:  1-D, K (logits)
    """
    prefix = "model_fpn.generate_fpn_proposals"
    bug_prefix = "GEN_PROPOSALS_BUG fpn"
    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_label_logits) == num_lvl
    orig_images_hw = orig_image_dims[:, :2]

    training = get_current_tower_context().is_training
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK * batch_size if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        for lvl in range(num_lvl):
            with tf.name_scope(f'Lvl{lvl}'):
                im_info = tf.cast(orig_images_hw, tf.float32)

                scores = multilevel_label_logits[
                    lvl]  # BS x H_feature x W_featurex NA
                bbox_deltas = tf.transpose(
                    multilevel_box_logits[lvl],
                    [0, 2, 3, 1])  #BS x H_feature x W_feature x (NA * 4)

                single_level_anchor_boxes = multilevel_anchor_boxes[lvl]
                single_level_anchor_boxes = tf.reshape(
                    single_level_anchor_boxes, (-1, 4))

                # # This is a custom tensorflow op that translates the bbox deltas into bounding box coordinates
                # and then runs NMS. See CODEBASE.md for more info
                #
                # roi: (# boxes for a single level) x 5, the 5 colunms arranged as: batch_index, x_1, y_1, x_2, y_2
                # rois_probs: 1-D, # boxes for a single level
                rois, rois_probs = tf.generate_bounding_box_proposals(
                    scores,
                    bbox_deltas,
                    im_info,
                    single_level_anchor_boxes,
                    spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl],
                    pre_nms_topn=fpn_nms_topk,
                    post_nms_topn=fpn_nms_topk,
                    nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH,
                    min_size=cfg.RPN.MIN_SIZE)
                # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix)
                all_boxes.append(rois)
                all_scores.append(rois_probs)

        proposal_boxes = tf.concat(all_boxes, axis=0)  # Num_all_rois x 5
        proposal_boxes = tf.reshape(proposal_boxes,
                                    [-1, 5])  # Num_all_rois x 5

        proposal_scores = tf.concat(all_scores, axis=0)  # 1-D Num_all_rois
        proposal_scores = tf.reshape(proposal_scores, [-1])  # 1-D Num_all_rois

        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=proposal_topk,
                                                    sorted=False)
        proposal_boxes = tf.gather(proposal_boxes, topk_indices)  # K x 5

    else:
        raise RuntimeError(
            "Only level-wise predictions are supported with batches")

    return tf.stop_gradient(proposal_boxes, name='boxes'), \
        tf.stop_gradient(proposal_scores, name='scores')
Beispiel #6
0
def dropout(inp, keep_prob):
    is_training = get_current_tower_context().is_training
    return tf.layers.dropout(inp, rate=1 - keep_prob, training=is_training)