Example #1
0
def setUp(pooled_regions, pooled_h, pooled_w, feat_channels,
        trainable=False, namespace="rcnn"):
    """Calculate bounding box regressions and class probabilities

    Preconditions:
        This function assumes that the variables accessed by tf.get_variable() already exist;
        they must already have been initialized before calling this function.

    Positional Inputs:
        pooled_regions -- A tf.Tensor object with shape (num_regions, pooled_h, pooled_w,
            num_channels) containing the pooled regions of interest in the image.
        pooled_h -- A scalar containing the height of the pooled input
        pooled_w -- A scalar containing the width of the pooled input
        feat_channels -- A scalar containing the number of channels in the pooled input

    Outputs:
        A tuple containing both:
        A list of scores for a given set of classes.  In the case of the VOC 2007 dataset,
            there are 20 classes plus one background class.
            Thus, this output should be an np.array of shape (num_regions,21) with a score
            for every class.
        A list of bounding box regressions, with a different bounding box regression for
            each class.  Each bbox regress is described by four floats, so this output
            will be an np.array of shape (num_regions, 21, 4)
    """

    last_dimension = pooled_h * pooled_w * feat_channels
    with easy_scope(namespace, reuse=True), tf.device("/gpu:0"):
        with easy_scope("fc6", reuse=True):
            flattened_in = tf.reshape(pooled_regions, (-1, last_dimension))
            prevLayer = tf.nn.bias_add(tf.matmul(flattened_in,
                tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable))

        prevLayer = tf.nn.relu(prevLayer, name="relu6")

        with easy_scope("fc7", reuse=True):
            prevLayer = tf.nn.bias_add(tf.matmul(prevLayer,
                tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable))

        prevLayer = tf.nn.relu(prevLayer, name="relu7")

        # Produce classification probabilities
        with easy_scope("cls_score", reuse=True):
            weights = tf.get_variable("Weights", trainable=trainable)
            bias = tf.get_variable("Bias", trainable=trainable)
            scoreLayer = tf.nn.bias_add(tf.matmul(prevLayer, weights), bias, name="out")

        # Produce regressions (note these are with respect to the individual regions, so the
        # actual regions in the image resulting from these is yet to be calculated
        with easy_scope("bbox_pred", reuse=True):
            bboxPred = tf.nn.bias_add(tf.matmul(prevLayer,
                tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable), name="out")

        probLayer = tf.nn.softmax(scoreLayer, name="cls_prob")
    return bboxPred, probLayer
def extractLayers(scope, weightsPath, biasesPath, device="/cpu:0"):
    """
    Function that extracts weights and biases into properly scoped variables.

    Positional arguments:
    scope -- tf.VariableScope ( or string representing a scope ) to place variables in
    weightsPath -- path to .npz file containing the weights
    biasesPath -- path to .npz file containing the biases

    Keyword arguments:
    device -- device on which to place the created variables
    """

    weightsPath = _fixModelPath(weightsPath)
    biasesPath = _fixModelPath(biasesPath)

    # Raw numpy values.  Need to be loaded into variables.
    weightsDict = numpy.load(weightsPath)
    biasesDict = numpy.load(biasesPath)

    # Here, we do a for loop looping through all of the names, "name".
    with tf.device(device):
        with easy_scope(scope):
            warning = False
            for name, weights_tnsr in weightsDict.items():
                if name.startswith("/"):
                    name = name[1:]
                if name.endswith("/"):
                    name = name[:-1]
                with easy_scope(name):
                    try:
                        tf.get_variable("Weights",
                                        trainable=False,
                                        initializer=tf.constant(weights_tnsr))
                        tf.get_variable("Bias",
                                        trainable=False,
                                        initializer=tf.constant(
                                            biasesDict[name]))
                    except ValueError:
                        # Values were loaded elsewhere
                        warning = True
            if warning:
                print(
                    "extractLayers()  Warning : Some variable names already exist."
                    "  If unintentional, please choose a different scope name."
                )

    return
Example #3
0
    def createConvLayer(bottom, name, stride=[1, 1, 1, 1]):
        """ Creates a conv layer given a name.

        Precondtions:
            Expects a tf.Variable with name model_scope/layer_scope/Weights
            and one with model_scope/layer_scope/Bias to already exist.

        Inputs:
            bottom  - A tf.Tensor containing activations
            name    - A string with a name for this layer
            stride  - A list containing the stride to apply for this convolution.
                        Most likely does not need to be changed from its default.
        Outputs:
            A tf.Tensor containing the output of the convolution.
        """
        with easy_scope(name, reuse=True):
            prevLayer = tf.nn.conv2d(bottom,
                                     tf.get_variable("Weights",
                                                     trainable=train),
                                     stride,
                                     padding="SAME")
            prevLayer = tf.nn.bias_add(prevLayer,
                                       tf.get_variable("Bias",
                                                       trainable=train),
                                       name="out")
        return prevLayer
Example #4
0
def _calculateRpnLoss(predRawScores, predBoxes, predRegressions,
        predAnchors, mini_batch_size, gt_boxes, feature_h, feature_w):
    """Refactoring of code from calculateRpnLoss into another function for testing"""
    num_classes = tf.shape_n(gt_boxes)[0] - 1  # subtract one since background is not a class
    iou_threshold_neg = s.DEF_IOU_THRESHOLD_TRAIN_NEG
    iou_threshold_pos = s.DEF_IOU_THRESHOLD_TRAIN_POS
    with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"):
        labeled_boxes = iou_labeler(predBoxes, gt_boxes, iou_threshold_neg, iou_threshold_pos)

        # Sample boxes and raw scores for loss

        posIdx, negIdx = tf.py_func(lambda x: sampleBoxes(x, num_classes, mini_batch_size),
            [labeled_boxes], [tf.int32, tf.int32], stateful=False, name="sampleBoxes")
        # posIdx, negIdx = tf.py_func(sampleBoxes, [labeled_boxes, num_classes, mini_batch_size],
        #        tf.float32, stateful=False, name="sampleBoxes")
        positive_raw_scores = tf.gather(predRawScores, posIdx, axis=0,
                name="positive_raw_scores")
        negative_raw_scores = tf.gather(predRawScores, negIdx, axis=0,
                name="negative_raw_scores")

        # There is no regression loss for negative examples.  For the positives, we need
        # to find the gt regression from anchor to gt boxes
        positive_anchors = tf.gather(predAnchors, posIdx, axis=0,
                name="positive_anchors")
        positive_gt_boxes = tf.gather(gt_boxes,
            tf.cast(tf.gather(labeled_boxes[:, 4], posIdx), dtype=tf.int32),
            name="positive_gt_boxes")
        positive_gt_regs = calculateRegressions(positive_anchors, positive_gt_boxes, axis=-1)
        positive_raw_regressions = tf.gather(predRegressions, posIdx, axis=0,
                name="positive_raw_regressions")

        # Flatten regressions before passing into the huber loss function
        flat_pred_regs = tf.reshape(positive_raw_regressions, [-1])
        flat_gt_regs = tf.reshape(positive_gt_regs, [-1])
        reg_loss = tf.losses.huber_loss(flat_pred_regs, flat_gt_regs,
                reduction=tf.losses.Reduction.NONE, delta=1.0)
        reg_loss = tf.reduce_sum(reg_loss)

        # Class-agnostic log loss for positive examples
        # Need to create a whole bunch of [0,1]s of the right length
        num_pos = tf.shape(positive_raw_scores)[0]
        cls_loss_pos = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.ones([num_pos], dtype=tf.int32), logits=positive_raw_scores)
        cls_loss_pos = tf.reduce_sum(cls_loss_pos)

        # Log-loss for the negative examples
        num_neg = tf.shape(negative_raw_scores)[0]
        cls_loss_neg = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.zeros([num_neg], dtype=tf.int32), logits=negative_raw_scores)
        cls_loss_neg = tf.reduce_sum(cls_loss_neg)

        # Adding up and normalizing the losses.
        reg_loss /= (feature_h * feature_w) / 10.
        cls_loss = (cls_loss_pos + cls_loss_neg) / mini_batch_size

        total_loss = tf.add(reg_loss, cls_loss, name="total_loss")
        return total_loss
Example #5
0
def calculateRpnLoss(rpnRawScores, rpnBboxPred, feature_h, feature_w,
                     image_attr, gt_boxes):
    """ Calculates the loss for the region proposal network

    Inputs:
        rpnRawScores -- tf.Tensor object containing the objectness scores for each region
            before application of softmax.
        rpnBboxPred -- tf.Tensor object containing the bounding-box regressions for each
            region.  Must be of a shape compatible with rpnRawScores, i.e. only differing
            in shape int he last dimension
        feature_h -- Height of the convolutional input to the RPN
        feature_w -- Width of the convolutional input to the RPN
        gt_boxes -- Ground-truth boxes with which we are calculating the loss with respect to.
            Must be in the format (num_gt_boxes, 5), where the rows are of the form
            [x0, y0, x1, y1, class], where class is the category to which each ground truth
            box belongs to

    Output:
        The loss for this minibatch
    """
    mini_batch_size = 128

    with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"):
        rpnScores = tf.nn.softmax(rpnRawScores, dim=-1, name="rpn_cls_prob")
        _, rpnScores = tf.unstack(rpnScores, num=2, axis=-1)

    predBoxes, predScores, predIndices, predAnchors = proposalLayer_train(
        rpnScores, rpnBboxPred, feature_h, feature_w, image_attr)

    with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"):
        predRawScores = tf.gather(tf.reshape(rpnRawScores, (-1, 2)),
                                  predIndices,
                                  axis=0,
                                  name="final_raw_scores")

        predRegressions = tf.gather(tf.reshape(rpnBboxPred, (-1, 4)),
                                    predIndices,
                                    axis=0,
                                    name="final_raw_regressions")

    return _calculateRpnLoss(predRawScores, predBoxes, predRegressions,
                             predAnchors, mini_batch_size, gt_boxes, feature_h,
                             feature_w)
def extractLayers(scope, weightsPath, biasesPath, device="/cpu:0"):
    """
    Function that extracts weights and biases into properly scoped variables.

    Positional arguments:
    scope -- tf.VariableScope ( or string representing a scope ) to place variables in
    weightsPath -- path to .npz file containing the weights
    biasesPath -- path to .npz file containing the biases

    Keyword arguments:
    device -- device on which to place the created variables
    """

    weightsPath = _fixModelPath(weightsPath)
    biasesPath = _fixModelPath(biasesPath)

    # Raw numpy values.  Need to be loaded into variables.
    weightsDict = numpy.load(weightsPath)
    biasesDict = numpy.load(biasesPath)

    # Here, we do a for loop looping through all of the names, "name".
    with tf.device(device):
        with easy_scope(scope):
            warning = False
            for name, weights_tnsr in weightsDict.items():
                if name.startswith("/"):
                    name = name[1:]
                if name.endswith("/"):
                    name = name[:-1]
                with easy_scope(name):
                    try:
                        tf.get_variable("Weights", trainable=False,
                            initializer=tf.constant(weights_tnsr))
                        tf.get_variable("Bias", trainable=False,
                            initializer=tf.constant(biasesDict[name]))
                    except ValueError:
                        # Values were loaded elsewhere
                        warning = True
            if warning:
                print("extractLayers()  Warning : Some variable names already exist."
                    "  If unintentional, please choose a different scope name.")

    return
Example #7
0
    def createConvLayer(self, name, trainable=True):
        """Creates a convolutional Tensorflow layer given its name.

        Assumes that properly named bias and weight variables are already loaded in memory
        """

        with easy_scope(name):
            conv = tf.nn.conv2d(self.prevLayer, tf.get_variable(
                "Weights", trainable=trainable), [1, 1, 1, 1], padding="SAME")
            bias = tf.nn.bias_add(conv, tf.get_variable("Bias", trainable=trainable))
        return bias
Example #8
0
    def createFcLayer(self, name, trainable=True):
        """Creates a fully connected layer

        Loads the weights from the weightsDict and biasesDict dictionaries using
        their key value name and returns the bias layer.
        """

        with easy_scope(name, reuse=True):
            layer = tf.nn.bias_add(
                tf.matmul(self.prevLayer, tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable))

        return layer
Example #9
0
    def createFcLayer(self, name, trainable=True):
        """Creates a fully connected layer

        Loads the weights from the weightsDict and biasesDict dictionaries using
        their key value name and returns the bias layer.
        """

        with easy_scope(name, reuse=True):
            layer = tf.nn.bias_add(
                tf.matmul(self.prevLayer,
                          tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable))

        return layer
Example #10
0
def calculateRpnLoss(rpnRawScores, rpnBboxPred, feature_h, feature_w, image_attr, gt_boxes):
    """ Calculates the loss for the region proposal network

    Inputs:
        rpnRawScores -- tf.Tensor object containing the objectness scores for each region
            before application of softmax.
        rpnBboxPred -- tf.Tensor object containing the bounding-box regressions for each
            region.  Must be of a shape compatible with rpnRawScores, i.e. only differing
            in shape int he last dimension
        feature_h -- Height of the convolutional input to the RPN
        feature_w -- Width of the convolutional input to the RPN
        gt_boxes -- Ground-truth boxes with which we are calculating the loss with respect to.
            Must be in the format (num_gt_boxes, 5), where the rows are of the form
            [x0, y0, x1, y1, class], where class is the category to which each ground truth
            box belongs to

    Output:
        The loss for this minibatch
    """
    mini_batch_size = 128

    with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"):
        rpnScores = tf.nn.softmax(rpnRawScores, dim=-1, name="rpn_cls_prob")
        _, rpnScores = tf.unstack(rpnScores, num=2, axis=-1)

    predBoxes, predScores, predIndices, predAnchors = proposalLayer_train(
        rpnScores, rpnBboxPred, feature_h, feature_w, image_attr)

    with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"):
        predRawScores = tf.gather(tf.reshape(rpnRawScores, (-1, 2)),
            predIndices, axis=0, name="final_raw_scores")

        predRegressions = tf.gather(tf.reshape(rpnBboxPred, (-1, 4)),
            predIndices, axis=0, name="final_raw_regressions")

    return _calculateRpnLoss(predRawScores, predBoxes, predRegressions,
            predAnchors, mini_batch_size, gt_boxes, feature_h, feature_w)
Example #11
0
    def createConvLayer(self, name, trainable=True):
        """Creates a convolutional Tensorflow layer given its name.

        Assumes that properly named bias and weight variables are already loaded in memory
        """

        with easy_scope(name):
            conv = tf.nn.conv2d(self.prevLayer,
                                tf.get_variable("Weights",
                                                trainable=trainable),
                                [1, 1, 1, 1],
                                padding="SAME")
            bias = tf.nn.bias_add(conv,
                                  tf.get_variable("Bias", trainable=trainable))
        return bias
Example #12
0
    def createFirstFcLayer(self, name, trainable=True):
        """Creates the first fully connected layer

        This layer converts the  output of the last convolutional layer to the
        input for the next fully connected ones.  Returns the bias layer.
        """

        INPUT_SIZE = 25088
        # OUTPUT_SIZE = 4096

        with easy_scope(name, reuse=True):
            flattenedInput = tf.reshape(self.prevLayer, [-1, INPUT_SIZE])
            layer = tf.nn.bias_add(
                tf.matmul(flattenedInput, tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable))

        return layer
Example #13
0
def _proposalLayer(feature_stride, iou_threshold, pre_nms_keep, post_nms_keep,
        scores, bbox_regressions, feature_h, feature_w, image_attr,
        minimum_dim, device, scope_name, train_rpn):
    """Implementation of internal logic of proposalLayer, see proposalLayer"""

    with easy_scope(name=scope_name), tf.device(device):
        baseAnchors = generateAnchors(ratios=[2, 1, .5])

        shiftedAnchors = generateShiftedAnchors(baseAnchors, feature_h, feature_w,
                feature_stride)

        regressedAnchors = regressAnchors(shiftedAnchors, bbox_regressions)

        if train_rpn is True:
            # In this case, we need to not clip anchors to image boundaries, but rather
            # eliminate any cross image-boundary anchors.
            clippedAnchors, c_indices = killRegions(regressedAnchors, image_attr)
            p_anchors, p_schores, p_indices = prunedScoresAndAnchors(
                clippedAnchors, scores, minimum_dim, image_attr)
            # Select p_indices from c_indices
            p_indices = tf.gather(c_indices, p_indices)
        else:
            clippedAnchors = clipRegions(regressedAnchors, image_attr)

            p_anchors, p_scores, p_indices = prunedScoresAndAnchors(clippedAnchors,
                scores, minimum_dim, image_attr)

        top_scores, top_score_indices = tf.nn.top_k(p_scores, k=pre_nms_keep, name="top_scores")

        top_anchors = tf.gather(p_anchors, top_score_indices, name="top_anchors", axis=0)
        top_indices = tf.gather(p_indices, top_score_indices, name="top_indices")

        # We want nms to keep everything that passes the IoU test
        post_nms_indices = nms(top_anchors, top_scores,
                            post_nms_keep, iou_threshold=iou_threshold, name="post_nms_indices")

        final_anchors = tf.gather(top_anchors, post_nms_indices, axis=0,
                name="proposal_regions")
        final_scores = tf.gather(top_scores, post_nms_indices, axis=0,
                name="proposal_region_scores")
        final_indices = tf.gather(top_indices, post_nms_indices,
                name="proposal_region_indices")
        final_base_anchors = tf.gather(tf.reshape(shiftedAnchors, (-1, 4)), final_indices,
                axis=0, name="proposal_region_base_anchors")

    return final_anchors, final_scores, final_indices, final_base_anchors
Example #14
0
    def createFirstFcLayer(self, name, trainable=True):
        """Creates the first fully connected layer

        This layer converts the  output of the last convolutional layer to the
        input for the next fully connected ones.  Returns the bias layer.
        """

        INPUT_SIZE = 25088
        # OUTPUT_SIZE = 4096

        with easy_scope(name, reuse=True):
            flattenedInput = tf.reshape(self.prevLayer, [-1, INPUT_SIZE])
            layer = tf.nn.bias_add(
                tf.matmul(flattenedInput,
                          tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable))

        return layer
Example #15
0
    def createConvLayer(bottom, name, stride=[1, 1, 1, 1]):
        """ Creates a conv layer given a name.

        Precondtions:
            Expects a tf.Variable with name model_scope/layer_scope/Weights
            and one with model_scope/layer_scope/Bias to already exist.

        Inputs:
            bottom  - A tf.Tensor containing activations
            name    - A string with a name for this layer
            stride  - A list containing the stride to apply for this convolution.
                        Most likely does not need to be changed from its default.
        Outputs:
            A tf.Tensor containing the output of the convolution.
        """
        with easy_scope(name, reuse=True):
            prevLayer = tf.nn.conv2d(bottom, tf.get_variable("Weights", trainable=train),
                    stride, padding="SAME")
            prevLayer = tf.nn.bias_add(prevLayer, tf.get_variable("Bias", trainable=train),
                    name="out")
        return prevLayer
Example #16
0
def Rpn(features, image_attr, train_net=None, namespace="rpn"):
    """ Region proposal network.  Proposes regions to later be pooled and classified/regressed

    Inputs:
    features    - A tf.Tensor object of rank 4, dimensions (batch, height, width, channel),
        since this is the standard tensorflow order.

    image_attr  - A tf.Tensor object of rank 1, with values [img_h, img_w, scaling_factor],
        where these values are described below:
    train_net   - Can be set to either None (default), "TRAIN_RPN", or "TRAIN_R-CNN".  When
        set to one of the latter 'TRAIN_' settings, it initalizes the network differently,
        for training instead of for prediction.

        Output:
        A tf.tensor object of rank 2 with dimensions (num_rois, 4), where the second dimension
        is of the form {x0, y0, x1, y1}
    """
    train = False
    if train_net is not None:
        train = True

    def createConvLayer(bottom, name, stride=[1, 1, 1, 1]):
        """ Creates a conv layer given a name.

        Precondtions:
            Expects a tf.Variable with name model_scope/layer_scope/Weights
            and one with model_scope/layer_scope/Bias to already exist.

        Inputs:
            bottom  - A tf.Tensor containing activations
            name    - A string with a name for this layer
            stride  - A list containing the stride to apply for this convolution.
                        Most likely does not need to be changed from its default.
        Outputs:
            A tf.Tensor containing the output of the convolution.
        """
        with easy_scope(name, reuse=True):
            prevLayer = tf.nn.conv2d(bottom, tf.get_variable("Weights", trainable=train),
                    stride, padding="SAME")
            prevLayer = tf.nn.bias_add(prevLayer, tf.get_variable("Bias", trainable=train),
                    name="out")
        return prevLayer

    with easy_scope(namespace, reuse=True):
        layer3x3 = createConvLayer(features, "rpn_conv/3x3")
        layer3x3 = tf.nn.relu(layer3x3, "rpn_relu/3x3")

        # Region Proposal Network - Probabilities
        prevLayer = createConvLayer(layer3x3, "rpn_cls_score")

        # Assuming that feat_w = feat_h = 14, and that the number of anchors is 9,
        # we have the output should be of shape (9,14,14,2).

        # However, a tf.nn.conv2d cannot create batches out of thin air.  Hence, the
        # rpn_cls_score should create a (1, 14, 14, 9*2) instead, which we reshape to
        # (1, 14, 14, 2, 9), transpose to (9, 14, 14, 2, 1), then tf.squeeze the last
        # dimension out to arrive at the desired wonderful shape of (9, 14, 14,
        # 2).  The last dimension of rpn_cls_score is unpacked from (9*2) to (2,9) and
        # not (9,2) since this is how the weights imported from caffe are packed.

        with easy_scope("create_rpn_score_batches"), tf.device("/cpu:0"):
            feature_h = tf.shape(features)[1]
            feature_w = tf.shape(features)[2]
            prevLayer = tf.reshape(prevLayer, (1, feature_h, feature_w, 2, 9))
            prevLayer = tf.transpose(prevLayer, (4, 1, 2, 3, 0))
            prevLayer = tf.squeeze(prevLayer)

            if train_net is not "TRAIN_RPN":
                rpnScores = tf.nn.softmax(prevLayer, dim=-1, name="rpn_cls_prob_raw")
                _, rpnScores = tf.unstack(rpnScores, num=2, axis=-1)

            rpnScores = tf.identity(rpnScores, name="rpn_cls_prob")

        with tf.device("/gpu:0"):
            # Region Proposal Network - Bounding Box Proposal Regression
            rpnBboxPred = createConvLayer(layer3x3, "rpn_bbox_pred")

        with easy_scope("create_rpn_bbox_batches"), tf.device("/cpu:0"):
            # We want to reshape rpnBboxPred just like we did the scores.
            # Only difference is that we reshape to (9,14,14,4) instead of
            # (9,14,14,2) (in the case of feat_h=feat_w=14)

            prevLayer = tf.reshape(rpnBboxPred, (1, feature_h, feature_w, 9, 4))
            prevLayer = tf.transpose(prevLayer, (3, 1, 2, 4, 0))
            rpnBboxPred = tf.squeeze(prevLayer)

        if train_net is not "TRAIN_RPN":
            out = proposalLayer(
                s.DEF_FEATURE_STRIDE,
                s.DEF_IOU_THRESHOLD,
                s.DEF_PRE_NMS_KEEP,
                s.DEF_POST_NMS_KEEP,
                rpnScores,
                rpnBboxPred,
                feature_h,
                feature_w,
                image_attr,
                s.DEF_MIN_PROPOSAL_DIMS
            )
            return out

        else:
            return rpnScores, rpnBboxPred, feature_h, feature_w, image_attr
Example #17
0
def _proposalLayer(feature_stride, iou_threshold, pre_nms_keep, post_nms_keep,
                   scores, bbox_regressions, feature_h, feature_w, image_attr,
                   minimum_dim, device, scope_name, train_rpn):
    """Implementation of internal logic of proposalLayer, see proposalLayer"""

    with easy_scope(name=scope_name), tf.device(device):
        baseAnchors = generateAnchors(ratios=[2, 1, .5])

        shiftedAnchors = generateShiftedAnchors(baseAnchors, feature_h,
                                                feature_w, feature_stride)

        regressedAnchors = regressAnchors(shiftedAnchors, bbox_regressions)

        if train_rpn is True:
            # In this case, we need to not clip anchors to image boundaries, but rather
            # eliminate any cross image-boundary anchors.
            clippedAnchors, c_indices = killRegions(regressedAnchors,
                                                    image_attr)
            p_anchors, p_schores, p_indices = prunedScoresAndAnchors(
                clippedAnchors, scores, minimum_dim, image_attr)
            # Select p_indices from c_indices
            p_indices = tf.gather(c_indices, p_indices)
        else:
            clippedAnchors = clipRegions(regressedAnchors, image_attr)

            p_anchors, p_scores, p_indices = prunedScoresAndAnchors(
                clippedAnchors, scores, minimum_dim, image_attr)

        top_scores, top_score_indices = tf.nn.top_k(p_scores,
                                                    k=pre_nms_keep,
                                                    name="top_scores")

        top_anchors = tf.gather(p_anchors,
                                top_score_indices,
                                name="top_anchors",
                                axis=0)
        top_indices = tf.gather(p_indices,
                                top_score_indices,
                                name="top_indices")

        # We want nms to keep everything that passes the IoU test
        post_nms_indices = nms(top_anchors,
                               top_scores,
                               post_nms_keep,
                               iou_threshold=iou_threshold,
                               name="post_nms_indices")

        final_anchors = tf.gather(top_anchors,
                                  post_nms_indices,
                                  axis=0,
                                  name="proposal_regions")
        final_scores = tf.gather(top_scores,
                                 post_nms_indices,
                                 axis=0,
                                 name="proposal_region_scores")
        final_indices = tf.gather(top_indices,
                                  post_nms_indices,
                                  name="proposal_region_indices")
        final_base_anchors = tf.gather(tf.reshape(shiftedAnchors, (-1, 4)),
                                       final_indices,
                                       axis=0,
                                       name="proposal_region_base_anchors")

    return final_anchors, final_scores, final_indices, final_base_anchors
Example #18
0
def Rpn(features, image_attr, train_net=None, namespace="rpn"):
    """ Region proposal network.  Proposes regions to later be pooled and classified/regressed

    Inputs:
    features    - A tf.Tensor object of rank 4, dimensions (batch, height, width, channel),
        since this is the standard tensorflow order.

    image_attr  - A tf.Tensor object of rank 1, with values [img_h, img_w, scaling_factor],
        where these values are described below:
    train_net   - Can be set to either None (default), "TRAIN_RPN", or "TRAIN_R-CNN".  When
        set to one of the latter 'TRAIN_' settings, it initalizes the network differently,
        for training instead of for prediction.

        Output:
        A tf.tensor object of rank 2 with dimensions (num_rois, 4), where the second dimension
        is of the form {x0, y0, x1, y1}
    """
    train = False
    if train_net is not None:
        train = True

    def createConvLayer(bottom, name, stride=[1, 1, 1, 1]):
        """ Creates a conv layer given a name.

        Precondtions:
            Expects a tf.Variable with name model_scope/layer_scope/Weights
            and one with model_scope/layer_scope/Bias to already exist.

        Inputs:
            bottom  - A tf.Tensor containing activations
            name    - A string with a name for this layer
            stride  - A list containing the stride to apply for this convolution.
                        Most likely does not need to be changed from its default.
        Outputs:
            A tf.Tensor containing the output of the convolution.
        """
        with easy_scope(name, reuse=True):
            prevLayer = tf.nn.conv2d(bottom,
                                     tf.get_variable("Weights",
                                                     trainable=train),
                                     stride,
                                     padding="SAME")
            prevLayer = tf.nn.bias_add(prevLayer,
                                       tf.get_variable("Bias",
                                                       trainable=train),
                                       name="out")
        return prevLayer

    with easy_scope(namespace, reuse=True):
        layer3x3 = createConvLayer(features, "rpn_conv/3x3")
        layer3x3 = tf.nn.relu(layer3x3, "rpn_relu/3x3")

        # Region Proposal Network - Probabilities
        prevLayer = createConvLayer(layer3x3, "rpn_cls_score")

        # Assuming that feat_w = feat_h = 14, and that the number of anchors is 9,
        # we have the output should be of shape (9,14,14,2).

        # However, a tf.nn.conv2d cannot create batches out of thin air.  Hence, the
        # rpn_cls_score should create a (1, 14, 14, 9*2) instead, which we reshape to
        # (1, 14, 14, 2, 9), transpose to (9, 14, 14, 2, 1), then tf.squeeze the last
        # dimension out to arrive at the desired wonderful shape of (9, 14, 14,
        # 2).  The last dimension of rpn_cls_score is unpacked from (9*2) to (2,9) and
        # not (9,2) since this is how the weights imported from caffe are packed.

        with easy_scope("create_rpn_score_batches"), tf.device("/cpu:0"):
            feature_h = tf.shape(features)[1]
            feature_w = tf.shape(features)[2]
            prevLayer = tf.reshape(prevLayer, (1, feature_h, feature_w, 2, 9))
            prevLayer = tf.transpose(prevLayer, (4, 1, 2, 3, 0))
            prevLayer = tf.squeeze(prevLayer)

            if train_net is not "TRAIN_RPN":
                rpnScores = tf.nn.softmax(prevLayer,
                                          dim=-1,
                                          name="rpn_cls_prob_raw")
                _, rpnScores = tf.unstack(rpnScores, num=2, axis=-1)

            rpnScores = tf.identity(rpnScores, name="rpn_cls_prob")

        with tf.device("/gpu:0"):
            # Region Proposal Network - Bounding Box Proposal Regression
            rpnBboxPred = createConvLayer(layer3x3, "rpn_bbox_pred")

        with easy_scope("create_rpn_bbox_batches"), tf.device("/cpu:0"):
            # We want to reshape rpnBboxPred just like we did the scores.
            # Only difference is that we reshape to (9,14,14,4) instead of
            # (9,14,14,2) (in the case of feat_h=feat_w=14)

            prevLayer = tf.reshape(rpnBboxPred,
                                   (1, feature_h, feature_w, 9, 4))
            prevLayer = tf.transpose(prevLayer, (3, 1, 2, 4, 0))
            rpnBboxPred = tf.squeeze(prevLayer)

        if train_net is not "TRAIN_RPN":
            out = proposalLayer(s.DEF_FEATURE_STRIDE, s.DEF_IOU_THRESHOLD,
                                s.DEF_PRE_NMS_KEEP, s.DEF_POST_NMS_KEEP,
                                rpnScores, rpnBboxPred, feature_h, feature_w,
                                image_attr, s.DEF_MIN_PROPOSAL_DIMS)
            return out

        else:
            return rpnScores, rpnBboxPred, feature_h, feature_w, image_attr
Example #19
0
    def buildGraph(self,
                   prevLayer,
                   train=False,
                   train_starting_at=None,
                   weightsPath=s.DEF_WEIGHTS_PATH,
                   biasesPath=s.DEF_BIASES_PATH,
                   network_version="VGG16",
                   device="/gpu:0",
                   custom_layout=None):
        """Builds up the computation graph based on the given parameters.

        Positional arguments:
            prevLayer -- VGG must be connected to the output of another op.
                When making a vgg16 network, for example, the input may be
                a tf.Placeholder, as is usually the case.

        Keyword arguments:
            train -- If True, sets all variables in the created computation
                graph to be trainable.  If False(default), then it sets them
                to not be trainable, unless overriden by a following option.
            train_starting_at -- If set to the name of a layer, sets all
                layers to trainable after and including that layer.  Overrides
                "train" keyword argument.
            weightsPath -- Path to .npz file containing properly namespaced
                weights for this network.  See loadNetVars for how to properly
                do this.
            biasesPath -- Path to .npz file containing properly namespaced
                biases for this network.  See above
            network_version -- If it is desired to, for example, create
                a VGG16 network, the default "VGG16" suffices.  The options
                are "VGG16", "VGG19", "VGG16CONV", and "VGG19CONV".  The
                latter two have as their last layers the last convolutional
                outputs of their respective CNNs
            device -- The device onto which to place all operations.  By default
                set to "/gpu:0"; running convolutions on CPUs is not fun.
            custom_layout -- In case one desires to make a custom VGG-like
                convolutional neural network, the exact layout of the neural network
                can be provided in the internally used format.  See class method
                makeLayout(self, name) above for an example
        """

        # Extracts the information from .npz files and puts them into properly
        # scoped tf.Variable(s)
        loadNetVars.extractLayers(self.namespace, weightsPath, biasesPath)

        # Set up the network layout
        layout = []
        if network_version is not None:
            _layout = self.makeLayout(network_version)

            # Set default device and trainability for layers
            for layer in _layout:
                layer["device"] = device
                layer["trainable"] = train

            # Set trainability when using train_starting_at
            if train_starting_at is not None:
                setTrainingTrue = False
                for layer in layout:
                    if layer["name"] is train_starting_at:
                        setTrainingTrue = True
                    layer["trainable"] = setTrainingTrue

            # Set dropout for any fully connected layers being trained
            for layer in _layout:
                if layer["name"].startswith(
                        'fc') and layer["trainable"] is True:
                    dropoutLayer = {}
                    dropoutLayer["name"] = layer["name"].replace(
                        'fc', 'drop', 1)
                    dropoutLayer["device"] = device
                    dropoutLayer["trainable"] = None
                    layout.apppend(dropoutLayer)
                    layout.append(layer)
                else:
                    # No dropout needed
                    layout.append(layer)

        # In the case of a custom layout passed in
        if custom_layout is not None:
            layout = custom_layout

        # Actualize the layout
        with easy_scope(self.namespace, reuse=True):
            self.prevLayer = prevLayer
            for layer in layout:
                with tf.device(layer["device"]):
                    self.addLayer(layer["name"], trainable=layer["trainable"])

        eprint("VGG computational graph successfully actualized!  See layers"
               " attribute to inspect its ops.")

        return self.prevLayer
Example #20
0
def setUp(pooled_regions,
          pooled_h,
          pooled_w,
          feat_channels,
          trainable=False,
          namespace="rcnn"):
    """Calculate bounding box regressions and class probabilities

    Preconditions:
        This function assumes that the variables accessed by tf.get_variable() already exist;
        they must already have been initialized before calling this function.

    Positional Inputs:
        pooled_regions -- A tf.Tensor object with shape (num_regions, pooled_h, pooled_w,
            num_channels) containing the pooled regions of interest in the image.
        pooled_h -- A scalar containing the height of the pooled input
        pooled_w -- A scalar containing the width of the pooled input
        feat_channels -- A scalar containing the number of channels in the pooled input

    Outputs:
        A tuple containing both:
        A list of scores for a given set of classes.  In the case of the VOC 2007 dataset,
            there are 20 classes plus one background class.
            Thus, this output should be an np.array of shape (num_regions,21) with a score
            for every class.
        A list of bounding box regressions, with a different bounding box regression for
            each class.  Each bbox regress is described by four floats, so this output
            will be an np.array of shape (num_regions, 21, 4)
    """

    last_dimension = pooled_h * pooled_w * feat_channels
    with easy_scope(namespace, reuse=True), tf.device("/gpu:0"):
        with easy_scope("fc6", reuse=True):
            flattened_in = tf.reshape(pooled_regions, (-1, last_dimension))
            prevLayer = tf.nn.bias_add(
                tf.matmul(flattened_in,
                          tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable))

        prevLayer = tf.nn.relu(prevLayer, name="relu6")

        with easy_scope("fc7", reuse=True):
            prevLayer = tf.nn.bias_add(
                tf.matmul(prevLayer,
                          tf.get_variable("Weights", trainable=trainable)),
                tf.get_variable("Bias", trainable=trainable))

        prevLayer = tf.nn.relu(prevLayer, name="relu7")

        # Produce classification probabilities
        with easy_scope("cls_score", reuse=True):
            weights = tf.get_variable("Weights", trainable=trainable)
            bias = tf.get_variable("Bias", trainable=trainable)
            scoreLayer = tf.nn.bias_add(tf.matmul(prevLayer, weights),
                                        bias,
                                        name="out")

        # Produce regressions (note these are with respect to the individual regions, so the
        # actual regions in the image resulting from these is yet to be calculated
        with easy_scope("bbox_pred", reuse=True):
            bboxPred = tf.nn.bias_add(tf.matmul(
                prevLayer, tf.get_variable("Weights", trainable=trainable)),
                                      tf.get_variable("Bias",
                                                      trainable=trainable),
                                      name="out")

        probLayer = tf.nn.softmax(scoreLayer, name="cls_prob")
    return bboxPred, probLayer
Example #21
0
    def buildGraph(self, prevLayer, train=False, train_starting_at=None,
            weightsPath=s.DEF_WEIGHTS_PATH, biasesPath=s.DEF_BIASES_PATH,
            network_version="VGG16", device="/gpu:0", custom_layout=None):
        """Builds up the computation graph based on the given parameters.

        Positional arguments:
            prevLayer -- VGG must be connected to the output of another op.
                When making a vgg16 network, for example, the input may be
                a tf.Placeholder, as is usually the case.

        Keyword arguments:
            train -- If True, sets all variables in the created computation
                graph to be trainable.  If False(default), then it sets them
                to not be trainable, unless overriden by a following option.
            train_starting_at -- If set to the name of a layer, sets all
                layers to trainable after and including that layer.  Overrides
                "train" keyword argument.
            weightsPath -- Path to .npz file containing properly namespaced
                weights for this network.  See loadNetVars for how to properly
                do this.
            biasesPath -- Path to .npz file containing properly namespaced
                biases for this network.  See above
            network_version -- If it is desired to, for example, create
                a VGG16 network, the default "VGG16" suffices.  The options
                are "VGG16", "VGG19", "VGG16CONV", and "VGG19CONV".  The
                latter two have as their last layers the last convolutional
                outputs of their respective CNNs
            device -- The device onto which to place all operations.  By default
                set to "/gpu:0"; running convolutions on CPUs is not fun.
            custom_layout -- In case one desires to make a custom VGG-like
                convolutional neural network, the exact layout of the neural network
                can be provided in the internally used format.  See class method
                makeLayout(self, name) above for an example
        """

        # Extracts the information from .npz files and puts them into properly
        # scoped tf.Variable(s)
        loadNetVars.extractLayers(self.namespace, weightsPath, biasesPath)

        # Set up the network layout
        layout = []
        if network_version is not None:
            _layout = self.makeLayout(network_version)

            # Set default device and trainability for layers
            for layer in _layout:
                layer["device"] = device
                layer["trainable"] = train

            # Set trainability when using train_starting_at
            if train_starting_at is not None:
                setTrainingTrue = False
                for layer in layout:
                    if layer["name"] is train_starting_at:
                        setTrainingTrue = True
                    layer["trainable"] = setTrainingTrue

            # Set dropout for any fully connected layers being trained
            for layer in _layout:
                if layer["name"].startswith('fc') and layer["trainable"] is True:
                    dropoutLayer = {}
                    dropoutLayer["name"] = layer["name"].replace('fc', 'drop', 1)
                    dropoutLayer["device"] = device
                    dropoutLayer["trainable"] = None
                    layout.apppend(dropoutLayer)
                    layout.append(layer)
                else:
                    # No dropout needed
                    layout.append(layer)

        # In the case of a custom layout passed in
        if custom_layout is not None:
            layout = custom_layout

        # Actualize the layout
        with easy_scope(self.namespace, reuse=True):
            self.prevLayer = prevLayer
            for layer in layout:
                with tf.device(layer["device"]):
                    self.addLayer(layer["name"], trainable=layer["trainable"])

        eprint("VGG computational graph successfully actualized!  See layers"
            " attribute to inspect its ops.")

        return self.prevLayer
Example #22
0
def _calculateRpnLoss(predRawScores, predBoxes, predRegressions, predAnchors,
                      mini_batch_size, gt_boxes, feature_h, feature_w):
    """Refactoring of code from calculateRpnLoss into another function for testing"""
    num_classes = tf.shape_n(
        gt_boxes)[0] - 1  # subtract one since background is not a class
    iou_threshold_neg = s.DEF_IOU_THRESHOLD_TRAIN_NEG
    iou_threshold_pos = s.DEF_IOU_THRESHOLD_TRAIN_POS
    with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"):
        labeled_boxes = iou_labeler(predBoxes, gt_boxes, iou_threshold_neg,
                                    iou_threshold_pos)

        # Sample boxes and raw scores for loss

        posIdx, negIdx = tf.py_func(
            lambda x: sampleBoxes(x, num_classes, mini_batch_size),
            [labeled_boxes], [tf.int32, tf.int32],
            stateful=False,
            name="sampleBoxes")
        # posIdx, negIdx = tf.py_func(sampleBoxes, [labeled_boxes, num_classes, mini_batch_size],
        #        tf.float32, stateful=False, name="sampleBoxes")
        positive_raw_scores = tf.gather(predRawScores,
                                        posIdx,
                                        axis=0,
                                        name="positive_raw_scores")
        negative_raw_scores = tf.gather(predRawScores,
                                        negIdx,
                                        axis=0,
                                        name="negative_raw_scores")

        # There is no regression loss for negative examples.  For the positives, we need
        # to find the gt regression from anchor to gt boxes
        positive_anchors = tf.gather(predAnchors,
                                     posIdx,
                                     axis=0,
                                     name="positive_anchors")
        positive_gt_boxes = tf.gather(gt_boxes,
                                      tf.cast(tf.gather(
                                          labeled_boxes[:, 4], posIdx),
                                              dtype=tf.int32),
                                      name="positive_gt_boxes")
        positive_gt_regs = calculateRegressions(positive_anchors,
                                                positive_gt_boxes,
                                                axis=-1)
        positive_raw_regressions = tf.gather(predRegressions,
                                             posIdx,
                                             axis=0,
                                             name="positive_raw_regressions")

        # Flatten regressions before passing into the huber loss function
        flat_pred_regs = tf.reshape(positive_raw_regressions, [-1])
        flat_gt_regs = tf.reshape(positive_gt_regs, [-1])
        reg_loss = tf.losses.huber_loss(flat_pred_regs,
                                        flat_gt_regs,
                                        reduction=tf.losses.Reduction.NONE,
                                        delta=1.0)
        reg_loss = tf.reduce_sum(reg_loss)

        # Class-agnostic log loss for positive examples
        # Need to create a whole bunch of [0,1]s of the right length
        num_pos = tf.shape(positive_raw_scores)[0]
        cls_loss_pos = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.ones([num_pos], dtype=tf.int32),
            logits=positive_raw_scores)
        cls_loss_pos = tf.reduce_sum(cls_loss_pos)

        # Log-loss for the negative examples
        num_neg = tf.shape(negative_raw_scores)[0]
        cls_loss_neg = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.zeros([num_neg], dtype=tf.int32),
            logits=negative_raw_scores)
        cls_loss_neg = tf.reduce_sum(cls_loss_neg)

        # Adding up and normalizing the losses.
        reg_loss /= (feature_h * feature_w) / 10.
        cls_loss = (cls_loss_pos + cls_loss_neg) / mini_batch_size

        total_loss = tf.add(reg_loss, cls_loss, name="total_loss")
        return total_loss
def faster_rcnn(image, image_attributes):
    """ Builds a Faster R-CNN network

    Inputs:
        image           - tf.Tensor object containing image to be processed
        image_attribures- tf.Tensor object containing image height, width, and
                            scaling factor used to resize original image
    Outputs:
        out_regions     - list of tf.Tensor objects with bounding boxes for detections,
                            sans background class
        out_scores      - list of tf.Tensor objects with classification scores for each
                            category in VOC 2007, sans background
    """
    pooled_h = 7
    pooled_w = 7
    feature_channels = 512  # Property of vgg16 network
    # num_classes = 21
    # confidence_threshold = 0.8
    vgg16_base = VGG('f-rcnn')
    features = vgg16_base.buildGraph(image, train=False,
        weightsPath=s.DEF_FRCNN_WEIGHTS_PATH,
        biasesPath=s.DEF_FRCNN_BIASES_PATH,
        network_version="VGG16CONV",
        device="/gpu:0")

    print("Layers of VGG are:")
    print(vgg16_base.layers.keys())
    proposed_regions, rpn_scores = rpn.Rpn(features, image_attributes, namespace='f-rcnn')
    print("Region Proposal Network set up!")

    with easy_scope('f-rcnn'):
        pooled_regions = roi_pooling_layer(tf.squeeze(features), image_attributes,
            proposed_regions, pooled_h, pooled_w, 16, name='roi_pooling_layer')
    print("RoI pooling set up!")
    bbox_reg, cls_scores = cls.setUp(
        pooled_regions, pooled_h, pooled_w, feature_channels, namespace="f-rcnn")
    with easy_scope('f-rcnn'), tf.device("/cpu:0"):
        with easy_scope('reshape_cls_output'):
            # cls_score is (300,21) ; bbox_reg is (300,84)
            bbox_reg = tf.reshape(bbox_reg, (-1, 21, 4))

            # Set proposed_regions shape to (300,1,4)
            proposed_regions_reshape = tf.expand_dims(proposed_regions, axis=1)

            # Rescale the Regions of Interest to the proper scale
            proposed_regions_reshape = proposed_regions_reshape / image_attributes[2]

        with easy_scope('clip_regress_unpack_output'):
            # Regress the Regions of Interest into class-specific detection boxes
            reg_roi = rpn.regressAnchors(proposed_regions_reshape, bbox_reg, axis=-1)

            # Clip all regions to image boundaries
            reg_roi = rpn.clipRegions(reg_roi, image_attributes, axis=-1)

            # Unpack both the regions and scores by class
            reg_rois = tf.unstack(reg_roi, num=21, axis=1)
            bbox_scores = tf.unstack(cls_scores, num=21, axis=1)

        with easy_scope('non_max_suppression'):
            # There are 20 classes, each in their own list.  Background is not stored
            out_scores = [[] for _ in range(20)]
            out_regions = [[] for _ in range(20)]

            # We skip the first class since it is the background class.
            for i, (regs, scores) in enumerate(zip(reg_rois[1:], bbox_scores[1:])):
                # Perform NMS, but keep all of the indices (#indices < 300)
                inds = nms(regs, scores, 300, iou_threshold=0.3)
                regs = tf.gather(regs, inds)
                scores = tf.gather(scores, inds)
                out_scores[i] = scores
                out_regions[i] = regs
            return out_regions, out_scores
def faster_rcnn(image, image_attributes):
    """ Builds a Faster R-CNN network

    Inputs:
        image           - tf.Tensor object containing image to be processed
        image_attribures- tf.Tensor object containing image height, width, and
                            scaling factor used to resize original image
    Outputs:
        out_regions     - list of tf.Tensor objects with bounding boxes for detections,
                            sans background class
        out_scores      - list of tf.Tensor objects with classification scores for each
                            category in VOC 2007, sans background
    """
    pooled_h = 7
    pooled_w = 7
    feature_channels = 512  # Property of vgg16 network
    # num_classes = 21
    # confidence_threshold = 0.8
    vgg16_base = VGG('f-rcnn')
    features = vgg16_base.buildGraph(image,
                                     train=False,
                                     weightsPath=s.DEF_FRCNN_WEIGHTS_PATH,
                                     biasesPath=s.DEF_FRCNN_BIASES_PATH,
                                     network_version="VGG16CONV",
                                     device="/gpu:0")

    print("Layers of VGG are:")
    print(vgg16_base.layers.keys())
    proposed_regions, rpn_scores = rpn.Rpn(features,
                                           image_attributes,
                                           namespace='f-rcnn')
    print("Region Proposal Network set up!")

    with easy_scope('f-rcnn'):
        pooled_regions = roi_pooling_layer(tf.squeeze(features),
                                           image_attributes,
                                           proposed_regions,
                                           pooled_h,
                                           pooled_w,
                                           16,
                                           name='roi_pooling_layer')
    print("RoI pooling set up!")
    bbox_reg, cls_scores = cls.setUp(pooled_regions,
                                     pooled_h,
                                     pooled_w,
                                     feature_channels,
                                     namespace="f-rcnn")
    with easy_scope('f-rcnn'), tf.device("/cpu:0"):
        with easy_scope('reshape_cls_output'):
            # cls_score is (300,21) ; bbox_reg is (300,84)
            bbox_reg = tf.reshape(bbox_reg, (-1, 21, 4))

            # Set proposed_regions shape to (300,1,4)
            proposed_regions_reshape = tf.expand_dims(proposed_regions, axis=1)

            # Rescale the Regions of Interest to the proper scale
            proposed_regions_reshape = proposed_regions_reshape / image_attributes[
                2]

        with easy_scope('clip_regress_unpack_output'):
            # Regress the Regions of Interest into class-specific detection boxes
            reg_roi = rpn.regressAnchors(proposed_regions_reshape,
                                         bbox_reg,
                                         axis=-1)

            # Clip all regions to image boundaries
            reg_roi = rpn.clipRegions(reg_roi, image_attributes, axis=-1)

            # Unpack both the regions and scores by class
            reg_rois = tf.unstack(reg_roi, num=21, axis=1)
            bbox_scores = tf.unstack(cls_scores, num=21, axis=1)

        with easy_scope('non_max_suppression'):
            # There are 20 classes, each in their own list.  Background is not stored
            out_scores = [[] for _ in range(20)]
            out_regions = [[] for _ in range(20)]

            # We skip the first class since it is the background class.
            for i, (regs,
                    scores) in enumerate(zip(reg_rois[1:], bbox_scores[1:])):
                # Perform NMS, but keep all of the indices (#indices < 300)
                inds = nms(regs, scores, 300, iou_threshold=0.3)
                regs = tf.gather(regs, inds)
                scores = tf.gather(scores, inds)
                out_scores[i] = scores
                out_regions[i] = regs
            return out_regions, out_scores