def setUp(pooled_regions, pooled_h, pooled_w, feat_channels, trainable=False, namespace="rcnn"): """Calculate bounding box regressions and class probabilities Preconditions: This function assumes that the variables accessed by tf.get_variable() already exist; they must already have been initialized before calling this function. Positional Inputs: pooled_regions -- A tf.Tensor object with shape (num_regions, pooled_h, pooled_w, num_channels) containing the pooled regions of interest in the image. pooled_h -- A scalar containing the height of the pooled input pooled_w -- A scalar containing the width of the pooled input feat_channels -- A scalar containing the number of channels in the pooled input Outputs: A tuple containing both: A list of scores for a given set of classes. In the case of the VOC 2007 dataset, there are 20 classes plus one background class. Thus, this output should be an np.array of shape (num_regions,21) with a score for every class. A list of bounding box regressions, with a different bounding box regression for each class. Each bbox regress is described by four floats, so this output will be an np.array of shape (num_regions, 21, 4) """ last_dimension = pooled_h * pooled_w * feat_channels with easy_scope(namespace, reuse=True), tf.device("/gpu:0"): with easy_scope("fc6", reuse=True): flattened_in = tf.reshape(pooled_regions, (-1, last_dimension)) prevLayer = tf.nn.bias_add(tf.matmul(flattened_in, tf.get_variable("Weights", trainable=trainable)), tf.get_variable("Bias", trainable=trainable)) prevLayer = tf.nn.relu(prevLayer, name="relu6") with easy_scope("fc7", reuse=True): prevLayer = tf.nn.bias_add(tf.matmul(prevLayer, tf.get_variable("Weights", trainable=trainable)), tf.get_variable("Bias", trainable=trainable)) prevLayer = tf.nn.relu(prevLayer, name="relu7") # Produce classification probabilities with easy_scope("cls_score", reuse=True): weights = tf.get_variable("Weights", trainable=trainable) bias = tf.get_variable("Bias", trainable=trainable) scoreLayer = tf.nn.bias_add(tf.matmul(prevLayer, weights), bias, name="out") # Produce regressions (note these are with respect to the individual regions, so the # actual regions in the image resulting from these is yet to be calculated with easy_scope("bbox_pred", reuse=True): bboxPred = tf.nn.bias_add(tf.matmul(prevLayer, tf.get_variable("Weights", trainable=trainable)), tf.get_variable("Bias", trainable=trainable), name="out") probLayer = tf.nn.softmax(scoreLayer, name="cls_prob") return bboxPred, probLayer
def extractLayers(scope, weightsPath, biasesPath, device="/cpu:0"): """ Function that extracts weights and biases into properly scoped variables. Positional arguments: scope -- tf.VariableScope ( or string representing a scope ) to place variables in weightsPath -- path to .npz file containing the weights biasesPath -- path to .npz file containing the biases Keyword arguments: device -- device on which to place the created variables """ weightsPath = _fixModelPath(weightsPath) biasesPath = _fixModelPath(biasesPath) # Raw numpy values. Need to be loaded into variables. weightsDict = numpy.load(weightsPath) biasesDict = numpy.load(biasesPath) # Here, we do a for loop looping through all of the names, "name". with tf.device(device): with easy_scope(scope): warning = False for name, weights_tnsr in weightsDict.items(): if name.startswith("/"): name = name[1:] if name.endswith("/"): name = name[:-1] with easy_scope(name): try: tf.get_variable("Weights", trainable=False, initializer=tf.constant(weights_tnsr)) tf.get_variable("Bias", trainable=False, initializer=tf.constant( biasesDict[name])) except ValueError: # Values were loaded elsewhere warning = True if warning: print( "extractLayers() Warning : Some variable names already exist." " If unintentional, please choose a different scope name." ) return
def createConvLayer(bottom, name, stride=[1, 1, 1, 1]): """ Creates a conv layer given a name. Precondtions: Expects a tf.Variable with name model_scope/layer_scope/Weights and one with model_scope/layer_scope/Bias to already exist. Inputs: bottom - A tf.Tensor containing activations name - A string with a name for this layer stride - A list containing the stride to apply for this convolution. Most likely does not need to be changed from its default. Outputs: A tf.Tensor containing the output of the convolution. """ with easy_scope(name, reuse=True): prevLayer = tf.nn.conv2d(bottom, tf.get_variable("Weights", trainable=train), stride, padding="SAME") prevLayer = tf.nn.bias_add(prevLayer, tf.get_variable("Bias", trainable=train), name="out") return prevLayer
def _calculateRpnLoss(predRawScores, predBoxes, predRegressions, predAnchors, mini_batch_size, gt_boxes, feature_h, feature_w): """Refactoring of code from calculateRpnLoss into another function for testing""" num_classes = tf.shape_n(gt_boxes)[0] - 1 # subtract one since background is not a class iou_threshold_neg = s.DEF_IOU_THRESHOLD_TRAIN_NEG iou_threshold_pos = s.DEF_IOU_THRESHOLD_TRAIN_POS with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"): labeled_boxes = iou_labeler(predBoxes, gt_boxes, iou_threshold_neg, iou_threshold_pos) # Sample boxes and raw scores for loss posIdx, negIdx = tf.py_func(lambda x: sampleBoxes(x, num_classes, mini_batch_size), [labeled_boxes], [tf.int32, tf.int32], stateful=False, name="sampleBoxes") # posIdx, negIdx = tf.py_func(sampleBoxes, [labeled_boxes, num_classes, mini_batch_size], # tf.float32, stateful=False, name="sampleBoxes") positive_raw_scores = tf.gather(predRawScores, posIdx, axis=0, name="positive_raw_scores") negative_raw_scores = tf.gather(predRawScores, negIdx, axis=0, name="negative_raw_scores") # There is no regression loss for negative examples. For the positives, we need # to find the gt regression from anchor to gt boxes positive_anchors = tf.gather(predAnchors, posIdx, axis=0, name="positive_anchors") positive_gt_boxes = tf.gather(gt_boxes, tf.cast(tf.gather(labeled_boxes[:, 4], posIdx), dtype=tf.int32), name="positive_gt_boxes") positive_gt_regs = calculateRegressions(positive_anchors, positive_gt_boxes, axis=-1) positive_raw_regressions = tf.gather(predRegressions, posIdx, axis=0, name="positive_raw_regressions") # Flatten regressions before passing into the huber loss function flat_pred_regs = tf.reshape(positive_raw_regressions, [-1]) flat_gt_regs = tf.reshape(positive_gt_regs, [-1]) reg_loss = tf.losses.huber_loss(flat_pred_regs, flat_gt_regs, reduction=tf.losses.Reduction.NONE, delta=1.0) reg_loss = tf.reduce_sum(reg_loss) # Class-agnostic log loss for positive examples # Need to create a whole bunch of [0,1]s of the right length num_pos = tf.shape(positive_raw_scores)[0] cls_loss_pos = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.ones([num_pos], dtype=tf.int32), logits=positive_raw_scores) cls_loss_pos = tf.reduce_sum(cls_loss_pos) # Log-loss for the negative examples num_neg = tf.shape(negative_raw_scores)[0] cls_loss_neg = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.zeros([num_neg], dtype=tf.int32), logits=negative_raw_scores) cls_loss_neg = tf.reduce_sum(cls_loss_neg) # Adding up and normalizing the losses. reg_loss /= (feature_h * feature_w) / 10. cls_loss = (cls_loss_pos + cls_loss_neg) / mini_batch_size total_loss = tf.add(reg_loss, cls_loss, name="total_loss") return total_loss
def calculateRpnLoss(rpnRawScores, rpnBboxPred, feature_h, feature_w, image_attr, gt_boxes): """ Calculates the loss for the region proposal network Inputs: rpnRawScores -- tf.Tensor object containing the objectness scores for each region before application of softmax. rpnBboxPred -- tf.Tensor object containing the bounding-box regressions for each region. Must be of a shape compatible with rpnRawScores, i.e. only differing in shape int he last dimension feature_h -- Height of the convolutional input to the RPN feature_w -- Width of the convolutional input to the RPN gt_boxes -- Ground-truth boxes with which we are calculating the loss with respect to. Must be in the format (num_gt_boxes, 5), where the rows are of the form [x0, y0, x1, y1, class], where class is the category to which each ground truth box belongs to Output: The loss for this minibatch """ mini_batch_size = 128 with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"): rpnScores = tf.nn.softmax(rpnRawScores, dim=-1, name="rpn_cls_prob") _, rpnScores = tf.unstack(rpnScores, num=2, axis=-1) predBoxes, predScores, predIndices, predAnchors = proposalLayer_train( rpnScores, rpnBboxPred, feature_h, feature_w, image_attr) with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"): predRawScores = tf.gather(tf.reshape(rpnRawScores, (-1, 2)), predIndices, axis=0, name="final_raw_scores") predRegressions = tf.gather(tf.reshape(rpnBboxPred, (-1, 4)), predIndices, axis=0, name="final_raw_regressions") return _calculateRpnLoss(predRawScores, predBoxes, predRegressions, predAnchors, mini_batch_size, gt_boxes, feature_h, feature_w)
def extractLayers(scope, weightsPath, biasesPath, device="/cpu:0"): """ Function that extracts weights and biases into properly scoped variables. Positional arguments: scope -- tf.VariableScope ( or string representing a scope ) to place variables in weightsPath -- path to .npz file containing the weights biasesPath -- path to .npz file containing the biases Keyword arguments: device -- device on which to place the created variables """ weightsPath = _fixModelPath(weightsPath) biasesPath = _fixModelPath(biasesPath) # Raw numpy values. Need to be loaded into variables. weightsDict = numpy.load(weightsPath) biasesDict = numpy.load(biasesPath) # Here, we do a for loop looping through all of the names, "name". with tf.device(device): with easy_scope(scope): warning = False for name, weights_tnsr in weightsDict.items(): if name.startswith("/"): name = name[1:] if name.endswith("/"): name = name[:-1] with easy_scope(name): try: tf.get_variable("Weights", trainable=False, initializer=tf.constant(weights_tnsr)) tf.get_variable("Bias", trainable=False, initializer=tf.constant(biasesDict[name])) except ValueError: # Values were loaded elsewhere warning = True if warning: print("extractLayers() Warning : Some variable names already exist." " If unintentional, please choose a different scope name.") return
def createConvLayer(self, name, trainable=True): """Creates a convolutional Tensorflow layer given its name. Assumes that properly named bias and weight variables are already loaded in memory """ with easy_scope(name): conv = tf.nn.conv2d(self.prevLayer, tf.get_variable( "Weights", trainable=trainable), [1, 1, 1, 1], padding="SAME") bias = tf.nn.bias_add(conv, tf.get_variable("Bias", trainable=trainable)) return bias
def createFcLayer(self, name, trainable=True): """Creates a fully connected layer Loads the weights from the weightsDict and biasesDict dictionaries using their key value name and returns the bias layer. """ with easy_scope(name, reuse=True): layer = tf.nn.bias_add( tf.matmul(self.prevLayer, tf.get_variable("Weights", trainable=trainable)), tf.get_variable("Bias", trainable=trainable)) return layer
def createConvLayer(self, name, trainable=True): """Creates a convolutional Tensorflow layer given its name. Assumes that properly named bias and weight variables are already loaded in memory """ with easy_scope(name): conv = tf.nn.conv2d(self.prevLayer, tf.get_variable("Weights", trainable=trainable), [1, 1, 1, 1], padding="SAME") bias = tf.nn.bias_add(conv, tf.get_variable("Bias", trainable=trainable)) return bias
def createFirstFcLayer(self, name, trainable=True): """Creates the first fully connected layer This layer converts the output of the last convolutional layer to the input for the next fully connected ones. Returns the bias layer. """ INPUT_SIZE = 25088 # OUTPUT_SIZE = 4096 with easy_scope(name, reuse=True): flattenedInput = tf.reshape(self.prevLayer, [-1, INPUT_SIZE]) layer = tf.nn.bias_add( tf.matmul(flattenedInput, tf.get_variable("Weights", trainable=trainable)), tf.get_variable("Bias", trainable=trainable)) return layer
def _proposalLayer(feature_stride, iou_threshold, pre_nms_keep, post_nms_keep, scores, bbox_regressions, feature_h, feature_w, image_attr, minimum_dim, device, scope_name, train_rpn): """Implementation of internal logic of proposalLayer, see proposalLayer""" with easy_scope(name=scope_name), tf.device(device): baseAnchors = generateAnchors(ratios=[2, 1, .5]) shiftedAnchors = generateShiftedAnchors(baseAnchors, feature_h, feature_w, feature_stride) regressedAnchors = regressAnchors(shiftedAnchors, bbox_regressions) if train_rpn is True: # In this case, we need to not clip anchors to image boundaries, but rather # eliminate any cross image-boundary anchors. clippedAnchors, c_indices = killRegions(regressedAnchors, image_attr) p_anchors, p_schores, p_indices = prunedScoresAndAnchors( clippedAnchors, scores, minimum_dim, image_attr) # Select p_indices from c_indices p_indices = tf.gather(c_indices, p_indices) else: clippedAnchors = clipRegions(regressedAnchors, image_attr) p_anchors, p_scores, p_indices = prunedScoresAndAnchors(clippedAnchors, scores, minimum_dim, image_attr) top_scores, top_score_indices = tf.nn.top_k(p_scores, k=pre_nms_keep, name="top_scores") top_anchors = tf.gather(p_anchors, top_score_indices, name="top_anchors", axis=0) top_indices = tf.gather(p_indices, top_score_indices, name="top_indices") # We want nms to keep everything that passes the IoU test post_nms_indices = nms(top_anchors, top_scores, post_nms_keep, iou_threshold=iou_threshold, name="post_nms_indices") final_anchors = tf.gather(top_anchors, post_nms_indices, axis=0, name="proposal_regions") final_scores = tf.gather(top_scores, post_nms_indices, axis=0, name="proposal_region_scores") final_indices = tf.gather(top_indices, post_nms_indices, name="proposal_region_indices") final_base_anchors = tf.gather(tf.reshape(shiftedAnchors, (-1, 4)), final_indices, axis=0, name="proposal_region_base_anchors") return final_anchors, final_scores, final_indices, final_base_anchors
def Rpn(features, image_attr, train_net=None, namespace="rpn"): """ Region proposal network. Proposes regions to later be pooled and classified/regressed Inputs: features - A tf.Tensor object of rank 4, dimensions (batch, height, width, channel), since this is the standard tensorflow order. image_attr - A tf.Tensor object of rank 1, with values [img_h, img_w, scaling_factor], where these values are described below: train_net - Can be set to either None (default), "TRAIN_RPN", or "TRAIN_R-CNN". When set to one of the latter 'TRAIN_' settings, it initalizes the network differently, for training instead of for prediction. Output: A tf.tensor object of rank 2 with dimensions (num_rois, 4), where the second dimension is of the form {x0, y0, x1, y1} """ train = False if train_net is not None: train = True def createConvLayer(bottom, name, stride=[1, 1, 1, 1]): """ Creates a conv layer given a name. Precondtions: Expects a tf.Variable with name model_scope/layer_scope/Weights and one with model_scope/layer_scope/Bias to already exist. Inputs: bottom - A tf.Tensor containing activations name - A string with a name for this layer stride - A list containing the stride to apply for this convolution. Most likely does not need to be changed from its default. Outputs: A tf.Tensor containing the output of the convolution. """ with easy_scope(name, reuse=True): prevLayer = tf.nn.conv2d(bottom, tf.get_variable("Weights", trainable=train), stride, padding="SAME") prevLayer = tf.nn.bias_add(prevLayer, tf.get_variable("Bias", trainable=train), name="out") return prevLayer with easy_scope(namespace, reuse=True): layer3x3 = createConvLayer(features, "rpn_conv/3x3") layer3x3 = tf.nn.relu(layer3x3, "rpn_relu/3x3") # Region Proposal Network - Probabilities prevLayer = createConvLayer(layer3x3, "rpn_cls_score") # Assuming that feat_w = feat_h = 14, and that the number of anchors is 9, # we have the output should be of shape (9,14,14,2). # However, a tf.nn.conv2d cannot create batches out of thin air. Hence, the # rpn_cls_score should create a (1, 14, 14, 9*2) instead, which we reshape to # (1, 14, 14, 2, 9), transpose to (9, 14, 14, 2, 1), then tf.squeeze the last # dimension out to arrive at the desired wonderful shape of (9, 14, 14, # 2). The last dimension of rpn_cls_score is unpacked from (9*2) to (2,9) and # not (9,2) since this is how the weights imported from caffe are packed. with easy_scope("create_rpn_score_batches"), tf.device("/cpu:0"): feature_h = tf.shape(features)[1] feature_w = tf.shape(features)[2] prevLayer = tf.reshape(prevLayer, (1, feature_h, feature_w, 2, 9)) prevLayer = tf.transpose(prevLayer, (4, 1, 2, 3, 0)) prevLayer = tf.squeeze(prevLayer) if train_net is not "TRAIN_RPN": rpnScores = tf.nn.softmax(prevLayer, dim=-1, name="rpn_cls_prob_raw") _, rpnScores = tf.unstack(rpnScores, num=2, axis=-1) rpnScores = tf.identity(rpnScores, name="rpn_cls_prob") with tf.device("/gpu:0"): # Region Proposal Network - Bounding Box Proposal Regression rpnBboxPred = createConvLayer(layer3x3, "rpn_bbox_pred") with easy_scope("create_rpn_bbox_batches"), tf.device("/cpu:0"): # We want to reshape rpnBboxPred just like we did the scores. # Only difference is that we reshape to (9,14,14,4) instead of # (9,14,14,2) (in the case of feat_h=feat_w=14) prevLayer = tf.reshape(rpnBboxPred, (1, feature_h, feature_w, 9, 4)) prevLayer = tf.transpose(prevLayer, (3, 1, 2, 4, 0)) rpnBboxPred = tf.squeeze(prevLayer) if train_net is not "TRAIN_RPN": out = proposalLayer( s.DEF_FEATURE_STRIDE, s.DEF_IOU_THRESHOLD, s.DEF_PRE_NMS_KEEP, s.DEF_POST_NMS_KEEP, rpnScores, rpnBboxPred, feature_h, feature_w, image_attr, s.DEF_MIN_PROPOSAL_DIMS ) return out else: return rpnScores, rpnBboxPred, feature_h, feature_w, image_attr
def _proposalLayer(feature_stride, iou_threshold, pre_nms_keep, post_nms_keep, scores, bbox_regressions, feature_h, feature_w, image_attr, minimum_dim, device, scope_name, train_rpn): """Implementation of internal logic of proposalLayer, see proposalLayer""" with easy_scope(name=scope_name), tf.device(device): baseAnchors = generateAnchors(ratios=[2, 1, .5]) shiftedAnchors = generateShiftedAnchors(baseAnchors, feature_h, feature_w, feature_stride) regressedAnchors = regressAnchors(shiftedAnchors, bbox_regressions) if train_rpn is True: # In this case, we need to not clip anchors to image boundaries, but rather # eliminate any cross image-boundary anchors. clippedAnchors, c_indices = killRegions(regressedAnchors, image_attr) p_anchors, p_schores, p_indices = prunedScoresAndAnchors( clippedAnchors, scores, minimum_dim, image_attr) # Select p_indices from c_indices p_indices = tf.gather(c_indices, p_indices) else: clippedAnchors = clipRegions(regressedAnchors, image_attr) p_anchors, p_scores, p_indices = prunedScoresAndAnchors( clippedAnchors, scores, minimum_dim, image_attr) top_scores, top_score_indices = tf.nn.top_k(p_scores, k=pre_nms_keep, name="top_scores") top_anchors = tf.gather(p_anchors, top_score_indices, name="top_anchors", axis=0) top_indices = tf.gather(p_indices, top_score_indices, name="top_indices") # We want nms to keep everything that passes the IoU test post_nms_indices = nms(top_anchors, top_scores, post_nms_keep, iou_threshold=iou_threshold, name="post_nms_indices") final_anchors = tf.gather(top_anchors, post_nms_indices, axis=0, name="proposal_regions") final_scores = tf.gather(top_scores, post_nms_indices, axis=0, name="proposal_region_scores") final_indices = tf.gather(top_indices, post_nms_indices, name="proposal_region_indices") final_base_anchors = tf.gather(tf.reshape(shiftedAnchors, (-1, 4)), final_indices, axis=0, name="proposal_region_base_anchors") return final_anchors, final_scores, final_indices, final_base_anchors
def Rpn(features, image_attr, train_net=None, namespace="rpn"): """ Region proposal network. Proposes regions to later be pooled and classified/regressed Inputs: features - A tf.Tensor object of rank 4, dimensions (batch, height, width, channel), since this is the standard tensorflow order. image_attr - A tf.Tensor object of rank 1, with values [img_h, img_w, scaling_factor], where these values are described below: train_net - Can be set to either None (default), "TRAIN_RPN", or "TRAIN_R-CNN". When set to one of the latter 'TRAIN_' settings, it initalizes the network differently, for training instead of for prediction. Output: A tf.tensor object of rank 2 with dimensions (num_rois, 4), where the second dimension is of the form {x0, y0, x1, y1} """ train = False if train_net is not None: train = True def createConvLayer(bottom, name, stride=[1, 1, 1, 1]): """ Creates a conv layer given a name. Precondtions: Expects a tf.Variable with name model_scope/layer_scope/Weights and one with model_scope/layer_scope/Bias to already exist. Inputs: bottom - A tf.Tensor containing activations name - A string with a name for this layer stride - A list containing the stride to apply for this convolution. Most likely does not need to be changed from its default. Outputs: A tf.Tensor containing the output of the convolution. """ with easy_scope(name, reuse=True): prevLayer = tf.nn.conv2d(bottom, tf.get_variable("Weights", trainable=train), stride, padding="SAME") prevLayer = tf.nn.bias_add(prevLayer, tf.get_variable("Bias", trainable=train), name="out") return prevLayer with easy_scope(namespace, reuse=True): layer3x3 = createConvLayer(features, "rpn_conv/3x3") layer3x3 = tf.nn.relu(layer3x3, "rpn_relu/3x3") # Region Proposal Network - Probabilities prevLayer = createConvLayer(layer3x3, "rpn_cls_score") # Assuming that feat_w = feat_h = 14, and that the number of anchors is 9, # we have the output should be of shape (9,14,14,2). # However, a tf.nn.conv2d cannot create batches out of thin air. Hence, the # rpn_cls_score should create a (1, 14, 14, 9*2) instead, which we reshape to # (1, 14, 14, 2, 9), transpose to (9, 14, 14, 2, 1), then tf.squeeze the last # dimension out to arrive at the desired wonderful shape of (9, 14, 14, # 2). The last dimension of rpn_cls_score is unpacked from (9*2) to (2,9) and # not (9,2) since this is how the weights imported from caffe are packed. with easy_scope("create_rpn_score_batches"), tf.device("/cpu:0"): feature_h = tf.shape(features)[1] feature_w = tf.shape(features)[2] prevLayer = tf.reshape(prevLayer, (1, feature_h, feature_w, 2, 9)) prevLayer = tf.transpose(prevLayer, (4, 1, 2, 3, 0)) prevLayer = tf.squeeze(prevLayer) if train_net is not "TRAIN_RPN": rpnScores = tf.nn.softmax(prevLayer, dim=-1, name="rpn_cls_prob_raw") _, rpnScores = tf.unstack(rpnScores, num=2, axis=-1) rpnScores = tf.identity(rpnScores, name="rpn_cls_prob") with tf.device("/gpu:0"): # Region Proposal Network - Bounding Box Proposal Regression rpnBboxPred = createConvLayer(layer3x3, "rpn_bbox_pred") with easy_scope("create_rpn_bbox_batches"), tf.device("/cpu:0"): # We want to reshape rpnBboxPred just like we did the scores. # Only difference is that we reshape to (9,14,14,4) instead of # (9,14,14,2) (in the case of feat_h=feat_w=14) prevLayer = tf.reshape(rpnBboxPred, (1, feature_h, feature_w, 9, 4)) prevLayer = tf.transpose(prevLayer, (3, 1, 2, 4, 0)) rpnBboxPred = tf.squeeze(prevLayer) if train_net is not "TRAIN_RPN": out = proposalLayer(s.DEF_FEATURE_STRIDE, s.DEF_IOU_THRESHOLD, s.DEF_PRE_NMS_KEEP, s.DEF_POST_NMS_KEEP, rpnScores, rpnBboxPred, feature_h, feature_w, image_attr, s.DEF_MIN_PROPOSAL_DIMS) return out else: return rpnScores, rpnBboxPred, feature_h, feature_w, image_attr
def buildGraph(self, prevLayer, train=False, train_starting_at=None, weightsPath=s.DEF_WEIGHTS_PATH, biasesPath=s.DEF_BIASES_PATH, network_version="VGG16", device="/gpu:0", custom_layout=None): """Builds up the computation graph based on the given parameters. Positional arguments: prevLayer -- VGG must be connected to the output of another op. When making a vgg16 network, for example, the input may be a tf.Placeholder, as is usually the case. Keyword arguments: train -- If True, sets all variables in the created computation graph to be trainable. If False(default), then it sets them to not be trainable, unless overriden by a following option. train_starting_at -- If set to the name of a layer, sets all layers to trainable after and including that layer. Overrides "train" keyword argument. weightsPath -- Path to .npz file containing properly namespaced weights for this network. See loadNetVars for how to properly do this. biasesPath -- Path to .npz file containing properly namespaced biases for this network. See above network_version -- If it is desired to, for example, create a VGG16 network, the default "VGG16" suffices. The options are "VGG16", "VGG19", "VGG16CONV", and "VGG19CONV". The latter two have as their last layers the last convolutional outputs of their respective CNNs device -- The device onto which to place all operations. By default set to "/gpu:0"; running convolutions on CPUs is not fun. custom_layout -- In case one desires to make a custom VGG-like convolutional neural network, the exact layout of the neural network can be provided in the internally used format. See class method makeLayout(self, name) above for an example """ # Extracts the information from .npz files and puts them into properly # scoped tf.Variable(s) loadNetVars.extractLayers(self.namespace, weightsPath, biasesPath) # Set up the network layout layout = [] if network_version is not None: _layout = self.makeLayout(network_version) # Set default device and trainability for layers for layer in _layout: layer["device"] = device layer["trainable"] = train # Set trainability when using train_starting_at if train_starting_at is not None: setTrainingTrue = False for layer in layout: if layer["name"] is train_starting_at: setTrainingTrue = True layer["trainable"] = setTrainingTrue # Set dropout for any fully connected layers being trained for layer in _layout: if layer["name"].startswith( 'fc') and layer["trainable"] is True: dropoutLayer = {} dropoutLayer["name"] = layer["name"].replace( 'fc', 'drop', 1) dropoutLayer["device"] = device dropoutLayer["trainable"] = None layout.apppend(dropoutLayer) layout.append(layer) else: # No dropout needed layout.append(layer) # In the case of a custom layout passed in if custom_layout is not None: layout = custom_layout # Actualize the layout with easy_scope(self.namespace, reuse=True): self.prevLayer = prevLayer for layer in layout: with tf.device(layer["device"]): self.addLayer(layer["name"], trainable=layer["trainable"]) eprint("VGG computational graph successfully actualized! See layers" " attribute to inspect its ops.") return self.prevLayer
def setUp(pooled_regions, pooled_h, pooled_w, feat_channels, trainable=False, namespace="rcnn"): """Calculate bounding box regressions and class probabilities Preconditions: This function assumes that the variables accessed by tf.get_variable() already exist; they must already have been initialized before calling this function. Positional Inputs: pooled_regions -- A tf.Tensor object with shape (num_regions, pooled_h, pooled_w, num_channels) containing the pooled regions of interest in the image. pooled_h -- A scalar containing the height of the pooled input pooled_w -- A scalar containing the width of the pooled input feat_channels -- A scalar containing the number of channels in the pooled input Outputs: A tuple containing both: A list of scores for a given set of classes. In the case of the VOC 2007 dataset, there are 20 classes plus one background class. Thus, this output should be an np.array of shape (num_regions,21) with a score for every class. A list of bounding box regressions, with a different bounding box regression for each class. Each bbox regress is described by four floats, so this output will be an np.array of shape (num_regions, 21, 4) """ last_dimension = pooled_h * pooled_w * feat_channels with easy_scope(namespace, reuse=True), tf.device("/gpu:0"): with easy_scope("fc6", reuse=True): flattened_in = tf.reshape(pooled_regions, (-1, last_dimension)) prevLayer = tf.nn.bias_add( tf.matmul(flattened_in, tf.get_variable("Weights", trainable=trainable)), tf.get_variable("Bias", trainable=trainable)) prevLayer = tf.nn.relu(prevLayer, name="relu6") with easy_scope("fc7", reuse=True): prevLayer = tf.nn.bias_add( tf.matmul(prevLayer, tf.get_variable("Weights", trainable=trainable)), tf.get_variable("Bias", trainable=trainable)) prevLayer = tf.nn.relu(prevLayer, name="relu7") # Produce classification probabilities with easy_scope("cls_score", reuse=True): weights = tf.get_variable("Weights", trainable=trainable) bias = tf.get_variable("Bias", trainable=trainable) scoreLayer = tf.nn.bias_add(tf.matmul(prevLayer, weights), bias, name="out") # Produce regressions (note these are with respect to the individual regions, so the # actual regions in the image resulting from these is yet to be calculated with easy_scope("bbox_pred", reuse=True): bboxPred = tf.nn.bias_add(tf.matmul( prevLayer, tf.get_variable("Weights", trainable=trainable)), tf.get_variable("Bias", trainable=trainable), name="out") probLayer = tf.nn.softmax(scoreLayer, name="cls_prob") return bboxPred, probLayer
def buildGraph(self, prevLayer, train=False, train_starting_at=None, weightsPath=s.DEF_WEIGHTS_PATH, biasesPath=s.DEF_BIASES_PATH, network_version="VGG16", device="/gpu:0", custom_layout=None): """Builds up the computation graph based on the given parameters. Positional arguments: prevLayer -- VGG must be connected to the output of another op. When making a vgg16 network, for example, the input may be a tf.Placeholder, as is usually the case. Keyword arguments: train -- If True, sets all variables in the created computation graph to be trainable. If False(default), then it sets them to not be trainable, unless overriden by a following option. train_starting_at -- If set to the name of a layer, sets all layers to trainable after and including that layer. Overrides "train" keyword argument. weightsPath -- Path to .npz file containing properly namespaced weights for this network. See loadNetVars for how to properly do this. biasesPath -- Path to .npz file containing properly namespaced biases for this network. See above network_version -- If it is desired to, for example, create a VGG16 network, the default "VGG16" suffices. The options are "VGG16", "VGG19", "VGG16CONV", and "VGG19CONV". The latter two have as their last layers the last convolutional outputs of their respective CNNs device -- The device onto which to place all operations. By default set to "/gpu:0"; running convolutions on CPUs is not fun. custom_layout -- In case one desires to make a custom VGG-like convolutional neural network, the exact layout of the neural network can be provided in the internally used format. See class method makeLayout(self, name) above for an example """ # Extracts the information from .npz files and puts them into properly # scoped tf.Variable(s) loadNetVars.extractLayers(self.namespace, weightsPath, biasesPath) # Set up the network layout layout = [] if network_version is not None: _layout = self.makeLayout(network_version) # Set default device and trainability for layers for layer in _layout: layer["device"] = device layer["trainable"] = train # Set trainability when using train_starting_at if train_starting_at is not None: setTrainingTrue = False for layer in layout: if layer["name"] is train_starting_at: setTrainingTrue = True layer["trainable"] = setTrainingTrue # Set dropout for any fully connected layers being trained for layer in _layout: if layer["name"].startswith('fc') and layer["trainable"] is True: dropoutLayer = {} dropoutLayer["name"] = layer["name"].replace('fc', 'drop', 1) dropoutLayer["device"] = device dropoutLayer["trainable"] = None layout.apppend(dropoutLayer) layout.append(layer) else: # No dropout needed layout.append(layer) # In the case of a custom layout passed in if custom_layout is not None: layout = custom_layout # Actualize the layout with easy_scope(self.namespace, reuse=True): self.prevLayer = prevLayer for layer in layout: with tf.device(layer["device"]): self.addLayer(layer["name"], trainable=layer["trainable"]) eprint("VGG computational graph successfully actualized! See layers" " attribute to inspect its ops.") return self.prevLayer
def _calculateRpnLoss(predRawScores, predBoxes, predRegressions, predAnchors, mini_batch_size, gt_boxes, feature_h, feature_w): """Refactoring of code from calculateRpnLoss into another function for testing""" num_classes = tf.shape_n( gt_boxes)[0] - 1 # subtract one since background is not a class iou_threshold_neg = s.DEF_IOU_THRESHOLD_TRAIN_NEG iou_threshold_pos = s.DEF_IOU_THRESHOLD_TRAIN_POS with easy_scope(name="proposal_layer_test"), tf.device("/cpu:0"): labeled_boxes = iou_labeler(predBoxes, gt_boxes, iou_threshold_neg, iou_threshold_pos) # Sample boxes and raw scores for loss posIdx, negIdx = tf.py_func( lambda x: sampleBoxes(x, num_classes, mini_batch_size), [labeled_boxes], [tf.int32, tf.int32], stateful=False, name="sampleBoxes") # posIdx, negIdx = tf.py_func(sampleBoxes, [labeled_boxes, num_classes, mini_batch_size], # tf.float32, stateful=False, name="sampleBoxes") positive_raw_scores = tf.gather(predRawScores, posIdx, axis=0, name="positive_raw_scores") negative_raw_scores = tf.gather(predRawScores, negIdx, axis=0, name="negative_raw_scores") # There is no regression loss for negative examples. For the positives, we need # to find the gt regression from anchor to gt boxes positive_anchors = tf.gather(predAnchors, posIdx, axis=0, name="positive_anchors") positive_gt_boxes = tf.gather(gt_boxes, tf.cast(tf.gather( labeled_boxes[:, 4], posIdx), dtype=tf.int32), name="positive_gt_boxes") positive_gt_regs = calculateRegressions(positive_anchors, positive_gt_boxes, axis=-1) positive_raw_regressions = tf.gather(predRegressions, posIdx, axis=0, name="positive_raw_regressions") # Flatten regressions before passing into the huber loss function flat_pred_regs = tf.reshape(positive_raw_regressions, [-1]) flat_gt_regs = tf.reshape(positive_gt_regs, [-1]) reg_loss = tf.losses.huber_loss(flat_pred_regs, flat_gt_regs, reduction=tf.losses.Reduction.NONE, delta=1.0) reg_loss = tf.reduce_sum(reg_loss) # Class-agnostic log loss for positive examples # Need to create a whole bunch of [0,1]s of the right length num_pos = tf.shape(positive_raw_scores)[0] cls_loss_pos = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.ones([num_pos], dtype=tf.int32), logits=positive_raw_scores) cls_loss_pos = tf.reduce_sum(cls_loss_pos) # Log-loss for the negative examples num_neg = tf.shape(negative_raw_scores)[0] cls_loss_neg = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.zeros([num_neg], dtype=tf.int32), logits=negative_raw_scores) cls_loss_neg = tf.reduce_sum(cls_loss_neg) # Adding up and normalizing the losses. reg_loss /= (feature_h * feature_w) / 10. cls_loss = (cls_loss_pos + cls_loss_neg) / mini_batch_size total_loss = tf.add(reg_loss, cls_loss, name="total_loss") return total_loss
def faster_rcnn(image, image_attributes): """ Builds a Faster R-CNN network Inputs: image - tf.Tensor object containing image to be processed image_attribures- tf.Tensor object containing image height, width, and scaling factor used to resize original image Outputs: out_regions - list of tf.Tensor objects with bounding boxes for detections, sans background class out_scores - list of tf.Tensor objects with classification scores for each category in VOC 2007, sans background """ pooled_h = 7 pooled_w = 7 feature_channels = 512 # Property of vgg16 network # num_classes = 21 # confidence_threshold = 0.8 vgg16_base = VGG('f-rcnn') features = vgg16_base.buildGraph(image, train=False, weightsPath=s.DEF_FRCNN_WEIGHTS_PATH, biasesPath=s.DEF_FRCNN_BIASES_PATH, network_version="VGG16CONV", device="/gpu:0") print("Layers of VGG are:") print(vgg16_base.layers.keys()) proposed_regions, rpn_scores = rpn.Rpn(features, image_attributes, namespace='f-rcnn') print("Region Proposal Network set up!") with easy_scope('f-rcnn'): pooled_regions = roi_pooling_layer(tf.squeeze(features), image_attributes, proposed_regions, pooled_h, pooled_w, 16, name='roi_pooling_layer') print("RoI pooling set up!") bbox_reg, cls_scores = cls.setUp( pooled_regions, pooled_h, pooled_w, feature_channels, namespace="f-rcnn") with easy_scope('f-rcnn'), tf.device("/cpu:0"): with easy_scope('reshape_cls_output'): # cls_score is (300,21) ; bbox_reg is (300,84) bbox_reg = tf.reshape(bbox_reg, (-1, 21, 4)) # Set proposed_regions shape to (300,1,4) proposed_regions_reshape = tf.expand_dims(proposed_regions, axis=1) # Rescale the Regions of Interest to the proper scale proposed_regions_reshape = proposed_regions_reshape / image_attributes[2] with easy_scope('clip_regress_unpack_output'): # Regress the Regions of Interest into class-specific detection boxes reg_roi = rpn.regressAnchors(proposed_regions_reshape, bbox_reg, axis=-1) # Clip all regions to image boundaries reg_roi = rpn.clipRegions(reg_roi, image_attributes, axis=-1) # Unpack both the regions and scores by class reg_rois = tf.unstack(reg_roi, num=21, axis=1) bbox_scores = tf.unstack(cls_scores, num=21, axis=1) with easy_scope('non_max_suppression'): # There are 20 classes, each in their own list. Background is not stored out_scores = [[] for _ in range(20)] out_regions = [[] for _ in range(20)] # We skip the first class since it is the background class. for i, (regs, scores) in enumerate(zip(reg_rois[1:], bbox_scores[1:])): # Perform NMS, but keep all of the indices (#indices < 300) inds = nms(regs, scores, 300, iou_threshold=0.3) regs = tf.gather(regs, inds) scores = tf.gather(scores, inds) out_scores[i] = scores out_regions[i] = regs return out_regions, out_scores
def faster_rcnn(image, image_attributes): """ Builds a Faster R-CNN network Inputs: image - tf.Tensor object containing image to be processed image_attribures- tf.Tensor object containing image height, width, and scaling factor used to resize original image Outputs: out_regions - list of tf.Tensor objects with bounding boxes for detections, sans background class out_scores - list of tf.Tensor objects with classification scores for each category in VOC 2007, sans background """ pooled_h = 7 pooled_w = 7 feature_channels = 512 # Property of vgg16 network # num_classes = 21 # confidence_threshold = 0.8 vgg16_base = VGG('f-rcnn') features = vgg16_base.buildGraph(image, train=False, weightsPath=s.DEF_FRCNN_WEIGHTS_PATH, biasesPath=s.DEF_FRCNN_BIASES_PATH, network_version="VGG16CONV", device="/gpu:0") print("Layers of VGG are:") print(vgg16_base.layers.keys()) proposed_regions, rpn_scores = rpn.Rpn(features, image_attributes, namespace='f-rcnn') print("Region Proposal Network set up!") with easy_scope('f-rcnn'): pooled_regions = roi_pooling_layer(tf.squeeze(features), image_attributes, proposed_regions, pooled_h, pooled_w, 16, name='roi_pooling_layer') print("RoI pooling set up!") bbox_reg, cls_scores = cls.setUp(pooled_regions, pooled_h, pooled_w, feature_channels, namespace="f-rcnn") with easy_scope('f-rcnn'), tf.device("/cpu:0"): with easy_scope('reshape_cls_output'): # cls_score is (300,21) ; bbox_reg is (300,84) bbox_reg = tf.reshape(bbox_reg, (-1, 21, 4)) # Set proposed_regions shape to (300,1,4) proposed_regions_reshape = tf.expand_dims(proposed_regions, axis=1) # Rescale the Regions of Interest to the proper scale proposed_regions_reshape = proposed_regions_reshape / image_attributes[ 2] with easy_scope('clip_regress_unpack_output'): # Regress the Regions of Interest into class-specific detection boxes reg_roi = rpn.regressAnchors(proposed_regions_reshape, bbox_reg, axis=-1) # Clip all regions to image boundaries reg_roi = rpn.clipRegions(reg_roi, image_attributes, axis=-1) # Unpack both the regions and scores by class reg_rois = tf.unstack(reg_roi, num=21, axis=1) bbox_scores = tf.unstack(cls_scores, num=21, axis=1) with easy_scope('non_max_suppression'): # There are 20 classes, each in their own list. Background is not stored out_scores = [[] for _ in range(20)] out_regions = [[] for _ in range(20)] # We skip the first class since it is the background class. for i, (regs, scores) in enumerate(zip(reg_rois[1:], bbox_scores[1:])): # Perform NMS, but keep all of the indices (#indices < 300) inds = nms(regs, scores, 300, iou_threshold=0.3) regs = tf.gather(regs, inds) scores = tf.gather(scores, inds) out_scores[i] = scores out_regions[i] = regs return out_regions, out_scores