def _network(self): with tf.variable_scope('fast_rcnn'): # No dropout in evaluation mode keep_prob = cfg.FRCNN_DROPOUT_KEEP_RATE if self.eval_mode is False else 1.0 # ROI pooling pooledFeatures = roi_pool(self.featureMaps, self.rois, self.im_dims) # Fully Connect layers (with dropout) with tf.variable_scope('fc'): self.rcnn_fc_layers = Layers(pooledFeatures) self.rcnn_fc_layers.flatten() for i in range(len(cfg.FRCNN_FC_HIDDEN)): self.rcnn_fc_layers.fc(output_nodes=cfg.FRCNN_FC_HIDDEN[i], keep_prob=keep_prob) hidden = self.rcnn_fc_layers.get_output() # Classifier score with tf.variable_scope('cls'): self.rcnn_cls_layers = Layers(hidden) self.rcnn_cls_layers.fc(output_nodes=self.num_classes, activation_fn=None) # Bounding Box refinement with tf.variable_scope('bbox'): self.rcnn_bbox_layers = Layers(hidden) self.rcnn_bbox_layers.fc(output_nodes=self.num_classes*4, activation_fn=None)
def _network(self, x): conv_layers = Layers(x) # Make sure that number of layers is consistent assert len(self.output_channels) == self.depth assert len(self.strides) == self.depth # Convolutional layers scope = 'convnet' + str(self.depth) with tf.variable_scope(scope): for l in range(self.depth): conv_layers.conv2d(filter_size=self.filter_sizes[l], output_channels=self.output_channels[l], stride=self.strides[l], padding='SAME', b_value=None) return conv_layers
def _network(self): with tf.variable_scope('fast_rcnn'): # No dropout in evaluation mode keep_prob = cfg.FRCNN_DROPOUT_KEEP_RATE if self.eval_mode is False else 1.0 # ROI pooling pooledFeatures = roi_pool(self.featureMaps, self.rois, self.im_dims) # Fully Connect layers (with dropout) with tf.variable_scope('fc'): self.rcnn_fc_layers = Layers(pooledFeatures) self.rcnn_fc_layers.flatten() for i in range(len(cfg.FRCNN_FC_HIDDEN)): self.rcnn_fc_layers.fc(output_nodes=cfg.FRCNN_FC_HIDDEN[i], keep_prob=keep_prob) hidden = self.rcnn_fc_layers.get_output() # Classifier score with tf.variable_scope('cls'): self.rcnn_cls_layers = Layers(hidden) self.rcnn_cls_layers.fc(output_nodes=self.num_classes, activation_fn=None) # Bounding Box refinement with tf.variable_scope('bbox'): self.rcnn_bbox_layers = Layers(hidden) self.rcnn_bbox_layers.fc(output_nodes=self.num_classes * 4, activation_fn=None)
def _network(self): with tf.variable_scope('fast_rcnn'): # No dropout in evaluation mode keep_prob = 0.5 if self.eval_mode == False else 1.0 # ROI pooling pooledFeatures = roi_pool(self.featureMaps, self.rois, self.im_dims) # Fully Connect layers (with dropout) with tf.variable_scope('fc'): self.rcnn_fc_layers = Layers(pooledFeatures) self.rcnn_fc_layers.flatten() self.rcnn_fc_layers.fc(output_nodes=1024, keep_prob=keep_prob) hidden = self.rcnn_fc_layers.get_output() # Classifier score with tf.variable_scope('cls'): self.rcnn_cls_layers = Layers(hidden) self.rcnn_cls_layers.fc(output_nodes=self.flags['num_classes'], activation_fn=None) # Bounding Box refinement with tf.variable_scope('bbox'): self.rcnn_bbox_layers = Layers(hidden) self.rcnn_bbox_layers.fc(output_nodes=4 * self.flags['num_classes'], activation_fn=None)
def _network(self): # There shouldn't be any gt_boxes if in evaluation mode if self.eval_mode is True: assert self.gt_boxes is None, \ 'Evaluation mode should not have ground truth boxes (or else what are you detecting for?)' _num_anchors = len(self.anchor_scales)*3 rpn_layers = Layers(self.featureMaps) with tf.variable_scope('rpn'): # Spatial windowing for i in range(len(cfg.RPN_OUTPUT_CHANNELS)): rpn_layers.conv2d(filter_size=cfg.RPN_FILTER_SIZES[i], output_channels=cfg.RPN_OUTPUT_CHANNELS[i]) features = rpn_layers.get_output() with tf.variable_scope('cls'): # Box-classification layer (objectness) self.rpn_bbox_cls_layers = Layers(features) self.rpn_bbox_cls_layers.conv2d(filter_size=1, output_channels=_num_anchors*2, activation_fn=None) with tf.variable_scope('target'): # Only calculate targets in train mode. No ground truth boxes in evaluation mode if self.eval_mode is False: # Anchor Target Layer (anchors and deltas) rpn_cls_score = self.rpn_bbox_cls_layers.get_output() self.rpn_labels, self.rpn_bbox_targets, self.rpn_bbox_inside_weights, self.rpn_bbox_outside_weights = \ anchor_target_layer(rpn_cls_score=rpn_cls_score, gt_boxes=self.gt_boxes, im_dims=self.im_dims, _feat_stride=self._feat_stride, anchor_scales=self.anchor_scales) with tf.variable_scope('bbox'): # Bounding-Box regression layer (bounding box predictions) self.rpn_bbox_pred_layers = Layers(features) self.rpn_bbox_pred_layers.conv2d(filter_size=1, output_channels=_num_anchors*4, activation_fn=None)
def _network(self): # There shouldn't be any gt_boxes if in evaluation mode if self.eval_mode is True: assert self.gt_boxes is None, \ 'Evaluation mode should not have ground truth boxes (or else what are you detecting for?)' _num_anchors = len(self.anchor_scales) * 1 rpn_layers = Layers(self.featureMaps) with tf.variable_scope('rpn'): # Spatial windowing for i in range(len(cfg.RPN_OUTPUT_CHANNELS)): rpn_layers.conv2d(filter_size=cfg.RPN_FILTER_SIZES[i], output_channels=cfg.RPN_OUTPUT_CHANNELS[i]) features = rpn_layers.get_output() with tf.variable_scope('cls'): # Box-classification layer (objectness) self.rpn_bbox_cls_layers = Layers(features) self.rpn_bbox_cls_layers.conv2d(filter_size=1, output_channels=_num_anchors * 2, activation_fn=None) with tf.variable_scope('target'): # Only calculate targets in train mode. No ground truth boxes in evaluation mode if self.eval_mode is False: print(anchor_target_layer) # Anchor Target Layer (anchors and deltas) rpn_cls_score = self.rpn_bbox_cls_layers.get_output() self.rpn_labels, self.rpn_bbox_targets, self.rpn_bbox_inside_weights, self.rpn_bbox_outside_weights = \ anchor_target_layer(rpn_cls_score=rpn_cls_score, gt_boxes=self.gt_boxes, im_dims=self.im_dims, _feat_stride=self._feat_stride, anchor_scales=self.anchor_scales) with tf.variable_scope('bbox'): # Bounding-Box regression layer (bounding box predictions) self.rpn_bbox_pred_layers = Layers(features) self.rpn_bbox_pred_layers.conv2d(filter_size=1, output_channels=_num_anchors * 4, activation_fn=None)
class rpn: ''' Region Proposal Network (RPN): Takes convolutional feature maps (TensorBase Layers object) from the last layer and proposes bounding boxes for objects. ''' def __init__(self,featureMaps,gt_boxes,im_dims,flags): self.featureMaps = featureMaps self.gt_boxes = gt_boxes self.im_dims = im_dims self.flags = flags self._network() def _network(self): _num_anchors = len(self.flags['anchor_scales'])*3 rpn_layers = Layers(self.featureMaps) with tf.variable_scope('rpn'): # Spatial windowing rpn_layers.conv2d(filter_size=3,output_channels=512) features = rpn_layers.get_output() # Box-classification layer (objectness) self.rpn_bbox_cls_layers = Layers(features) self.rpn_bbox_cls_layers.conv2d(filter_size=1,output_channels=_num_anchors*2,activation_fn=None) # Anchor Target Layer (anchors and deltas) self.rpn_cls_score = self.rpn_bbox_cls_layers.get_output() self.rpn_labels,self.rpn_bbox_targets,self.rpn_bbox_inside_weights,self.rpn_bbox_outside_weights = \ anchor_target_layer(rpn_cls_score=self.rpn_cls_score,gt_boxes=self.gt_boxes,im_dims=self.im_dims,anchor_scales=self.flags['anchor_scales']) # Bounding-Box regression layer (bounding box predictions) self.rpn_bbox_pred_layers = Layers(features) self.rpn_bbox_pred_layers.conv2d(filter_size=1,output_channels=_num_anchors*4,activation_fn=None) def get_rpn_bbox_cls(self): return self.rpn_bbox_cls_layers.get_output() def get_rpn_bbox_pred(self): return self.rpn_bbox_pred_layers.get_output() def get_rpn_labels(self): return self.rpn_labels def get_rpn_bbox_targets(self): return self.bbox_targets def get_rpn_bbox_inside_weights(self): return self.rpn_bbox_inside_weights def get_rpn_bbox_outside_weights(self): return self.rpn_bbox_outside_weights
def _cnn(self, x, gt_boxes, im_dims, key): # self.cnn[key] = convnet(x, [5, 3, 3, 3, 3], [32, 64, 64, 128, 128], strides=[2, 2, 1, 2, 1]) self.cnn[key] = Layers(x) self.cnn[key].conv2d(5, 32) self.cnn[key].maxpool() self.cnn[key].conv2d(3, 64) self.cnn[key].maxpool() self.cnn[key].conv2d(3, 64) self.cnn[key].conv2d(3, 128) self.cnn[key].maxpool() self.cnn[key].conv2d(3, 128) self.cnn[key].flatten() self.cnn[key].fc(512) self.cnn[key].fc(11,activation_fn = None) self.logits[key] = self.cnn[key].get_output()
def _network(self): _num_anchors = len(self.flags['anchor_scales'])*3 rpn_layers = Layers(self.featureMaps) with tf.variable_scope('rpn'): # Spatial windowing rpn_layers.conv2d(filter_size=3,output_channels=512) features = rpn_layers.get_output() # Box-classification layer (objectness) self.rpn_bbox_cls_layers = Layers(features) self.rpn_bbox_cls_layers.conv2d(filter_size=1,output_channels=_num_anchors*2,activation_fn=None) # Anchor Target Layer (anchors and deltas) self.rpn_cls_score = self.rpn_bbox_cls_layers.get_output() self.rpn_labels,self.rpn_bbox_targets,self.rpn_bbox_inside_weights,self.rpn_bbox_outside_weights = \ anchor_target_layer(rpn_cls_score=self.rpn_cls_score,gt_boxes=self.gt_boxes,im_dims=self.im_dims,anchor_scales=self.flags['anchor_scales']) # Bounding-Box regression layer (bounding box predictions) self.rpn_bbox_pred_layers = Layers(features) self.rpn_bbox_pred_layers.conv2d(filter_size=1,output_channels=_num_anchors*4,activation_fn=None)
def _conv_layers(self,x): conv_layers = Layers(x) # Convolutional layers res_blocks = [1,3,4,23,3] output_channels = [64,256,512,1024,2048] with tf.variable_scope('scale0'): conv_layers.conv2d(filter_size=7,output_channels=output_channels[0],stride=2,padding='SAME',b_value=None) conv_layers.maxpool(k=3) with tf.variable_scope('scale1'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=2) for block in range(res_blocks[1]-1): conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=1) with tf.variable_scope('scale2'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=2) for block in range(res_blocks[2]-1): conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=1) with tf.variable_scope('scale3'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=2) for block in range(res_blocks[3]-1): conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=1) with tf.variable_scope('scale4'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=2) for block in range(res_blocks[4]-1): conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=1) conv_layers.avgpool(globe=True) # Fully Connected Layer conv_layers.fc(output_nodes=10) return conv_layers.get_output()
def _conv_layers(self, x): conv_layers = Layers(x) # Convolutional layers res_blocks = [1, 3, 4, 23, 3] output_channels = [64, 256, 512, 1024, 2048] with tf.variable_scope('scale0'): conv_layers.conv2d(filter_size=7, output_channels=output_channels[0], stride=2, padding='SAME', b_value=None) conv_layers.maxpool(k=3) with tf.variable_scope('scale1'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=2) for block in range(res_blocks[1] - 1): conv_layers.conv_layers.res_layer( filter_size=3, output_channels=output_channels[1], stride=1) with tf.variable_scope('scale2'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=2) for block in range(res_blocks[2] - 1): conv_layers.conv_layers.res_layer( filter_size=3, output_channels=output_channels[2], stride=1) with tf.variable_scope('scale3'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=2) for block in range(res_blocks[3] - 1): conv_layers.conv_layers.res_layer( filter_size=3, output_channels=output_channels[3], stride=1) with tf.variable_scope('scale4'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=2) for block in range(res_blocks[4] - 1): conv_layers.conv_layers.res_layer( filter_size=3, output_channels=output_channels[4], stride=1) conv_layers.avgpool(globe=True) # Fully Connected Layer conv_layers.fc(output_nodes=10) return conv_layers.get_output()
def _network(self, x): conv_layers = Layers(x) # Convolutional layers with tf.variable_scope('resnet101'): res_blocks = [1, 3, 4, 23, 3] output_channels = [64, 256, 512, 1024, 2048] with tf.variable_scope('scale0'): conv_layers.conv2d(filter_size=7, output_channels=output_channels[0], stride=2, padding='SAME', b_value=None) conv_layers.maxpool(k=3) with tf.variable_scope('scale1'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=2) for block in range(res_blocks[1] - 1): conv_layers.conv_layers.res_layer( filter_size=3, output_channels=output_channels[1], stride=1) with tf.variable_scope('scale2'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=2) for block in range(res_blocks[2] - 1): conv_layers.conv_layers.res_layer( filter_size=3, output_channels=output_channels[2], stride=1) with tf.variable_scope('scale3'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=2) for block in range(res_blocks[3] - 1): conv_layers.conv_layers.res_layer( filter_size=3, output_channels=output_channels[3], stride=1) with tf.variable_scope('scale4'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=2) for block in range(res_blocks[4] - 1): conv_layers.conv_layers.res_layer( filter_size=3, output_channels=output_channels[4], stride=1) return conv_layers
class rpn: ''' Region Proposal Network (RPN): From the convolutional feature maps (TensorBase Layers object) of the last layer, generate bounding boxes relative to anchor boxes and give an "objectness" score to each ''' def __init__(self,featureMaps,gt_boxes,im_dims,_feat_stride,flags): self.featureMaps = featureMaps self.gt_boxes = gt_boxes self.im_dims = im_dims self._feat_stride = _feat_stride self.flags = flags self._network() def _network(self): _num_anchors = len(self.flags['anchor_scales'])*3 rpn_layers = Layers(self.featureMaps) with tf.variable_scope('rpn'): # Spatial windowing rpn_layers.conv2d(filter_size=3,output_channels=512) features = rpn_layers.get_output() with tf.variable_scope('bbox'): # Box-classification layer (objectness) self.rpn_bbox_cls_layers = Layers(features) self.rpn_bbox_cls_layers.conv2d(filter_size=1,output_channels=_num_anchors*2,activation_fn=None) # Anchor Target Layer (anchors and deltas) rpn_cls_score = self.rpn_bbox_cls_layers.get_output() self.rpn_labels,self.rpn_bbox_targets,self.rpn_bbox_inside_weights,self.rpn_bbox_outside_weights = \ anchor_target_layer(rpn_cls_score=rpn_cls_score,gt_boxes=self.gt_boxes,im_dims=self.im_dims,_feat_stride=self._feat_stride,anchor_scales=self.flags['anchor_scales']) with tf.variable_scope('cls'): # Bounding-Box regression layer (bounding box predictions) self.rpn_bbox_pred_layers = Layers(features) self.rpn_bbox_pred_layers.conv2d(filter_size=1,output_channels=_num_anchors*4,activation_fn=None) # Get functions def get_rpn_cls_score(self): return self.rpn_bbox_cls_layers.get_output() def get_rpn_labels(self): return self.rpn_labels def get_rpn_bbox_pred(self): return self.rpn_bbox_pred_layers.get_output() def get_rpn_bbox_targets(self): return self.rpn_bbox_targets def get_rpn_bbox_inside_weights(self): return self.rpn_bbox_inside_weights def get_rpn_bbox_outside_weights(self): return self.rpn_bbox_outside_weights # Loss functions def get_rpn_cls_loss(self): rpn_cls_score = self.get_rpn_cls_score() rpn_labels = self.get_rpn_labels() return rpn_cls_loss(rpn_cls_score,rpn_labels) def get_rpn_bbox_loss(self): rpn_bbox_pred = self.get_rpn_bbox_pred() rpn_bbox_targets = self.get_rpn_bbox_targets() rpn_bbox_inside_weights = self.get_rpn_bbox_inside_weights() rpn_bbox_outside_weights = self.get_rpn_bbox_outside_weights() return rpn_bbox_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights)
class fast_rcnn: ''' Crop and resize areas from the feature-extracting CNN's feature maps according to the ROIs generated from the ROI proposal layer ''' def __init__(self,featureMaps, roi_proposal_net): self.featureMaps = featureMaps self.roi_proposal_net = roi_proposal_net self.rois = roi_proposal_net.get_rois() self.im_dims = roi_proposal_net.im_dims self.flags = roi_proposal_net.flags self._network() def _network(self): with tf.variable_scope('fast_rcnn'): # ROI pooling pooledFeatures = roi_pool(self.featureMaps,self.rois,self.im_dims) # Fully Connect layers (with dropout) with tf.variable_scope('fc'): self.rcnn_fc_layers = Layers(pooledFeatures) self.rcnn_fc_layers.flatten() self.rcnn_fc_layers.fc(output_nodes=4096, keep_prob=0.5) self.rcnn_fc_layers.fc(output_nodes=4096, keep_prob=0.5) hidden = self.rcnn_fc_layers.get_output() # Classifier score with tf.variable_scope('cls'): self.rcnn_cls_layers = Layers(hidden) self.rcnn_cls_layers.fc(output_nodes=self.flags['num_classes'],activation_fn=None) # Bounding Box refinement with tf.variable_scope('bbox'): self.rcnn_bbox_layers = Layers(hidden) self.rcnn_bbox_layers.fc(output_nodes=4*self.flags['num_classes'],activation_fn=None) # Get functions def get_cls_score(self): return self.rcnn_cls_layers.get_output() def get_cls_prob(self): logits = self.get_cls_score() return tf.nn.softmax(logits) def get_bbox_refinement(self): return self.rcnn_bbox_layers.get_output() # Loss functions def get_fast_rcnn_cls_loss(self): fast_rcnn_cls_score = self.get_cls_score() labels = self.roi_proposal_net.get_labels() return fast_rcnn_cls_loss(fast_rcnn_cls_score, labels) def get_fast_rcnn_bbox_loss(self): fast_rcnn_bbox_pred = self.get_bbox_refinement() bbox_targets = self.roi_proposal_net.get_bbox_targets() roi_inside_weights = self.roi_proposal_net.get_bbox_inside_weights() roi_outside_weights = self.roi_proposal_net.get_bbox_outside_weights() return fast_rcnn_bbox_loss(fast_rcnn_bbox_pred, bbox_targets, roi_inside_weights, roi_outside_weights)
class rpn: ''' Region Proposal Network (RPN): From the convolutional feature maps (TensorBase Layers object) of the last layer, generate bounding boxes relative to anchor boxes and give an "objectness" score to each In evaluation mode (eval_mode==True), gt_boxes should be None. ''' def __init__(self, featureMaps, gt_boxes, im_dims, _feat_stride, eval_mode): self.featureMaps = featureMaps self.gt_boxes = gt_boxes self.im_dims = im_dims self._feat_stride = _feat_stride self.anchor_scales = cfg.RPN_ANCHOR_SCALES self.eval_mode = eval_mode self._network() def _network(self): # There shouldn't be any gt_boxes if in evaluation mode if self.eval_mode is True: assert self.gt_boxes is None, \ 'Evaluation mode should not have ground truth boxes (or else what are you detecting for?)' _num_anchors = len(self.anchor_scales) * 3 rpn_layers = Layers(self.featureMaps) with tf.variable_scope('rpn'): # Spatial windowing for i in range(len(cfg.RPN_OUTPUT_CHANNELS)): rpn_layers.conv2d(filter_size=cfg.RPN_FILTER_SIZES[i], output_channels=cfg.RPN_OUTPUT_CHANNELS[i]) features = rpn_layers.get_output() with tf.variable_scope('cls'): # Box-classification layer (objectness) self.rpn_bbox_cls_layers = Layers(features) self.rpn_bbox_cls_layers.conv2d(filter_size=1, output_channels=_num_anchors * 2, activation_fn=None) with tf.variable_scope('target'): # Only calculate targets in train mode. No ground truth boxes in evaluation mode if self.eval_mode is False: # Anchor Target Layer (anchors and deltas) rpn_cls_score = self.rpn_bbox_cls_layers.get_output() self.rpn_labels, self.rpn_bbox_targets, self.rpn_bbox_inside_weights, self.rpn_bbox_outside_weights = \ anchor_target_layer(rpn_cls_score=rpn_cls_score, gt_boxes=self.gt_boxes, im_dims=self.im_dims, _feat_stride=self._feat_stride, anchor_scales=self.anchor_scales) with tf.variable_scope('bbox'): # Bounding-Box regression layer (bounding box predictions) self.rpn_bbox_pred_layers = Layers(features) self.rpn_bbox_pred_layers.conv2d(filter_size=1, output_channels=_num_anchors * 4, activation_fn=None) # Get functions def get_rpn_cls_score(self): return self.rpn_bbox_cls_layers.get_output() def get_rpn_labels(self): assert self.eval_mode is False, 'No RPN labels without ground truth boxes' return self.rpn_labels def get_rpn_bbox_pred(self): return self.rpn_bbox_pred_layers.get_output() def get_rpn_bbox_targets(self): assert self.eval_mode is False, 'No RPN bounding box targets without ground truth boxes' return self.rpn_bbox_targets def get_rpn_bbox_inside_weights(self): assert self.eval_mode is False, 'No RPN inside weights without ground truth boxes' return self.rpn_bbox_inside_weights def get_rpn_bbox_outside_weights(self): assert self.eval_mode is False, 'No RPN outside weights without ground truth boxes' return self.rpn_bbox_outside_weights # Loss functions def get_rpn_cls_loss(self): assert self.eval_mode is False, 'No RPN cls loss without ground truth boxes' rpn_cls_score = self.get_rpn_cls_score() rpn_labels = self.get_rpn_labels() return rpn_cls_loss(rpn_cls_score, rpn_labels) def get_rpn_bbox_loss(self): assert self.eval_mode is False, 'No RPN bbox loss without ground truth boxes' rpn_bbox_pred = self.get_rpn_bbox_pred() rpn_bbox_targets = self.get_rpn_bbox_targets() rpn_bbox_inside_weights = self.get_rpn_bbox_inside_weights() rpn_bbox_outside_weights = self.get_rpn_bbox_outside_weights() return rpn_bbox_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights)
def _network(self, x): conv_layers = Layers(x) # Convolutional layers scope = 'resnet' + str(self.depth) with tf.variable_scope(scope): res_blocks = self.architectures[self.depth] output_channels = [64, 256, 512, 1024, 2048] with tf.variable_scope('scale0'): conv_layers.conv2d(filter_size=7, output_channels=output_channels[0], stride=2, padding='SAME', b_value=None) # Downsample conv_layers.maxpool(k=3, s=2) # Downsample with tf.variable_scope('scale1'): for block in range(res_blocks[1]): conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=1) with tf.variable_scope('scale2'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=2) # Downsample for block in range(res_blocks[2] - 1): conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=1) with tf.variable_scope('scale3'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=2) # Downsample for block in range(res_blocks[3] - 1): conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=1) with tf.variable_scope('scale4'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=2) # Downsample for block in range(res_blocks[4] - 1): conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=1) return conv_layers
class fast_rcnn: ''' Crop and resize areas from the feature-extracting CNN's feature maps according to the ROIs generated from the ROI proposal layer ''' def __init__(self, featureMaps, roi_proposal_net, eval_mode): self.featureMaps = featureMaps self.roi_proposal_net = roi_proposal_net self.rois = roi_proposal_net.get_rois() self.im_dims = roi_proposal_net.im_dims self.num_classes = cfg.NUM_CLASSES self.eval_mode = eval_mode self._network() def _network(self): with tf.variable_scope('fast_rcnn'): # No dropout in evaluation mode keep_prob = cfg.FRCNN_DROPOUT_KEEP_RATE if self.eval_mode is False else 1.0 # ROI pooling pooledFeatures = roi_pool(self.featureMaps, self.rois, self.im_dims) # Fully Connect layers (with dropout) with tf.variable_scope('fc'): self.rcnn_fc_layers = Layers(pooledFeatures) self.rcnn_fc_layers.flatten() for i in range(len(cfg.FRCNN_FC_HIDDEN)): self.rcnn_fc_layers.fc(output_nodes=cfg.FRCNN_FC_HIDDEN[i], keep_prob=keep_prob) hidden = self.rcnn_fc_layers.get_output() # Classifier score with tf.variable_scope('cls'): self.rcnn_cls_layers = Layers(hidden) self.rcnn_cls_layers.fc(output_nodes=self.num_classes, activation_fn=None) # Bounding Box refinement with tf.variable_scope('bbox'): self.rcnn_bbox_layers = Layers(hidden) self.rcnn_bbox_layers.fc(output_nodes=self.num_classes * 4, activation_fn=None) # Get functions def get_cls_score(self): return self.rcnn_cls_layers.get_output() def get_cls_prob(self): logits = self.get_cls_score() return tf.nn.softmax(logits) def get_bbox_refinement(self): return self.rcnn_bbox_layers.get_output() # Loss functions def get_fast_rcnn_cls_loss(self): assert self.eval_mode is False, 'No Fast RCNN cls loss without ground truth boxes' fast_rcnn_cls_score = self.get_cls_score() labels = self.roi_proposal_net.get_labels() return fast_rcnn_cls_loss(fast_rcnn_cls_score, labels) def get_fast_rcnn_bbox_loss(self): assert self.eval_mode is False, 'No Fast RCNN bbox loss without ground truth boxes' fast_rcnn_bbox_pred = self.get_bbox_refinement() bbox_targets = self.roi_proposal_net.get_bbox_targets() roi_inside_weights = self.roi_proposal_net.get_bbox_inside_weights() roi_outside_weights = self.roi_proposal_net.get_bbox_outside_weights() return fast_rcnn_bbox_loss(fast_rcnn_bbox_pred, bbox_targets, roi_inside_weights, roi_outside_weights)
class fast_rcnn: ''' Crop and resize areas from the feature-extracting CNN's feature maps according to the ROIs generated from the ROI proposal layer ''' def __init__(self, featureMaps, roi_proposal_net, eval_mode): self.featureMaps = featureMaps self.roi_proposal_net = roi_proposal_net self.rois = roi_proposal_net.get_rois() self.im_dims = roi_proposal_net.im_dims self.num_classes = cfg.NUM_CLASSES self.eval_mode = eval_mode self._network() def _network(self): with tf.variable_scope('fast_rcnn'): # No dropout in evaluation mode keep_prob = cfg.FRCNN_DROPOUT_KEEP_RATE if self.eval_mode is False else 1.0 # ROI pooling pooledFeatures = roi_pool(self.featureMaps, self.rois, self.im_dims) # Fully Connect layers (with dropout) with tf.variable_scope('fc'): self.rcnn_fc_layers = Layers(pooledFeatures) self.rcnn_fc_layers.flatten() for i in range(len(cfg.FRCNN_FC_HIDDEN)): self.rcnn_fc_layers.fc(output_nodes=cfg.FRCNN_FC_HIDDEN[i], keep_prob=keep_prob) hidden = self.rcnn_fc_layers.get_output() # Classifier score with tf.variable_scope('cls'): self.rcnn_cls_layers = Layers(hidden) self.rcnn_cls_layers.fc(output_nodes=self.num_classes, activation_fn=None) # Bounding Box refinement with tf.variable_scope('bbox'): self.rcnn_bbox_layers = Layers(hidden) self.rcnn_bbox_layers.fc(output_nodes=self.num_classes*4, activation_fn=None) # Get functions def get_cls_score(self): return self.rcnn_cls_layers.get_output() def get_cls_prob(self): logits = self.get_cls_score() return tf.nn.softmax(logits) def get_bbox_refinement(self): return self.rcnn_bbox_layers.get_output() # Loss functions def get_fast_rcnn_cls_loss(self): assert self.eval_mode is False, 'No Fast RCNN cls loss without ground truth boxes' fast_rcnn_cls_score = self.get_cls_score() labels = self.roi_proposal_net.get_labels() return fast_rcnn_cls_loss(fast_rcnn_cls_score, labels) def get_fast_rcnn_bbox_loss(self): assert self.eval_mode is False, 'No Fast RCNN bbox loss without ground truth boxes' fast_rcnn_bbox_pred = self.get_bbox_refinement() bbox_targets = self.roi_proposal_net.get_bbox_targets() roi_inside_weights = self.roi_proposal_net.get_bbox_inside_weights() roi_outside_weights = self.roi_proposal_net.get_bbox_outside_weights() return fast_rcnn_bbox_loss(fast_rcnn_bbox_pred, bbox_targets, roi_inside_weights, roi_outside_weights)
class rpn: ''' Region Proposal Network (RPN): From the convolutional feature maps (TensorBase Layers object) of the last layer, generate bounding boxes relative to anchor boxes and give an "objectness" score to each In evaluation mode (eval_mode==True), gt_boxes should be None. ''' def __init__(self, featureMaps, gt_boxes, im_dims, _feat_stride, eval_mode): self.featureMaps = featureMaps self.gt_boxes = gt_boxes self.im_dims = im_dims self._feat_stride = _feat_stride self.anchor_scales = cfg.RPN_ANCHOR_SCALES self.eval_mode = eval_mode self._network() def _network(self): # There shouldn't be any gt_boxes if in evaluation mode if self.eval_mode is True: assert self.gt_boxes is None, \ 'Evaluation mode should not have ground truth boxes (or else what are you detecting for?)' _num_anchors = len(self.anchor_scales)*3 rpn_layers = Layers(self.featureMaps) with tf.variable_scope('rpn'): # Spatial windowing for i in range(len(cfg.RPN_OUTPUT_CHANNELS)): rpn_layers.conv2d(filter_size=cfg.RPN_FILTER_SIZES[i], output_channels=cfg.RPN_OUTPUT_CHANNELS[i]) features = rpn_layers.get_output() with tf.variable_scope('cls'): # Box-classification layer (objectness) self.rpn_bbox_cls_layers = Layers(features) self.rpn_bbox_cls_layers.conv2d(filter_size=1, output_channels=_num_anchors*2, activation_fn=None) with tf.variable_scope('target'): # Only calculate targets in train mode. No ground truth boxes in evaluation mode if self.eval_mode is False: # Anchor Target Layer (anchors and deltas) rpn_cls_score = self.rpn_bbox_cls_layers.get_output() self.rpn_labels, self.rpn_bbox_targets, self.rpn_bbox_inside_weights, self.rpn_bbox_outside_weights = \ anchor_target_layer(rpn_cls_score=rpn_cls_score, gt_boxes=self.gt_boxes, im_dims=self.im_dims, _feat_stride=self._feat_stride, anchor_scales=self.anchor_scales) with tf.variable_scope('bbox'): # Bounding-Box regression layer (bounding box predictions) self.rpn_bbox_pred_layers = Layers(features) self.rpn_bbox_pred_layers.conv2d(filter_size=1, output_channels=_num_anchors*4, activation_fn=None) # Get functions def get_rpn_cls_score(self): return self.rpn_bbox_cls_layers.get_output() def get_rpn_labels(self): assert self.eval_mode is False, 'No RPN labels without ground truth boxes' return self.rpn_labels def get_rpn_bbox_pred(self): return self.rpn_bbox_pred_layers.get_output() def get_rpn_bbox_targets(self): assert self.eval_mode is False, 'No RPN bounding box targets without ground truth boxes' return self.rpn_bbox_targets def get_rpn_bbox_inside_weights(self): assert self.eval_mode is False, 'No RPN inside weights without ground truth boxes' return self.rpn_bbox_inside_weights def get_rpn_bbox_outside_weights(self): assert self.eval_mode is False, 'No RPN outside weights without ground truth boxes' return self.rpn_bbox_outside_weights # Loss functions def get_rpn_cls_loss(self): assert self.eval_mode is False, 'No RPN cls loss without ground truth boxes' rpn_cls_score = self.get_rpn_cls_score() rpn_labels = self.get_rpn_labels() return rpn_cls_loss(rpn_cls_score, rpn_labels) def get_rpn_bbox_loss(self): assert self.eval_mode is False, 'No RPN bbox loss without ground truth boxes' rpn_bbox_pred = self.get_rpn_bbox_pred() rpn_bbox_targets = self.get_rpn_bbox_targets() rpn_bbox_inside_weights = self.get_rpn_bbox_inside_weights() rpn_bbox_outside_weights = self.get_rpn_bbox_outside_weights() return rpn_bbox_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights)