def build_network(self, images, class_num, is_training=True, keep_prob=0.5, scope='Fast-RCNN'): self.conv1 = self.convLayer(images, 11, 11, 4, 4, 96, "conv1", "VALID") lrn1 = self.LRN(self.conv1, 2, 2e-05, 0.75, "norm1") self.pool1 = self.maxPoolLayer(lrn1, 3, 3, 2, 2, "pool1", "VALID") self.conv2 = self.convLayer(self.pool1, 5, 5, 1, 1, 256, "conv2", groups=2) lrn2 = self.LRN(self.conv2, 2, 2e-05, 0.75, "lrn2") self.pool2 = self.maxPoolLayer(lrn2, 3, 3, 2, 2, "pool2", "VALID") self.conv3 = self.convLayer(self.pool2, 3, 3, 1, 1, 384, "conv3") self.conv4 = self.convLayer(self.conv3, 3, 3, 1, 1, 384, "conv4", groups=2) self.conv5 = self.convLayer(self.conv4, 3, 3, 1, 1, 256, "conv5", groups=2) self.roi_pool6 = roi_pooling(self.conv5, self.rois, pool_height=6, pool_width=6) with slim.arg_scope([slim.fully_connected, slim.conv2d], activation_fn=nn_ops.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): flatten = slim.flatten(self.roi_pool6, scope='flat_32') self.fc1 = slim.fully_connected(flatten, 4096, scope='fc_6') drop6 = slim.dropout(self.fc1, keep_prob=keep_prob, is_training=is_training, scope='dropout6',) self.fc2 = slim.fully_connected(drop6, 4096, scope='fc_7') drop7 = slim.dropout(self.fc2, keep_prob=keep_prob, is_training=is_training, scope='dropout7') cls = slim.fully_connected(drop7, class_num,activation_fn=nn_ops.softmax ,scope='fc_8') bbox = slim.fully_connected(drop7, (self.class_num-1)*4, weights_initializer=tf.truncated_normal_initializer(0.0, 0.001), activation_fn=None ,scope='fc_9') return cls,bbox
def model_pred_train_ROI_ori(self, cimg_conv5, cimg_conv6, pimg_conv6, pROI, POLICY, trainable=True, test=False): """ Predict final feature map :param pROI: pROI :return net_out: [_, model_size/32, model_size/32, 5] : tx, ty, tw, th, object_score ... YOLOv2 """ from roi_pooling.roi_pooling_ops import roi_pooling # W, H for 4 batch size # batch, Wl, Hl, WR, HR rois = [[0, pROI[0, 0, 0], pROI[0, 0, 1], pROI[0, 0, 2], pROI[0, 0, 3]], [1, pROI[1, 0, 0], pROI[1, 0, 1], pROI[1, 0, 2], pROI[1, 0, 3]], [2, pROI[2, 0, 0], pROI[2, 0, 1], pROI[2, 0, 2], pROI[2, 0, 3]], [3, pROI[3, 0, 0], pROI[3, 0, 1], pROI[3, 0, 2], pROI[3, 0, 3]]] # roi_pooling -> batch, wl, hl, wr, hr list ROI_feature = roi_pooling(pimg_conv6, rois, pool_height=3, pool_width=3) cimg_conv5 = tf.extract_image_patches(cimg_conv5, ksizes=[1, 2, 2, 1], strides=[1, 2, 2, 1], rates=[1, 1, 1, 1], padding='SAME') cimg_conv5_0, cimg_conv5_1, cimg_conv5_2, cimg_conv5_3= tf.split(cimg_conv5, 4, axis=3) cimg_concat = tf.stack([cimg_conv5_0, cimg_conv5_1, cimg_conv5_2, cimg_conv5_3, cimg_conv6], axis=1) # [batch, 5, H, W, depth] # for 4 batch size ROI_feature = tf.transpose(ROI_feature, perm=[0, 3, 2, 1]) # batch, height, width, in_channels ROI_feature = tf.expand_dims(ROI_feature, axis=4) # 1 out_channel correlation0 = tf.nn.conv2d(cimg_concat[0, :, :, :, :], ROI_feature[0, :, :, :, :], [1, 1, 1, 1], padding='SAME') correlation1 = tf.nn.conv2d(cimg_concat[1, :, :, :, :], ROI_feature[1, :, :, :, :], [1, 1, 1, 1], padding='SAME') correlation2 = tf.nn.conv2d(cimg_concat[2, :, :, :, :], ROI_feature[2, :, :, :, :], [1, 1, 1, 1], padding='SAME') correlation3 = tf.nn.conv2d(cimg_concat[3, :, :, :, :], ROI_feature[3, :, :, :, :], [1, 1, 1, 1], padding='SAME') correlation = tf.stack([correlation0, correlation1, correlation2, correlation3], axis=0) # [batch, 5, H, W, 1] correlation = tf.transpose(correlation, perm=[0, 2, 3, 1, 4]) correlation = correlation[..., 0] tf.summary.image("correlation_0", correlation[:, :, :, 0:1], max_outputs=2) tf.summary.image("correlation_1", correlation[:, :, :, 1:2], max_outputs=2) # tf.summary.text('pROI', pROI) # TODO, FC or 1D conv if you want net_out = conv_linear(correlation, filters=5, kernel=1, scope='conv_final', trainable=trainable) tf.summary.image("objectness", net_out[:, :, :, 4:], max_outputs=2) # TODO, get highest object score # softmax, # TODO, calculate box with ROI_coordinate return net_out
def model_pred_train_ROI(self, cimg_conv5, cimg_conv6, pimg_conv6, pROI, POLICY, trainable=True, test=False): """ Predict final feature map :param pROI: pROI :return net_out: [_, model_size/32, model_size/32, 5] : tx, ty, tw, th, object_score ... YOLOv2 """ from roi_pooling.roi_pooling_ops import roi_pooling # crop[:, xl:xr, yl:yr, :] # xl, yl, xr, yr = ROI_coordinate, # using first' frames coordinate # W, H for 4 batch size # batch, Wl, Hl, WR, HR # rois = [] # rois.append(np.append([0], pROI[0][0])) # rois.append(np.append([1], pROI[1][1])) # rois.append(np.append([2], pROI[2][2])) # rois.append(np.append([3], pROI[3][3])) print(pROI.shape) rois = [[0, pROI[0, 0, 0], pROI[0, 0, 1], pROI[0, 0, 2], pROI[0, 0, 3]], [1, pROI[1, 0, 0], pROI[1, 0, 1], pROI[1, 0, 2], pROI[1, 0, 3]], [2, pROI[2, 0, 0], pROI[2, 0, 1], pROI[2, 0, 2], pROI[2, 0, 3]], [3, pROI[3, 0, 0], pROI[3, 0, 1], pROI[3, 0, 2], pROI[3, 0, 3]]] # roi_pooling -> batch, wl, hl, wr, hr list ROI_feature = roi_pooling(pimg_conv6, rois, pool_height=1, pool_width=1) ROI_feature = tf.transpose(ROI_feature, perm=[0, 3, 2, 1]) correlation_conv5 = tf.multiply(cimg_conv5, ROI_feature) correlation_conv5 = tf.reduce_mean(correlation_conv5, axis=3, keep_dims=True, name='correlation5') correlation_conv5 = tf.extract_image_patches(correlation_conv5, ksizes=[1, 2, 2, 1], strides=[1, 2, 2, 1], rates=[1, 1, 1, 1], padding='SAME') correlation_conv6 = tf.multiply(cimg_conv6, ROI_feature) correlation_conv6 = tf.reduce_mean(correlation_conv6, axis=3, keep_dims=True, name='correlation6') correlation = tf.concat([correlation_conv5, correlation_conv6], axis=3, name='correlation') tf.summary.image("correlation_0", correlation[:, :, :, 0:1], max_outputs=2) tf.summary.image("correlation_1", correlation[:, :, :, 1:2], max_outputs=2) # TODO, FC or 1D conv if you want net_out = conv_linear(correlation, filters=5, kernel=1, scope='conv_final', trainable=trainable) tf.summary.image("objectness", net_out[:, :, :, 4:], max_outputs=2) # TODO, get highest object score # softmax, # TODO, calculate box with ROI_coordinate return net_out
def model_pred(self, cimg_conv5, cimg_conv6, pimg_conv6, ROI_coordinate, pROI, POLICY, roi_pool=True, trainable=True, reuse=False, test=False): """ Predict final feature map :param ROI_coordinate: pbox_xy :return net_out: [_, model_size/32, model_size/32, 5] : tx, ty, tw, th, object_score ... YOLOv2 """ if roi_pool: from roi_pooling.roi_pooling_ops import roi_pooling pROI = tf.expand_dims(pROI, 0) pROI = tf.expand_dims(pROI, 0) rois = [[0, pROI[0, 0, 0], pROI[0, 0, 1], pROI[0, 0, 2], pROI[0, 0, 3]]] # roi_pooling -> batch, wl, hl, wr, hr list ROI_feature = roi_pooling(pimg_conv6, rois, pool_height=1, pool_width=1) ROI_feature = tf.transpose(ROI_feature, perm=[0, 3, 2, 1]) else: # crop[:, xl:xr, yl:yr, :] # xl, yl, xr, yr = ROI_coordinate, # using first' frames coordinate xl = ROI_coordinate[0, 0, 0] yl = ROI_coordinate[0, 0, 1] # TODO, ROI 1x1 region xr = xl+1 yr = yl+1 ROI_feature = pimg_conv6[:, yl:yr, xl:xr, :] # TODO, if ROI is not 1x1, modify this region #_, h, w, c = ROI_feature.shape.as_list() #pool_avg = slim.avg_pool2d(ROI_feature, [h, w], scope='avg_pool') # TODO, correlation with high level feature # tf implementation https://github.com/jgorgenucsd/corr_tf # @tf.RegisterGradient("Correlation") # corr = correlation(cimg_conv6, pool_avg, ...) # pool_avg(pimg conv_6) should not be trainable # correlation_conv5 = tf.nn.conv2d(cimg_conv5, ROI_feature, strides=[1, 1, 1, 1], padding='SAME', name='correlation5') # correlation_conv5 = tf.extract_image_patches(correlation_conv5, # ksizes=[1, 2, 2, 1], # strides=[1, 2, 2, 1], # rates=[1, 1, 1, 1], # padding='SAME') # correlation_conv6 = tf.nn.conv2d(cimg_conv6, ROI_feature, strides=[1, 1, 1, 1], padding='SAME', name='correlation6') # correlation = tf.concat([correlation_conv5, correlation_conv6], axis=3, name='correlation') correlation_conv5 = tf.multiply(cimg_conv5, ROI_feature) correlation_conv5 = tf.reduce_mean(correlation_conv5, axis=3, keep_dims=True, name='correlation5') correlation_conv5 = tf.extract_image_patches(correlation_conv5, ksizes=[1, 2, 2, 1], strides=[1, 2, 2, 1], rates=[1, 1, 1, 1], padding='SAME') correlation_conv6 = tf.multiply(cimg_conv6, ROI_feature) correlation_conv6 = tf.reduce_mean(correlation_conv6, axis=3, keep_dims=True, name='correlation6') self.correlation = tf.concat([correlation_conv5, correlation_conv6], axis=3, name='correlation') tf.summary.image("correlation_0", self.correlation[:, :, :, 0:1], max_outputs=2) tf.summary.image("correlation_1", self.correlation[:, :, :, 1:2], max_outputs=2) # TODO, FC or 1D conv if you want # for test with slim.arg_scope([slim.conv2d], reuse=reuse): net_out = conv_linear(self.correlation, filters=5, kernel=1, scope='conv_final', trainable=trainable) tf.summary.image("objectness", self.correlation[:, :, :, 4:], max_outputs=2) # TODO, get highest object score # softmax, # TODO, calculate box with ROI_coordinate return net_out
def get_proposal_cls_net(point_cloud, img_seg_map, is_training, bn_decay, end_points): batch_size = point_cloud.shape[0] l0_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]) l0_points = tf.slice(point_cloud, [0, 0, 3], [-1, -1, NUM_CHANNEL - 3]) # Set abstraction layers l1_xyz, l1_points = pointnet_sa_module_msg( l0_xyz, l0_points, 128, [0.2, 0.4, 0.8], [32, 64, 128], [[32, 32, 64], [64, 64, 128], [64, 96, 128]], is_training, bn_decay, scope='cls_layer1') l2_xyz, l2_points = pointnet_sa_module_msg( l1_xyz, l1_points, 32, [0.4, 0.8, 1.6], [64, 64, 128], [[64, 64, 128], [128, 128, 256], [128, 128, 256]], is_training, bn_decay, scope='cls_layer2') l3_xyz, l3_points, _ = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[128, 256, 512], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='cls_layer3') # image feature pooling _img_pixel_size = np.asarray([360, 1200]) box2d_corners, box2d_corners_norm = projection.tf_project_to_image_space( proposal_boxes, calib, _img_pixel_size) # crop and resize ''' # y1, x1, y2, x2 box2d_corners_norm_reorder = tf.stack([ tf.gather(box2d_corners_norm, 1, axis=-1), tf.gather(box2d_corners_norm, 0, axis=-1), tf.gather(box2d_corners_norm, 3, axis=-1), tf.gather(box2d_corners_norm, 2, axis=-1), ], axis=-1) img_rois = tf.image.crop_and_resize( img_seg_map, box2d_corners_norm_reorder, tf.range(0, batch_size), [16,16]) ''' # roi pooling in faster-rcnn img_seg_map = tf_util.conv2d(img_seg_map, 1, 1, padding='VALID', bn=True, is_training=is_training, scope='cls_feature_bottleneck', bn_decay=bn_decay) # feature map index, upper left, bottom right coordinates roi_crops = tf.concat(tf.expand_dims(tf.range(0, batch_size), axis=-1), box2d_corners, axis=-1) img_rois = roi_pooling(img_seg_map, roi_crops, pool_height=16, pool_width=16) img_feats = tf.reshape(img_rois, [batch_size, -1]) # classification point_feats = tf.reshape(l3_points, [batch_size, -1]) # use image only #cls_net = img_feats # use point and image feature cls_net = tf.concat([point_feats, img_feats], axis=1) # use point only #cls_net = point_feats cls_net = tf_util.fully_connected(cls_net, 512, bn=True, is_training=is_training, scope='cls_fc1', bn_decay=bn_decay) cls_net = tf_util.dropout(cls_net, keep_prob=0.5, is_training=is_training, scope='cls_dp1') cls_net = tf_util.fully_connected(cls_net, 256, bn=True, is_training=is_training, scope='cls_fc2', bn_decay=bn_decay) cls_net = tf_util.dropout(cls_net, keep_prob=0.5, is_training=is_training, scope='cls_dp2') cls_net = tf_util.fully_connected(cls_net, NUM_OBJ_CLASSES, activation_fn=None, scope='cls_logits') end_points['cls_logits'] = cls_net return end_points
[0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] rois_value = np.asarray(rois_value, dtype='int32') # the pool_height and width are parameters of the ROI layer pool_height, pool_width = (2, 2) n_rois = len(rois_value) y_shape = [n_rois, 1, pool_height, pool_width] print('Input: ', input_value, ', shape: ', input_value.shape) print('ROIs: ', rois_value, ', shape: ', rois_value.shape) # precise semantics is now only defined by the kernel, need tests input = tf.placeholder(tf.float32) rois = tf.placeholder(tf.int32) y = roi_pooling(input, rois, pool_height=2, pool_width=2) mean = tf.reduce_mean(y) grads = tf.gradients(mean, input) print(type(grads)) print(len(grads)) print(grads) print(input_value.shape) with tf.Session('') as sess: input_const = tf.constant(input_value, tf.float32) rois_const = tf.constant(rois_value, tf.int32) y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) mean = tf.reduce_mean(y) numerical_grad_error_1 = tf.test.compute_gradient_error(
] rois_value = np.asarray(rois_value, dtype='int32') # the pool_height and width are parameters of the ROI layer pool_height, pool_width = (2, 2) n_rois = len(rois_value) y_shape = [n_rois, 1, pool_height, pool_width] print('Input: ', input_value, ', shape: ', input_value.shape) print('ROIs: ', rois_value, ', shape: ', rois_value.shape) # precise semantics is now only defined by the kernel, need tests input = tf.placeholder(tf.float32) rois = tf.placeholder(tf.int32) y = roi_pooling(input, rois, pool_height=2, pool_width=2) mean = tf.reduce_mean(y) grads = tf.gradients(mean, input) print(type(grads)) print(len(grads)) print(grads) print(input_value.shape) with tf.Session('') as sess: input_const = tf.constant(input_value, tf.float32) rois_const = tf.constant(rois_value, tf.int32) y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) mean = tf.reduce_mean(y) numerical_grad_error_1 = tf.test.compute_gradient_error([input_const], [input_value.shape], y, y_shape)
def add_roi_pooling(self): with tf.variable_scope('roi_pooling_layer'): self.pool5 = roi_pooling(self.relu13, self.roidb, 7, 7)