def test_forward_cpu_gpu_equal(self): # cpu x_cpu = chainer.Variable(self.x) rois_cpu = chainer.Variable(self.rois) y_cpu = functions.roi_pooling_2d( x_cpu, rois_cpu, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale) gy_cpu = self.gy[:] # gpu x_gpu = chainer.Variable(cuda.to_gpu(self.x)) rois_gpu = chainer.Variable(cuda.to_gpu(self.rois)) y_gpu = functions.roi_pooling_2d( x_gpu, rois_gpu, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale) gradient_check.assert_allclose(gy_cpu, cuda.to_cpu(self.gy))
def test_forward_cpu_gpu_equal(self): # cpu x_cpu = chainer.Variable(self.x) rois_cpu = chainer.Variable(self.rois) y_cpu = functions.roi_pooling_2d( x_cpu, rois_cpu, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale) # gpu x_gpu = chainer.Variable(cuda.to_gpu(self.x)) rois_gpu = chainer.Variable(cuda.to_gpu(self.rois)) y_gpu = functions.roi_pooling_2d( x_gpu, rois_gpu, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale) testing.assert_allclose(y_cpu.data, cuda.to_cpu(y_gpu.data))
def __call__(self, x, im_info): h, n = self.trunk(x), x.data.shape[0] rpn_cls_score = self.rpn_cls_score(h) c, hh, ww = rpn_cls_score.data.shape[1:] rpn_bbox_pred = self.rpn_bbox_pred(h) rpn_cls_score = F.reshape(rpn_cls_score, (n, 2, -1)) # RoI Proposal rpn_cls_prob = F.softmax(rpn_cls_score) rpn_cls_prob_reshape = F.reshape(rpn_cls_prob, (n, c, hh, ww)) rois = self.proposal_layer( rpn_cls_prob_reshape, rpn_bbox_pred, im_info, self.train) boxes = rois[:, 1:5] / im_info[0][2] rois = chainer.Variable(rois, volatile=not self.train) # RCNN pool5 = F.roi_pooling_2d(self.trunk.relu5_3_out, rois, 7, 7, 0.0625) fc6 = F.relu(self.fc6(pool5)) fc7 = F.relu(self.fc7(fc6)) self.scores = F.softmax(self.cls_score(fc7)) box_deltas = self.bbox_pred(fc7).data pred_boxes = bbox_transform_inv(boxes, box_deltas) self.pred_boxes = clip_boxes(pred_boxes, im_info[0][:2]) if self.train: # loss_cls = F.softmax_cross_entropy(cls_score, labels) # huber loss with delta=1 means SmoothL1Loss return None else: return self.scores, self.pred_boxes
def __call__(self, x, rois): h = F.relu(self.conv1_1(x)) h = F.relu(self.conv1_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv2_1(h)) h = F.relu(self.conv2_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv3_1(h)) h = F.relu(self.conv3_2(h)) h = F.relu(self.conv3_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv4_1(h)) h = F.relu(self.conv4_2(h)) h = F.relu(self.conv4_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv5_1(h)) h = F.relu(self.conv5_2(h)) h = F.relu(self.conv5_3(h)) h = F.roi_pooling_2d(h, rois, 7, 7, spatial_scale=0.0625) h = F.dropout(F.relu(self.fc6(h)), ratio=0.5) h = F.dropout(F.relu(self.fc7(h)), ratio=0.5) cls_score = F.softmax(self.cls_score(h)) bbox_pred = self.bbox_pred(h) return cls_score, bbox_pred
def __call__(self, x, rois, roi_indices, test=True): """Forward the chain. We assume that there are :math:`N` batches. Args: x (~chainer.Variable): 4D image variable. rois (array): A bounding box array containing coordinates of proposal boxes. This is a concatenation of bounding box arrays from multiple images in the batch. Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed RoIs from the :math:`i` th image, :math:`R' = \\sum _{i=1} ^ N R_i`. roi_indices (array): An array containing indices of images to which bounding boxes correspond to. Its shape is :math:`(R',)`. test (bool): Whether in test mode or not. This has no effect in the current implementation. """ roi_indices = roi_indices.astype(np.float32) rois = self.xp.concatenate((roi_indices[:, None], rois), axis=1) pool = F.roi_pooling_2d(x, rois, self.roi_size, self.roi_size, self.spatial_scale) fc6 = _relu(self.fc6(pool)) fc7 = _relu(self.fc7(fc6)) roi_cls_locs = self.cls_loc(fc7) roi_scores = self.score(fc7) return roi_cls_locs, roi_scores
def __call__(self, x, rois, roi_indices, test=True): """Forward the chain. We assume that there are :math:`N` batches. Args: x (~chainer.Variable): 4D image variable. rois (array): A bounding box array containing coordinates of proposal boxes. This is a concatenation of bounding box arrays from multiple images in the batch. Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed RoIs from the :math:`i` th image, :math:`R' = \\sum _{i=1} ^ N R_i`. roi_indices (array): An array containing indices of images to which bounding boxes correspond to. Its shape is :math:`(R',)`. test (bool): Whether in test mode or not. This has no effect in the current implementation. """ roi_indices = roi_indices.astype(np.float32) new_rois = self.xp.zeros((rois.shape[0], rois.shape[1] - 1), dtype=np.float32) x_coor = self.xp.vstack(( rois[:, 0] - (rois[:, 2] / 2) * self.xp.cos(rois[:, 4]) - (rois[:, 3] / 2) * self.xp.sin(rois[:, 4]), rois[:, 0] + (rois[:, 2] / 2) * self.xp.cos(rois[:, 4]) - ( rois[:, 3] / 2) * self.xp.sin(rois[:, 4]), rois[:, 0] - (rois[:, 2] / 2) * self.xp.cos(rois[:, 4]) + ( rois[:, 3] / 2) * self.xp.sin(rois[:, 4]), rois[:, 0] + (rois[:, 2] / 2) * self.xp.cos(rois[:, 4]) + ( rois[:, 3] / 2) * self.xp.sin(rois[:, 4]))) y_coor = self.xp.vstack(( rois[:, 1] - (rois[:, 3] / 2) * self.xp.cos(rois[:, 4]) + (rois[:, 2] / 2) * self.xp.sin(rois[:, 4]), rois[:, 1] - (rois[:, 3] / 2) * self.xp.cos(rois[:, 4]) - ( rois[:, 2] / 2) * self.xp.sin(rois[:, 4]), rois[:, 1] + (rois[:, 3] / 2) * self.xp.cos(rois[:, 4]) + (rois[:, 2] / 2) * self.xp.sin(rois[:, 4]), rois[:, 1] + (rois[:, 3] / 2) * self.xp.cos(rois[:, 4]) - ( rois[:, 2] / 2) * self.xp.sin(rois[:, 4]))) new_rois[:, 0] = self.xp.min(x_coor, axis=0) new_rois[:, 0][new_rois[:, 0] < 0] = 0 new_rois[:, 1] = self.xp.min(y_coor, axis=0) new_rois[:, 1][new_rois[:, 1] < 0] = 0 new_rois[:, 2] = self.xp.max(x_coor, axis=0) new_rois[:, 2][new_rois[:, 2] > x.shape[3] / self.spatial_scale] = x.shape[3] / self.spatial_scale new_rois[:, 3] = self.xp.max(y_coor, axis=0) new_rois[:, 3][new_rois[:, 3] > x.shape[2] / self.spatial_scale] = x.shape[2] / self.spatial_scale new_rois = self.xp.concatenate( (roi_indices[:, None], new_rois), axis=1) pool = F.roi_pooling_2d( x, new_rois, self.roi_size, self.roi_size, self.spatial_scale) # rois = self.xp.concatenate( # (roi_indices[:, None], rois), axis=1) # pool = roi_pooling_2d( # x, rois, self.roi_size, self.roi_size, self.spatial_scale) fc6 = _relu(self.fc6(pool)) fc7 = _relu(self.fc7(fc6)) roi_cls_locs = self.cls_loc(fc7) roi_scores = self.score(fc7) return roi_cls_locs, roi_scores
def check_forward(self, x_data, roi_data): x = chainer.Variable(x_data) rois = chainer.Variable(roi_data) y = functions.roi_pooling_2d(x, rois) self.assertEqual(y.data.dtype, numpy.float32) y_data = cuda.to_cpu(y.data) self.assertEqual(self.gy.shape, y_data.shape)
def _roi_pooling_2d_yx(x, indices_and_rois, outh, outw, spatial_scale, use_roi_align): xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]] if use_roi_align: pool = roi_align_2d(x, xy_indices_and_rois, outh, outw, spatial_scale) else: pool = F.roi_pooling_2d( x, xy_indices_and_rois, outh, outw, spatial_scale) return pool
def roi_pooling2(input, rois, size=(7, 7), spatial_scale=1.0): # chainer version input, rois = tV2cV(input), tV2cV(rois) output = FF.roi_pooling_2d(input, rois, 7, 7, spatial_scale=1.0) if has_backward: FF.sum(output).backward() return output
def test_forward_cpu_gpu_equal(self): # cpu x_cpu = chainer.Variable(self.x) rois_cpu = chainer.Variable(self.rois) y_cpu = functions.roi_pooling_2d(x_cpu, rois_cpu, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale) gy_cpu = self.gy[:] # gpu x_gpu = chainer.Variable(cuda.to_gpu(self.x)) rois_gpu = chainer.Variable(cuda.to_gpu(self.rois)) y_gpu = functions.roi_pooling_2d(x_gpu, rois_gpu, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale) gradient_check.assert_allclose(gy_cpu, cuda.to_cpu(self.gy))
def check_forward(self, x_data, roi_data): x = chainer.Variable(x_data) rois = chainer.Variable(roi_data) y = functions.roi_pooling_2d( x, rois, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale) self.assertEqual(y.data.dtype, self.dtype) y_data = cuda.to_cpu(y.data) self.assertEqual(self.gy.shape, y_data.shape)
def check_backward(self, x_data, roi_data, y_grad): x = chainer.Variable(x_data) rois = chainer.Variable(roi_data) y = functions.roi_pooling_2d(x, rois) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((x.data, rois.data)) gx, gr = gradient_check.numerical_grad(f, (x.data, rois.data), (y.grad,)) gradient_check.assert_allclose(cuda.to_cpu(gx), cuda.to_cpu(x.grad))
def check_backward(self, x_data, roi_data, y_grad): x = chainer.Variable(x_data) rois = chainer.Variable(roi_data) y = functions.roi_pooling_2d(x, rois) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((x.data, rois.data)) gx, gr = gradient_check.numerical_grad(f, (x.data, rois.data), (y.grad, )) gradient_check.assert_allclose(cuda.to_cpu(gx), cuda.to_cpu(x.grad))
def forward(self, features, rois): def torch2chainer(variable): '''used in chainer''' # torch Variable to Chainer Variable npa = variable.data.cpu().numpy() return cVariable(cupy.array(npa)) features_ch = torch2chainer(features) rois_ch = torch2chainer(rois) o_cn = cF.roi_pooling_2d(features_ch, rois_ch, self.outh, self.outw, self.spatial_scale) output_numpy = cupy.asnumpy(o_cn.array) output = torch.from_numpy(output_numpy) return output
def check_backward(self, x_data, roi_data, y_grad): x = chainer.Variable(x_data) rois = chainer.Variable(roi_data) y = functions.roi_pooling_2d(x, rois, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale) y.grad = y_grad y.backward() xs = (x.data, rois.data) def f(): func = y.creator return func.forward(xs) gx, _ = gradient_check.numerical_grad(f, xs, (y.grad,)) gradient_check.assert_allclose(cuda.to_cpu(gx), cuda.to_cpu(x.grad))
def __call__(self, x, rois, t=None, train=False): h = self.conv1(x) h = F.relu(h) h = F.local_response_normalization(h, n=5, k=2, alpha=5e-4, beta=.75) h = F.max_pooling_2d(h, ksize=3, stride=2) h = self.conv2(h) h = F.relu(h) h = F.local_response_normalization(h, n=5, k=2, alpha=5e-4, beta=.75) h = F.max_pooling_2d(h, ksize=3, stride=2) h = self.conv3(h) h = F.relu(h) h = self.conv4(h) h = F.relu(h) h = self.conv5(h) h = F.relu(h) h = F.roi_pooling_2d(h, rois, outh=6, outw=6, spatial_scale=0.0625) h = self.fc6(h) h = F.relu(h) h = F.dropout(h, train=train, ratio=.5) h = self.fc7(h) h = F.relu(h) h = F.dropout(h, train=train, ratio=.5) h_cls_score = self.cls_score(h) cls_score = F.softmax(h_cls_score) bbox_pred = self.bbox_pred(h) if t is None: return cls_score, bbox_pred assert train t_cls, t_bbox = t self.cls_loss = F.softmax_cross_entropy(h_cls_score, t_cls) self.bbox_loss = F.smooth_l1_loss(bbox_pred, t_bbox) xp = cuda.get_array_module(x.data) lambda_ = (0.5 * (t_cls.data != self.bg_label)).astype(xp.float32) lambda_ = Variable(lambda_, volatile=not train) L = self.cls_loss + F.sum(lambda_ * self.bbox_loss) return L
def forward(self, x_data, rois, train=True): x = Variable(x_data, volatile=not train) rois = Variable(rois, volatile=not train) h = F.max_pooling_2d(F.relu(self.bn1(self.conv1(x))), 3, stride=2) h = F.max_pooling_2d(F.relu(self.bn2(self.conv2(h))), 3, stride=2) h = F.relu(self.conv3(h)) h = F.relu(self.conv4(h)) h = F.relu(self.conv5(h)) h = F.roi_pooling_2d(h, rois) h = F.dropout(F.relu(self.fc6(h)), train=train, ratio=0.5) h = F.dropout(F.relu(self.fc7(h)), train=train, ratio=0.5) cls_score = F.softmax(self.cls_score(h)) bbox_pred = self.bbox_pred(h) return cls_score, bbox_pred
def test_roi_module(): ## fake data### B, N, C, H, W, PH, PW = 2, 8, 4, 32, 32, 7, 7 bottom_data = torch.randn(B, C, H, W).cuda() bottom_rois = torch.randn(N, 5) bottom_rois[:int(N / 2), 0] = 0 bottom_rois[int(N / 2):, 0] = 1 bottom_rois[:, 1:] = (torch.rand(N, 4) * 100).float() bottom_rois = bottom_rois.cuda() spatial_scale = 1. / 16 outh, outw = PH, PW # pytorch version module = RoIPooling2D(outh, outw, spatial_scale) x = bottom_data.requires_grad_() rois = bottom_rois.detach() print(rois) output = module(x, rois) print(output) output.sum().backward() def t2c(variable): npa = variable.data.cpu().numpy() return cp.array(npa) def test_eq(variable, array, info): cc = cp.asnumpy(array) neq = (cc != variable.data.cpu().numpy()) assert neq.sum() == 0, 'test failed: %s' % info # chainer version,if you're going to run this # pip install chainer import chainer.functions as F from chainer import Variable x_cn = Variable(t2c(x)) o_cn = F.roi_pooling_2d(x_cn, t2c(rois), outh, outw, spatial_scale) test_eq(output, o_cn.array, 'forward') F.sum(o_cn).backward() test_eq(x.grad, x_cn.grad, 'backward') print('test pass') #test_roi_module()
if __name__ == "__main__": import numpy as np import chainer import chainer.functions as F x = np.arange(16, dtype=np.float32) print(x[15]) print(x) x = np.expand_dims(x, 0) x = np.expand_dims(x, 0) sois = np.array([[0, 0, 7], [0, 2, 10]], dtype=np.float32) # shape = 1, 2, 3 outw = 3 spatial_scale = 1.0 x = chainer.cuda.to_gpu(x, 0) sois = chainer.cuda.to_gpu(sois, 0) o = soi_pooling_1d(chainer.Variable(x), chainer.Variable(sois), outw=outw, spatial_scale=1.0) print(o) import cupy as cp x = cp.expand_dims(x, 2) # 1, 1, 1, 16 sois = cp.array([[0, 0, 0, 7, 0], [0, 2, 0, 10, 0]], dtype=np.float32) outw = 3 outh = 1 o = F.roi_pooling_2d(x, sois, outh, outw, spatial_scale=1.0) print(o)
def __call__(self, x, rois): return F.roi_pooling_2d(x, rois, **self.kwargs)
def _roi_pooling_2d_yx(x, indices_and_rois, outh, outw, spatial_scale): xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]] pool = F.roi_pooling_2d(x, xy_indices_and_rois, outh, outw, spatial_scale) return pool
[0.66, 0.26, 0.82, 0.64, 0.54, 0.73, 0.59, 0.26], [0.85, 0.34, 0.76, 0.84, 0.29, 0.75, 0.62, 0.25], [0.32, 0.74, 0.21, 0.39, 0.34, 0.03, 0.33, 0.48], [0.20, 0.14, 0.16, 0.13, 0.73, 0.65, 0.96, 0.32], [0.19, 0.69, 0.09, 0.86, 0.88, 0.07, 0.01, 0.48], [0.83, 0.24, 0.97, 0.04, 0.24, 0.35, 0.50, 0.91], ], dtype=np.float32) print(input) x = input[np.newaxis, np.newaxis, :, :] x = chainer.Variable(x) # batch_index, x1, y1, x2, y2 rois = np.array([[0, 0, 2, 6, 7]], dtype=np.float32) rois = chainer.Variable(rois) y = F.roi_pooling_2d(x, rois, outh=2, outw=2, spatial_scale=1) y.grad = np.ones((1, 1, 2, 2), dtype=np.float32) y.backward() print(x.grad) output = y.data[0, 0] print(output) input_viz = plt.cm.jet(input) input_viz = (input_viz * 255).astype(np.uint8) plt.subplot(121) plt.imshow(input_viz) plt.title('input') for j in range(input.shape[0]): for i in range(input.shape[1]): plt.text(i, j,
def f(x, rois): return functions.roi_pooling_2d( x, rois, outh=self.outh, outw=self.outw, spatial_scale=self.spatial_scale)
def pooling_forward(x, roi, outh, outw, spatial_scale): y = F.roi_pooling_2d(x, roi, outh, outw, spatial_scale) return y
def __call__(self, x, img_info, gt_boxes=None): """Faster RCNN forward Args: x (:class:`~chainer.Variable`): The input image. Note that the batchsize should be 1. So the shape should be :math:`(1, n_channels, height, width)`. img_info (:class:`~chainer.Variable`): The input image info. It contains :math:`(height, width)` and the batchsize should be 1. So the shape should be :math:`(1, 2)`. gt_boxes (:class:`~chainer.Variable`): The ground truth bounding boxes and its class label array. The shape should be :math:`(1, n_gt_boxes, 5)` and the batchsize should be 1. """ if self.type_check_enable: self._check_data_type_forward(x, img_info, gt_boxes) # Use the array module of the backend of trunk model with cuda.get_device_from_array(x.data): xp, feature_map = self.trunk.xp, self.trunk(x) # RPN training mode if self.rpn_train and gt_boxes is not None: return self.RPN(feature_map, img_info, gt_boxes) else: proposals, probs = self.RPN(feature_map, img_info, gt_boxes) self.rpn_proposals = proposals self.rpn_probs = probs # RCNN batch_id = xp.zeros((len(proposals), 1), dtype=xp.float32) brois = xp.concatenate((batch_id, proposals), axis=1) pool5 = F.roi_pooling_2d(feature_map, brois, 7, 7, self._spatial_scale) fc6 = F.dropout(F.relu(self.fc6(pool5)), train=self.rcnn_train) fc7 = F.dropout(F.relu(self.fc7(fc6)), train=self.rcnn_train) # Per class probability cls_score = self.cls_score(fc7) # BBox predictions bbox_pred = self.bbox_pred(fc7) if self.rcnn_train and gt_boxes is not None: # Create proposal target layer if not exsist if not hasattr(self, 'proposal_target_layer'): self.proposal_target_layer = ProposalTargetLayer( self._feat_stride, self._anchor_ratios, self._anchor_scales, self._num_classes) use_gt_boxes, bbox_reg_targets, keep_inds = \ self.proposal_target_layer(proposals, gt_boxes) # TODO(mitmul): Remove this re-sending below vars to GPU xp = self.RPN.xp if xp is cuda.cupy: use_gt_boxes = xp.asarray(use_gt_boxes) bbox_reg_targets = xp.asarray(bbox_reg_targets) keep_inds = xp.asarray(keep_inds) # Select predicted scores and calc loss cls_score = cls_score[keep_inds] cls_labels = use_gt_boxes[:, -1].astype(xp.int32) loss_cls = F.softmax_cross_entropy(cls_score, cls_labels) loss_cls = loss_cls.reshape(()) cls_acc = F.accuracy(cls_score, cls_labels, -1) # Select predicted bbox transformations and calc loss bbox_pred = bbox_pred[keep_inds] loss_bbox = F.huber_loss(bbox_pred, bbox_reg_targets, self._rcnn_delta) loss_bbox = F.sum(loss_bbox) / loss_bbox.size loss_bbox = loss_bbox.reshape(()) loss_rcnn = loss_cls + loss_bbox reporter.report({'loss_cls': loss_cls, 'cls_accuracy': cls_acc, 'loss_bbox': loss_bbox, 'loss_rcnn': loss_rcnn}, self) return loss_rcnn pred_boxes = bbox_transform_inv(proposals, bbox_pred.data) pred_boxes = clip_boxes(pred_boxes, img_info.data[0]) return F.softmax(cls_score), pred_boxes
def _roi_pooling_2d_yx(x, indices_and_rois, outh, outw, spatial_scale): xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]] pool = F.roi_pooling_2d( x, xy_indices_and_rois, outh, outw, spatial_scale) return pool