Esempio n. 1
0
    def __call__(self, images):
        self.visual_backprop_anchors.clear()

        with cuda.Device(images.data.device):
            input_images = self.prepare_images(images.copy() * 255)
        h = self.feature_extractor(input_images)

        if self.train_imagenet:
            return h

        if images.shape[-2] > 224:
            h = self.res6(h)

            if images.shape[-2] > 300:
                h = self.res7(h)

        self.visual_backprop_anchors.append(h)
        h = _global_average_pooling_2d(h)

        transform_params = self.param_predictor(h)
        transform_params = rotation_dropout(F.reshape(transform_params,
                                                      (-1, 2, 3)),
                                            ratio=0.0)
        points = F.spatial_transformer_grid(transform_params, self.out_size)
        rois = F.spatial_transformer_sampler(images, points)

        if self.transform_rois_to_grayscale:
            assert rois.shape[
                1] == 3, "rois are not in RGB, can not convert them to grayscale"
            b, g, r = F.split_axis(rois, 3, axis=1)
            rois = 0.299 * r + 0.587 * g + 0.114 * b

        return rois, points
Esempio n. 2
0
    def __call__(self, x):
        h = relu(self.conv1(x))
        h = relu(self.bn2(self.conv2(h)))
        h = relu(self.bn3(self.conv3(h)))
        h = relu(self.bn4(self.conv4(h)))
        h = R._global_average_pooling_2d(h)
        h = self.fc(h)

        return h
Esempio n. 3
0
    def get_transform_params(self, features):
        h = _global_average_pooling_2d(features)
        lstm_predictions = [
            self.lstm(h) for _ in range(self.num_bboxes_to_localize)
        ]
        lstm_predictions = F.stack(lstm_predictions, axis=1)
        batch_size, num_boxes, _ = lstm_predictions.shape
        lstm_predictions = F.reshape(lstm_predictions,
                                     (-1, ) + lstm_predictions.shape[2:])

        params = self.param_predictor(lstm_predictions)
        transform_params = rotation_dropout(F.reshape(params, (-1, 2, 3)),
                                            ratio=self.dropout_ratio)
        return transform_params
    def __call__(self, x, rois, roi_indices, spatial_scale):
        roi_indices = roi_indices.astype(np.float32)
        indices_and_rois = self.xp.concatenate(
            (roi_indices[:, None], rois), axis=1)

        pool = _roi_align_2d_yx(x, indices_and_rois, self.roi_size,
                                self.roi_size, spatial_scale)

        # h: 分岐する直前まで
        h = F.relu(self.res5(pool))
        h = F.relu(self.conv1(h))
        # global average pooling
        gap = _global_average_pooling_2d(h)
        roi_cls_locs = self.cls_loc(gap)
        roi_scores = self.score(gap)
        # mask
        mask = self.conv2(F.relu(self.deconv1(h)))
        return roi_cls_locs, roi_scores, mask
 def __call__(self, x):
     # self.visual_backprop_anchors.clear()
     h = self.feature_extractor(x)
     # self.visual_backprop_anchors.append(h)
     h = _global_average_pooling_2d(h)
     return h