Example #1
0
    def __call__(self, images):
        self.lstm.reset_state()
        self.transform_2.reset_state()

        h = self.bn0(self.conv0(images))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs1(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs2(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs3(h)
        # h = self.rs4(h)
        self.vis_anchor = h
        h = F.average_pooling_2d(h, 5, stride=2)

        localizations = []

        with cuda.get_device_from_array(h.data):
            # lstm_prediction = chainer.Variable(self.xp.zeros((len(images), self.lstm.state_size), dtype=h.dtype))

            for _ in range(self.num_timesteps):
                # in_feature = self.attend(h, lstm_prediction)
                in_feature = h
                lstm_prediction = F.relu(self.lstm(in_feature))
                transformed = self.transform_2(lstm_prediction)
                transformed = F.reshape(transformed, (-1, 2, 3))
                localizations.append(
                    rotation_dropout(transformed, ratio=self.dropout_ratio))

        return F.concat(localizations, axis=0)
Example #2
0
    def __call__(self, images):
        self.lstm.reset_state()

        h = self.bn0(self.conv0(images))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs1(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs2(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs3(h)
        # h = self.rs4(h)
        self.vis_anchor = h
        h = F.average_pooling_2d(h, 5, stride=2)

        localizations = []
        # 预测产出N个二维仿射转换矩阵A
        with cuda.get_device_from_array(h.data):
            for _ in range(self.num_timesteps):
                in_feature = h
                lstm_prediction = F.relu(self.lstm(in_feature))
                transformed = self.transform_2(lstm_prediction)
                transformed = F.reshape(transformed, (-1, 2, 3))
                # rotation_dropout 旋转dropout 防止过度旋转
                localizations.append(
                    rotation_dropout(transformed, ratio=self.dropout_ratio))

        return F.concat(localizations, axis=0)
Example #3
0
    def localization_net(self, images):
        self.lstm.reset_state()
        self.transform_2.reset_state()

        images = self.data_bn(images)
        h = F.relu(self.bn0(self.conv0(images)))
        h = F.max_pooling_2d(h, 3, stride=2, pad=1)

        h = self.rs1_1(h)
        h = self.rs1_2(h)

        h = self.rs2_1(h)
        h = self.rs2_2(h)

        h = self.rs3_1(h)
        h = self.rs3_2(h)

        # h = self.rs4_1(h)
        # h = self.rs4_2(h)

        self.localization_vis_anchor = h

        h = F.average_pooling_2d(h, 5, stride=1)

        localizations = []

        with cuda.get_device_from_array(h.data):
            for _ in range(self.num_timesteps):
                in_feature = h
                lstm_prediction = F.relu(self.lstm(in_feature))
                transformed = self.transform_2(lstm_prediction)
                transformed = F.reshape(transformed, (-1, 2, 3))
                localizations.append(rotation_dropout(transformed, ratio=self.dropout_ratio))

        return F.concat(localizations, axis=0)
Example #4
0
    def __call__(self, images):
        self.visual_backprop_anchors.clear()

        with cuda.Device(images.data.device):
            input_images = self.prepare_images(images.copy() * 255)
        h = self.feature_extractor(input_images)

        if self.train_imagenet:
            return h

        if images.shape[-2] > 224:
            h = self.res6(h)

            if images.shape[-2] > 300:
                h = self.res7(h)

        self.visual_backprop_anchors.append(h)
        h = _global_average_pooling_2d(h)

        transform_params = self.param_predictor(h)
        transform_params = rotation_dropout(F.reshape(transform_params,
                                                      (-1, 2, 3)),
                                            ratio=0.0)
        points = F.spatial_transformer_grid(transform_params, self.out_size)
        rois = F.spatial_transformer_sampler(images, points)

        if self.transform_rois_to_grayscale:
            assert rois.shape[
                1] == 3, "rois are not in RGB, can not convert them to grayscale"
            b, g, r = F.split_axis(rois, 3, axis=1)
            rois = 0.299 * r + 0.587 * g + 0.114 * b

        return rois, points
Example #5
0
    def __call__(self, images):
        self.lstm.reset_state()

        h = self.bn0(self.conv0(images))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs1(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs2(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs3(h)
        # h = self.rs4(h)
        self.vis_anchor = h
        h = F.average_pooling_2d(h, 5)

        localizations = []

        with cuda.get_device_from_array(h.data):

            for _ in range(self.num_timesteps):
                timestep_localizations = []
                in_feature = h
                lstm_prediction = F.relu(self.lstm(in_feature))
                transformed = self.transform_2(lstm_prediction)
                transformed = F.reshape(transformed, (-1, 2, 3))
                transformation_params = rotation_dropout(
                    transformed, ratio=self.dropout_ratio)
                timestep_localizations.append(transformation_params)

                # self.transform_2.disable_update()

                if self.do_parameter_refinement:
                    transformation_params = self.to_homogeneous_coordinates(
                        transformation_params)
                    # refine the transformation parameters
                    for _ in range(self.num_refinement_steps):
                        transformation_deltas = self.do_transformation_param_refinement_step(
                            images, transformation_params)
                        transformation_deltas = self.to_homogeneous_coordinates(
                            transformation_deltas)

                        transformation_params = F.batch_matmul(
                            transformation_params, transformation_deltas)
                        # transformation_params = F.batch_matmul(transformation_deltas, transformation_params)
                        timestep_localizations.append(
                            transformation_params[:, :-1, :])

                localizations.append(timestep_localizations)

        return [F.concat(loc, axis=0) for loc in zip(*localizations)]
Example #6
0
    def __call__(self, images):
        self.lstm.reset_state()
        self.transform_2.reset_state()

        h = self.bn0(self.conv0(images))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs1(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs2(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs3(h)
        self.vis_anchor = h
        h = F.average_pooling_2d(h, 5, stride=2)

        localizations = []

        with cuda.get_device_from_array(h.data):
            homogenuous_addon = self.xp.zeros((len(h), 1, 3),
                                              dtype=h.data.dtype)
            homogenuous_addon[:, 0, 2] = 1

        for _ in range(self.num_timesteps):
            lstm_prediction = F.relu(self.lstm(h))
            translation_transform = F.reshape(
                self.rotation_transform(lstm_prediction), (-1, 2, 3))
            translation_transform = disable_shearing(translation_transform)
            translation_transform = F.concat(
                (translation_transform, homogenuous_addon), axis=1)

            rotation_transform = F.reshape(
                self.rotation_transform(lstm_prediction), (-1, 2, 3))
            rotation_transform = disable_translation(rotation_transform)
            rotation_transform = F.concat(
                (rotation_transform, homogenuous_addon), axis=1)

            # first rotate, then translate
            transform = F.batch_matmul(rotation_transform,
                                       translation_transform)
            # homogenuous_multiplier = F.get_item(transform, (..., 2, 2))
            #
            # # bring matrices from homogenous coordinates to normal coordinates
            transform = transform[:, :2, :]
            # transform = transform / homogenuous_multiplier
            localizations.append(
                rotation_dropout(transform, ratio=self.dropout_factor))

        return F.concat(localizations, axis=0)
Example #7
0
    def get_transform_params(self, features):
        h = self.pre_transform_params(features)
        slices = F.split_axis(h, self.num_bboxes_to_localize, axis=1)

        lstm_predictions = [self.lstm(slice) for slice in slices]
        lstm_predictions = F.stack(lstm_predictions, axis=1)
        batch_size, num_boxes, _ = lstm_predictions.shape
        lstm_predictions = F.reshape(lstm_predictions,
                                     (-1, ) + lstm_predictions.shape[2:])

        params = self.param_predictor(lstm_predictions)
        transform_params = rotation_dropout(F.reshape(params, (-1, 2, 3)),
                                            ratio=self.dropout_ratio)
        return transform_params
Example #8
0
    def get_transform_params(self, features):
        h = _global_average_pooling_2d(features)
        lstm_predictions = [
            self.lstm(h) for _ in range(self.num_bboxes_to_localize)
        ]
        lstm_predictions = F.stack(lstm_predictions, axis=1)
        batch_size, num_boxes, _ = lstm_predictions.shape
        lstm_predictions = F.reshape(lstm_predictions,
                                     (-1, ) + lstm_predictions.shape[2:])

        params = self.param_predictor(lstm_predictions)
        transform_params = rotation_dropout(F.reshape(params, (-1, 2, 3)),
                                            ratio=self.dropout_ratio)
        return transform_params
    def get_transform_params(self, features):
        batch_size, num_channels, feature_height, feature_weight = features.shape
        features = F.reshape(features, (batch_size, num_channels, -1))
        features = F.transpose(features, (0, 2, 1))

        target = chainer.Variable(self.xp.zeros((batch_size, 1, 6), dtype=chainer.get_dtype()))

        for _ in range(self.num_bboxes_to_localize):
            embedded_params = self.param_embedder(target.array, n_batch_axes=2)
            embedded_params = self.positional_encoding(embedded_params)
            decoded = self.decoder(embedded_params, features, None, self.mask)
            params = self.param_predictor(decoded, n_batch_axes=2)
            target = F.concat([target, params[:, -1:]])

        target = F.reshape(target[:, 1:], (-1,) + target.shape[2:])
        transform_params = rotation_dropout(F.reshape(target, (-1, 2, 3)), ratio=self.dropout_ratio)
        return transform_params
Example #10
0
    def do_transformation_param_refinement_step(self, images,
                                                transformation_params):
        transformation_params = self.remove_homogeneous_coordinates(
            transformation_params)
        points = F.spatial_transformer_grid(transformation_params,
                                            self.target_shape)
        rois = F.spatial_transformer_sampler(images, points)

        # rerun parts of the feature extraction for producing a refined version of the transformation params
        h = self.bn0_1(self.conv0_1(rois))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs4(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs5(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        transformation_params = self.refinement_transform(h)
        transformation_params = F.reshape(transformation_params, (-1, 2, 3))
        transformation_params = rotation_dropout(transformation_params,
                                                 ratio=self.dropout_ratio)
        return transformation_params