Example #1
0
    def __call__(self, batch):
        random_scale_inds = npr.randint(0, high=len(self.scales))
        target_size = self.scales[random_scale_inds]
        target_size = int(
            np.floor(float(target_size) / self.multiple) * self.multiple)
        rescale = Rescale(target_size=target_size, keep_ratio=self.keep_ratio)
        transform = Compose([Normailize(), Reshape(unsqueeze=False)])

        images = [sample['image'] for sample in batch]
        bboxes = [sample['boxes'] for sample in batch]
        batch_size = len(images)
        max_width, max_height = -1, -1
        for i in range(batch_size):
            im, _ = rescale(images[i])
            height, width = im.shape[0], im.shape[1]
            max_width = width if width > max_width else max_width
            max_height = height if height > max_height else max_height

        padded_ims = torch.zeros(batch_size, 3, max_height, max_width)

        num_params = bboxes[0].shape[-1]
        max_num_boxes = max(bbox.shape[0] for bbox in bboxes)
        padded_boxes = torch.ones(batch_size, max_num_boxes, num_params) * -1
        for i in range(batch_size):
            im, bbox = images[i], bboxes[i]
            im, im_scale = rescale(im)
            height, width = im.shape[0], im.shape[1]
            padded_ims[i, :, :height, :width] = transform(im)
            if num_params < 9:
                bbox[:, :4] = bbox[:, :4] * im_scale
            else:
                bbox[:, :8] = bbox[:, :8] * np.hstack((im_scale, im_scale))
            padded_boxes[i, :bbox.shape[0], :] = torch.from_numpy(bbox)
        return {'image': padded_ims, 'boxes': padded_boxes}
Example #2
0
 def __init__(self, config):
     shp_enc_out, dim_out_flat_conv = compute_cnn_output_filters_and_dims(
         dims_img=config.dims_img,
         dims_filter=config.dims_conv,
         kernel_sizes=config.kernel_sizes_conv,
         strides=config.kernel_sizes_conv,
         paddings=config.paddings_conv,
     )
     super().__init__(
         stem=nn.Sequential(
             Reshape(config.dims_img),  # TxPxB will be flattened before.
             Conv2d(
                 in_channels=config.dims_img[0],
                 out_channels=config.dims_conv[0],
                 kernel_size=config.kernel_sizes_conv[0],
                 stride=config.strides_conv[0],
                 padding=config.paddings_conv[0],
             ),
             nn.ReLU(),
             Conv2d(
                 in_channels=config.dims_conv[0],
                 out_channels=config.dims_conv[1],
                 kernel_size=config.kernel_sizes_conv[1],
                 stride=config.strides_conv[1],
                 padding=config.paddings_conv[1],
             ),
             nn.ReLU(),
             Conv2d(
                 in_channels=config.dims_conv[1],
                 out_channels=config.dims_conv[2],
                 kernel_size=config.kernel_sizes_conv[2],
                 stride=config.strides_conv[2],
                 padding=config.paddings_conv[2],
             ),
             nn.ReLU(),
             Reshape((dim_out_flat_conv, )),  # Flatten image dims
         ),
         dist_params=nn.ModuleDict({
             "logits":
             Linear(
                 in_features=dim_out_flat_conv,
                 out_features=config.dims.switch,
             ),
         }),
         dist_cls=OneHotCategorical,
     )
Example #3
0
def single_scale_detect(model, src, target_size, use_gpu=True, conf=None):
    im, im_scales = Rescale(target_size=target_size, keep_ratio=True)(src)
    im = Compose([Normailize(), Reshape(unsqueeze=True)])(im)
    if use_gpu and torch.cuda.is_available():
        model, im = model.cuda(), im.cuda()
    with torch.no_grad():
        scores, classes, boxes = model(im, test_conf=conf)
    scores = scores.data.cpu().numpy()
    classes = classes.data.cpu().numpy()
    boxes = boxes.data.cpu().numpy()
    boxes[:, :4] = boxes[:, :4] / im_scales
    if boxes.shape[1] > 5:
        boxes[:, 5:9] = boxes[:, 5:9] / im_scales
    scores = np.reshape(scores, (-1, 1))
    classes = np.reshape(classes, (-1, 1))
    cls_dets = np.concatenate([classes, scores, boxes], axis=1)
    keep = np.where(classes > 0)[0]
    return cls_dets[keep, :]
Example #4
0
    def __init__(self, config):
        self.num_hierarchies = 2
        if config.dims.ctrl_encoder not in [None, 0]:
            raise ValueError(
                "no controls. would require different architecture "
                "or mixing with images.")
        shp_enc_out, dim_out_flat_conv = compute_cnn_output_filters_and_dims(
            dims_img=config.dims_img,
            dims_filter=config.dims_filter,
            kernel_sizes=config.kernel_sizes,
            strides=config.strides,
            paddings=config.paddings,
        )

        assert config.dims_encoder[0] is None, (
            "first stem is a conv net. "
            "config is given differently...")
        dims_stem_2 = (  # TODO: really past self?
            32,
            32,
        )
        activations_stem_2 = nn.ReLU()
        dim_out_1 = config.dims.auxiliary
        dim_out_2 = config.dims.switch
        dim_in_dist_params_1 = dim_out_flat_conv
        dim_in_dist_params_2 = (dims_stem_2[-1]
                                if len(dims_stem_2) > 0 else dim_out_flat_conv)

        super().__init__(
            allow_cat_inputs=False,  # images and scalar...
            stem=nn.ModuleList([
                nn.Sequential(
                    Reshape(
                        config.dims_img),  # TxPxB will be flattened before.
                    Conv2d(
                        in_channels=config.dims_img[0],
                        out_channels=config.dims_filter[0],
                        kernel_size=config.kernel_sizes[0],
                        stride=config.strides[0],
                        padding=config.paddings[0],
                    ),
                    nn.ReLU(),
                    Conv2d(
                        in_channels=config.dims_filter[0],
                        out_channels=config.dims_filter[1],
                        kernel_size=config.kernel_sizes[1],
                        stride=config.strides[1],
                        padding=config.paddings[1],
                    ),
                    nn.ReLU(),
                    Conv2d(
                        in_channels=config.dims_filter[1],
                        out_channels=config.dims_filter[2],
                        kernel_size=config.kernel_sizes[2],
                        stride=config.strides[2],
                        padding=config.paddings[2],
                    ),
                    nn.ReLU(),
                    Reshape((dim_out_flat_conv, )),  # Flatten image dims
                ),
                MLP(
                    dim_in=dim_out_flat_conv,
                    dims=dims_stem_2,
                    activations=activations_stem_2,
                ),
            ]),
            dist_params=nn.ModuleList([
                nn.ModuleDict({
                    "loc":
                    nn.Sequential(
                        Linear(
                            in_features=dim_in_dist_params_1,
                            out_features=dim_out_1,
                        ), ),
                    "scale_tril":
                    DefaultScaleTransform(
                        dim_in_dist_params_1,
                        dim_out_1,
                    ),
                }),
                nn.ModuleDict({
                    "loc":
                    nn.Sequential(
                        Linear(
                            in_features=dim_in_dist_params_2,
                            out_features=dim_out_2,
                        ), ),
                    "scale_tril":
                    DefaultScaleTransform(
                        dim_in_dist_params_2,
                        dim_out_2,
                    ),
                }),
            ]),
            dist_cls=[MultivariateNormal, MultivariateNormal],
        )
Example #5
0
 def __init__(self, config):
     shp_enc_out, dim_out_flat_conv = compute_cnn_output_filters_and_dims(
         dims_img=config.dims_img,
         dims_filter=config.dims_filter,
         kernel_sizes=config.kernel_sizes,
         strides=config.strides,
         paddings=config.paddings,
     )
     if not config.requires_grad_Q and isinstance(config.init_scale_Q_diag,
                                                  float):
         fixed_max_scale = True
     elif config.requires_grad_Q and not isinstance(
             config.init_scale_Q_diag, float):
         fixed_max_scale = False
     else:
         raise ValueError("unclear what encoder scale rectifier to use.")
     super().__init__(
         stem=nn.Sequential(
             Reshape(config.dims_img),  # TxPxB will be flattened before.
             nn.ZeroPad2d(padding=[0, 1, 0, 1]),
             Conv2d(
                 in_channels=config.dims_img[0],
                 out_channels=config.dims_filter[0],
                 kernel_size=config.kernel_sizes[0],
                 stride=config.strides[0],
                 padding=0,
             ),
             nn.ReLU(),
             nn.ZeroPad2d(padding=[0, 1, 0, 1]),
             Conv2d(
                 in_channels=config.dims_filter[0],
                 out_channels=config.dims_filter[1],
                 kernel_size=config.kernel_sizes[1],
                 stride=config.strides[1],
                 padding=0,
             ),
             nn.ReLU(),
             nn.ZeroPad2d(padding=[0, 1, 0, 1]),
             Conv2d(
                 in_channels=config.dims_filter[1],
                 out_channels=config.dims_filter[2],
                 kernel_size=config.kernel_sizes[2],
                 stride=config.strides[2],
                 padding=0,
             ),
             nn.ReLU(),
             Reshape((dim_out_flat_conv, )),  # Flatten image dims
         ),
         dist_params=nn.ModuleDict({
             "loc":
             Linear(
                 in_features=dim_out_flat_conv,
                 out_features=config.dims.auxiliary,
             ),
             "scale":
             nn.Sequential(
                 Linear(
                     in_features=dim_out_flat_conv,
                     out_features=config.dims.auxiliary,
                 ),
                 ScaledSqrtSigmoid(max_scale=config.init_scale_Q_diag),
             ) if fixed_max_scale else DefaultScaleTransform(
                 dim_out_flat_conv,
                 config.dims.auxiliary,
                 make_diag_cov_matrix=False,
             ),
         }),
         dist_cls=IndependentNormal,
     )
Example #6
0
 def __init__(self, config):
     shp_enc_out, dim_out_flat_conv = compute_cnn_output_filters_and_dims(
         dims_img=config.dims_img,
         dims_filter=config.dims_filter,
         kernel_sizes=config.kernel_sizes,
         strides=config.strides,
         paddings=config.paddings,
     )
     super().__init__(
         stem=nn.Sequential(
             Linear(
                 in_features=config.dims.auxiliary,
                 out_features=int(np.prod(shp_enc_out)),
             ),
             Reshape(shp_enc_out),  # TxPxB will be flattened before.
             Conv2d(
                 in_channels=shp_enc_out[0],
                 out_channels=config.dims_filter[-1] *
                 config.upscale_factor**2,
                 kernel_size=config.kernel_sizes[-1],
                 stride=1,  # Pixelshuffle instead.
                 padding=config.paddings[-1],
             ),
             nn.PixelShuffle(upscale_factor=config.upscale_factor),
             nn.ReLU(),
             Conv2d(
                 in_channels=config.dims_filter[-1],
                 out_channels=config.dims_filter[-2] *
                 config.upscale_factor**2,
                 kernel_size=config.kernel_sizes[-2],
                 stride=1,  # Pixelshuffle instead.
                 padding=config.paddings[-2],
             ),
             nn.PixelShuffle(upscale_factor=config.upscale_factor),
             nn.ReLU(),
             Conv2d(
                 in_channels=config.dims_filter[-2],
                 out_channels=config.dims_filter[-3] *
                 config.upscale_factor**2,
                 kernel_size=config.kernel_sizes[-3],
                 stride=1,  # Pixelshuffle instead.
                 padding=config.paddings[-3],
             ),
             nn.PixelShuffle(upscale_factor=config.upscale_factor),
             nn.ReLU(),
         ),
         dist_params=nn.ModuleDict({
             "logits":
             nn.Sequential(
                 Conv2d(
                     in_channels=config.dims_filter[-3],
                     out_channels=1,
                     kernel_size=1,
                     stride=1,
                     padding=0,
                 ),
                 Reshape((config.dims.target, )),
             )
         }),
         dist_cls=IndependentBernoulli,
     )