def __call__(self, batch): random_scale_inds = npr.randint(0, high=len(self.scales)) target_size = self.scales[random_scale_inds] target_size = int( np.floor(float(target_size) / self.multiple) * self.multiple) rescale = Rescale(target_size=target_size, keep_ratio=self.keep_ratio) transform = Compose([Normailize(), Reshape(unsqueeze=False)]) images = [sample['image'] for sample in batch] bboxes = [sample['boxes'] for sample in batch] batch_size = len(images) max_width, max_height = -1, -1 for i in range(batch_size): im, _ = rescale(images[i]) height, width = im.shape[0], im.shape[1] max_width = width if width > max_width else max_width max_height = height if height > max_height else max_height padded_ims = torch.zeros(batch_size, 3, max_height, max_width) num_params = bboxes[0].shape[-1] max_num_boxes = max(bbox.shape[0] for bbox in bboxes) padded_boxes = torch.ones(batch_size, max_num_boxes, num_params) * -1 for i in range(batch_size): im, bbox = images[i], bboxes[i] im, im_scale = rescale(im) height, width = im.shape[0], im.shape[1] padded_ims[i, :, :height, :width] = transform(im) if num_params < 9: bbox[:, :4] = bbox[:, :4] * im_scale else: bbox[:, :8] = bbox[:, :8] * np.hstack((im_scale, im_scale)) padded_boxes[i, :bbox.shape[0], :] = torch.from_numpy(bbox) return {'image': padded_ims, 'boxes': padded_boxes}
def __init__(self, config): shp_enc_out, dim_out_flat_conv = compute_cnn_output_filters_and_dims( dims_img=config.dims_img, dims_filter=config.dims_conv, kernel_sizes=config.kernel_sizes_conv, strides=config.kernel_sizes_conv, paddings=config.paddings_conv, ) super().__init__( stem=nn.Sequential( Reshape(config.dims_img), # TxPxB will be flattened before. Conv2d( in_channels=config.dims_img[0], out_channels=config.dims_conv[0], kernel_size=config.kernel_sizes_conv[0], stride=config.strides_conv[0], padding=config.paddings_conv[0], ), nn.ReLU(), Conv2d( in_channels=config.dims_conv[0], out_channels=config.dims_conv[1], kernel_size=config.kernel_sizes_conv[1], stride=config.strides_conv[1], padding=config.paddings_conv[1], ), nn.ReLU(), Conv2d( in_channels=config.dims_conv[1], out_channels=config.dims_conv[2], kernel_size=config.kernel_sizes_conv[2], stride=config.strides_conv[2], padding=config.paddings_conv[2], ), nn.ReLU(), Reshape((dim_out_flat_conv, )), # Flatten image dims ), dist_params=nn.ModuleDict({ "logits": Linear( in_features=dim_out_flat_conv, out_features=config.dims.switch, ), }), dist_cls=OneHotCategorical, )
def single_scale_detect(model, src, target_size, use_gpu=True, conf=None): im, im_scales = Rescale(target_size=target_size, keep_ratio=True)(src) im = Compose([Normailize(), Reshape(unsqueeze=True)])(im) if use_gpu and torch.cuda.is_available(): model, im = model.cuda(), im.cuda() with torch.no_grad(): scores, classes, boxes = model(im, test_conf=conf) scores = scores.data.cpu().numpy() classes = classes.data.cpu().numpy() boxes = boxes.data.cpu().numpy() boxes[:, :4] = boxes[:, :4] / im_scales if boxes.shape[1] > 5: boxes[:, 5:9] = boxes[:, 5:9] / im_scales scores = np.reshape(scores, (-1, 1)) classes = np.reshape(classes, (-1, 1)) cls_dets = np.concatenate([classes, scores, boxes], axis=1) keep = np.where(classes > 0)[0] return cls_dets[keep, :]
def __init__(self, config): self.num_hierarchies = 2 if config.dims.ctrl_encoder not in [None, 0]: raise ValueError( "no controls. would require different architecture " "or mixing with images.") shp_enc_out, dim_out_flat_conv = compute_cnn_output_filters_and_dims( dims_img=config.dims_img, dims_filter=config.dims_filter, kernel_sizes=config.kernel_sizes, strides=config.strides, paddings=config.paddings, ) assert config.dims_encoder[0] is None, ( "first stem is a conv net. " "config is given differently...") dims_stem_2 = ( # TODO: really past self? 32, 32, ) activations_stem_2 = nn.ReLU() dim_out_1 = config.dims.auxiliary dim_out_2 = config.dims.switch dim_in_dist_params_1 = dim_out_flat_conv dim_in_dist_params_2 = (dims_stem_2[-1] if len(dims_stem_2) > 0 else dim_out_flat_conv) super().__init__( allow_cat_inputs=False, # images and scalar... stem=nn.ModuleList([ nn.Sequential( Reshape( config.dims_img), # TxPxB will be flattened before. Conv2d( in_channels=config.dims_img[0], out_channels=config.dims_filter[0], kernel_size=config.kernel_sizes[0], stride=config.strides[0], padding=config.paddings[0], ), nn.ReLU(), Conv2d( in_channels=config.dims_filter[0], out_channels=config.dims_filter[1], kernel_size=config.kernel_sizes[1], stride=config.strides[1], padding=config.paddings[1], ), nn.ReLU(), Conv2d( in_channels=config.dims_filter[1], out_channels=config.dims_filter[2], kernel_size=config.kernel_sizes[2], stride=config.strides[2], padding=config.paddings[2], ), nn.ReLU(), Reshape((dim_out_flat_conv, )), # Flatten image dims ), MLP( dim_in=dim_out_flat_conv, dims=dims_stem_2, activations=activations_stem_2, ), ]), dist_params=nn.ModuleList([ nn.ModuleDict({ "loc": nn.Sequential( Linear( in_features=dim_in_dist_params_1, out_features=dim_out_1, ), ), "scale_tril": DefaultScaleTransform( dim_in_dist_params_1, dim_out_1, ), }), nn.ModuleDict({ "loc": nn.Sequential( Linear( in_features=dim_in_dist_params_2, out_features=dim_out_2, ), ), "scale_tril": DefaultScaleTransform( dim_in_dist_params_2, dim_out_2, ), }), ]), dist_cls=[MultivariateNormal, MultivariateNormal], )
def __init__(self, config): shp_enc_out, dim_out_flat_conv = compute_cnn_output_filters_and_dims( dims_img=config.dims_img, dims_filter=config.dims_filter, kernel_sizes=config.kernel_sizes, strides=config.strides, paddings=config.paddings, ) if not config.requires_grad_Q and isinstance(config.init_scale_Q_diag, float): fixed_max_scale = True elif config.requires_grad_Q and not isinstance( config.init_scale_Q_diag, float): fixed_max_scale = False else: raise ValueError("unclear what encoder scale rectifier to use.") super().__init__( stem=nn.Sequential( Reshape(config.dims_img), # TxPxB will be flattened before. nn.ZeroPad2d(padding=[0, 1, 0, 1]), Conv2d( in_channels=config.dims_img[0], out_channels=config.dims_filter[0], kernel_size=config.kernel_sizes[0], stride=config.strides[0], padding=0, ), nn.ReLU(), nn.ZeroPad2d(padding=[0, 1, 0, 1]), Conv2d( in_channels=config.dims_filter[0], out_channels=config.dims_filter[1], kernel_size=config.kernel_sizes[1], stride=config.strides[1], padding=0, ), nn.ReLU(), nn.ZeroPad2d(padding=[0, 1, 0, 1]), Conv2d( in_channels=config.dims_filter[1], out_channels=config.dims_filter[2], kernel_size=config.kernel_sizes[2], stride=config.strides[2], padding=0, ), nn.ReLU(), Reshape((dim_out_flat_conv, )), # Flatten image dims ), dist_params=nn.ModuleDict({ "loc": Linear( in_features=dim_out_flat_conv, out_features=config.dims.auxiliary, ), "scale": nn.Sequential( Linear( in_features=dim_out_flat_conv, out_features=config.dims.auxiliary, ), ScaledSqrtSigmoid(max_scale=config.init_scale_Q_diag), ) if fixed_max_scale else DefaultScaleTransform( dim_out_flat_conv, config.dims.auxiliary, make_diag_cov_matrix=False, ), }), dist_cls=IndependentNormal, )
def __init__(self, config): shp_enc_out, dim_out_flat_conv = compute_cnn_output_filters_and_dims( dims_img=config.dims_img, dims_filter=config.dims_filter, kernel_sizes=config.kernel_sizes, strides=config.strides, paddings=config.paddings, ) super().__init__( stem=nn.Sequential( Linear( in_features=config.dims.auxiliary, out_features=int(np.prod(shp_enc_out)), ), Reshape(shp_enc_out), # TxPxB will be flattened before. Conv2d( in_channels=shp_enc_out[0], out_channels=config.dims_filter[-1] * config.upscale_factor**2, kernel_size=config.kernel_sizes[-1], stride=1, # Pixelshuffle instead. padding=config.paddings[-1], ), nn.PixelShuffle(upscale_factor=config.upscale_factor), nn.ReLU(), Conv2d( in_channels=config.dims_filter[-1], out_channels=config.dims_filter[-2] * config.upscale_factor**2, kernel_size=config.kernel_sizes[-2], stride=1, # Pixelshuffle instead. padding=config.paddings[-2], ), nn.PixelShuffle(upscale_factor=config.upscale_factor), nn.ReLU(), Conv2d( in_channels=config.dims_filter[-2], out_channels=config.dims_filter[-3] * config.upscale_factor**2, kernel_size=config.kernel_sizes[-3], stride=1, # Pixelshuffle instead. padding=config.paddings[-3], ), nn.PixelShuffle(upscale_factor=config.upscale_factor), nn.ReLU(), ), dist_params=nn.ModuleDict({ "logits": nn.Sequential( Conv2d( in_channels=config.dims_filter[-3], out_channels=1, kernel_size=1, stride=1, padding=0, ), Reshape((config.dims.target, )), ) }), dist_cls=IndependentBernoulli, )