def preprocess_inputs(self, raw_inputs, params):
     outputs = dict()
     # label & inst.
     transform_label = get_transform_fn(self.opt,
                                        params,
                                        method=Image.NEAREST,
                                        normalize=False)
     outputs['label'] = transform_label(raw_inputs['label']) * 255.0
     outputs['inst'] = transform_label(raw_inputs['inst'])
     if self.opt.dataloader == 'sun_rgbd' or self.opt.dataloader == 'ade20k':  # NOTE(sh): dirty exception!
         outputs['inst'] *= 255.0
     outputs['label_path'] = raw_inputs['label_path']
     outputs['inst_path'] = raw_inputs['inst_path']
     # image
     if self.load_image:
         transform_image = get_transform_fn(self.opt, params)
         outputs['image'] = transform_image(raw_inputs['image'])
         outputs['image_path'] = raw_inputs['image_path']
     # raw inputs
     if self.load_raw:
         transform_raw = get_raw_transform_fn(normalize=False)
         outputs['label_raw'] = transform_raw(raw_inputs['label']) * 255.0
         outputs['inst_raw'] = transform_raw(raw_inputs['inst'])
         transform_image_raw = get_raw_transform_fn()
         outputs['image_raw'] = transform_image_raw(raw_inputs['image'])
     return outputs
 def preprocess_cropping(self, raw_inputs, outputs, params):
     transform_obj = get_transform_fn(self.opt,
                                      params,
                                      method=Image.NEAREST,
                                      normalize=False,
                                      is_context=False)
     label_obj = transform_obj(raw_inputs['label']) * 255.0
     input_bbox = np.array(params['bbox_in_context'])
     bbox_cls = params['bbox_cls']
     bbox_cls = bbox_cls if bbox_cls is not None else self.opt.label_nc - 1
     mask_object_inst = (outputs['inst']==params['bbox_inst_id']).float() \
             if not (params['bbox_inst_id'] == None) else torch.zeros(outputs['inst'].size())
     ### generate output bbox
     img_size = outputs['label'].size(1)  #shape[1]
     context_ratio = np.random.uniform(low=self.config['min_ctx_ratio'],
                                       high=self.config['max_ctx_ratio'])
     output_bbox = np.array(
         get_soft_bbox(input_bbox, img_size, img_size, context_ratio))
     mask_in, mask_object_in, mask_context_in = get_masked_image(
         outputs['label'], input_bbox, bbox_cls)
     mask_out, mask_object_out, _ = get_masked_image(
         outputs['label'], output_bbox)
     # Build dictionary
     outputs['input_bbox'] = torch.from_numpy(input_bbox)
     outputs['output_bbox'] = torch.from_numpy(output_bbox)
     outputs['mask_in'] = mask_in  # (1x1xHxW)
     outputs['mask_object_in'] = mask_object_in  # (1xCxHxW)
     outputs['mask_context_in'] = mask_context_in  # (1xCxHxW)
     outputs['mask_out'] = mask_out  # (1x1xHxW)
     outputs['mask_object_out'] = mask_object_out  # (1xCxHxW)
     outputs['label_obj'] = label_obj
     outputs['mask_object_inst'] = mask_object_inst
     outputs['cls'] = torch.LongTensor([bbox_cls])
     return outputs
Exemple #3
0
def crop_canvas(bbox_sampled, label_original, opt, img_original=None, \
    patch_to_obj_ratio=1.2, min_ctx_ratio=1.2, max_ctx_ratio=1.5, resize=True, \
    transform_img=False):
    h, w = label_original.size()[2:4]

    config = {}
    config['prob_flip'] = 0.0
    config['fineSize'] = opt.fineSize if resize else None
    config['img_to_obj_ratio'] = opt.contextMargin
    config['patch_to_obj_ratio'] = patch_to_obj_ratio
    config['min_ctx_ratio'] = min_ctx_ratio
    config['max_ctx_ratio'] = max_ctx_ratio

    params = get_transform_params((w,h), config=config, bbox=bbox_sampled, \
        random_crop=False)
    transform_label = get_transform_fn(opt, params, method=Image.NEAREST, \
        normalize=False, resize=resize)
    transform_image = get_transform_fn(opt, params, resize=resize)

    output_dict = {}
    output_dict['label'] = transform_label(tensor2pil(label_original[0])) * 255.0
    if transform_img:
        output_dict['image'] = transform_image(tensor2pil(img_original[0], \
            is_img=True))

    input_bbox = np.array(params['bbox_in_context'])
    crop_pos = np.array(params['crop_pos']).astype(int)
    bbox_cls = params['bbox_cls']
    ### generate output bbox
    img_size = output_dict['label'].size(1) #shape[1]
    context_ratio = np.random.uniform(low=config['min_ctx_ratio'],\
         high=config['max_ctx_ratio'])
    output_bbox = np.array(get_soft_bbox(input_bbox, img_size, img_size, context_ratio))
    mask_in, mask_object_in, mask_context_in = get_masked_image( \
        output_dict['label'], input_bbox, bbox_cls)
    mask_out, mask_object_out, _ = get_masked_image( \
        output_dict['label'], output_bbox)

    output_dict['mask_ctx_in'] = mask_context_in.unsqueeze(0) # (1xCxHxW)
    output_dict['mask_in'] = mask_in.unsqueeze(0) # (1x1xHxW)
    output_dict['mask_out'] = mask_out.unsqueeze(0) # (1x1xHxW)
    output_dict['crop_pos'] = torch.from_numpy(crop_pos) # (1x4)
    output_dict['label'] = output_dict['label'].unsqueeze(0) # (1x1xHxW)
    output_dict['cls'] = torch.LongTensor([bbox_cls])
    if transform_img:
        output_dict['image'] = output_dict['image'].unsqueeze(0) * \
                               (1-output_dict['mask_in']).repeat(1,3,1,1)
    #else:

    # Crop window
    x1, y1, x2, y2 = crop_pos # coordinates of crop window
    x1 = max(0,x1); y1 = max(0,y1) # make sure in range
    width = x2 - x1 + 1; height = y2 - y1 + 1

    #
    label_crop = label_original[:,:,y1:y2+1,x1:x2+1]
    input_bbox_orig = input_bbox.astype(float)
    input_bbox_orig = [input_bbox_orig[0] / opt.fineSize * width,
                       input_bbox_orig[1] / opt.fineSize * height,
                       input_bbox_orig[2] / opt.fineSize * width,
                       input_bbox_orig[3] / opt.fineSize * height]
    input_bbox_orig = np.array(input_bbox_orig)
    output_bbox_orig = output_bbox.astype(float)
    output_bbox_orig = [output_bbox_orig[0] / opt.fineSize * width,
                        output_bbox_orig[1] / opt.fineSize * height,
                        output_bbox_orig[2] / opt.fineSize * width,
                        output_bbox_orig[3] / opt.fineSize * height]
    output_bbox_orig = np.array(output_bbox_orig)
    _, _, mask_ctx_in = get_masked_image(label_crop[0], input_bbox_orig, bbox_cls)
    mask_out, _, _ = get_masked_image(label_crop[0], output_bbox_orig)

    output_dict['label_orig'] = label_crop
    output_dict['mask_ctx_in_orig'] = mask_ctx_in.unsqueeze(0)
    output_dict['mask_out_orig'] = mask_out.unsqueeze(0)

    output_bbox_global = [x1 + output_bbox_orig[0],
                          y1 + output_bbox_orig[1],
                          x1 + output_bbox_orig[2],
                          y1 + output_bbox_orig[3]]
    output_bbox_global = np.array(output_bbox_global)
    output_dict['output_bbox'] = torch.from_numpy(output_bbox)
    output_dict['output_bbox_global'] = torch.from_numpy(output_bbox_global)

    return output_dict