예제 #1
0
def single_image_to_top_predictions(image):
    image = image.float() / 255.0
    image = image.permute(2, 0, 1)
    # we are loading images with OpenCV, so we don't need to convert them
    # to BGR, they are already! So all we need to do is to normalize
    # by 255 if we want to convert to BGR255 format, or flip the channels
    # if we want it to be in RGB in [0-1] range.
    if cfg.INPUT.TO_BGR255:
        image = image * 255
    else:
        image = image[[2, 1, 0]]

    # we absolutely want fixed size (int) here (or we run into a tracing error (or bug?)
    # or we might later decide to make things work with variable size...
    image = image - torch.tensor(cfg.INPUT.PIXEL_MEAN)[:, None, None]
    # should also do variance...
    image_list = ImageList(image.unsqueeze(0), [(int(image.size(-2)), int(image.size(-1)))])
    result, = coco_demo.model(image_list)
    scores = result.get_field("scores")
    keep = (scores >= coco_demo.confidence_threshold)
    result = (result.bbox[keep],
              result.get_field("labels")[keep],
              result.get_field("mask")[keep],
              scores[keep])
    return result
예제 #2
0
 def single_image_to_top_predictions(self, image):
     result = self.model(
         ImageList(image, [(int(image.size(-2)), int(image.size(-1)))]))[0]
     scores = result.get_field("scores")
     keep = (scores >= self.confidence_threshold)
     result = (result.bbox[keep], result.get_field("labels")[keep],
               result.get_field("mask")[keep], scores[keep])
     return result
예제 #3
0
def to_mask_list(tensors, size_divisible=0, ignore_value=255):
    """
    tensors can be an ImageList, a torch.Tensor or
    an iterable of Tensors. It can't be a numpy array.
    When tensors is an iterable of Tensors, it pads
    the Tensors with zeros so that they have the same
    shape
    """
    if isinstance(tensors, torch.Tensor) and size_divisible > 0:
        tensors = [tensors]

    if isinstance(tensors, ImageList):
        return tensors
    elif isinstance(tensors, torch.Tensor):
        # single tensor shape can be inferred
        if tensors.dim() == 3:
            tensors = tensors[None]
        assert tensors.dim() == 4
        image_sizes = [tensor.shape[-2:] for tensor in tensors]
        return ImageList(tensors, image_sizes)
    elif isinstance(tensors, (tuple, list)):
        max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))

        # TODO Ideally, just remove this and let me model handle arbitrary
        # input sizs
        if size_divisible > 0:
            import math

            stride = size_divisible
            max_size = list(max_size)
            max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
            max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
            max_size = tuple(max_size)

        batch_shape = (len(tensors), ) + max_size
        batched_imgs = tensors[0].new_full(*batch_shape, ignore_value)
        for img, pad_img in zip(tensors, batched_imgs):
            pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].copy_(img)

        image_sizes = [im.shape[-2:] for im in tensors]

        return ImageList(batched_imgs, image_sizes)
    else:
        raise TypeError("Unsupported type for to_mask_list: {}".format(
            type(tensors)))
예제 #4
0
def single_image_to_top_predictions(image):
    image_list = ImageList(image.unsqueeze(0), [(int(image.size(-2)), int(image.size(-1)))])

    for param in coco_demo.model.parameters():
        param.requires_grad = False

    result, = coco_demo.model(image_list)
    scores = result.get_field("scores")
    result = (result.bbox, result.get_field('labels'), scores)
    return result
예제 #5
0
def partition_data(images, targets, num):
    if num == 1 or len(images.image_sizes) < num:
        return [(images, targets)]
    each = len(images.image_sizes) // num
    result = []
    from maskrcnn_benchmark.structures.image_list import ImageList
    for i in range(num):
        start = i * each
        end = start + each
        curr_tensors = images.tensors[start:end]
        curr_sizes = images.image_sizes[start:end]
        curr_imagelist = ImageList(curr_tensors, curr_sizes)
        curr_target = targets[start:end]
        result.append((curr_imagelist, curr_target))
    return result
예제 #6
0
def single_image_to_top_predictions(image):
    # attempt to change shape from constant to tensor
    # from torch.onnx import operators
    # im_shape = operators.shape_as_tensor(image)
    # image_sizes = (im_shape[1].to(torch.float), im_shape[2].to(torch.float))

    # image_list = ImageList(image.unsqueeze(0), [image_sizes])
    image_list = ImageList(image.unsqueeze(0), [(int(image.size(-2)), int(image.size(-1)))])

    for param in coco_demo.model.parameters():
        param.requires_grad = False

    result, = coco_demo.model(image_list)
    scores = result.get_field('scores')
    masks = result.get_field('mask')
    # result = (result.bbox, result.get_field('labels'), scores)
    result = (result.bbox, result.get_field('labels'), scores, masks)
    return result
예제 #7
0
    def forward(self, images, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[PanopticTarget]): ground-truth boxes/segmentation in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training:
            if targets is None:
                raise ValueError("In training mode, targets should be passed")
        else:
            if self.feed_ground_truth_instances:
                if targets is None:
                    raise ValueError(
                        "expected to feed ground truth instances but no ground truth provided"
                    )

                # remove images/targets without any instances.
                keep_indexes = [
                    i for i in range(images.tensors.shape[0])
                    if len(targets[i].region) > 0
                ]
                image_tensors = images.tensors[keep_indexes]

                image_sizes = [
                    images.image_sizes[i]
                    for i in range(images.tensors.shape[0])
                    if len(targets[i].region) > 0
                ]
                images = ImageList(image_tensors, image_sizes)

                targets = [
                    target for target in targets if len(target.region) > 0
                ]

        # usually, it seems this is already an ImageList.
        images = to_image_list(images)

        # note that these are already run through FPN if FPN is included.
        features = self.backbone(images.tensors)

        semantic_targets = None
        box_targets = None

        if not (targets is None):
            semantic_targets = [f.segmentation for f in targets]
            box_targets = [f.region for f in targets]

        semantic_masks, semantic_losses, semantic_features_per_scale = self.semantic_segmentation(
            images, features, semantic_targets)

        primed_features = features
        # # combine these if we're asked to.
        # if self.prime_rpn or self.prime_roi:
        #     # features is shallow to deep, semantic is deep to shallow.
        #     # todo, make this computed.
        #     FPN_FEATURES_START = 2
        #     primed_features = []
        #     for other in features[FPN_FEATURES_START + 1:]:
        #         primed_features.append(other)

        #     for i, feature_per_scale in enumerate(semantic_features_per_scale):
        #         primed_feature_index = FPN_FEATURES_START - i
        #         primed_feature = features[primed_feature_index] + feature_per_scale

        #         # going backwards.
        #         primed_features.insert(0, primed_feature)

        # we already know what we want.
        if self.feed_ground_truth_instances:
            proposals = [
                boxes.to(images.tensors.device) for boxes in box_targets
            ]
            proposal_losses = None
        else:
            proposals, proposal_losses = self.rpn(
                images, primed_features if self.prime_rpn else features,
                box_targets)

        if self.roi_heads:
            x, boxes, detector_losses = self.roi_heads(
                primed_features if self.prime_roi else features, proposals,
                box_targets, semantic_targets)
        else:
            # RPN-only models don't have roi_heads
            x = features
            boxes = proposals
            detector_losses = {}

        if self.training:
            losses = {}
            losses.update(detector_losses)
            losses.update(semantic_losses)
            losses.update(proposal_losses)
            return losses

        for i, semantic_mask in enumerate(semantic_masks):
            box = boxes[i]
            mask = semantic_mask.mask

            # the boxes record the original shape (I hope).
            given_width, given_height = box.size
            padded_shape = mask.shape

            extra_bottom = padded_shape[0] - given_height
            if extra_bottom > 0:
                mask = mask[:-extra_bottom, :]

            extra_right = padded_shape[1] - given_width
            if extra_right > 0:
                mask = mask[:, :-extra_right]

            semantic_mask.mask = mask

        result = [
            PanopticTarget(box, mask)
            for (box, mask) in zip(boxes, semantic_masks)
        ]
        return result
예제 #8
0
파일: inference.py 프로젝트: zxwu/adv_cloak
def compute_on_dataset(model,
                       data_loader,
                       device,
                       timer=None,
                       patched=False,
                       patchfile="",
                       cls_id=1):
    model.eval()
    results_dict = {}
    cpu_device = torch.device("cpu")
    patch_applier = PatchApplier().to(device)
    patch_transformer = PatchTransformer(augment=False).to(device)
    mean = [102.9801, 115.9465, 122.7717]
    std = [1.0, 1.0, 1.0]
    mean = torch.as_tensor(mean)
    std = torch.as_tensor(std)

    if patched:
        if 'rand' in patchfile:
            adv_patch_cpu = torch.rand(3, 250, 150)
        elif 'white' in patchfile:
            adv_patch_cpu = torch.zeros(3, 250, 150).fill_(0.5)
        elif 'clean' in patchfile:
            adv_patch_cpu = torch.rand(3, 250, 150)
            patched = False
        else:
            adv_patch_cpu = torch.load(patchfile)
            if 'submean' in patchfile:
                adv_patch_cpu = adv_patch_cpu['model']
            else:
                adv_patch_cpu = adv_patch_cpu.detach().cpu()
                adv_patch_cpu = torch.flip(adv_patch_cpu, [0])

        adv_patch = adv_patch_cpu.to(device)

    for _, batch in enumerate(tqdm(data_loader)):
        images, targets, image_ids = batch
        imgs = images.tensors.to(device)

        mean = mean.to(device)
        std = std.to(device)
        imgs.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])

        images = ImageList(imgs, images.image_sizes)

        with torch.no_grad():
            if timer:
                timer.tic()
            if cfg.TEST.BBOX_AUG.ENABLED:
                output = im_detect_bbox_aug(model, images, device)
            else:
                output = model(images.to(device))

                if patched:
                    imgs = images.tensors.to(device)
                    imgs.mul_(std[None, :, None, None]).add_(mean[None, :,
                                                                  None, None])

                    bsz, _, height, width = imgs.shape
                    output = [o.to(cpu_device) for o in output]
                    using_gt = True
                    if using_gt:
                        output = targets
                    lab_batch = batchify_labels(bsz, (height, width),
                                                output,
                                                image_ids,
                                                testing=True,
                                                cls_label=cls_id)

                    adv_batch = patch_transformer(adv_patch.to(device), lab_batch.to(device), height, width, \
                                rand_loc=True, scale_factor=0.22, cls_label=cls_id)

                    adv_batch = adv_batch.mul_(255)

                    imgs = imgs.to(device)
                    imgs = patch_applier(imgs.to(device), adv_batch.to(device))

                    imgs.sub_(mean[None, :, None, None]).div_(std[None, :,
                                                                  None, None])
                    images = ImageList(imgs, images.image_sizes)

                    output = model(images.to(device))
            if timer:
                if not cfg.MODEL.DEVICE == 'cpu':
                    torch.cuda.synchronize()
                timer.toc()
            output = [o.to(cpu_device) for o in output]
        results_dict.update(
            {img_id: result
             for img_id, result in zip(image_ids, output)})
    return results_dict
예제 #9
0
 def compute_predictions(self, image_list: ImageList) -> List[BoxList]:
     image_list = image_list.to(self.device)
     predictions = self.model(image_list)
     return predictions
예제 #10
0
파일: trainer.py 프로젝트: zxwu/adv_cloak
def do_train(
    model,
    data_loader,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    adv_patch_cpu,
    patch_transformer,
    patch_applier,
    arguments,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()
    mean = [102.9801, 115.9465, 122.7717]
    std = [1.0, 1.0, 1.0]
    mean = torch.as_tensor(mean, )
    std = torch.as_tensor(std)
    print(len(data_loader.dataset))

    for iteration, (images, targets,
                    paths) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        adv_patch = adv_patch_cpu.to(device)

        scheduler.step()
        imgs = images.tensors.to(device)

        bsz, _, height, width = imgs.shape

        lab_batch = batchify_labels(bsz, (height, width),
                                    targets,
                                    paths,
                                    cls_label=int(arguments["cls_id"]))

        adv_batch = patch_transformer(adv_patch.to(device),
                                      lab_batch.to(device),
                                      height,
                                      width,
                                      rand_loc=True,
                                      scale_factor=0.22,
                                      cls_label=int(arguments["cls_id"]))

        adv_batch = adv_batch.mul_(255)

        imgs = imgs.to(device)
        imgs = patch_applier(imgs.to(device), adv_batch.to(device))

        mean = mean.to(device)
        std = std.to(device)
        imgs.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])

        targets = [target.to(device) for target in targets]
        images = ImageList(imgs, images.image_sizes)

        loss_dict = model(images,
                          targets,
                          logits_only=True,
                          adv_patch=adv_patch)
        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        # with amp.scale_loss(losses, optimizer) as scaled_losses:
        #     scaled_losses.backward()
        losses.backward()
        optimizer.step()
        adv_patch.clamp_(0, 1)

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("patch_{:07d}".format(iteration), **arguments)
        if iteration == max_iter:
            checkpointer.save("patch_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))