def single_image_to_top_predictions(image): image = image.float() / 255.0 image = image.permute(2, 0, 1) # we are loading images with OpenCV, so we don't need to convert them # to BGR, they are already! So all we need to do is to normalize # by 255 if we want to convert to BGR255 format, or flip the channels # if we want it to be in RGB in [0-1] range. if cfg.INPUT.TO_BGR255: image = image * 255 else: image = image[[2, 1, 0]] # we absolutely want fixed size (int) here (or we run into a tracing error (or bug?) # or we might later decide to make things work with variable size... image = image - torch.tensor(cfg.INPUT.PIXEL_MEAN)[:, None, None] # should also do variance... image_list = ImageList(image.unsqueeze(0), [(int(image.size(-2)), int(image.size(-1)))]) result, = coco_demo.model(image_list) scores = result.get_field("scores") keep = (scores >= coco_demo.confidence_threshold) result = (result.bbox[keep], result.get_field("labels")[keep], result.get_field("mask")[keep], scores[keep]) return result
def single_image_to_top_predictions(self, image): result = self.model( ImageList(image, [(int(image.size(-2)), int(image.size(-1)))]))[0] scores = result.get_field("scores") keep = (scores >= self.confidence_threshold) result = (result.bbox[keep], result.get_field("labels")[keep], result.get_field("mask")[keep], scores[keep]) return result
def to_mask_list(tensors, size_divisible=0, ignore_value=255): """ tensors can be an ImageList, a torch.Tensor or an iterable of Tensors. It can't be a numpy array. When tensors is an iterable of Tensors, it pads the Tensors with zeros so that they have the same shape """ if isinstance(tensors, torch.Tensor) and size_divisible > 0: tensors = [tensors] if isinstance(tensors, ImageList): return tensors elif isinstance(tensors, torch.Tensor): # single tensor shape can be inferred if tensors.dim() == 3: tensors = tensors[None] assert tensors.dim() == 4 image_sizes = [tensor.shape[-2:] for tensor in tensors] return ImageList(tensors, image_sizes) elif isinstance(tensors, (tuple, list)): max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) # TODO Ideally, just remove this and let me model handle arbitrary # input sizs if size_divisible > 0: import math stride = size_divisible max_size = list(max_size) max_size[1] = int(math.ceil(max_size[1] / stride) * stride) max_size[2] = int(math.ceil(max_size[2] / stride) * stride) max_size = tuple(max_size) batch_shape = (len(tensors), ) + max_size batched_imgs = tensors[0].new_full(*batch_shape, ignore_value) for img, pad_img in zip(tensors, batched_imgs): pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].copy_(img) image_sizes = [im.shape[-2:] for im in tensors] return ImageList(batched_imgs, image_sizes) else: raise TypeError("Unsupported type for to_mask_list: {}".format( type(tensors)))
def single_image_to_top_predictions(image): image_list = ImageList(image.unsqueeze(0), [(int(image.size(-2)), int(image.size(-1)))]) for param in coco_demo.model.parameters(): param.requires_grad = False result, = coco_demo.model(image_list) scores = result.get_field("scores") result = (result.bbox, result.get_field('labels'), scores) return result
def partition_data(images, targets, num): if num == 1 or len(images.image_sizes) < num: return [(images, targets)] each = len(images.image_sizes) // num result = [] from maskrcnn_benchmark.structures.image_list import ImageList for i in range(num): start = i * each end = start + each curr_tensors = images.tensors[start:end] curr_sizes = images.image_sizes[start:end] curr_imagelist = ImageList(curr_tensors, curr_sizes) curr_target = targets[start:end] result.append((curr_imagelist, curr_target)) return result
def single_image_to_top_predictions(image): # attempt to change shape from constant to tensor # from torch.onnx import operators # im_shape = operators.shape_as_tensor(image) # image_sizes = (im_shape[1].to(torch.float), im_shape[2].to(torch.float)) # image_list = ImageList(image.unsqueeze(0), [image_sizes]) image_list = ImageList(image.unsqueeze(0), [(int(image.size(-2)), int(image.size(-1)))]) for param in coco_demo.model.parameters(): param.requires_grad = False result, = coco_demo.model(image_list) scores = result.get_field('scores') masks = result.get_field('mask') # result = (result.bbox, result.get_field('labels'), scores) result = (result.bbox, result.get_field('labels'), scores, masks) return result
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[PanopticTarget]): ground-truth boxes/segmentation in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training: if targets is None: raise ValueError("In training mode, targets should be passed") else: if self.feed_ground_truth_instances: if targets is None: raise ValueError( "expected to feed ground truth instances but no ground truth provided" ) # remove images/targets without any instances. keep_indexes = [ i for i in range(images.tensors.shape[0]) if len(targets[i].region) > 0 ] image_tensors = images.tensors[keep_indexes] image_sizes = [ images.image_sizes[i] for i in range(images.tensors.shape[0]) if len(targets[i].region) > 0 ] images = ImageList(image_tensors, image_sizes) targets = [ target for target in targets if len(target.region) > 0 ] # usually, it seems this is already an ImageList. images = to_image_list(images) # note that these are already run through FPN if FPN is included. features = self.backbone(images.tensors) semantic_targets = None box_targets = None if not (targets is None): semantic_targets = [f.segmentation for f in targets] box_targets = [f.region for f in targets] semantic_masks, semantic_losses, semantic_features_per_scale = self.semantic_segmentation( images, features, semantic_targets) primed_features = features # # combine these if we're asked to. # if self.prime_rpn or self.prime_roi: # # features is shallow to deep, semantic is deep to shallow. # # todo, make this computed. # FPN_FEATURES_START = 2 # primed_features = [] # for other in features[FPN_FEATURES_START + 1:]: # primed_features.append(other) # for i, feature_per_scale in enumerate(semantic_features_per_scale): # primed_feature_index = FPN_FEATURES_START - i # primed_feature = features[primed_feature_index] + feature_per_scale # # going backwards. # primed_features.insert(0, primed_feature) # we already know what we want. if self.feed_ground_truth_instances: proposals = [ boxes.to(images.tensors.device) for boxes in box_targets ] proposal_losses = None else: proposals, proposal_losses = self.rpn( images, primed_features if self.prime_rpn else features, box_targets) if self.roi_heads: x, boxes, detector_losses = self.roi_heads( primed_features if self.prime_roi else features, proposals, box_targets, semantic_targets) else: # RPN-only models don't have roi_heads x = features boxes = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(semantic_losses) losses.update(proposal_losses) return losses for i, semantic_mask in enumerate(semantic_masks): box = boxes[i] mask = semantic_mask.mask # the boxes record the original shape (I hope). given_width, given_height = box.size padded_shape = mask.shape extra_bottom = padded_shape[0] - given_height if extra_bottom > 0: mask = mask[:-extra_bottom, :] extra_right = padded_shape[1] - given_width if extra_right > 0: mask = mask[:, :-extra_right] semantic_mask.mask = mask result = [ PanopticTarget(box, mask) for (box, mask) in zip(boxes, semantic_masks) ] return result
def compute_on_dataset(model, data_loader, device, timer=None, patched=False, patchfile="", cls_id=1): model.eval() results_dict = {} cpu_device = torch.device("cpu") patch_applier = PatchApplier().to(device) patch_transformer = PatchTransformer(augment=False).to(device) mean = [102.9801, 115.9465, 122.7717] std = [1.0, 1.0, 1.0] mean = torch.as_tensor(mean) std = torch.as_tensor(std) if patched: if 'rand' in patchfile: adv_patch_cpu = torch.rand(3, 250, 150) elif 'white' in patchfile: adv_patch_cpu = torch.zeros(3, 250, 150).fill_(0.5) elif 'clean' in patchfile: adv_patch_cpu = torch.rand(3, 250, 150) patched = False else: adv_patch_cpu = torch.load(patchfile) if 'submean' in patchfile: adv_patch_cpu = adv_patch_cpu['model'] else: adv_patch_cpu = adv_patch_cpu.detach().cpu() adv_patch_cpu = torch.flip(adv_patch_cpu, [0]) adv_patch = adv_patch_cpu.to(device) for _, batch in enumerate(tqdm(data_loader)): images, targets, image_ids = batch imgs = images.tensors.to(device) mean = mean.to(device) std = std.to(device) imgs.sub_(mean[None, :, None, None]).div_(std[None, :, None, None]) images = ImageList(imgs, images.image_sizes) with torch.no_grad(): if timer: timer.tic() if cfg.TEST.BBOX_AUG.ENABLED: output = im_detect_bbox_aug(model, images, device) else: output = model(images.to(device)) if patched: imgs = images.tensors.to(device) imgs.mul_(std[None, :, None, None]).add_(mean[None, :, None, None]) bsz, _, height, width = imgs.shape output = [o.to(cpu_device) for o in output] using_gt = True if using_gt: output = targets lab_batch = batchify_labels(bsz, (height, width), output, image_ids, testing=True, cls_label=cls_id) adv_batch = patch_transformer(adv_patch.to(device), lab_batch.to(device), height, width, \ rand_loc=True, scale_factor=0.22, cls_label=cls_id) adv_batch = adv_batch.mul_(255) imgs = imgs.to(device) imgs = patch_applier(imgs.to(device), adv_batch.to(device)) imgs.sub_(mean[None, :, None, None]).div_(std[None, :, None, None]) images = ImageList(imgs, images.image_sizes) output = model(images.to(device)) if timer: if not cfg.MODEL.DEVICE == 'cpu': torch.cuda.synchronize() timer.toc() output = [o.to(cpu_device) for o in output] results_dict.update( {img_id: result for img_id, result in zip(image_ids, output)}) return results_dict
def compute_predictions(self, image_list: ImageList) -> List[BoxList]: image_list = image_list.to(self.device) predictions = self.model(image_list) return predictions
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, adv_patch_cpu, patch_transformer, patch_applier, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() mean = [102.9801, 115.9465, 122.7717] std = [1.0, 1.0, 1.0] mean = torch.as_tensor(mean, ) std = torch.as_tensor(std) print(len(data_loader.dataset)) for iteration, (images, targets, paths) in enumerate(data_loader, start_iter): if any(len(target) < 1 for target in targets): logger.error( f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration adv_patch = adv_patch_cpu.to(device) scheduler.step() imgs = images.tensors.to(device) bsz, _, height, width = imgs.shape lab_batch = batchify_labels(bsz, (height, width), targets, paths, cls_label=int(arguments["cls_id"])) adv_batch = patch_transformer(adv_patch.to(device), lab_batch.to(device), height, width, rand_loc=True, scale_factor=0.22, cls_label=int(arguments["cls_id"])) adv_batch = adv_batch.mul_(255) imgs = imgs.to(device) imgs = patch_applier(imgs.to(device), adv_batch.to(device)) mean = mean.to(device) std = std.to(device) imgs.sub_(mean[None, :, None, None]).div_(std[None, :, None, None]) targets = [target.to(device) for target in targets] images = ImageList(imgs, images.image_sizes) loss_dict = model(images, targets, logits_only=True, adv_patch=adv_patch) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe # with amp.scale_loss(losses, optimizer) as scaled_losses: # scaled_losses.backward() losses.backward() optimizer.step() adv_patch.clamp_(0, 1) batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("patch_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("patch_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))