def __results_to_json(model, data_loader_val, categories): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') res = [] for images, anns in data_loader_val: images = list(img for img in images) images = tensorize_batch(images, device) model.eval() with torch.no_grad(): outputs = model(images) for idx, out in enumerate(outputs): image_id = anns[idx]['image_id'].cpu().data pred_scores = out["scores"].cpu().data.numpy() pred_masks = [] pred_boxes = [] pred_labels = out['labels'].cpu().data.numpy() if "masks" in out.keys(): pred_masks = out["masks"].cpu().data.numpy() if "boxes" in out.keys(): pred_boxes = out["boxes"].cpu().data.numpy() for i, _ in enumerate(pred_scores): if int(pred_labels[i]) > 0: obj = { "image_id": image_id[0].numpy().tolist(), "category_id": categories[int(pred_labels[i]) - 1], "score": pred_scores[i].item() } if "masks" in out.keys(): bimask = pred_masks[i] > 0.5 bimask = np.array(bimask[0, :, :, np.newaxis], dtype=np.uint8, order="F") encoded_mask = mask.encode( np.asfortranarray(bimask))[0] encoded_mask['counts'] = encoded_mask[ 'counts'].decode("utf-8") obj['segmentation'] = encoded_mask if "boxes" in out.keys(): bbox = pred_boxes[i] bbox_coco = [ int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ] obj['bbox'] = bbox_coco res.append(obj) torch.cuda.empty_cache() return res
def view_masks(model, data_loader_val, num_classes, weights_file, result_type, folder, confidence=0.5): # Create folde if it doesn't exist Path(folder).mkdir(parents=True, exist_ok=True) # load weights model.load_state_dict(torch.load(weights_file)) # move model to the right device model.to(device) for images, anns in data_loader_val: images = list(img for img in images) images = tensorize_batch(images, device) file_names = list(map(lambda ann: ann["file_name"], anns)) model.eval() with torch.no_grad(): outputs = model(images) if result_type == "panoptic": panoptic_fusion.get_panoptic_results(images, outputs, all_categories, stuff_categories, thing_categories, folder, file_names) torch.cuda.empty_cache() else: for idx, output in enumerate(outputs): file_name = file_names[idx] if result_type == "instance": im = apply_instance_masks(images[idx], output['masks'], 0.5) elif result_type == "semantic": logits = output["semantic_logits"] mask = torch.argmax(logits, dim=0) im = apply_semantic_mask_gpu( images[idx], mask, config.NUM_STUFF_CLASSES + config.NUM_THING_CLASSES) save_fig(im, folder, file_name) torch.cuda.empty_cache()
def get_mIoU(model, data_loader_val): iou_list = [] for images, anns in data_loader_val: images = list(img for img in images) images = tensorize_batch(images, device) model.eval() with torch.no_grad(): outputs = model(images) for idx, output in enumerate(outputs): label = anns[idx]["semantic_mask"] pred = output["semantic_logits"] iou = mIOU(label, pred) iou_list.append(iou) return np.mean(iou_list)
def __update_model(trainer_engine, batch): model.train() optimizer.zero_grad() imgs, annotations = batch[0], batch[1] imgs = list(img for img in imgs) imgs = tensorize_batch(imgs, device) annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations] loss_dict = model(imgs, anns=annotations) losses = sum(loss for loss in loss_dict.values()) i = trainer_engine.state.iteration writer.add_scalar("Loss/train/iteration", losses, i) losses.backward() optimizer.step() return losses
def forward(self, images, anns=None, semantic=True, instance=True): losses = {} semantic_logits = [] images = list(image for image in images) if self.training: maskrcnn_losses, backbone_feat = self.mask_rcnn(images, anns) else: maskrcnn_results, backbone_feat = self.mask_rcnn(images) P4, P8, P16, P32 = backbone_feat['0'], backbone_feat[ '1'], backbone_feat['2'], backbone_feat['3'] if semantic: semantic_logits = self.semantic_head(P4, P8, P16, P32) if self.training: if semantic: semantic_masks = list( map(lambda ann: ann['semantic_mask'], anns)) semantic_masks = tensorize_batch(semantic_masks, temp_variables.DEVICE) losses["semantic_loss"] = F.cross_entropy( semantic_logits, semantic_masks.long()) losses = {**losses, **maskrcnn_losses} return losses else: return [{ **maskrcnn_results[idx], 'semantic_logits': semantic_logits[idx] } for idx, _ in enumerate(images)]
def get_seg_frame(frame, prev_det, confidence=0.5): image = Image.fromarray(frame.astype('uint8'), 'RGB') image = transforms(image) images = tensorize_batch([image], device) with torch.no_grad(): outputs = model(images) threshold_preds = threshold_instances(outputs, threshold=config.CONFIDENCE_THRESHOLD) threshold_preds = threshold_overlap(threshold_preds, nms_threshold=config.NMS_THRESHOLD) sorted_preds = sort_by_confidence(threshold_preds) if config.OBJECT_TRACKING: tracked_obj = None if prev_det is None: tracked_obj = get_tracked_objects( None, sorted_preds[0]["boxes"], None, sorted_preds[0]["labels"], super_cat_indices, config.OBJECT_TRACKING_IOU_THRESHHOLD) else: tracked_obj = get_tracked_objects( prev_det[0]["boxes"], sorted_preds[0]["boxes"], prev_det[0]["labels"], sorted_preds[0]["labels"], super_cat_indices, config.OBJECT_TRACKING_IOU_THRESHHOLD) sorted_preds[0]["ids"] = tracked_obj if len(tracked_obj) > 0: sorted_preds[0]["boxes"] = sorted_preds[0]["boxes"][:len( tracked_obj)] sorted_preds[0]["masks"] = sorted_preds[0]["masks"][:len( tracked_obj)] sorted_preds[0]["scores"] = sorted_preds[0]["scores"][:len( tracked_obj)] sorted_preds[0]["labels"] = sorted_preds[0]["labels"][:len( tracked_obj)] if config.BOUNDING_BOX_ONLY: return frame, None, sorted_preds if result_type == "semantic": logits = outputs[0]["semantic_logits"] mask = torch.argmax(logits, dim=0) im = apply_semantic_mask_gpu(images[0], mask, config.NUM_STUFF_CLASSES + config.NUM_THING_CLASSES) return im.cpu().permute(1, 2, 0).numpy(), None, sorted_preds if result_type == "instance": im = apply_instance_masks(images[0], sorted_preds[0]['masks'], 0.5, ids=sorted_preds[0]["ids"]) return im.cpu().permute(1, 2, 0).numpy(), None, sorted_preds if result_type == "panoptic" and len(sorted_preds[0]["masks"]) > 0: # Get intermediate prediction and semantice prediction inter_pred_batch, sem_pred_batch, summary_batch = panoptic_fusion( sorted_preds, all_categories, stuff_categories, thing_categories, threshold_by_confidence=False, sort_confidence=False) canvas = panoptic_canvas( inter_pred_batch, sem_pred_batch, all_categories, stuff_categories, thing_categories)[0] if canvas is None: return frame, summary_batch, sorted_preds else: im = apply_panoptic_mask_gpu( images[0], canvas).cpu().permute(1, 2, 0).numpy() return im, summary_batch, sorted_preds else: return frame, None, sorted_preds