# MetadataCatalog.get("VisualGenomeObjects").thing_classes = obj_vocab cfg = get_cfg() cfg.merge_from_file( 'configs/COCO-Detection/faster_rcnn_X_101_64x4d_FPN_2x_vlp.yaml') cfg.freeze() predictor = DefaultPredictor(cfg) demo = VisualizationDemo(cfg) #model = build_model(cfg) #model.eval() #checkpointer = DetectionCheckpointer(model) #checkpointer.load(os.path.join('model_weights', 'e2e_faster_rcnn_X-101-64x4d-FPN_2x-vlp.pkl')) img1 = read_image(os.path.join('test_images', '12283150_12d37e6389_z.jpg'), format="BGR") img2 = read_image(os.path.join('test_images', '25691390_f9944f61b5_z.jpg'), format="BGR") img3 = read_image(os.path.join('test_images', '9247489789_132c0d534a_z.jpg'), format="BGR") preds, vis_out = demo.run_on_image(img3, obj_vocab) cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, vis_out.get_image()[:, :, ::-1]) #preds['instances'].get_fields()['box_features'].shape #preds['instances'].get_fields()['probs'].shape
args = get_parser().parse_args() setup_logger(name="fvcore") logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) print(cfg) # 构建模型 model = build_model(cfg) # 加载权重 checkpointer = DetectionCheckpointer(model) checkpointer.load(cfg.MODEL.WEIGHTS) # 加载图像 path = os.path.expanduser(args.input) original_image = read_image(path, format="BGR") height, width = original_image.shape[:2] transform_gen = T.ResizeShortestEdge( [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST) image = transform_gen.get_transform(original_image).apply_image( original_image) image = torch.as_tensor(image.astype("float32").transpose( 2, 0, 1)).requires_grad_(True) inputs = {"image": image, "height": height, "width": width} # Grad-CAM layer_name = get_last_conv_name(model) grad_cam = GradCAM(model, layer_name) mask, box, class_id = grad_cam(inputs) # cam mask
import argparse import cv2 from detectron2.data import MetadataCatalog from detectron2.data.detection_utils import read_image from detectron2.utils.visualizer import Visualizer from model import get_predictor parser = argparse.ArgumentParser(description='Inference football helmets on image') required = parser.add_argument_group('required') required.add_argument('-i', '--input', type=str, help='path to an image', required=True) required.add_argument('-o', '--output', type=str, help='output path', required=True) args = parser.parse_args() predictor = get_predictor() image = read_image(args.input, "BGR") model_output = predictor(image) model_output = model_output["instances"].to("cpu") confident_predictions = model_output[model_output.scores > 0.8] img = cv2.imread(args.input) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("nflimpact")) img = v.draw_instance_predictions(confident_predictions).get_image() success = cv2.imwrite(args.output, img) if not success: raise ValueError("couldn't write image successfully, sorry opencv does not give helpful error messages")
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals(dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def inference(cfgs, logger, pseudo_label=False): # pseudo label pseudo_threshold = 0.4 coco_annos = { "info": {}, "licenses": [], "images": [], "annotations": [], "categories": [{ "id": 1, "name": "wheat", "supercategory": "wheat", "skeleton": [] }], } instance_id = 0 predictors = [] for cfg in cfgs: predictors.append(TTAPredictor(cfg)) results = ["image_id,PredictionString\n"] if cfgs[0].INPUT_DIR: img_list = glob.glob(cfgs[0].INPUT_DIR + "/*.jpg") assert img_list, "The input path(s) was not found" for idx, path in tqdm.tqdm(enumerate(img_list)): # use PIL, to be consistent with evaluation img = read_image(path, format="BGR") img_size = img.shape[:2] start_time = time.time() predictions = [] for predictor in predictors: predictions.append(predictor(img)) # wbf merged_boxes = merge_multi_predictions(predictions, img_size, nms_threshold=0.6) predictions = {"instances": merged_boxes} # output result if not pseudo_label: result = format_result( os.path.basename(path).split('.')[0], predictions) results.append(result) else: if 'instances' in predictions: instances = predictions["instances"].to('cpu') img_info, annos, instance_id = pred_instances_to_coco_json( instances, path, idx, instance_id, img_size, pseudo_threshold) if img_info is not None and annos is not None: coco_annos["images"].append(img_info) coco_annos["annotations"].extend(annos) logger.info("{}: {} in {:.2f}s".format( path, "detected {} instances".format(len(predictions["instances"])) if "instances" in predictions else "finished", time.time() - start_time, )) if pseudo_label: # save pseudo label to json file json_name = cfgs[0].OUTPUT_DIR + '/pseudo_label.json' with open(json_name, 'w') as f: json.dump(coco_annos, f) else: with open(cfgs[0].OUTPUT_DIR + '/submission.csv', 'w') as f: f.writelines(results)
def rotated_mapper(original_dataset_dict): # Implement a mapper, similar to the default DatasetMapper, but with our own customizations dataset_dict = copy.deepcopy( original_dataset_dict) # it will be modified by code below original_gsd = dataset_dict["gsd"] target_gsd = np.random.uniform(0.09, 0.13) # randomize target gsd scale = original_gsd / target_gsd target_size = 400 target_crop = int(target_size / scale) target_crop = (target_crop, target_crop) image_np = detection_utils.read_image(dataset_dict["file_name"], format="BGR") boxes = np.asarray([anno['bbox'] for anno in dataset_dict['annotations']]) # select anno at random # draw random center # h, w = image_np.shape[:2] # rand_box = boxes[np.random.randint(len(boxes))] # ch, cw = rand_box[:2] # xmin = np.min() # xmax = np.max() # ymin = 3 # ymax = 4 # h0 = np.random.randint(min(h, ymin), min(h, ymax) + 1) # w0 = np.random.randint(min(w, xmin), min(w, xmax) + 1) # assert h >= target_crop[1] and w >= target_crop[0], "Shape computation has bugs." # crop = T.CropTransform(w0, h0, target_crop) # make sure random crop contains annotations i = 0 while True: random_crop = T.RandomCrop('absolute', target_crop).get_transform(image_np) cropped_boxes = RotatedBoxes( random_crop.apply_coords(copy.deepcopy(boxes))) inside_ind = cropped_boxes.inside_box(target_crop) if 1 < sum(inside_ind) <= 100: break i += 1 if i > 150: return None image, transforms = T.apply_transform_gens([ random_crop, T.Resize((target_size, target_size)), ], image_np) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ rotated_transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = detection_utils.annotations_to_instances_rotated( annos, image.shape[:2]) instances = detection_utils.filter_empty_instances(instances) inside_ind = instances.gt_boxes.inside_box(image.shape[:2]) instances = instances[inside_ind] assert ((instances.gt_boxes.tensor.numpy()[:, 2] > 0).all().item() ), "width not > 0\n\n" + str(instances.gt_boxes.tensor.numpy()) dataset_dict["instances"] = instances return dataset_dict
def mapper(self, dataset_dict, train_type: str = 'kpt'): if train_type != 'kpt': for item in dataset_dict["annotations"]: if 'keypoints' in item: del item['keypoints'] image = utils.read_image(dataset_dict["file_name"], format="BGR") img_h, img_w = image.shape[:2] num_pixels = img_w * img_h ann_dict = Detectron2_Annotation_Dict.from_dict(dataset_dict) bbox_list = [ann.bbox for ann in ann_dict.annotations] # if train_type == 'seg': # printj.purple(len(ann_dict.annotations)) # for ann in ann_dict.annotations: # seg = ann.segmentation # mask = seg.to_mask() # tranformed = self.aug(mask=mask) # mask = tranformed['mask'] # image = tranformed['image'] # else: image = self.aug(image=np.array(image))['image'] seq_aug_for_no_seg = almost_always(iaa.Sequential( [ # iaa.Rot90(ia.ALL, keep_size=False) ] )) seq_aug_for_seg = sometimes(iaa.Sequential( [ iaa.Rot90(ia.ALL, keep_size=False), iaa.Affine( scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, rotate=(-180, 180), order=[0, 1], # cval=(0, 255), cval=255, mode=ia.ALL ) ] )) imgaug_kpts = KeypointsOnImage(keypoints=[], shape=image.shape) imgaug_bboxes = BoundingBoxesOnImage( bounding_boxes=[], shape=image.shape) imgaug_polys = PolygonsOnImage(polygons=[], shape=image.shape) num_ann = len(ann_dict.annotations) num_kpts = None seg_len_list = [] for ann in ann_dict.annotations: if num_kpts is None: num_kpts = len(ann.keypoints) if len(ann.keypoints.to_imgaug(img_shape=image.shape).keypoints) != len(ann_dict.annotations[0].keypoints): printj.red( f'len(ann.keypoints.to_imgaug(img_shape=image.shape).keypoints) == {len(ann.keypoints.to_imgaug(img_shape=image.shape).keypoints)} != {len(ann_dict.annotations[0].keypoints)} == len(ann_dict.annotations[0].keypoints)') raise Exception imgaug_kpts.keypoints.extend( ann.keypoints.to_imgaug(img_shape=image.shape).keypoints) if ann.bbox.to_imgaug() is None: printj.red(f'ann.bbox.to_imgaug() is None') printj.red(f'ann.bbox: {ann.bbox}') raise Exception imgaug_bboxes.bounding_boxes.append(ann.bbox.to_imgaug()) if ann.segmentation.to_imgaug(img_shape=image.shape).polygons is None: printj.red( f'ann.segmentation.to_imgaug(img_shape=image.shape).polygons is None') printj.red(f'ann.segmentation:\n{ann.segmentation}') raise Exception seg_len_list.append(len(ann.segmentation)) imgaug_polys.polygons.extend( ann.segmentation.to_imgaug(img_shape=image.shape).polygons) if len(imgaug_polys.polygons) > 0: if num_kpts > 0: image, imgaug_kpts_aug, imgaug_polys_aug = seq_aug_for_seg( image=image, keypoints=imgaug_kpts, polygons=imgaug_polys) else: image, imgaug_polys_aug = seq_aug_for_seg( image=image, polygons=imgaug_polys) imgaug_kpts_aug = None imgaug_bboxes_aug = None else: if num_kpts > 0: image, imgaug_kpts_aug, imgaug_bboxes_aug = seq_aug_for_no_seg( image=image, keypoints=imgaug_kpts, bounding_boxes=imgaug_bboxes) else: image, imgaug_bboxes_aug = seq_aug_for_no_seg( image=image, bounding_boxes=imgaug_bboxes) imgaug_kpts_aug = None imgaug_polys_aug = None kpts_aug0 = Keypoint2D_List.from_imgaug( imgaug_kpts=imgaug_kpts_aug) if num_kpts > 0 else Keypoint2D_List() kpts_aug_list = kpts_aug0.to_numpy(demarcation=True)[:, :2].reshape( num_ann, num_kpts, 2) if num_kpts > 0 else [] kpts_aug_list = [[[x, y, 2] for x, y in kpts_aug] for kpts_aug in kpts_aug_list] kpts_aug_list = [Keypoint2D_List.from_list( kpts_aug, demarcation=True) for kpts_aug in kpts_aug_list] if imgaug_polys_aug is not None and imgaug_bboxes_aug is None: poly_aug_list = [Polygon.from_imgaug( imgaug_polygon) for imgaug_polygon in imgaug_polys_aug.polygons] poly_aug_list_list = unflatten_list( poly_aug_list, part_sizes=seg_len_list) seg_aug_list = [Segmentation(poly_aug_list) for poly_aug_list in poly_aug_list_list] bbox_aug_list = [seg_aug.to_bbox() for seg_aug in seg_aug_list] # Adjust BBoxes when Segmentation BBox does not contain all keypoints for i in range(len(bbox_aug_list)): kpt_points_aug = [ kpt_aug.point for kpt_aug in kpts_aug_list[i]] if num_kpts > 0 else [] kpt_points_aug_contained = [kpt_point_aug.within( bbox_aug_list[i]) for kpt_point_aug in kpt_points_aug] if len(kpt_points_aug) > 0: if not np.any(np.array(kpt_points_aug_contained)): printj.red( f"Keypoints not contained in corresponding bbox.") elif not np.all(np.array(kpt_points_aug_contained)): pass else: break elif imgaug_polys_aug is None and imgaug_bboxes_aug is not None: bbox_aug_list = [BBox.from_imgaug( bbox_aug) for bbox_aug in imgaug_bboxes_aug.bounding_boxes] seg_aug_list = [None] * len(bbox_aug_list) else: printj.red(f'Unexpected error') raise Exception if num_kpts > 0: for ann, kpts_aug, bbox_aug, seg_aug in zip(ann_dict.annotations, kpts_aug_list, bbox_aug_list, seg_aug_list): ann.keypoints = kpts_aug ann.bbox = bbox_aug ann.segmentation = seg_aug if seg_aug is not None else Segmentation.from_list([ ]) else: for ann, bbox_aug, seg_aug in zip(ann_dict.annotations, bbox_aug_list, seg_aug_list): ann.keypoints = Keypoint2D_List() ann.bbox = bbox_aug ann.segmentation = seg_aug if seg_aug is not None else Segmentation.from_list([ ]) dataset_dict = ann_dict.to_dict() image, transforms = T.apply_transform_gens([], image) annots = [] for item in dataset_dict["annotations"]: if 'keypoints' in item and num_kpts == 0: del item['keypoints'] elif 'keypoints' in item: item['keypoints'] = np.array( item['keypoints']).reshape(-1, 3).tolist() annots.append(item) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) instances = utils.annotations_to_instances(annots, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances( instances, by_box=True, by_mask=False) # if True: # vis_img = image.copy() # bbox_list = [BBox.from_list(vals) for vals in dataset_dict["instances"].gt_boxes.tensor.numpy().tolist()] # seg_list = [Segmentation([Polygon.from_list(poly.tolist(), demarcation=False) for poly in seg_polys]) for seg_polys in dataset_dict["instances"].gt_masks.polygons] # kpts_list = [Keypoint2D_List.from_numpy(arr, demarcation=True) for arr in dataset_dict["instances"].gt_keypoints.tensor.numpy()] if hasattr(dataset_dict["instances"], 'gt_keypoints') else [] # for seg in seg_list: # vis_img = draw_segmentation(img=vis_img, segmentation=seg, transparent=True) # for bbox in bbox_list: # vis_img = draw_bbox(img=vis_img, bbox=bbox) # for kpts in kpts_list: # vis_img = draw_keypoints(img=vis_img, keypoints=kpts.to_numpy(demarcation=True)[:, :2].tolist(), radius=6) # aug_visualizer.step(vis_img) return dataset_dict
def create_annotation(image_folder, json_path, confidence_thresh=0.8): json_dict = { "images": [], "type": "instances", "annotations": [], "categories": [] } mp.set_start_method("spawn", force=True) args = get_parser().parse_args() logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) demo = VisualizationDemo(cfg) image_path = {} for path, subdirs, files in os.walk(image_folder): for name in files: print(name) if name.endswith('.jpg') or \ name.endswith('.png') or \ name.endswith('.JPG') or \ name.endswith('.PNG') or \ name.endswith('.jpeg') or \ name.endswith('.JPEG'): image_path[name] = os.path.join(path, name) print("length: ", len(image_path.keys())) for path in tqdm.tqdm(image_path.keys(), disable=not args.output): # use PIL, to be consistent with evaluation start_time = time.time() try: img = read_image(image_path[path], format="BGR") # run detector predictions, visualized_output, shape = demo.run_on_image(img) except: print("except") continue height, width, channel = shape global count ## append image info image = { "file_name": str(path), "height": str(height), "width": str(width), "id": str(count), } count += 1 # if count > 10: # break json_dict["images"].append(image) ## append annotation info bnd_id = 0 for i in range(len(predictions["instances"].pred_boxes)): if predictions["instances"].scores[ i] > confidence_thresh and predictions[ "instances"].pred_classes[i] in [0, 2, 5, 7]: # print(predictions["instances"].pred_boxes[i].tensor) x_center, y_center, o_width, o_height = predictions[ "instances"].pred_boxes[i].tensor[0].cpu().detach().numpy( ) score = predictions["instances"].scores[i].cpu().detach( ).numpy() pred_class = predictions["instances"].pred_classes[i].cpu( ).detach().numpy() # print(x_center, y_center, o_width, o_height, score) ann = { "area": str(o_width * o_height), "iscrowd": 0, "image_id": str(count), "bbox": [ str(int(x_center - o_width / 2)), str(int(y_center - o_height / 2)), str(o_width), str(o_height) ], "category_id": str(pred_class + 1), "id": str(bnd_id), "ignore": 0, "segmentation": [], } bnd_id += 1 json_dict["annotations"].append(ann) # cat = {"supercategory": "none", "id": cid, "name": cate} # json_dict["categories"].append(cat) # if args.output: # if os.path.isdir(args.output): # assert os.path.isdir(args.output), args.output # out_filename = os.path.join(args.output, os.path.basename(path)) # else: # assert len(args.input) == 1, "Please specify a directory with args.output" # out_filename = args.output # visualized_output.save(out_filename) # print("pred_boxes: ", predictions["instances"].pred_boxes) # print("scores: ", predictions["instances"].scores) # print("pred_classes: ", predictions["instances"].pred_classes) # print("shape: ", width, height, channel) # logger.info( # "{}: detected {} instances in {:.2f}s".format( # path, len(predictions["instances"]), time.time() - start_time # ) # ) logger.info( ("progress: {:.0f} / {:.0f}".format(count, len(image_path.keys())))) ## append category info cat = {"supercategory": "none", "id": str(1), "name": "person"} json_dict["categories"].append(cat) cat = {"supercategory": "none", "id": str(3), "name": "car"} json_dict["categories"].append(cat) cat = {"supercategory": "none", "id": str(6), "name": "bus"} json_dict["categories"].append(cat) cat = {"supercategory": "none", "id": str(8), "name": "truck"} json_dict["categories"].append(cat) os.makedirs(os.path.dirname(json_path), exist_ok=True) json_fp = open(json_path, "w") json_str = json.dumps(json_dict) json_fp.write(json_str) json_fp.close()
"--output", default=None, help="output images name", ) parser.add_argument( "--confidence-threshold", type=float, default=0.5, help="Minimum score for instance predictions to be shown", ) return parser if __name__ == "__main__": args = get_parser().parse_args() cfg = setup_cfg(args) input = args.input output = args.output debug = output is not None demo = VisualizationDemo(cfg, debug) # use PIL, to be consistent with evaluation img = read_image(input, format="BGR") predictions, visualized_output, obj = demo.run_on_image(img, debug) if output != None: visualized_output.save(output) print(output) else: print(json.dumps(obj))
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ # get sample information and read image dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) # check real image size match the sample description in dataset_dict utils.check_image_size(dataset_dict, image) # IMAGE AUGMENTATION if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # these information no needed in train mode if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("multi_labels", None) dataset_dict.pop("multi_label_names", None) dataset_dict.pop("seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) # 虽然用不上,但还是保留吧 # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if "seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) # TODO: one-hot encoding after transformation sem_gt = np.zeros( (self.num_seg_class, sem_seg_gt.shape[0], sem_seg_gt.shape[1]), dtype="uint8") for c in range(self.num_seg_class): sem_gt[c][sem_seg_gt == c] = 1 # sem_gt = np.transpose(sem_gt, (1, 2, 0)) # for later transform sem_gt = torch.as_tensor(sem_gt.astype("long")) dataset_dict["sem_seg"] = sem_gt if "multi_labels" in dataset_dict: dataset_dict["multi_labels"] = dataset_dict.pop("multi_labels") return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) # this place you can add your own code to change the input image if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) if not self.attribute_on: anno.pop("attribute_ids") annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances_with_attributes( annos, image_shape, mask_format=self.mask_format, load_attributes=self.attribute_on, max_attr_per_ins=self.max_attr_per_ins) if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def load_img(img_path, transform_gen): img1 = read_image(img_path, format="BGR") img = transform_gen.get_transform(img1).apply_image(img1) img_tensor = torch.as_tensor(img.astype("float32").transpose(2, 0, 1)) return img1, img_tensor
parser.add_argument("--onlyhighest", action="store_true", help="will return only the highest scoring detection") parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) return parser if __name__ == "__main__": mp.set_start_method("spawn", force=True) args = get_parser().parse_args() logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) im_name = args.input.split("/")[-1].split(".")[0] demo = VisualizationDemo(cfg, vis_highest_scoring=args.onlyhighest, output_dir=os.path.join(args.output, im_name)) # use PIL, to be consistent with evaluation img = read_image(args.input, format="BGR") predictions = demo.run_on_image(img, focal_length=args.focal_length)
def load_image(self, dataset_dict): image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) return image
img = img[:, :, [2, 1, 0]] else: img = np.asarray( Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert( "RGB" ) ) visualizer = Visualizer(img, metadata=metadata, scale=scale) target_fields = per_image["instances"].get_fields() labels = [ metadata.thing_classes[i] for i in target_fields["gt_classes"] ] vis = visualizer.overlay_instances( labels=labels, boxes=target_fields.get("gt_boxes", None), ) output(vis, str(per_image["image_id"]) + ".jpg") else: dicts = list( chain.from_iterable( [DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN] ) ) for dic in dicts: img = utils.read_image(dic["file_name"], "RGB") visualizer = Visualizer(img, metadata=metadata, scale=scale) vis = visualizer.draw_dataset_dict(dic) output(vis, os.path.basename(dic["file_name"]))
demo = VisualizationDemo(cfg) all_img = [] with open(args.input_txt, "r") as f: for line in f.readlines(): line = line.strip('\n') all_img.append(line) if args.input_txt: #if len(args.input) == 1: # args.input = glob.glob(os.path.expanduser(args.input[0])) # assert args.input, "The input path(s) was not found" for path in tqdm.tqdm(all_img, disable=not args.output): # use PIL, to be consistent with evaluation path = path + '.jpg' img = read_image( '../../tmp/data/VOCdevkit2007/VOC2007/JPEGImages/' + path, format="BGR") start_time = time.time() predictions, visualized_output = demo.run_on_image(img) logger.info("{}: {} in {:.2f}s".format( path, "detected {} instances".format(len(predictions["instances"])) if "instances" in predictions else "finished", time.time() - start_time, )) if args.output: if os.path.isdir(args.output): assert os.path.isdir(args.output), args.output out_filename = os.path.join(args.output, os.path.basename(path))
def main(): mp.set_start_method("spawn", force=True) args = get_parser().parse_args() setup_logger(name="fvcore") logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = model_zoo.get_config(args.config_file) cfg = setup_cfg(cfg, args) demo = VisualizationDemo(cfg, args.config_file) if args.input: if len(args.input) == 1: args.input = glob.glob(os.path.expanduser(args.input[0])) assert args.input, "The input path(s) was not found" for path in tqdm.tqdm(args.input, disable=not args.output): # use PIL, to be consistent with evaluation img = read_image(path, format="BGR") start_time = time.time() predictions, visualized_output = demo.run_on_image(img) logger.info("{}: {} in {:.2f}s".format( path, "detected {} instances".format(len(predictions["instances"])) if "instances" in predictions else "finished", time.time() - start_time, )) if args.output: if os.path.isdir(args.output): assert os.path.isdir(args.output), args.output out_filename = os.path.join(args.output, os.path.basename(path)) else: assert len( args.input ) == 1, "Please specify a directory with args.output" out_filename = args.output visualized_output.save(out_filename) else: cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) if cv2.waitKey(0) == 27: break # esc to quit elif args.video_input: video = cv2.VideoCapture(args.video_input) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) basename = os.path.basename(args.video_input) if args.output: if os.path.isdir(args.output): output_fname = os.path.join(args.output, basename) output_fname = os.path.splitext(output_fname)[0] + ".mkv" else: output_fname = args.output assert not os.path.isfile(output_fname), output_fname output_file = cv2.VideoWriter( filename=output_fname, # some installation of opencv may not support x264 (due to its license), # you can try other format (e.g. MPEG) fourcc=cv2.VideoWriter_fourcc(*"x264"), fps=float(frames_per_second), frameSize=(width, height), isColor=True, ) assert os.path.isfile(args.video_input) for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): if args.output: output_file.write(vis_frame) else: cv2.namedWindow(basename, cv2.WINDOW_NORMAL) cv2.imshow(basename, vis_frame) if cv2.waitKey(1) == 27: break # esc to quit video.release() if args.output: output_file.release() else: cv2.destroyAllWindows()
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) try: utils.check_image_size(dataset_dict, image) except Exception as e: print(e) import moxing as mox mox.file.copy_parallel(dataset_dict["file_name"], 's3://bucket-6756/liangxiwen/result/haitian_semi/unbiased-teacher/wrong_imgs/' + dataset_dict["file_name"].split('/')[-1]) print(image.shape) image = np.rot90(image) print(image.shape) utils.check_image_size(dataset_dict, image) if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L" ).squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image_weak_aug, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image_weak_aug.shape[:2] # h, w if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) if self.compute_tight_boxes and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() bboxes_d2_format = utils.filter_empty_instances(instances) dataset_dict["instances"] = bboxes_d2_format # apply strong augmentation # We use torchvision augmentation, which is not compatiable with # detectron2, which use numpy format for images. Thus, we need to # convert to PIL format first. image_pil = Image.fromarray(image_weak_aug.astype("uint8"), "RGB") image_strong_aug = np.array(self.strong_augmentation(image_pil)) dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image_strong_aug.transpose(2, 0, 1)) ) dataset_dict_key = copy.deepcopy(dataset_dict) dataset_dict_key["image"] = torch.as_tensor( np.ascontiguousarray(image_weak_aug.transpose(2, 0, 1)) ) assert dataset_dict["image"].size(1) == dataset_dict_key["image"].size(1) assert dataset_dict["image"].size(2) == dataset_dict_key["image"].size(2) return (dataset_dict, dataset_dict_key)
# Input img_fpath = 'demo/data/input2.jpg' # Conduct instance segmentation predictor = ISEEInstanceSegmentation() # 1. Initialization err_no = predictor.init(config_file, params_dict) if err_no < 0: err_type = ISEEInstanceSegmentation.getErrType(err_no) print("ERROR: initialize the predictor FAILED - {}".format(err_type)) exit(err_no) else: print("INFO: initialize the predictor SUCCESSFULLY!") # 2. Load image imgs_data = [] img = read_image(img_fpath, format="BGR") imgs_data.append(img) # 3. Predict output = 'demo/data/' stamp1 = time.time() err_no = predictor.process(imgs_data, output=output) stamp2 = time.time() if err_no < 0: err_type = ISEEInstanceSegmentation.getErrType(err_no) print("ERROR: instance segmentation is conducted FAILED - {}".format( err_type)) else: print("INFO: instance segmentation is conducted SUCCESSFULLY!") # 4. Get results segment_res_list = predictor.getResults() print(
# Predicting random image from existing dataset ravu_folder = Path(r"/media/jsieb/ED598/drone2go_ravu/drone2go_ravu") image_paths = ravu_folder.rglob("*.JPG") image_paths = [image_path for image_path in image_paths] d = np.random.choice(data, size=1)[0] random_image_path = d['file_name'] #random_image_path = str(np.random.choice(image_paths, size =1)[0]) #d = np.random.choice(data, 1)[0] #random_image_path = d['file_name'] #random_image_path = r"/tmp/.X11-unix/e.jpeg" #json_path = os.path.join(os.path.dirname(random_image_path), f"{os.path.basename(random_image_path).split('.')[0]}.JSON") im = read_image(random_image_path) outputs = predictor(im[:, :, ::-1]) cpu_instances = outputs['instances'].to("cpu") print(len(cpu_instances)) plt.imshow(im) # Filtering # threshold = 0.5 # class_check = cpu_instances.pred_classes.numpy() == 0 # score_check = cpu_instances.scores.numpy() > threshold # indices = np.logical_and(class_check, score_check) # if indices.any(): # filtered_instances = cpu_instances[indices] # found = True
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file category = dataset_dict["annotations"][0]['category_id'] try: image = utils.read_image(dataset_dict["file_name"], format=self.image_format) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None if (int(category) != 5): boxes = np.asarray([ BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) for instance in dataset_dict["annotations"] ]) aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image, sem_seg_gt = aug_input.image, aug_input.sem_seg else: boxes = np.asarray([0]) # aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) # transforms = aug_input.apply_augmentations(self.augmentation) # image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk: if (int(category) != 5): utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: if (int(category) != 5): dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data if (int(category) != 5): annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] segment_transform = transf.Compose([ myTransform.FreeScaleMask((60, 100)), myTransform.MaskToTensor(), ]) img_transform = transf.Compose([ transf.Resize((288, 800)), transf.ToTensor(), transf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) if self.is_train: use_ax = True else: use_ax = False if (int(category) != 5): # dataset_dict['seg_label'] = torch.zeros([36,100,3]) # dataset_dict['cls_label'] = [[-1 for _ in range(4)] for _ in range(18)] instances = annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes( ) dataset_dict["instances"] = utils.filter_empty_instances( instances) else: cl = LaneClsDataset( '/home/ghr/hdd/traffic_sign/only_lane/images/CULANE_288', img_path=dataset_dict['file_name'], row_anchor=culane_row_anchor, seg_path=dataset_dict['annotations'][0]['lanefilepath'], segment_transform=segment_transform, use_aux=use_ax) if use_ax: img, cls, seg = cl.get_item() else: img, cls = cl.get_item() seg = 0 # print('hahahahahahahahahah')img # import pdb; pdb.set_trace() # dataset_dict["image"] = img dataset_dict['seg_label'] = seg dataset_dict['cls_label'] = cls #instances = annotations_to_instances(dataset_dict['annotations'], image_shape, mask_format=self.instance_mask_format) # Call lane class return label,... # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # if self.recompute_boxes: # instances.gt_boxes = instances.gt_masks.get_bounding_boxes() # dataset_dict["instances"] = utils.filter_empty_instances(instances) if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = (dataset_dict["file_name"].replace( "train2017", "thing_train2017").replace("image/train", "thing_train")) else: basis_sem_path = (dataset_dict["file_name"].replace( "coco", "lvis").replace("train2017", "thing_train")) # change extension to npz basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz" basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
rval = False count = 0 # Count frames predictedd = 3 font = cv2.FONT_HERSHEY_SIMPLEX while rval: cv2.imshow("Yoga-Pose-Estimation", frame) rval, frame = cam.read() count = count + 1 print(count) # YOGA POSE ESTIMATION # # Get frame and load image cv2.imwrite('poseframe.png', frame) image = read_image('poseframe.png', format='BGR') height, width, _ = image.shape transform = detectron2.data.transforms.transform.ResizeTransform(h=height, w=width, new_h=800, new_w=800, interp=2) image = transform.apply_image(image) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) test_loader = DataLoader([(image, 0)], batch_size=1, shuffle=False, pin_memory=True) if count % 20 == 0:
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below #print("BELOW IS THE dataset_dict (FOR DEBUGGING)") #print(dataset_dict) # USER: Write your own image loading if it's not from a file try: image = utils.read_image( dataset_dict["file_name"], format=self.image_format ) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L" ).squeeze(2) else: sem_seg_gt = None boxes = np.asarray( [ BoxMode.convert( instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS ) for instance in dataset_dict["annotations"] ] ) aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1)) ) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format ) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = ( dataset_dict["file_name"] .replace("train2017", "thing_train2017") .replace("image/train", "thing_train") ) else: basis_sem_path = ( dataset_dict["file_name"] .replace("coco", "lvis") .replace("train2017", "thing_train") ) # change extension to npz basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz" basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
if __name__ == "__main__": args = get_parser().parse_args() logger = setup_logger(name='deekongai') logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) demo = VisualizationDemo(cfg) if args.input: for path in tqdm.tqdm(os.listdir(args.input), disable=not args.output): # use PIL, to be consistent with evaluation img = read_image(os.path.join(args.input, path), format="BGR") start_time = time.time() predictions, visualized_output = demo.run_on_image(img) logger.info("{}: {} in {:.2f}s".format( path, "detected {} instances".format(len(predictions["instances"])) if "instances" in predictions else "finished", time.time() - start_time, )) if args.output: if os.path.isdir(args.output): assert os.path.isdir(args.output), args.output out_filename = os.path.join(args.output, os.path.basename(path)) else:
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file import os if not os.path.exists(dataset_dict["file_name"]): name = dataset_dict["file_name"].replace("hico/train2015", "coco2014/train2014") else: name = dataset_dict["file_name"] image = utils.read_image(name, format=self.img_format) # import ipdb;ipdb.set_trace() if dataset_dict['height'] != image.shape[0]: image = image.transpose([1, 0, 2]) # print(dataset_dict) # np.asarray().transpose() # print(image.shape, type(image)) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) # ################################################################################################################ # print("AutoAugDet:", dataset_dict["file_name"]) # h, w, c = image.shape # if h <= 0 or w <=0: # print("Empty image") # if self.autoaugdet and "annotations" in dataset_dict: # from detectron2.structures.boxes import BoxMode # bboxes = [] # for label in dataset_dict["annotations"]: # assert label['bbox_mode'] == BoxMode.XYWH_ABS # bboxes.append(label['bbox']) # # import cv2, random # # showimg_in = image.copy() # # for box in bboxes: # # cv2.rectangle(showimg_in, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255))) # try: # image, bboxes = autoaugdet.autoaugdet(image, bboxes, self.autoaugdet) # except Exception as e: # print("AutoAug Error:", e) # # showimg_out = image.copy() # # for box in bboxes: # # cv2.rectangle(showimg_out, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255))) # # cv2.imshow("in", showimg_in) # # cv2.imshow("out", showimg_out) # # cv2.waitKey(0) # for i in range(len(bboxes)): # dataset_dict["annotations"][i]['bbox'] = bboxes[i] # ################################################################################################# if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image ) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # generate heatmaps of keypoints #if dataset_dict["instances"].has("gt_keypoints"): # #For segmentation-based detection, transform the instance-level segmentation mask into semantic segmasks and contour maps # turning instance-level segmentation map into semantic segmap # get the contour map for segmentation-based detection dataset_dict["contours"], dataset_dict["semseg"] = utils.annotations_to_segmaps(annos, self.num_classes, image_shape) kpts = [obj.get("keypoints", []) for obj in annos] map_shape = (image_shape[0], image_shape[1]) kp_maps, short_offsets = get_keypoint_maps(None, kpts, map_shape) dataset_dict["kp_maps"] = kp_maps.transpose(2, 0, 1) dataset_dict["short_offsets"] = short_offsets.transpose(2, 0, 1) ################################################################ # # visualize the keypoints # from detectron2.utils.visualizer import Visualizer # from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES # from os import path # image_rgb = image[..., ::-1] # V = Visualizer(image_rgb, dataset_dict) # # draw the foreground mask of each object category # binary_masks = kp_maps>0.1 # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # print('Mask size: ', binary_masks.shape) # print('Image size: ', image_rgb.shape) # assert binary_masks.shape[1]==image_rgb.shape[0], (binary_masks.shape[1], image_rgb.shape[0]) # assert binary_masks.shape[2]==image_rgb.shape[1], (binary_masks.shape[2], image_rgb.shape[1]) # assert image_rgb.shape[2]==3, image_rgb.shape[2] # bm = binary_masks # for i in range(binary_masks.shape[0]): # masked_image = V.draw_binary_mask( # bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10 # ) # COCO_CATEGORIES[i]["color"] # # filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png' # # masked_image.save(filepath) # filepath = "tmp/" + fn_next + '.png' # masked_image.save(filepath) ################################################################ ################################################ # # visualize the segmentation mask # from os import path # image_rgb = image[..., ::-1] #utils.read_image(dataset_dict["file_name"], format="RGB") # segmask = dataset_dict["semseg"].tensor.numpy() # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # im = Image.fromarray(np.uint8(image_rgb)) # filepath = "tmp_segmap_sorted/" + fn_next + '_raw.png' # im.save(filepath) # im2 = Image.fromarray(np.uint8(segmask*3)) # filepath2 = "tmp_segmap_sorted/" + fn_next + '_seg.png' # im2.save(filepath2) ################################################ ############### # # visualize the segmentation map and contours # from detectron2.utils.visualizer import Visualizer # from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES # from os import path # #V.draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8) # image_rgb = image[..., ::-1] #utils.read_image(dataset_dict["file_name"], format="RGB") # V = Visualizer(image_rgb, dataset_dict) # # draw the foreground mask of each object category # #binary_masks = dataset_dict["contours"].gt_segmasks.tensor # binary_masks = dataset_dict["contours"].gt_contours.tensor # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # print('Mask size: ', binary_masks.size()) # print('Image size: ', image_rgb.shape) # assert binary_masks.size(1)==image_rgb.shape[0], (binary_masks.size(1), image_rgb.shape[0]) # assert binary_masks.size(2)==image_rgb.shape[1], (binary_masks.size(2), image_rgb.shape[1]) # assert image_rgb.shape[2]==3, image_rgb.shape[2] # bm = binary_masks.numpy() # # bm_uint8 = bm.astype("uint8") # # print(bm) # for i in range(binary_masks.size(0)): # masked_image = V.draw_binary_mask( # bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10 # ) # COCO_CATEGORIES[i]["color"] # # filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png' # # masked_image.save(filepath) # filepath = "tmp/" + fn_next + '.png' # masked_image.save(filepath) ################################################################################################# # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): assert 'annotations' in dataset_dict, '今回はセグメンテーションのみを対象にする' assert not 'sem_seg_file_name' in dataset_dict, 'パノプティックセグメンテーションは行わない' dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict['file_name'], format=self.img_format) utils.check_image_size(dataset_dict, image) # 明るさ・コントラスト・彩度・カットアウト if self.cont_gen is not None: tfm = self.cont_gen.get_transform(image) image = tfm.apply_image(image) if self.bright_gen is not None: tfm = self.bright_gen.get_transform(image) image = tfm.apply_image(image) if self.sat_gen is not None: tfm = self.sat_gen.get_transform(image) image = tfm.apply_image(image) if self.cutout_gen is not None: tfm = self.cutout_gen.get_transform(image) image = tfm.apply_image(image) # アフィン if self.rotate_gen is not None: rotate_tfm = self.rotate_gen.get_transform(image) image = rotate_tfm.apply_image(image) if self.shear_gen is not None: shear_tfm = self.shear_gen.get_transform(image) image = shear_tfm.apply_image(image) if self.extent_gen is not None: extent_tfm = self.extent_gen.get_transform(image) image = extent_tfm.apply_image(image) if self.crop_gen is not None: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict['annotations'])) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen is not None: transforms = crop_tfm + transforms if self.extent_gen is not None: transforms = extent_tfm + transforms if self.shear_gen is not None: transforms = shear_tfm + transforms if self.rotate_gen is not None: transforms = rotate_tfm + transforms # テストの場合はアノテーションがいらないので削除して終了 if not self.is_train: dataset_dict.pop('annotations', None) dataset_dict.pop('sem_seg_file_name', None) return dataset_dict image_shape = image.shape[:2] # h, w dataset_dict['image'] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) annos = [ utils.transform_instance_annotations(obj, transforms, image_shape, keypoint_hflip_indices=None) for obj in dataset_dict.pop('annotations') if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # マスクからバウンディングボックスを作成 if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
if __name__ == "__main__": mp.set_start_method("spawn", force=True) args = get_parser().parse_args() cfg = setup_cfg(args) demo = VisualizationDemo(cfg) if args.input: if len(args.input) == 1: args.input = glob.glob(os.path.expanduser(args.input[0])) assert args.input, "The input path(s) was not found" for path in tqdm.tqdm(args.input, disable=not args.output): # use PIL, to be consistent with evaluation img = read_image(path, format="BGR") predictions, visualized_output = demo.run_on_image(img) if args.output: if os.path.isdir(args.output): assert os.path.isdir(args.output), args.output out_filename = os.path.join(args.output, os.path.basename(path)) else: assert len( args.input ) == 1, "Please specify a directory with args.output" out_filename = args.output visualized_output.save(out_filename) else:
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ transform_rotated_boxes_annotations(obj, transforms) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = rotated_annotations_to_instances(annos, image_shape) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) del annos, instances return dataset_dict