data_coco["categories"] = categories data_coco["annotations"] = annotations json.dump(data_coco, open(save_json_path, "w"), indent=4) register_coco_instances("my_dataset_train1", {}, "traincoco.json", "") DatasetCatalog.get("my_dataset_train1") #visualize training data my_dataset_train_metadata = MetadataCatalog.get("my_dataset_train") dataset_dicts = DatasetCatalog.get("my_dataset_train") for d in random.sample(dataset_dicts, 3): img = cv2.imread(d["file_name"]) visualizer = Visualizer(img[:, :, ::-1], metadata=my_dataset_train_metadata, scale=0.5) vis = visualizer.draw_dataset_dict(d) cv2_imshow(vis.get_image()[:, :, ::-1]) """# Train Custom Detectron2 Detector""" #We are importing our own Trainer Module here to use the COCO validation evaluation during training. Otherwise no validation eval occurs. class CocoTrainer(DefaultTrainer): @classmethod def build_evaluator(cls, cfg, dataset_name, output_folder=None): if output_folder is None: os.makedirs("coco_eval", exist_ok=True) output_folder = "coco_eval"
webcv2.imshow("window", vis.get_image()[:, :, ::-1]) webcv2.waitKey() train_data_loader = build_detection_train_loader(cfg) import ipdb ipdb.set_trace() for batch in tqdm.tqdm(train_data_loader): if args.speed: continue for per_image in batch: # Pytorch tensor is in (C, H, W) format img = per_image["image"].permute(1, 2, 0) if cfg.INPUT.FORMAT == "BGR": img = img[:, :, [2, 1, 0]] else: img = np.asarray( Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) visualizer = Visualizer(img, metadata=metadata, scale=1.0) target_fields = per_image["instances"].get_fields() labels = [ metadata.thing_classes[i] for i in target_fields["gt_classes"] ] vis = visualizer.overlay_instances( labels=labels, boxes=target_fields.get("gt_boxes", None), masks=target_fields.get("gt_masks", None), keypoints=target_fields.get("gt_keypoints", None), ) output(vis, str(per_image["image_id"]) + ".jpg")
""" Test the d2sa json dataset loader. Usage: python -m detectron2.data.datasets.d2sa \ path/to/json path/to/image_root dataset_name "dataset_name" can be "d2sa_val", or other pre-registered ones """ from detectron2.utils.logger import setup_logger from detectron2.utils.visualizer import Visualizer import detectron2.data.datasets # noqa # add pre-defined metadata import sys logger = setup_logger(name=__name__) assert sys.argv[3] in DatasetCatalog.list() meta = MetadataCatalog.get(sys.argv[3]) dicts = load_cocoa_json(sys.argv[1], sys.argv[2], sys.argv[3]) logger.info("Done loading {} samples.".format(len(dicts))) dirname = "d2sa-data-vis" os.makedirs(dirname, exist_ok=True) for d in dicts: img = np.array(Image.open(d["file_name"])) visualizer = Visualizer(img, metadata=meta) vis = visualizer.draw_dataset_dict(d) fpath = os.path.join(dirname, os.path.basename(d["file_name"])) vis.save(fpath)
writer = SummaryWriter( log_dir='/tmp/tensorboard/{}'.format(datetime.datetime.now())) # Parse command line arguments ap = argparse.ArgumentParser() ap.add_argument("--split", default="test") ap.add_argument("--samples", type=int, default=10) ap.add_argument("--scale", type=float, default=1.0) ap.add_argument("--path", type=str, default="../dataset", metavar='DIR') args = ap.parse_args() dataset_name = f"sacrum_{args.split}" print(dataset_name) register_sacrum_voc(dataset_name, args.path, args.split) dataset_dicts = DatasetCatalog.get(dataset_name) for d in random.sample(dataset_dicts, args.samples): img = cv2.imread(d["file_name"]) visualizer = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(dataset_name), scale=args.scale) vis = visualizer.draw_dataset_dict(d) writer.add_image(d["file_name"], np.transpose(vis.get_image(), axes=[2, 0, 1])) #cv2.imshow(dataset_name, vis.get_image()[:, :, ::-1]) # Exit? Press ESC #if cv2.waitKey(0) & 0xFF == 27: # break #cv2.destroyAllWindows()
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = MODEL_PATH if not torch.cuda.is_available(): cfg.MODEL.DEVICE = 'cpu' predictor = DefaultPredictor(cfg) im = cv2.imread(INPUT_FILE) outputs = predictor(im) instances = outputs['instances'] if len(instances) <= 0: sys.exit(1) v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2, instance_mode=ColorMode.IMAGE_BW) v = v.draw_instance_predictions(instances.to("cpu")) result = v.get_image()[:, :, ::-1] cv2.imshow('waldo', result) while True: key = cv2.waitKey(1) if key == 27 or key == 113: break cv2.destroyAllWindows()
"Kia-Detection_faster_rcnn_R_50_FPN_1x-eps300/model_final.pth" ) # path to the model we just trained cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold predictor = DefaultPredictor(cfg) from detectron2.utils.visualizer import ColorMode for d in random.sample(Kia_trainval_dataset_dicts, 3): im = cv2.imread(d["file_name"]) # outputs = predictor(im) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format # print("outputs : ", outputs) # print("outputs[instances] : ", outputs["instances"]) v = Visualizer( im[:, :, ::-1], metadata=Kia_trainval_metadata, scale=0.5, instance_mode=ColorMode. IMAGE_BW # remove the colors of unsegmented pixels. This option is only available for segmentation models ) # out = v.draw_instance_predictions(outputs["instances"].to("cpu")) # draw instances with predictions of predictor out = v.draw_dataset_dict(d) # draw instances with using Annotations # print("out.get_image().shape : ", out.get_image().shape) # print("out.get_image()[:, :, ::-1].shape : ", out.get_image()[:, :, ::-1].shape) cv2.imwrite("./output/%s" % (d["file_name"].split("/")[-1]), out.get_image()[:, :, ::-1]) """ img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] basename = os.path.basename(dic["file_name"]) predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2])
# for d in random.sample(dataset_dicts, 3): # img = cv2.imread(d["file_name"]) # visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.3) # vis = visualizer.draw_dataset_dict(d) # cv2.imshow('img', vis.get_image()[:, :, ::-1]) # cv2.waitKey(0) predictor = DefaultPredictor(cfg) from IPython import embed for d in random.sample(dataset_dicts, 5): img = cv2.imread(d["file_name"]) outputs = predictor(img) print(d["file_name"]) visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.3) visualizer = visualizer.draw_instance_predictions( outputs["instances"].to("cpu")) visualizer = visualizer.get_image()[:, :, ::-1] visualizer = Visualizer(visualizer[:, :, ::-1], metadata=metadata, scale=0.3) visualizer = visualizer.draw_dataset_dict(d) cv2.imshow('img', visualizer.get_image()[:, :, ::-1]) cv2.waitKey(0) # trainer = DefaultTrainer(cfg) # trainer.resume_or_load(resume=True) # evaluator = COCOEvaluator("train", cfg, False, output_dir="./output/") # val_loader = build_detection_test_loader(cfg, "train")
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # 3 classes (data, fig, hazelnut) cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 8 cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_0000299.pth") cfg.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpPVNetHead" cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model cfg.DATASETS.TEST = ("nihonbashi", ) predictor = DefaultPredictor(cfg) from detectron2.utils.visualizer import ColorMode for d in random.sample(dataset_dicts, 3): im = cv2.imread(d["file_name"]) outputs = predictor(im) v = Visualizer(im[:, :, ::-1], metadata=nihonbashi_metadata, scale=0.8, instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels ) v = v.draw_instance_predictions(outputs["instances"].to("cpu")) visualizer = Visualizer(im[:, :, ::-1], metadata=nihonbashi_metadata, scale=0.8, instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels ) vis = visualizer.draw_dataset_dict(d) im_pred = v.get_image()[:, :, ::-1] im_gt = vis.get_image()[:, :, ::-1]
def may_visualize_gt(self, batched_inputs, init_objectness, init_bbox, refine_objectness, refine_boxes, centers, pred_init_boxes, pred_refine_boxes, logits): """ Visualize initial and refine boxes using mathced labels for filtering. The prediction at positive positions are shown. """ if self.training: if self.vis_period <= 0: return storage = get_event_storage() if not storage.iter % self.vis_period == 0: return from detectron2.utils.visualizer import Visualizer image_index = 0 img = batched_inputs[image_index]["image"].cpu().numpy() assert img.shape[0] == 3, "Images should have 3 channels." img = img[::-1, :, :] img = img.transpose(1, 2, 0) v_init = Visualizer(img, None) v_init = v_init.overlay_instances(boxes=Boxes(init_bbox[image_index][ init_objectness[image_index]].cpu())) init_image = v_init.get_image() v_refine = Visualizer(img, None) v_refine = v_refine.overlay_instances( boxes=Boxes(refine_boxes[image_index][ refine_objectness[image_index] > 0].cpu())) refine_image = v_refine.get_image() if self.training: vis_img = np.vstack((init_image, refine_image)) vis_img = vis_img.transpose(2, 0, 1) storage.put_image("TOP: init gt boxes; Bottom: refine gt boxes", vis_img) vp_init = Visualizer(img, None) selected_centers = centers[init_objectness[image_index]].cpu().numpy() vp_init = vp_init.overlay_instances( boxes=Boxes(pred_init_boxes[image_index][ init_objectness[image_index]].detach().cpu()), labels=logits[image_index] [init_objectness[image_index]].sigmoid().max(1)[0].detach().cpu()) init_image = vp_init.get_image() for point in selected_centers: init_image = cv2.circle(init_image, tuple(point), 3, (255, 255, 255)) vp_refine = Visualizer(img, None) foreground_idxs = (refine_objectness[image_index] >= 0).logical_and( refine_objectness[image_index] < self.num_classes) selected_centers = centers[foreground_idxs].cpu().numpy() vp_refine = vp_refine.overlay_instances( boxes=pred_refine_boxes[image_index] [foreground_idxs].detach().cpu(), labels=logits[image_index][foreground_idxs].sigmoid().max( 1)[0].detach().cpu()) refine_image = vp_refine.get_image() for point in selected_centers: refine_image = cv2.circle(refine_image, tuple(point), 3, (255, 255, 255)) vis_img = np.vstack((init_image, refine_image)) if self.training: vis_img = vis_img.transpose(2, 0, 1) storage.put_image( "TOP: init pred boxes; Bottom: refine pred boxes", vis_img) # NOTE: This is commented temporarily. Uncomment it if # eagerly visualization is desired. '''
def get_midpoint_obj_conf(self): #objects = random.sample(list(objects), self.num_objects_per_episode) xyz_obj_mids = [] cat_ids_objects = [] count = 0 nav_points = np.array(self.nav_pts) action = "do_nothing" movement = "turn_right" if self.visualize_maskrcnn: self.plot_navigable_points(self.nav_pts) # constraint if two fixation points are very close # add in all confident masks # 20 spawns while count < self.num_spawns_per_episode: print("GETTING OBJECT #", count) rand_ind = np.random.randint(low=0, high=len(nav_points)) pos_rand = nav_points[rand_ind, :] agent_state = habitat_sim.AgentState() agent_state.position = pos_rand + np.array([0, 1.5, 0]) print("Random POS=", agent_state.position) self.agent.set_state(agent_state) # remove spawning points close to this spawn (so as to not spawn near there again) distances = np.sqrt(np.sum((nav_points - pos_rand)**2, axis=1)) nav_points = nav_points[np.where(distances > self.radius_remove)] # if self.visualize: # x_sample = self.nav_pts[:,0] # z_sample = self.nav_pts[:,2] # plt.plot(z_sample, x_sample, 'o', color = 'red') # plt.plot(agent_state.position[2], agent_state.position[0], 'x', 'blue') # plt.show() # rotate in place until get high confident object # bin_angle_size = 60.0 # angles = np.arange(-180, 180, bin_angle_size) angles = np.arange(0, 360, self.turn_angle) for angle in angles: #b_inds_notempty: # print("ANGLE=", angle) # turn_angle = np.radians(angle) # quat_yaw = quat_from_angle_axis(angle, np.array([0, 1.0, 0])) # # Set agent yaw rotation to current angle # agent_state.rotation = quat_yaw # # change sensor state to default # # need to move the sensors too # for sensor in self.agent.state.sensor_states: # # st() # self.agent.state.sensor_states[sensor].rotation = agent_state.rotation # self.agent.state.sensor_states[sensor].position = agent_state.position # + np.array([0, 1.5, 0]) # ADDED IN UP TOP # # print("PRINT", self.agent.state.sensor_states[sensor].rotation) # print(agent_state.rotation) # # get observations after centiering # self.agent.set_state(agent_state) # rotate until find confident object observations = self.sim.step(movement) #self.sim.step(action) ####### %%%%%%%%%%%%%%%%%%%%%%% ######### MASK RCNN im = observations["color_sensor"] im = Image.fromarray(im, mode="RGBA") im = cv2.cvtColor(np.asarray(im), cv2.COLOR_RGB2BGR) # plt.imshow(im) # plt.show() outputs = self.maskrcnn(im) pred_masks = outputs['instances'].pred_masks pred_boxes = outputs['instances'].pred_boxes.tensor pred_classes = outputs['instances'].pred_classes pred_scores = outputs['instances'].scores maskrcnn_to_catname = { 56: "chair", 59: "bed", 61: "toilet", 57: "couch", 58: "indoor-plant", 72: "refrigerator", 62: "tv" } #, 60: "dining-table"} obj_ids = [] obj_catids = [] obj_scores = [] obj_masks = [] obj_all_catids = [] obj_all_scores = [] obj_all_boxes = [] for segs in range(len(pred_masks)): if pred_classes[segs].item() in maskrcnn_to_catname: if pred_scores[segs] >= 0.90: obj_ids.append(segs) obj_catids.append(pred_classes[segs].item()) obj_scores.append(pred_scores[segs].item()) obj_masks.append(pred_masks[segs]) cat_ids_objects.append(maskrcnn_to_catname[int( pred_classes[segs])]) obj_all_catids.append(pred_classes[segs].item()) obj_all_scores.append(pred_scores[segs].item()) y, x = torch.where(pred_masks[segs]) pred_box = torch.Tensor( [min(y), min(x), max(y), max(x)]) # ymin, xmin, ymax, xmax obj_all_boxes.append(pred_box) print("MASKS ", len(pred_masks)) print("VALID ", len(obj_scores)) print(obj_scores) print(pred_scores.shape) translation_ = self.agent.state.sensor_states[ 'depth_sensor'].position quaternion_ = self.agent.state.sensor_states[ 'depth_sensor'].rotation rotation_ = quaternion.as_rotation_matrix(quaternion_) T_world_cam = np.eye(4) T_world_cam[0:3, 0:3] = rotation_ T_world_cam[0:3, 3] = translation_ if not obj_masks: continue else: # randomly choose a high confidence object # instead of this I think we should iterate over ALL the high confident objects and fixate on them # obj_mask_focus = random.choice(obj_masks) for obj_mask in obj_masks: depth = observations["depth_sensor"] xs, ys = np.meshgrid( np.linspace(-1 * 256 / 2., 1 * 256 / 2., 256), np.linspace(1 * 256 / 2., -1 * 256 / 2., 256)) depth = depth.reshape(1, 256, 256) xs = xs.reshape(1, 256, 256) ys = ys.reshape(1, 256, 256) xys = np.vstack((xs * depth, ys * depth, -depth, np.ones(depth.shape))) xys = xys.reshape(4, -1) xy_c0 = np.matmul(np.linalg.inv(self.K), xys) xyz = xy_c0.T[:, :3].reshape(256, 256, 3) # xyz_obj_masked = xyz[obj_mask_focus] xyz_obj_masked = xyz[obj_mask] xyz_obj_masked = np.matmul( rotation_, xyz_obj_masked.T) + translation_.reshape(3, 1) xyz_obj_mid = np.mean(xyz_obj_masked, axis=1) print("MIDPOINT=", xyz_obj_mid) xyz_obj_mids.append(xyz_obj_mid) count += 1 if self.visualize_maskrcnn: plt.figure(1) v = Visualizer(im[:, :, ::-1], MetadataCatalog.get( self.cfg_det.DATASETS.TRAIN[0]), scale=1.2) out = v.draw_instance_predictions( outputs['instances'].to("cpu")) seg_im = out.get_image() plt.imshow(seg_im) plt.figure(2) x_sample = self.nav_pts[:, 0] z_sample = self.nav_pts[:, 2] plt.plot(z_sample, x_sample, 'o', color='red') plt.plot(nav_points[:, 2], nav_points[:, 0], 'o', color='green') plt.plot(agent_state.position[2], agent_state.position[0], 'x', 'blue') plt.show() break # got a high confident view xyz_obj_mids = np.array(xyz_obj_mids) return xyz_obj_mids, cat_ids_objects
def main(): # register_coco_instances(f"sugar_beet_train", {}, f"/netscratch/naeem/structured_cwc/instances_train{year}.json", # f"/netscratch/naeem/structured_cwc/train/img/") # register_coco_instances(f"sugar_beet_valid", {}, f"/netscratch/naeem/structured_cwc/instances_valid{year}.json", # f"/netscratch/naeem/structured_cwc/valid/img/") register_coco_instances( "sugar_beet_train", {}, "/home/robot/datasets/structured_cwc/instances_train2016.json", "/home/robot/datasets/structured_cwc/train/img/") register_coco_instances( "sugar_beet_valid", {}, "/home/robot/datasets/structured_cwc/instances_valid2016.json", "/home/robot/datasets/structured_cwc/valid/img/") register_coco_instances( "sugar_beet_test", {}, "/home/robot/datasets/structured_cwc/instances_test2016.json", "/home/robot/datasets/structured_cwc/test/img/") cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")) cfg.DATASETS.TRAIN = (f"sugar_beet_train", ) cfg.DATASETS.TEST = (f"sugar_beet_test", ) cfg.DATALOADER.NUM_WORKERS = 8 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml" ) # Let training initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.001 # pick a good LR cfg.SOLVER.MAX_ITER = 10000 # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset # cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512) cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 # cfg.OUTPUT_DIR = '/home/robot/datasets/MRCNN_training' os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) # trainer = DefaultTrainer(cfg) # trainer.resume_or_load(resume=True) # trainer.train() # cfg already contains everything we've set previously. Now we changed it a little bit for inference: cfg.MODEL.WEIGHTS = os.path.join( '/home/robot/git/detectron2/output/model_final.pth') cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold predictor = DefaultPredictor(cfg) evaluator = COCOEvaluator(f"sugar_beet_valid", cfg, False, output_dir="/home/robot/datasets/MRCNN_training") val_loader = build_detection_test_loader(cfg, f"sugar_beet_valid") # grad_cam = GradCam(model=trainer.model, # feature_module=trainer.model.layer4, # target_layer_names=["2"], use_cuda=True) # print(inference_on_dataset(trainer.model, val_loader, evaluator)) dataset_dicts = DatasetCatalog.get(f"sugar_beet_valid") def get_label(rgb_path): data_root, file_name = os.path.split( os.path.split(rgb_path)[0])[0], os.path.split(rgb_path)[1] return os.path.join(data_root, 'lbl', file_name) c = 0 for d in random.sample(dataset_dicts, 10): im = cv2.imread(d["file_name"]) lbl = cv2.imread(get_label(d["file_name"])) outputs = predictor(im) # outputs = grad_cam(im, 0) v = Visualizer( im[:, :, ::-1], scale=0.5, instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels. This option is only available for segmentation models ) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) img = out.get_image() print(img.shape) img = Image.fromarray( np.concatenate([ img[:, :, ::-1], cv2.resize(lbl, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_AREA) ], axis=1)) img.save(f"{cfg.OUTPUT_DIR}/output{c}.jpeg") c = c + 1
def display_sample(self, rgb_obs, semantic_obs, depth_obs, mainobj=None, visualize=False): rgb_img = Image.fromarray(rgb_obs, mode="RGBA") semantic_img = Image.new( "P", (semantic_obs.shape[1], semantic_obs.shape[0])) semantic_img.putpalette(d3_40_colors_rgb.flatten()) semantic_img.putdata((semantic_obs.flatten() % 40).astype(np.uint8)) semantic_img = semantic_img.convert("RGBA") # st() depth_img = Image.fromarray((depth_obs / 10 * 255).astype(np.uint8), mode="L") display_img = cv2.cvtColor(np.asarray(rgb_img), cv2.COLOR_RGB2BGR) #print(display_img.shape) # mask_image = False # if mask_image and mainobj is not None: # main_id = int(mainobj.id[1:]) # print("MAINID ", main_id) # # semantic = observations["semantic_sensor"] # display_img[semantic_obs == main_id] = [1, 0, 1] # st() #display_img = cv2 plt.imshow(display_img) plt.show() im = rgb_img[..., :3] im = im[:, :, ::-1] outputs = self.maskrcnn(im) pred_masks = outputs['instances'].pred_masks pred_boxes = outputs['instances'].pred_boxes.tensor pred_classes = outputs['instances'].pred_classes pred_scores = outputs['instances'].scores # converts instance segmentation to individual masks and bbox # visualisations v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2) out = v.draw_instance_predictions(outputs['instances'].to("cpu")) seg_im = out.get_image() # cv2.imshow('img',display_img) if visualize: arr = [rgb_img, semantic_img, depth_img, seg_im] titles = ['rgb', 'semantic', 'depth', 'seg_im'] plt.figure(figsize=(12, 8)) for i, data in enumerate(arr): ax = plt.subplot(1, 3, i + 1) ax.axis('off') ax.set_title(titles[i]) plt.imshow(data) # plt.pause() plt.show()
def predict(save_json=False,megrge_result=False,d2_visual=True,my_visual=False): """ instances format:{'pred_boxes': Boxes(tensor([[ 732.5856, 1598.1067, 766.4857, 1633.0486]], device='cuda:0')), 'scores': tensor([0.9482], device='cuda:0'), 'pred_classes': tensor([2], device='cuda:0')} BoxMode.convert(pre_instances.pred_boxes.tensor,from_mode=BoxMode.XYXY_ABS,to_mode=BoxMode.XYWH_ABS print("\n"+"-" * int(i/len(dataset_test_dicts.keys())*100*50) +">"+ "{}".format(i/len(dataset_test_dicts.keys())) + "%", end='\r') time.sleep(0.00001) json.dump(coco_list_results,f,cls=MyEncoder,indent=2)# print(type(dict_value))# print(type(dict_value["image id"])) """ cfg.MODEL.WEIGHTS=os.path.join(cfg.OUTPUT_DIR,"model_final.pth") predictor = DefaultPredictor(cfg) # test_annos_root_dir="/root/data/gvision/dataset/predict/s0.5_t0.8_141517" # test_json="/root/data/gvision/dataset/predict/s0.5_t0.8_141517/image_annos/person_bbox_test_141517_split.json" test_image_path="/root/data/gvision/dataset/predict/s0.5_t0.9_14/image_test" test_json="/root/data/gvision/dataset/predict/s0.5_t0.9_14/image_annos/person_s0.5_t0.9_14_split_test.json" dataset_test_dicts = json.load(open(test_json,"r")) """metadata Metadata(evaluator_type='coco', image_root='/root/data/gvision/dataset/train_all_annos/s0.3_t0.7_all/image_train', json_file='/root/data/gvision/dataset/train_all_annos/s0.3_t0.7_all/image_annos/coco_pv_train_bbox_hwnoi.json', name='pv_train', thing_classes=['visible body', 'full body', 'head', 'vehicle'], thing_dataset_id_to_contiguous_id={1: 0, 2: 1, 3: 2, 4: 3})""" MetadataCatalog.get("pv_train").set(thing_colors=[(138,255,0),(138,0,255),(255,46,46),(131,131,131)]) # MetadataCatalog.get("pv_train").set(thing_colors=[(131,131,131),(131,131,131),(131,131,131),(131,131,131)]) """green pink purple grey ['visible body', 'full body', 'head', 'vehicle'] 1 2 3 4 """ train_dicts_metadata = MetadataCatalog.get("pv_train") print("metadata",train_dicts_metadata) coco_list_results=[] print("predict-------------------start") os.makedirs(os.path.join(cfg.OUTPUT_DIR, "my_predict"),exist_ok=True) # for j,(file_name,dict_value) in enumerate(dataset_test_dicts.items()): for j,(file_name,dict_value) in enumerate(random.sample(dataset_test_dicts.items(),9)): cate=[] coco_dict_results={} id_1,id_2,id_3,id_4=0,0,0,0 print("{}\t{}-------------------{}".format(file_name,j,len(dataset_test_dicts.keys())),flush=True) img=cv2.imread(os.path.join(test_image_path,file_name)) pre_output =predictor(img) pre_instances=pre_output['instances'] for i in range(len(pre_instances.scores)): coco_dict_results["image_id"]=dict_value["image id"] coco_dict_results["category_id"]=pre_instances.pred_classes.cpu().numpy()[i]+1 coco_dict_results["bbox"]=pre_instances.pred_boxes.tensor.cpu().numpy()[i]#pre_output['instances'].to("cpu") coco_dict_results["score"]=pre_instances.scores.cpu().numpy()[i] coco_list_results.append(coco_dict_results) if my_visual: cate.append(coco_dict_results["category_id"]) xmin, ymin, xmax , ymax = coco_dict_results["bbox"] if coco_dict_results["category_id"]==1:#green id_1+=1 img=cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (138,255,0), 8,lineType=8) cv2.putText(img, '{}'.format(coco_dict_results["category_id"]), (xmin,ymin), cv2.FONT_HERSHEY_COMPLEX, 1.5, (138,255,0), 4) if coco_dict_results["category_id"]==2:#pink id_2+=1 img=cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (138,0,255), 8,lineType=8) cv2.putText(img, '{}'.format(coco_dict_results["category_id"]), (xmin,ymin), cv2.FONT_HERSHEY_COMPLEX, 1.5, (138,0,255), 4) if coco_dict_results["category_id"]==3:#purple id_3+=1 img=cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,46,46), 8,lineType=8) cv2.putText(img, '{}'.format(coco_dict_results["category_id"]), (xmin,ymin), cv2.FONT_HERSHEY_COMPLEX, 1.5, (255,46,46), 4) if coco_dict_results["category_id"]==4:#grey id_4+=1 img=cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (131,131,131), 8,lineType=8) cv2.putText(img, '{}'.format(coco_dict_results["category_id"]), (xmin,ymin), cv2.FONT_HERSHEY_COMPLEX, 1.5, (131,131,131), 4) if i==len(pre_instances.scores)-1: cv2.putText(img, r"len{} cid:{}".format(len(pre_instances.scores),list(set(cate))[:]), (15,40), cv2.FONT_HERSHEY_COMPLEX, 1.5, (170,64,112), 4)# cv2.putText(img, r"c1:{} c2:{} c3:{} c4:{}".format(id_1,id_2,id_3,id_4), (15,80), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (170,64,112), 4) os.makedirs(os.path.join(cfg.OUTPUT_DIR,"my_pre_split_visual"),exist_ok=True) cv2.imwrite(os.path.join(cfg.OUTPUT_DIR,"my_pre_split_visual","vis2_{}".format(file_name)),img) if d2_visual: v = Visualizer(img[:, :, ::-1],metadata=train_dicts_metadata, scale=1,instance_mode=ColorMode.IMAGE)# ColorMode.SEGMENTATION or ColorMode.IMAGE_BW) v = v.draw_instance_predictions(pre_output["instances"].to("cpu")) os.makedirs(os.path.join(cfg.OUTPUT_DIR,"d2_predict_split_visual"),exist_ok=True) cv2.imwrite(os.path.join(cfg.OUTPUT_DIR,"d2_predict_split_visual","vis2_{}".format(file_name)),v.get_image()[:, :, ::-1]) if save_json: f1=open(os.path.join(cfg.OUTPUT_DIR, "my_predict","pre_result_test.json"),'w') f1.write(json.dumps(coco_list_results,cls=MyEncoder)) print("predict----------------end") if megrge_result: print("--------->>>>>>>>>merge-------------start") merge =ResultMerge.DetResMerge(resfile=os.path.join(cfg.OUTPUT_DIR, "my_predict","pre_result.json"), splitannofile=test_json, srcannofile="/root/data/gvision/dataset/predict/s0.5_t0.8_141517/image_annos/person_bbox_test_141517.json", outpath=cfg.OUTPUT_DIR, outfile="my_predict/pre_merge_result.json") merge.mergeResults(is_nms=True) print("merge-------------end")
'/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0021/000028.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0027/000067.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0002/000126.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0000/000456.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0020/000091.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0016/000446.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0005/000285.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0028/000048.png' ] i = 0 for image in random_paths: im = cv2.imread(image) output = predictor(im) v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) v = v.draw_instance_predictions(output["instances"].to("cpu")) cv2.imwrite(f"{path2}0{i}.png", v.get_image()[:, :, ::-1]) i = i + 1 random_paths = [ '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0023/000329.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0022/000157.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0004/000156.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0018/000123.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0017/000200.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0001/000047.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0008/000027.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0019/000028.png', '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0010/000073.png',
"width": w, "date_captured": "", "flickr_url": "", "id": img_id } images.append(img_dict) if c == 1: tqdm.write('Got greyscale image. Repeating channel axis.') im = im[:, :, np.newaxis].repeat(3, axis=-1) outputs = predictor(im) if args.vis: v = Visualizer(im, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) v = v.draw_instance_predictions(outputs["instances"].to("cpu")) plt.imshow(v.get_image()) plt.show() continue cv2.imwrite(str(img_folder / img_name), im) fields = outputs['instances'].get_fields() for points, score, box, area in zip( fields['pred_keypoints'].cpu().numpy(), fields['scores'].cpu(), fields['pred_boxes'].tensor.cpu().numpy(), fields['pred_boxes'].area()): if score < detections_threshold: continue
if scores is not None: if labels is None: labels = ["{:.0f}%".format(s * 100) for s in scores] else: labels = [ "{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores) ] return labels def object_detect(image): predictions = predictor(image) boxes = predictions["instances"].pred_boxes scores = predictions["instances"].scores classes = predictions["instances"].pred_classes # labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) return predictions, boxes, scores, classes if __name__ == '__main__': image = cv2.imread("demo.png") image = imutils.resize(image, width=400) outputs, boxes, scores, classes = object_detect(image) v = Visualizer(image[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) cv2.imshow("image", out.get_image()[:, :, ::-1]) cv2.waitKey(0)
predictions = json.load(f) pred_by_image = defaultdict(list) for p in predictions: pred_by_image[p["image_id"]].append(p) dicts = list(DatasetCatalog.get(args.dataset)) metadata = MetadataCatalog.get(args.dataset) os.makedirs(args.output, exist_ok=True) for dic in tqdm.tqdm(dicts): img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] basename = os.path.basename(dic["file_name"]) predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) vis = Visualizer(img, metadata, edge_width=1, ifShowLabel=False) vis_pred = vis.draw_instance_predictions(predictions, alpha=0).get_image() vis = Visualizer(img, metadata, edge_width=1, ifDrawBox=False, ifShowLabel=False) vis_gt = vis.draw_dataset_dict(dic, alpha=0).get_image() concat = np.concatenate((vis_pred, vis_gt), axis=0) cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])
if args.source == "dataloader": train_data_loader = build_detection_train_loader(cfg) for batch in train_data_loader: for per_image in batch: # Pytorch tensor is in (C, H, W) format img = per_image["image"].permute(1, 2, 0) if cfg.INPUT.FORMAT == "BGR": img = img[:, :, [2, 1, 0]] else: img = np.asarray( Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert( "RGB" ) ) visualizer = Visualizer(img, metadata=metadata, scale=scale) target_fields = per_image["instances"].get_fields() labels = [ metadata.thing_classes[i] for i in target_fields["gt_classes"] ] vis = visualizer.overlay_instances( labels=labels, boxes=target_fields.get("gt_boxes", None), ) output(vis, str(per_image["image_id"]) + ".jpg") else: dicts = list( chain.from_iterable( [DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN] )
def detect(save_img=False): # (320, 192) or (416, 256) or (608, 352) for (height, width) img_size = (320, 192) if ONNX_EXPORT else opt.img_size out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize model model = Darknet(opt.cfg, img_size) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict(torch.load( weights, map_location=device)['model']) else: # darknet format load_darknet_weights(model, weights) # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier( name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load( 'weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Eval mode model.to(device).eval() # Fuse Conv2d + BatchNorm2d layers # model.fuse() # Export mode if ONNX_EXPORT: model.fuse() img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) f = opt.weights.replace(opt.weights.split( '.')[-1], 'onnx') # *.onnx filename torch.onnx.export(model, img, f, verbose=False, opset_version=11, input_names=['images'], output_names=['classes', 'boxes']) # Validate exported model import onnx model = onnx.load(f) # Load the ONNX model onnx.checker.check_model(model) # Check that the IR is well formed # Print a human readable representation of the graph print(onnx.helper.printable_graph(model.graph)) return # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True # set True to speed up constant image size inference torch.backends.cudnn.benchmark = True dataset = LoadStreams(source, img_size=img_size) else: save_img = True dataset = LoadImages(source, img_size=img_size) # Get names and colors names = load_classes(opt.names) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time() img = torch.zeros((1, 3, img_size, img_size), device=device) # init img _ = model(img.half() if half else img.float() ) if device.type != 'cpu' else None # run once times = [] datas = [] for path, img, im0s, vid_cap in dataset: start_time = time() img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=opt.augment)[0] t2 = torch_utils.time_synchronized() # to float if half: pred = pred.float() # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, multi_label=False, classes=opt.classes, agnostic=opt.agnostic_nms) # Pass on to Detectron is Classes detected if pred != [None]: im = cv2.imread(path) outputs = predictor(im) v = Visualizer( im[:, :, ::-1], metadata=statement_metadata, scale=1., instance_mode=ColorMode.IMAGE ) instances = outputs["instances"].to("cpu") instances.remove('pred_masks') print(instances) v = v.draw_instance_predictions(instances) result = v.get_image()[:, :, ::-1] file_name = ntpath.basename(path) write_res = cv2.imwrite(f'output_hybrid/{file_name}', result) end_time = time() datas.append(str([fname, instances, end-start])) else: im = cv2.imread(path) file_name = ntpath.basename(path) write_res = cv2.imwrite(f'output_hybrid/{file_name}', im) end_time = time() datas.append(str([fname, instances, end-start])) print('Done. (%.3fs)' % (time() - t0)) # with open('output_hybrid/times.txt', 'w') as t: # t.write('\n'.join(times)) with open(f'output/outputs_hybrid{opt.conf_thres}.txt', 'w') as t: t.write('\n'.join(data_to_save)) t.write()
### original visualize data #dataset_dicts = get_balloon_dicts("balloon/train") #for d in random.sample(dataset_dicts, 3): # img = cv2.imread(d["file_name"]) # visualizer = Visualizer(img[:, :, ::-1], metadata=balloon_metadata, scale=0.5) # out = visualizer.draw_dataset_dict(d) # cv2_imshow(out.get_image()[:, :, ::-1]) ### end visualize ### my local visualize data dataset_dicts = get_balloon_dicts("balloon/train") for d in random.sample(dataset_dicts, 3): img = cv2.imread(d["file_name"]) visualizer = Visualizer(img[:, :, ::-1], metadata=balloon_metadata, scale=0.5) out = visualizer.draw_dataset_dict(d) b, g, r = cv2.split((out.get_image()[:, :, ::-1])) image_rgb = cv2.merge([r, g, b]) plt.figure() plt.ion() plt.imshow(image_rgb) plt.show() ### end visualize from detectron2.engine import DefaultTrainer cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file(
def task_a(model_name, model_file, augmentation=False): model_name = model_name + '_inference' print('Running task A for model', model_name) SAVE_PATH = os.path.join('./results_week_6_task_a', model_name) os.makedirs(SAVE_PATH, exist_ok=True) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('KITTIMOTS_train', ) cfg.DATASETS.TEST = ('KITTIMOTS_val', ) cfg.DATALOADER.NUM_WORKERS = 0 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 1000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 print(cfg) # Training print('Training') if augmentation: print("data augmentation") '''resize_factor = 1 crop_size = [300,300] print("resize_factor: ", resize_factor) print("crop_size: ", crop_size)''' trainer = OurTrainer(cfg) else: print("NO data augmentation") trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') evaluator = COCOEvaluator('KITTIMOTS_val', cfg, False, output_dir='./output') trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') draw_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH) # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = kitti_val() inputs = inputs[:20] + inputs[-20:] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def draw_instance_predictions(self, frame, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None colors = predictions.COLOR if predictions.has( "COLOR") else [None] * len(predictions) durations = predictions.ID_duration if predictions.has( "ID_duration") else None duration_threshold = self.metadata.get("duration_threshold", 0) visibilities = None if durations is None else [ x > duration_threshold for x in durations ] if predictions.has("pred_masks"): masks = predictions.pred_masks # mask IOU is not yet enabled # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) # assert len(masks_rles) == num_instances else: masks = None detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=colors[i], ttl=8) for i in range(num_instances) ] if not predictions.has("COLOR"): colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.reset_image( frame_visualizer._create_grayscale_image((masks.any( dim=0) > 0).numpy() if masks is not None else None)) alpha = 0.3 else: alpha = 0.5 labels = None if labels is None else [ y[0] for y in filter(lambda x: x[1], zip(labels, visibilities)) ] # noqa assigned_colors = None if colors is None else [ y[0] for y in filter(lambda x: x[1], zip(colors, visibilities)) ] # noqa frame_visualizer.overlay_instances( boxes=None if masks is not None else boxes[visibilities], # boxes are a bit distracting masks=None if masks is None else masks[visibilities], labels=labels, keypoints=None if keypoints is None else keypoints[visibilities], assigned_colors=assigned_colors, alpha=alpha, ) return frame_visualizer.output
def detect(): out_res, source, weights, save_txt = \ opt.output, opt.source, opt.weights, opt.save_txt def get_wire_dict(img_dir): fff = ['instances_default.json', 'instances_default2.json'] dataset_dicts = [] for name in fff: json_file = os.path.join(img_dir, name) with open(json_file) as f: imgs_anns = json.load(f) for idx, v in enumerate(imgs_anns['images']): record = {} filename = os.path.join(img_dir, v["file_name"]) height, width = cv2.imread(filename).shape[:2] record["file_name"] = filename record["image_id"] = idx record["height"] = height record["width"] = width annos = imgs_anns['annotations'] objs = [] for anno in annos: if anno['image_id'] == v['id']: obj = { "bbox": anno['bbox'], "bbox_mode": BoxMode.XYWH_ABS, "segmentation": anno['segmentation'], "category_id": 0, } objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts class DatasetMapper: """ A callable which takes a dataset dict in Detectron2 Dataset format, and map it into a format used by the model. This is a custom version of the DatasetMapper. The only different with Detectron2's DatasetMapper is that we extract attributes from our dataset_dict. """ def __init__(self, cfg, is_train=True): if cfg.INPUT.CROP.ENABLED and is_train: self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE) # logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen)) else: self.crop_gen = None self.tfm_gens = [ T.Resize((640, 640)), T.RandomBrightness(0.1, 1.6), T.RandomContrast(0.1, 1), T.RandomSaturation(0.1, 1), T.RandomRotation(angle=[90, 90]), T.RandomFlip(prob=0.4, horizontal=False, vertical=True), # T.RandomCrop('relative_range', (0.4, 0.6)), # CutOut() ] # self.tfm_gens = utils.build_transform_gen(cfg, is_train) # fmt: off self.img_format = cfg.INPUT.FORMAT self.mask_on = cfg.MODEL.MASK_ON self.mask_format = cfg.INPUT.MASK_FORMAT self.keypoint_on = cfg.MODEL.KEYPOINT_ON self.load_proposals = cfg.MODEL.LOAD_PROPOSALS # fmt: on if self.keypoint_on and is_train: # Flip only makes sense in training self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices( cfg.DATASETS.TRAIN) else: self.keypoint_hflip_indices = None if self.load_proposals: self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE self.proposal_topk = ( cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN if is_train else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST) self.is_train = is_train def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens( self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes( ) dataset_dict["instances"] = utils.filter_empty_instances( instances) # USER: Remove if you don't do semantic/panoptic segmentation. # if "sem_seg_file_name" in dataset_dict: # with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: # sem_seg_gt = Image.open(f) # sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") # sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) # sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) # dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict class wireTrainer(DefaultTrainer): @classmethod def build_train_loader(cls, cfg): return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg)) @classmethod def build_test_loader(cls, cfg, dataset_name): return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg)) def register_dataset(dataset_label, d): # Register dataset - if dataset is already registered, give it a new name try: DatasetCatalog.register( dataset_label, lambda d=d: get_wire_dict("dataset_wire/" + d)) MetadataCatalog.get(dataset_label).thing_classes = ['wire'] except: # Add random int to dataset name to not run into 'Already registered' error n = random.randint(1, 1000) dataset_label = dataset_label + str(n) DatasetCatalog.register( dataset_label, lambda d=d: get_wire_dict("dataset_wire/" + d)) MetadataCatalog.get(dataset_label).thing_classes = ['wire'] return MetadataCatalog.get(dataset_label), dataset_label metadata, train_dataset = register_dataset('wire_train', "train") # metadata, test_dataset = register_dataset('wire_test', "val") wire_dict = get_wire_dict("dataset_wire/train") # for d in random.sample(wire_dict, 2): # plt.figure(figsize=(10,10)) # img = cv2.imread(d["file_name"]) # visualizer = Visualizer(img[:, :, ::-1], metadata, scale=0.5) # vis = visualizer.draw_dataset_dict(d) # plt.imshow(vis.get_image()[:, :, ::-1]) # plt.show() MODEL_USE = 'ResNet' if MODEL_USE == 'faster_rcnn': MODEL_PATH = 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' WEIGHT_PATH = 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' elif MODEL_USE == 'ResNet': MODEL_PATH = 'COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml' WEIGHT_PATH = 'COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml' def cfg_setup(): cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(MODEL_PATH)) cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(WEIGHT_PATH) cfg.MODEL.RETINANET.NUM_CLASSES = 1 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.DATASETS.TRAIN = (train_dataset, ) cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 4 cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.LR_SCHEDULER_NAME = 'WarmupCosineLR' cfg.SOLVER.BASE_LS = 0.02 # cfg.SOLVER.WARMUP_ITERS = 4500 # cfg.SOLVER.WARMUP_METHOD = "linear" cfg.SOLVER.MAX_ITER = 2000 os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) return cfg def cfg_test(): cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(MODEL_PATH)) cfg.MODEL.WEIGHTS = os.path.join(weights) # cfg.DATASETS.TEST = (test_dataset,) cfg.MODEL.RETINANET.NUM_CLASSES = 1 cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5 return cfg cfg = cfg_test() predict = DefaultPredictor(cfg) lines = [] # for i, d in enumerate(glob.glob('./img/*.jpg')): # fig, ax = plt.subplots() # namefile = os.path.basename(d) # # im = cv2.imread(d) # outputs = predict(im) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format # out = outputs["instances"].to("cpu") # masks = out.pred_masks.numpy() # # for i in range(masks.shape[0]): # immask = masks[i,:,:] # graf = [] # plot_x = [] # plot_y = [] # coords = np.column_stack(np.where(immask > 0)) # # for x in range(immask.shape[1]): # # n, = np.where(immask[:,x] == True) # # if len(n) != 0: # # yyy = immask.shape[0] - np.mean(n) # # # graf.append([x,yyy]) # # plot_x.append(x) # # plot_y.append(yyy) # # # print(graf) # if len(coords[:,1]) != 0: # cor1 = stat.pearsonr(coords[:,1], coords[:,0]) # # cor2 = stat.pearsonr(coords[:,0], coords[:,1]) # plt.scatter(coords[:,1],immask.shape[0]-coords[:,0], label = '{}'.format(np.round(cor1[0],3))) # ax.legend() # plt.title(namefile) # plt.show() # lines.append(graf) # dataset_dicts = get_wire_dicts("dataset_wire/val") for i, d in enumerate(glob.glob(source + '/*.jpg')): im = cv2.imread(d) namefile = os.path.basename(d) arr = os.path.splitext(namefile) outputs = predict( im ) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format v = Visualizer( im[:, :, ::-1], metadata=metadata, scale=1, instance_mode=ColorMode. IMAGE_BW # remove the colors of unsegmented pixels. This option is only available for segmentation models ) outputs2 = outputs["instances"].to("cpu") out = v.draw_instance_predictions(outputs2) # cv2.imshow('',out.get_image()[:, :, ::-1]) cv2.imwrite(os.path.join(out_res, namefile), out.get_image()[:, :, ::-1]) # cv2.waitKey() if True: #save_txt: arr_nump_mask = outputs2.pred_masks.numpy() for iiii in range(arr_nump_mask.shape[0]): np.savetxt(os.path.join(out_res, '{}_{}.txt'.format(arr[0], iiii)), arr_nump_mask[iiii, :, :], fmt='%i')
def draw_instance_predictions_custom(self, frame, predictions, incl_boxes=True, incl_labels=True, incl_scores=True, target_alpha=None): frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None scores = scores if incl_scores else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None if predictions.has("pred_masks"): masks = predictions.pred_masks # mask IOU is not yet enabled # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) # assert len(masks_rles) == num_instances else: masks = None detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( (masks.any(dim=0) > 0).numpy() if masks is not None else None) alpha = 0.3 else: alpha = 0.5 boxes = boxes if incl_boxes else None labels = labels if incl_labels else None alpha = alpha if target_alpha is None else target_alpha frame_visualizer.overlay_instances( boxes=None if masks is not None else boxes, # boxes are a bit distracting masks=masks, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def experiment_1(exp_name, model_file): print('Running Task B experiment', exp_name) SAVE_PATH = os.path.join('./results_week_6_task_b', exp_name) os.makedirs(SAVE_PATH, exist_ok=True) # Loading data print('Loading data') kittiloader = KittiMots() def rkitti_train(): return kittiloader.get_dicts(flag='train', method='complete', percentage=1.0) def rkitti_val(): return kittiloader.get_dicts(flag='val') def rkitti_test(): return kittiloader.get_dicts(flag='test') DatasetCatalog.register('KITTI_train', rkitti_train) MetadataCatalog.get('KITTI_train').set( thing_classes=list(KITTI_CATEGORIES.keys())) DatasetCatalog.register('KITTI_val', rkitti_val) MetadataCatalog.get('KITTI_val').set( thing_classes=list(KITTI_CATEGORIES.keys())) DatasetCatalog.register('KITTI_test', rkitti_test) MetadataCatalog.get('KITTI_test').set( thing_classes=list(KITTI_CATEGORIES.keys())) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('KITTI_train', ) cfg.DATASETS.TEST = ('KITTI_val', ) cfg.DATALOADER.NUM_WORKERS = 4 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 4000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 # Training print('Training') trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') cfg.DATASETS.TEST = ('KITTI_test', ) evaluator = COCOEvaluator('KITTI_test', cfg, False, output_dir=SAVE_PATH) trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, exp_name, SAVE_PATH, 'validation_loss.png') # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = rkitti_test() inputs = [inputs[i] for i in TEST_INFERENCE_VALUES] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join(SAVE_PATH, 'Inference_' + exp_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def draw_panoptic_seg_predictions_custom(self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5, incl_labels=True): frame_visualizer = Visualizer(frame, self.metadata) pred = _PanopticPrediction(panoptic_seg, segments_info) if self._instance_mode == ColorMode.IMAGE_BW: frame_visualizer.output.img = frame_visualizer._create_grayscale_image( pred.non_empty_mask()) # draw mask for all semantic segments first i.e. "stuff" for mask, sinfo in pred.semantic_masks(): category_idx = sinfo["category_id"] try: mask_color = [ x / 255 for x in self.metadata.stuff_colors[category_idx] ] except AttributeError: mask_color = None text = self.metadata.stuff_classes[ category_idx] if incl_labels else None frame_visualizer.draw_binary_mask( mask, color=mask_color, text=text, alpha=alpha, area_threshold=area_threshold, ) all_instances = list(pred.instance_masks()) if len(all_instances) == 0: return frame_visualizer.output # draw mask for all instances second masks, sinfo = list(zip(*all_instances)) num_instances = len(masks) masks_rles = mask_util.encode( np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F")) assert len(masks_rles) == num_instances category_ids = [x["category_id"] for x in sinfo] detected = [ _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = [self.metadata.thing_classes[k] for k in category_ids] labels = labels if incl_labels else None frame_visualizer.overlay_instances( boxes=None, masks=masks, labels=labels, keypoints=None, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
name_list = [] for images in ddd["images"]: k = "/content/drive/My Drive/new_imgs/" + images["file_name"] name_list.append(k) print(len(name_list)) print(name_list) for imageName in glob.glob('/content/drive/My Drive/new_imgs/*png'): if (imageName in name_list): N = imageName im = cv2.imread(N) outputs = predictor(im) v = Visualizer(im[:, :, ::-1], metadata=test_metadata, scale=1 ) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) cv2_imshow(out.get_image()[:, :, ::-1]) original_box = [] for box in outputs["instances"]._fields["pred_boxes"]: box = box.to('cpu') box = box.numpy() box = box.tolist() original_box.append(box) aa = cv2.imread(N, cv2.IMREAD_COLOR) aa = cv2.cvtColor(aa, cv2.COLOR_BGR2RGB) for box in original_box: lx, ly, rx, ry = box
def draw_instance_predictions(self, frame, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has( "pred_keypoints") else None if predictions.has("pred_masks"): masks = predictions.pred_masks # mask IOU is not yet enabled # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) # assert len(masks_rles) == num_instances else: masks = None detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( (masks.any(dim=0) > 0).numpy() if masks is not None else None) alpha = 0.3 else: alpha = 0.5 frame_visualizer.overlay_instances( boxes=None if masks is not None else boxes, # boxes are a bit distracting masks=masks, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
plt.show() # lines.append(graf) from detectron2.utils.visualizer import ColorMode # dataset_dicts = get_wire_dicts("dataset_wire/val") for i, d in enumerate(glob.glob('./img/*.jpg')): im = cv2.imread(d) namefile = os.path.basename(d) arr = os.path.splitext(namefile) outputs = predict( im ) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format v = Visualizer( im[:, :, ::-1], metadata=metadata, scale=1, instance_mode=ColorMode. IMAGE_BW # remove the colors of unsegmented pixels. This option is only available for segmentation models ) outputs2 = outputs["instances"].to("cpu") out = v.draw_instance_predictions(outputs2) # cv2.imshow('',out.get_image()[:, :, ::-1]) cv2.imwrite(os.path.join('./out_img', namefile), out.get_image()[:, :, ::-1]) # cv2.waitKey() # np.savetxt(os.path.join('./out_img', arr[0]+ '.txt'),outputs2.) # f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format
cfg = get_cfg() cfg.merge_from_file( "./detectron2_repo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" ) cfg.DATASETS.TRAIN = ("fruits_nuts", ) cfg.DATASETS.TEST = () # no metrics implemented for this dataset cfg.DATALOADER.NUM_WORKERS = 2 # initialize from model zoo cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.02 cfg.SOLVER.MAX_ITER = ( 300 ) # 300 iterations seems good enough, but you can certainly train longer cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ( 128) # faster, and good enough for this toy dataset cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 # 3 classes (data, fig, hazelnut) cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.DATASETS.TEST = ("fruits_nuts", ) predictor = DefaultPredictor(cfg) for d in random.sample(dataset_dicts, 3): im = cv2.imread(d["file_name"]) outputs = predictor(im) v = Visualizer(im[:, :, ::-1], metadata=fruits_nuts_metadata, scale=0.8) v = v.draw_instance_predictions(outputs["instances"].to("cpu")) cv2.imshow("preview", v.get_image()[:, :, ::-1]) cv2.waitKey(0)