def run(self): # Core function of your process input = self.getInput(0) # Get parameters : param = self.getParam() if len(input.data["images"]) > 0: param.cfg["epochs"] = int(param.cfg["maxIter"] * param.cfg["batchSize"] / len(input.data["images"])) # complete class names if input dataset has no background class if not (input.has_bckgnd_class): tmp_dict = {0: "background"} for k, name in input.data["metadata"]["category_names"].items(): tmp_dict[k + 1] = name input.data["metadata"]["category_names"] = tmp_dict input.has_bckgnd_class = True param.cfg["classes"] = len(input.data["metadata"]["category_names"]) # Call beginTaskRun for initialization self.beginTaskRun() if param.cfg["expertModeCfg"] == "": # Get default config cfg = get_cfg() # Add specific deeplab config add_deeplab_config(cfg) cfg.merge_from_file(os.path.dirname(os.path.realpath(__file__)) + "/model/configs/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml") # Generic dataset names that will be used cfg.DATASETS.TRAIN = ("datasetTrain",) cfg.DATASETS.TEST = ("datasetTest",) cfg.SOLVER.MAX_ITER = param.cfg["maxIter"] cfg.SOLVER.WARMUP_FACTOR = 0.001 cfg.SOLVER.WARMUP_ITERS = param.cfg["maxIter"] // 5 cfg.SOLVER.POLY_LR_FACTOR = 0.9 cfg.SOLVER.POLY_LR_CONSTANT_FACTOR = 0.0 cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = param.cfg["classes"] cfg.SOLVER.BASE_LR = param.cfg["learningRate"] cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS = 256 cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4 cfg.SOLVER.IMS_PER_BATCH = param.cfg["batchSize"] cfg.DATALOADER.NUM_WORKERS = 0 cfg.INPUT_SIZE = (param.cfg["inputWidth"], param.cfg["inputHeight"]) cfg.TEST.EVAL_PERIOD = param.cfg["evalPeriod"] cfg.SPLIT_TRAIN_TEST = param.cfg["splitTrainTest"] cfg.SPLIT_TRAIN_TEST_SEED = -1 cfg.MODEL.BACKBONE.FREEZE_AT = 5 cfg.CLASS_NAMES = [name for k, name in input.data["metadata"]["category_names"].items()] if param.cfg["earlyStopping"]: cfg.PATIENCE = param.cfg["patience"] else: cfg.PATIENCE = -1 if param.cfg["outputFolder"] == "": cfg.OUTPUT_DIR = os.path.dirname(os.path.realpath(__file__)) + "/output" elif os.path.isdir(param.cfg["outputFolder"]): cfg.OUTPUT_DIR = param.cfg["outputFolder"] else: print("Incorrect output folder path") else: cfg = None with open(param.cfg["expertModeCfg"], 'r') as file: cfg_data = file.read() cfg = CfgNode.load_cfg(cfg_data) if cfg is not None: deeplabutils.register_train_test(input.data["images"], input.data["metadata"], train_ratio=cfg.SPLIT_TRAIN_TEST / 100, seed=cfg.SPLIT_TRAIN_TEST_SEED) os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) str_datetime = datetime.now().strftime("%d-%m-%YT%Hh%Mm%Ss") model_folder = cfg.OUTPUT_DIR + os.path.sep + str_datetime cfg.OUTPUT_DIR = model_folder if not os.path.isdir(model_folder): os.mkdir(model_folder) cfg.OUTPUT_DIR = model_folder self.trainer = deeplabutils.MyTrainer(cfg, self) self.trainer.resume_or_load(resume=False) print("Starting training job...") launch(self.trainer.train, num_gpus_per_machine=1) print("Training job finished.") self.trainer = None gc.collect() torch.cuda.empty_cache() with open(cfg.OUTPUT_DIR+"/Detectron2_DeepLabV3Plus_Train_Config.yaml", 'w') as file: file.write(cfg.dump()) else: print("Error : can't load config file "+param.cfg["expertModeCfg"]) # Call endTaskRun to finalize process self.endTaskRun()
def run(self): # Core function of your process # Call beginTaskRun for initialization self.beginTaskRun() # we use seed to keep the same color for our masks + boxes + labels (same random each time) random.seed(10) # Get input : input = self.getInput(0) srcImage = input.getImage() # Get output : mask_output = self.getOutput(0) graph_output = self.getOutput(2) # Get parameters : param = self.getParam() # Config file and model file needed are in the output folder generated by the train plugin if (self.cfg is None or param.update) and param.configFile != "": with open(param.configFile, 'r') as file: cfg_data = file.read() self.cfg = CfgNode.load_cfg(cfg_data) self.classes = self.cfg.CLASS_NAMES if self.model is None or param.update: if param.dataset == "Cityscapes": url = "https://dl.fbaipublicfiles.com/detectron2/DeepLab/Cityscapes-" \ "SemanticSegmentation/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16/" \ "28054032/model_final_a8a355.pkl" self.cfg = get_cfg() cfg_file = os.path.join( os.path.dirname(__file__), os.path.join( "configs", "deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml")) add_deeplab_config(self.cfg) self.cfg.merge_from_file(cfg_file) self.cfg.MODEL.WEIGHTS = url self.classes = [ 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle' ] elif self.cfg is not None: self.cfg.MODEL.WEIGHTS = param.modelFile if not torch.cuda.is_available(): self.cfg.MODEL.DEVICE = "cpu" self.cfg.MODEL.RESNETS.NORM = "BN" self.cfg.MODEL.SEM_SEG_HEAD.NORM = "BN" self.model = build_model(self.cfg) DetectionCheckpointer(self.model).load(self.cfg.MODEL.WEIGHTS) self.model.eval() if self.model is not None and srcImage is not None: # Convert numpy image to detectron2 input format input = {} h, w, c = np.shape(srcImage) input["image"] = (torch.tensor(srcImage).permute(2, 0, 1)) if param.dataset == "Cityscapes": input["image"] = Resize([512, 1024])(input["image"]) input["height"] = h input["width"] = w # Inference with pretrained model with torch.no_grad(): pred = self.model([input]) pred = pred[0]["sem_seg"].cpu().numpy() # Convert logits to labelled image dstImage = (np.argmax(pred, axis=0)).astype(dtype=np.uint8) # Set image of input/output (numpy array): # dstImage +1 because value 0 is for background but no background here mask_output.setImage(dstImage) # Create random color map if self.colors is None or param.update: n = len(self.classes) self.colors = [] for i in range(n): self.colors.append([ random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 255 ]) # Apply color map on labelled image self.setOutputColorMap(1, 0, self.colors) self.forwardInputImage(0, 1) graph_output.setImage(self.draw_legend()) param.update = False # Step progress bar: self.emitStepProgress() # Call endTaskRun to finalize process self.endTaskRun()
def main(config): root = expanduser(config["base"]["root"]) imgs_root = expanduser(config["base"]["imgs_root"]) jsons_dir = join(root, "jsons") model_dir = join(root, "outputs") scale = float(config["test_model"]["scale"]) do_show = config["test_model"]["do_show"] register_data(jsons_dir, imgs_root) # Need this datasets line, in order for metadata to have .thing_classes attribute datasets = DatasetCatalog.get("test_data") metadata = MetadataCatalog.get("test_data") # Read the cfg back in: with open(join(model_dir, "cfg.txt"), "r") as f: cfg = f.read() # Turn into CfgNode obj: cfg = CfgNode.load_cfg(cfg) # Use the weights from the model trained on our custom dataset: cfg.MODEL.WEIGHTS = join(model_dir, "model_final.pth") # TODO: have option to use snapshot instead cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 # make small so I can make PR curve for broad range of scores # cfg.DATASETS.TEST = ("val_data", ) # should already be saved from train_model.py print("Generating predictor ...") predictor = DefaultPredictor(cfg) # For saving images with predicted labels: output_imgs_dir = join(model_dir, "test_pred_imgs") makedirs(output_imgs_dir, exist_ok=True) # For saving detection predictions as csv: output_csv = join(model_dir, "all_test_preds.csv") csv_file_handle = open(output_csv, "w", newline="") atexit.register(csv_file_handle.close) col_names = ["img", "x1", "y1", "x2", "y2", "score", "thing","dummy_id"] csv_writer = csv.DictWriter(csv_file_handle, fieldnames=col_names) csv_writer.writeheader() # Select 5 random images to visualize, # but save the prediction results for all imgs: rando_idxs = np.random.choice(range(len(datasets)), 5, replace=False).tolist() for i,d in enumerate(datasets): print(f"Predicting on image {i+1} of {len(datasets)} ...", end="\r") id = d["image_id"] img = cv2.imread(d["file_name"]) detected = predictor(img) # Visualize: visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=scale, instance_mode=ColorMode) visualizer = visualizer.draw_instance_predictions(detected["instances"].to("cpu")) # Save the first 5 images from the random draw: if i in rando_idxs: pred_img = visualizer.get_image()[:, :, ::-1] cv2.imwrite(join(output_imgs_dir, ("predicted_" + basename(d["file_name"]))), pred_img) if do_show: cv2.imshow(f"prediction on image {id}", pred_img) print(f"Press any key to go to the next image ({i+1}/5) ...") key = cv2.waitKey(0) & 0xFF if key == ord("q"): print("Quitting ...") break cv2.destroyAllWindows() # Stream the predicted box coords and scores to a csv: preds = detected['instances'].to('cpu') boxes = preds.pred_boxes thing_ids = preds.pred_classes.tolist() scores = preds.scores num_boxes = np.array(scores.size())[0] for i in range(0, num_boxes): coords = boxes[i].tensor.numpy() score = float(scores[i].numpy()) thing_id = thing_ids[i] # is int thing_class = metadata.thing_classes[thing_id] csv_writer.writerow({col_names[0]: basename(d["file_name"]), col_names[1]: int(coords[0][0]), # x1 col_names[2]: int(coords[0][1]), # y1 col_names[3]: int(coords[0][2]), # x2 col_names[4]: int(coords[0][3]), # y2 col_names[5]: score, # score col_names[6]: thing_class, # thing col_names[7]: i}) # dummy id print(f"Finished predicting on all {len(datasets)} images from the test data fraction.") print(f"Results are stored in {output_csv}") print(f"5 sample test images are stored in {output_imgs_dir}\n" "Note that the 5 sample test images show all detections with a score greater than 0.01. " "This low score cutoff is for test purposes and is intentional. " "You should expect to see many false positive labels.\n") # Clear GPU memory torch.cuda.empty_cache()
def main(config): root = expanduser(config["base"]["root"]) imgs_root = expanduser(config["base"]["imgs_root"]) jsons_dir = join(root, "jsons") model_dir = join(root, "outputs") model_pth = expanduser(config["analyze_vids"]["model_pth"]) vid_ending = '*' + config["analyze_vids"]["vid_ending"] expected_obj_nums = config["analyze_vids"]["expected_obj_nums"] score_cutoff = float(config["analyze_vids"]["score_cutoff"]) vids_root = expanduser(config["analyze_vids"]["vids_root"]) framerate = int(config["analyze_vids"]["framerate"]) if not model_pth.endswith(".pth"): raise ValueError(f"{basename(model_pth)} must be a '.pth' file.") if not vid_ending.endswith(".mp4"): raise ValueError(f"{vid_ending} must end in '.mp4'") # if model_pth not in model_dir: # raise IOError(f"The selected model, {basename(model_pth)}, is not in " # f"{basename(model_dir)}. Please pick a model that resides") if not 0 < score_cutoff < 1: raise ValueError( f"The testing threshold, {score_cutoff}, must be between 0 and 1.") vids = [str(path.absolute()) for path in Path(vids_root).rglob(vid_ending)] # from pprint import pprint; pprint(vids) # import ipdb;ipdb.set_trace() if len(vids) == 0: print(f"No .mp4 videos were found in {vids_root} ...") register_data(jsons_dir, imgs_root) # Need the `datasets =` line, in order for metadata to have the # .thing_classes attrib. I don't really use these two lines, I # only call them so I can get the .thing_classes attrib off # `metadata`. So, it doesn't matter if I use "training_data" as # my arg or some other registered dataset, for these two calls: datasets = DatasetCatalog.get("training_data") metadata = MetadataCatalog.get("training_data") # Read the cfg back in: with open(join(model_dir, "cfg.txt"), "r") as f: cfg = f.read() # Turn into CfgNode obj: cfg = CfgNode.load_cfg(cfg) # Use the weights from our chosen model: cfg.MODEL.WEIGHTS = model_pth # # Pick a confidence cutoff # TODO: based on PR curve # # TODO: don't have this as a user parameter cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = score_cutoff print("Generating predictor ...") predictor = DefaultPredictor(cfg) bgr_palette = [(165, 194, 102), (98, 141, 252), (203, 160, 141), (195, 138, 231), (84, 216, 166), (47, 217, 255), (148, 196, 229), (179, 179, 179)] for vid in vids: print(f"Analyzing {vid} ...") cap = cv2.VideoCapture(vid) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) pbar = trange(frame_count) output_vid = f"{splitext(vid)[0]}_detected.mp4" output_csv = f"{splitext(vid)[0]}_detected.csv" csv_file_handle = open(output_csv, "w", newline="") atexit.register(csv_file_handle.close) col_names = [ "frame", "x1", "y1", "x2", "y2", "score", "thing", "dummy_id" ] csv_writer = csv.DictWriter(csv_file_handle, fieldnames=col_names) csv_writer.writeheader() # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(filename=output_vid, apiPreference=0, fourcc=fourcc, fps=int(framerate), frameSize=(int(cap.get(3)), int(cap.get(4))), params=None) # Use Detectron2 model on each frame in vid: all_detection_info = [] for f, _ in enumerate(pbar): ret, frame = cap.read() if ret: detected = predictor(frame) # # Visualize: # visualizer = Visualizer(frame[:, :, ::-1], # metadata=metadata, # scale=1.0, # instance_mode=ColorMode) # visualizer = visualizer.draw_instance_predictions(detected["instances"].to("cpu")) # detected_img = visualizer.get_image()[:, :, ::-1] labelled_frame = frame # Save the predicted box coords and scores to a dictionary: preds = detected['instances'].to('cpu') boxes = preds.pred_boxes.tensor.numpy() thing_ids = preds.pred_classes.tolist() scores = preds.scores.numpy() # Get the idxs of each unique thing_id: idxs_of_each_thing = {} for thing_id in set(thing_ids): idxs_of_each_thing[thing_id] = [] for i, thing_id in enumerate(thing_ids): idxs_of_each_thing[thing_id].append(i) # Split up the data according to thing_id: for i, (thing_id, idxs) in enumerate(idxs_of_each_thing.items()): thing_class = metadata.thing_classes[thing_id] if thing_class in expected_obj_nums: expected_obj_num = expected_obj_nums[thing_class] num_boxes = scores.size assert expected_obj_num <= num_boxes, \ f"You expected {expected_obj_num} {thing_class} in \ every frame of the video, according to `expected_obj_nums`, \ but only {num_boxes} were found in frame {f}." thing_scores = scores[idxs] thing_boxes = boxes[idxs] # Here, I grab the top n animals according to their score # because by default, `preds` is sorted by their descending scores: for j in range(0, expected_obj_num): coords = thing_boxes[j] x1 = int(coords[0]) y1 = int(coords[1]) x2 = int(coords[2]) y2 = int(coords[3]) score = float(thing_scores[j]) labelled_frame = cv2.rectangle( labelled_frame, (x1, y1), (x2, y2), bgr_palette[thing_id], 2) labelled_frame = cv2.putText( labelled_frame, thing_class, (x2 - 10, y2 - 40), cv2.FONT_HERSHEY_SIMPLEX, 1, bgr_palette[thing_id], 2) # TODO: Write some sort of simple filtering thing that skips # big jumps ... maybe just store the last bbox coords and compare. # Define big jumps as a proportion of the frame width or average of # last few frame deltas csv_writer.writerow({ col_names[0]: int(f), # frame col_names[1]: x1, # x1 col_names[2]: y1, # y1 col_names[3]: x2, # x2 col_names[4]: y2, # y2 col_names[5]: score, # score col_names[6]: thing_class, # thing col_names[7]: i }) # dummy id # Save frame to vid: out.write(labelled_frame) pbar.set_description(f"Detecting in frame {f+1}/{frame_count}") # Clear GPU memory torch.cuda.empty_cache()