def __init__(self, config, num_train_optimization_steps=3100, include_actions=True): #Load vilBert config print("Loading ViLBERT model configuration") self.vilbert_config = BertConfig.from_json_file(config.BERT_CONFIG) self.pre_trained_model = config.BERT_PRE_TRAINED_MODEL self.bert_gpu = config.BERT_GPU self.detectron2_gpu = config.DETECTRON2_GPU self.bert_gpu_device = torch.device(self.bert_gpu) self.detectron2_gpu_device = torch.device(self.detectron2_gpu) print("Loading ViLBERT model on gpu {}".format(self.bert_gpu)) self.model = VILBertForVLTasks.from_pretrained( self.pre_trained_model, self.vilbert_config, num_labels=len(self.model_actions) - 2, # number of predicted actions 6 ) new_voc_size = self.vilbert_config.vocab_size + 8 self.model.resize_token_embeddings(new_voc_size) self.model.to(self.bert_gpu_device) print("ViLBERT loaded on GPU {}".format(self.bert_gpu)) print("Loading Detectron2 predictor on GPU {}".format( self.detectron2_gpu)) detectron2_cfg = self.create_detectron2_cfg(config) self.detector = DefaultPredictor(detectron2_cfg) #self.detector.eval() print("Detectron2 loaded") self._max_region_num = 36 self._max_seq_length = 128 #if include_actions: #self._max_seq_length = 128 + 10 self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True, do_basic_tokenize=True) self.criterion = nn.BCEWithLogitsLoss(reduction='mean') self.loss = 0 self.learning_rate = 3e-6 self.vision_scratch = False self.max_steps = 30 self.grad_accumulation = 1 #00 self.action_history = [] self.loss_weight = { "a": 0.1, "b": 0.1, "c": 0.8, "a_loss": [], "b_loss": [], "c_loss": [], } self.save_example = { "path_id": "", "images": [], "boxes": [], "box_probs": [], "text": [], "actions": [], "box_one_hots": [], "box_labels": [] } optimizer_grouped_parameters = [] no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] for key, value in dict(self.model.named_parameters()).items(): if value.requires_grad: if any(nd in key for nd in no_decay): optimizer_grouped_parameters += [{ "params": [value], "lr": self.learning_rate, "weight_decay": 0.01 }] if not any(nd in key for nd in no_decay): optimizer_grouped_parameters += [{ "params": [value], "lr": self.learning_rate, "weight_decay": 0.0 }] print(len(list(self.model.named_parameters())), len(optimizer_grouped_parameters)) self.optimizer = Adam(optimizer_grouped_parameters, lr=self.learning_rate, warmup=0.1, t_total=num_train_optimization_steps, schedule='warmup_constant') self.lr_scheduler = ReduceLROnPlateau(self.optimizer, \ mode='max', factor=0.2, patience=10, cooldown=4, threshold=0.001)
def run(self): self.beginTaskRun() # we use seed to keep the same color for our masks + boxes + labels (same random each time) random.seed(30) # Get input : input = self.getInput(0) srcImage = input.getImage() # Get output : output_image = self.getOutput(0) output_graph = self.getOutput(1) output_graph.setNewLayer("TridentNet") # Get parameters : param = self.getParam() # predictor if not self.loaded: print("Chargement du modèle") if param.cuda == False: self.cfg.MODEL.DEVICE = "cpu" self.deviceFrom = "cpu" else: self.deviceFrom = "gpu" self.loaded = True self.predictor = DefaultPredictor(self.cfg) # reload model if CUDA check and load without CUDA elif self.deviceFrom == "cpu" and param.cuda == True: print("Chargement du modèle") self.cfg = get_cfg() add_tridentnet_config(self.cfg) self.cfg.merge_from_file(self.folder + "/TridentNet_git/configs/" + self.MODEL_NAME_CONFIG + ".yaml") self.cfg.MODEL.WEIGHTS = self.folder + "/models/" + self.MODEL_NAME + ".pkl" self.deviceFrom = "gpu" self.predictor = DefaultPredictor(self.cfg) # reload model if CUDA not check and load with CUDA elif self.deviceFrom == "gpu" and param.cuda == False: print("Chargement du modèle") self.cfg = get_cfg() self.cfg.MODEL.DEVICE = "cpu" add_tridentnet_config(self.cfg) self.cfg.merge_from_file(self.folder + "/TridentNet_git/configs/" + self.MODEL_NAME_CONFIG + ".yaml") self.cfg.MODEL.WEIGHTS = self.folder + "/models/" + self.MODEL_NAME + ".pkl" self.deviceFrom = "cpu" self.predictor = DefaultPredictor(self.cfg) outputs = self.predictor(srcImage) # get outputs instances output_image.setImage(srcImage) boxes = outputs["instances"].pred_boxes scores = outputs["instances"].scores classes = outputs["instances"].pred_classes # to numpy if param.cuda: boxes_np = boxes.tensor.cpu().numpy() scores_np = scores.cpu().numpy() classes_np = classes.cpu().numpy() else: boxes_np = boxes.tensor.numpy() scores_np = scores.numpy() classes_np = classes.numpy() self.emitStepProgress() # keep only the results with proba > threshold scores_np_tresh = list() for s in scores_np: if float(s) > param.proba: scores_np_tresh.append(s) self.emitStepProgress() if len(scores_np_tresh) > 0: # text label with score labels = None class_names = MetadataCatalog.get( self.cfg.DATASETS.TRAIN[0]).get("thing_classes") if classes is not None and class_names is not None and len( class_names) > 1: labels = [class_names[i] for i in classes] if scores_np_tresh is not None: if labels is None: labels = [ "{:.0f}%".format(s * 100) for s in scores_np_tresh ] else: labels = [ "{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores_np_tresh) ] # Show Boxes + labels for i in range(len(scores_np_tresh)): color = [ random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 255 ] prop_text = core.GraphicsTextProperty() prop_text.color = color prop_text.font_size = 7 output_graph.addText(labels[i], float(boxes_np[i][0]), float(boxes_np[i][1]), prop_text) prop_rect = core.GraphicsRectProperty() prop_rect.pen_color = color prop_rect.category = labels[i] output_graph.addRectangle( float(boxes_np[i][0]), float(boxes_np[i][1]), float(boxes_np[i][2] - boxes_np[i][0]), float(boxes_np[i][3] - boxes_np[i][1]), prop_rect) # Step progress bar: self.emitStepProgress() # Call endTaskRun to finalize process self.endTaskRun()
# set up detectron path_weigth = cfg.SERVICE.DETECT_WEIGHT path_config = cfg.SERVICE.DETECT_CONFIG confidences_threshold = cfg.SERVICE.THRESHOLD num_of_class = cfg.SERVICE.NUMBER_CLASS detectron = config_detectron() detectron.MODEL.DEVICE = cfg.SERVICE.DEVICE detectron.merge_from_file(path_config) detectron.MODEL.WEIGHTS = path_weigth detectron.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidences_threshold detectron.MODEL.ROI_HEADS.NUM_CLASSES = num_of_class PREDICTOR = DefaultPredictor(detectron) # create labels CLASSES = load_class_names(cfg.SERVICE.CLASSES) image = cv2.imread('images/test.jpg') height, width, channels = image.shape center_image = (width//2, height//2) print("shape image: ", (width, height)) list_boxes, list_scores, list_classes = predict( image, PREDICTOR, CLASSES) print('list_boxes', list_boxes) print('list_classes', list_classes) # draw
UPLOAD_FOLDER = './' app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.secret_key = "secret key" start = time.time() # obtain detectron2's default config cfg = get_cfg() # load the pre trained model from Detectron2 model zoo cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")) # set confidence threshold for this model cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # load model weights cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml") # create the predictor for pose estimation using the config pose_detector = DefaultPredictor(cfg) model_load_done = time.time() print("Detectron model loaded in ", model_load_done - start) # Load pretrained LSTM model from checkpoint file lstm_classifier = ActionClassificationLSTM.load_from_checkpoint("models/saved_model.ckpt") lstm_classifier.eval() class DataObject(): pass def checkFileType(f: str): return f.split('.')[-1] in ['mp4']
def run(self): self.beginTaskRun() # we use seed to keep the same color for our masks + boxes + labels (same random each time) random.seed(30) # Get input : img_input = self.getInput(0) src_img = img_input.getImage() # Get output : mask_output = self.getOutput(0) output_graph = self.getOutput(2) output_graph.setImageIndex(1) output_graph.setNewLayer("MaskRCNN") # Get parameters : param = self.getParam() # predictor if not self.predictor or param.update_model: if param.dataset == "COCO": self.model_link = "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml" else: self.model_link = "Cityscapes/mask_rcnn_R_50_FPN.yaml" self.cfg = get_cfg() self.cfg.MODEL.DEVICE = param.device self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold # load config from file(.yaml) self.cfg.merge_from_file(model_zoo.get_config_file( self.model_link)) # download the model (.pkl) self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( self.model_link) self.predictor = DefaultPredictor(self.cfg) param.update_model = False outputs = self.predictor(src_img) # get outputs instances boxes = outputs["instances"].pred_boxes scores = outputs["instances"].scores classes = outputs["instances"].pred_classes masks = outputs["instances"].pred_masks # to numpy boxes_np = boxes.tensor.cpu().numpy() scores_np = scores.cpu().numpy() # classes_np = classes.cpu().numpy() self.emitStepProgress() # keep only the results with proba > threshold scores_np_thresh = list() for s in scores_np: if float(s) > param.proba: scores_np_thresh.append(s) if len(scores_np_thresh) > 0: # create random color for masks + boxes + labels colors = [[0, 0, 0]] for i in range(len(scores_np_thresh)): colors.append([ random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 255 ]) # text labels with scores labels = None class_names = MetadataCatalog.get( self.cfg.DATASETS.TRAIN[0]).get("thing_classes") if classes is not None and class_names is not None and len( class_names) > 1: labels = [class_names[i] for i in classes] if scores_np_thresh is not None: if labels is None: labels = [ "{:.0f}%".format(s * 100) for s in scores_np_thresh ] else: labels = [ "{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores_np_thresh) ] # Show boxes + labels for i in range(len(scores_np_thresh)): prop_text = core.GraphicsTextProperty() # start with i+1 we don't use the first color dedicated for the label mask prop_text.color = colors[i + 1] prop_text.font_size = 7 prop_rect = core.GraphicsRectProperty() prop_rect.pen_color = colors[i + 1] prop_rect.category = labels[i] output_graph.addRectangle( float(boxes_np[i][0]), float(boxes_np[i][1]), float(boxes_np[i][2] - boxes_np[i][0]), float(boxes_np[i][3] - boxes_np[i][1]), prop_rect) output_graph.addText(labels[i], float(boxes_np[i][0]), float(boxes_np[i][1]), prop_text) self.emitStepProgress() # label mask nb_objects = len(masks[:len(scores_np_thresh)]) if nb_objects > 0: masks = masks[:nb_objects, :, :, None] mask_or = masks[0] * nb_objects for j in range(1, nb_objects): mask_or = torch.max(mask_or, masks[j] * (nb_objects - j)) mask_numpy = mask_or.byte().cpu().numpy() mask_output.setImage(mask_numpy) # output mask apply to our original image # inverse colors to match boxes colors c = colors[1:] c = c[::-1] colors = [[0, 0, 0]] for col in c: colors.append(col) self.setOutputColorMap(1, 0, colors) else: self.emitStepProgress() self.forwardInputImage(0, 1) # Step progress bar: self.emitStepProgress() # Call endTaskRun to finalize process self.endTaskRun()
def main(args): cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(args.cfg)) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(args.cfg) predictor = DefaultPredictor(cfg) if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for video_name in im_list: out_name = os.path.join( args.output_dir, os.path.basename(video_name) ) print('Processing {}'.format(video_name)) boxes = [] segments = [] keypoints = [] for frame_i, im in enumerate(read_video(video_name)): t = time.time() outputs = predictor(im)['instances'].to('cpu') print('Frame {} processed in {:.3f}s'.format(frame_i, time.time() - t)) has_bbox = False if outputs.has('pred_boxes'): bbox_tensor = outputs.pred_boxes.tensor.numpy() if len(bbox_tensor) > 0: has_bbox = True scores = outputs.scores.numpy()[:, None] bbox_tensor = np.concatenate((bbox_tensor, scores), axis=1) if has_bbox: kps = outputs.pred_keypoints.numpy() kps_xy = kps[:, :, :2] kps_prob = kps[:, :, 2:3] kps_logit = np.zeros_like(kps_prob) # Dummy kps = np.concatenate((kps_xy, kps_logit, kps_prob), axis=2) kps = kps.transpose(0, 2, 1) else: kps = [] bbox_tensor = [] # Mimic Detectron1 format cls_boxes = [[], bbox_tensor] cls_keyps = [[], kps] boxes.append(cls_boxes) segments.append(None) keypoints.append(cls_keyps) # Video resolution metadata = { 'w': im.shape[1], 'h': im.shape[0], } np.savez_compressed(out_name, boxes=boxes, segments=segments, keypoints=keypoints, metadata=metadata)
def run(self): self.beginTaskRun() # we use seed to keep the same color for our masks + boxes + labels (same random each time) random.seed(30) # Get input : input = self.getInput(0) srcImage = input.getImage() # Get output : output_graph = self.getOutput(2) output_graph.setImageIndex(1) output_graph.setNewLayer("PanopticSegmentation") # Get parameters : param = self.getParam() # predictor if not self.loaded: print("Chargement du modèle") if param.cuda == False: self.cfg.MODEL.DEVICE = "cpu" self.deviceFrom = "cpu" else: self.deviceFrom = "gpu" self.predictor = DefaultPredictor(self.cfg) self.loaded = True # reload model if CUDA check and load without CUDA elif self.deviceFrom == "cpu" and param.cuda == True: print("Chargement du modèle") self.cfg = get_cfg() self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold self.cfg.merge_from_file(model_zoo.get_config_file( self.LINK_MODEL)) # load config from file(.yaml) self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( self.LINK_MODEL) # download the model (.pkl) self.predictor = DefaultPredictor(self.cfg) self.deviceFrom = "gpu" # reload model if CUDA not check and load with CUDA elif self.deviceFrom == "gpu" and param.cuda == False: print("Chargement du modèle") self.cfg = get_cfg() self.cfg.MODEL.DEVICE = "cpu" self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold self.cfg.merge_from_file(model_zoo.get_config_file( self.LINK_MODEL)) # load config from file(.yaml) self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( self.LINK_MODEL) # download the model (.pkl) self.predictor = DefaultPredictor(self.cfg) self.deviceFrom = "cpu" outputs = self.predictor(srcImage)["panoptic_seg"] # get outputs of model mask = outputs[0] infos = outputs[1] # set mask output mask_output = self.getOutput(0) if param.cuda: mask_output.setImage(mask.cpu().numpy()) else: mask_output.setImage(mask.numpy()) self.emitStepProgress() # output visualisation nb_objects = len(infos) # create random color for masks + boxes + labels colors = [[0, 0, 0]] for i in range(nb_objects): colors.append([ random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 255 ]) # get infos classes scores = list() classesThings = list() classesStuffs = list() labelsStuffs = list() for info in infos: if info["isthing"]: scores.append(info['score']) classesThings.append(info['category_id']) else: classesStuffs.append(info['category_id']) # text label with score - get classe name for thing and stuff from metedata labelsThings = None class_names = MetadataCatalog.get( self.cfg.DATASETS.TRAIN[0]).get("thing_classes") if classesThings is not None and class_names is not None and len( class_names) > 1: labelsThings = [class_names[i] for i in classesThings] if scores is not None: if labelsThings is None: labelsThings = ["{:.0f}%".format(s * 100) for s in scores] else: labelsThings = [ "{} {:.0f}%".format(l, s * 100) for l, s in zip(labelsThings, scores) ] class_names_stuff = MetadataCatalog.get( self.cfg.DATASETS.TRAIN[0]).get("stuff_classes") [labelsStuffs.append(class_names_stuff[x]) for x in classesStuffs] labels = labelsThings + labelsStuffs seg_ids = torch.unique(mask).tolist() self.emitStepProgress() # create masks - use for text_pos masks = list() for sid in seg_ids: if param.cuda: mymask = (mask == sid).cpu().numpy().astype(np.bool) else: mymask = (mask == sid).numpy().astype(np.bool) masks.append(mymask) # text pos = median of mask - median is less sensitive to outliers. if len(masks) > len( labels ): # unrecognized area - no given class for area labeled 0 for i in range(nb_objects): properties_text = core.GraphicsTextProperty() properties_text.color = colors[i + 1] properties_text.font_size = 7 text_pos = np.median(masks[i + 1].nonzero(), axis=1)[::-1] output_graph.addText(labels[i], text_pos[0], text_pos[1], properties_text) else: for i in range(nb_objects): properties_text = core.GraphicsTextProperty() properties_text.color = colors[i + 1] properties_text.font_size = 7 text_pos = np.median(masks[i].nonzero(), axis=1)[::-1] output_graph.addText(labels[i], text_pos[0], text_pos[1], properties_text) # output mask apply to our original image self.setOutputColorMap(1, 0, colors) self.forwardInputImage(0, 1) # Step progress bar: self.emitStepProgress() # Call endTaskRun to finalize process self.endTaskRun()
def detect(video_path): save_visual_detections = False results_dir = 'results/task1_1/retina' coco_car_id = 2 model = 'retinanet_R_50_FPN_3x' model_path = 'COCO-Detection/' + model + '.yaml' print(model_path) # Run a pre-trained detectron2 model cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_path)) cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5 cfg.MODEL.RETINANET.NMS_THRESH_TEST = 0.4 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_path) cfg.OUTPUT_DIR = results_dir os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) predictor = DefaultPredictor(cfg) det_path = os.path.join(cfg.OUTPUT_DIR, 'detections.txt') if os.path.exists(det_path): os.remove(det_path) vidcap = cv2.VideoCapture(video_path) num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) # num_frames = 3 times = [] start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) for frame_id in tqdm(range(num_frames)): _, frame = vidcap.read() start.record() outputs = predictor(frame) end.record() torch.cuda.synchronize() times.append(start.elapsed_time(end)) pred_boxes = outputs["instances"].pred_boxes.to("cpu") scores = outputs["instances"].scores.to("cpu") pred_classes = outputs["instances"].pred_classes.to("cpu") for idx, pred in enumerate(pred_classes): if pred.item() == coco_car_id: box = pred_boxes[idx].tensor.numpy()[0] # Format: <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z> det = str(frame_id + 1) + ',-1,' + str(box[0]) + ',' + str( box[1]) + ',' + str(box[2] - box[0]) + ',' + str( box[3] - box[1]) + ',' + str( scores[idx].item()) + ',-1,-1,-1\n' with open(det_path, 'a') as f: f.write(det) if save_visual_detections: output_path = os.path.join(cfg.OUTPUT_DIR, 'det_frame_' + str(frame_id) + '.png') v = Visualizer(frame[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) cv2.imwrite(output_path, out.get_image()[:, :, ::-1]) print('Inference time (s/img): ', np.mean(times) / 1000) return det_path
def cfg(self, cfg: CfgNode): self.config = cfg self.predictor = DefaultPredictor(cfg)
def train(cfgs): # dataset vg = VisualGenome(cfgs.ann_file, cfgs.vocab_path + 'relations_vocab.txt', cfgs.vocab_path + 'objects_vocab.txt') train_dataset = BoxesDataset(vg, cfgs.split_path, cfgs.img_path, split='train') train_loader = data.DataLoader(dataset=train_dataset, batch_size=cfgs.batch_size, num_workers=1, shuffle=True, collate_fn=collate_fn) val_dataset = BoxesDataset(vg, cfgs.split_path, cfgs.img_path, split='val') val_loader = data.DataLoader(dataset=val_dataset, batch_size=cfgs.batch_size, shuffle=False, collate_fn=collate_fn) # Model model = Classifier(0.5) if wandb is not None: wandb.watch(model) if cfgs.resume: checkpoint = torch.load(cfgs.checkpoint + 'checkpoint_final.pkl') model.load_state_dict(checkpoint['model_state_dict']) epoch = checkpoint['epoch'] learning_rate = checkpoint['learning_rate'] train_loss_epoch = checkpoint['train_loss_epoch'] train_acc_epoch = checkpoint['train_acc_epoch'] test_acc_epoch = checkpoint['test_acc_epoch'] else: epoch = 0 learning_rate = cfgs.learning_rate train_loss_epoch = [] train_acc_epoch = [] test_acc_epoch = [] if cfgs.mGPUs: model = nn.DataParallel(model) if torch.cuda.is_available(): model.cuda() # optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(list(model.parameters()), lr=learning_rate) # scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9) best_acc = -1.0 cfg = get_cfg() cfg.merge_from_file("../../configs/VG-Detection/faster_rcnn_R_101_C4_attr_caffemaxpool.yaml") cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 # VG Weight cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe_attr_original.pkl" predictor = DefaultPredictor(cfg) csv.field_size_limit(sys.maxsize) # for epoch in range(cfgs.max_epochs): while epoch < cfgs.max_epochs: model.train() train_loss = 0.0 train_acc = 0.0 count = 0 accuracy_count = 0 # start_time = time.time() # end_time = time.time() progress_bar = tqdm(train_loader, desc='|Train Epoch {}'.format(epoch), leave=False) for i, batch in enumerate(progress_bar): # end_time = time.time() # print('Done (t={:0.2f}s)'.format(end_time - start_time)) count += 1 img_id, obj_boxes, sub_boxes, union_boxes, labels = batch # print(img_id) # obj_boxes, sub_boxes, union_boxes, labels = obj_boxes.cuda(), sub_boxes.cuda(), union_boxes.cuda(), labels.cuda() labels = labels.cuda() with torch.no_grad(): obj_feature, sub_feature, union_feature = extract_feature(img_id, predictor, obj_boxes, sub_boxes, union_boxes, cfgs) outputs = model(obj_feature, sub_feature, union_feature) # outputs_reshape = torch.reshape(outputs, (outputs.size(0) * outputs.size(1), outputs.size(2))) labels_reshape = torch.reshape(labels, (labels.size(0) * labels.size(1),)) labels_nopad = labels_reshape[labels_reshape[:] >= 0] # labels_reshape = torch.reshape(labels_nopad, (labels_nopad.size(0) * labels_nopad.size(1),)) optimizer.zero_grad() loss = criterion(outputs, labels_nopad.long()) loss.backward() optimizer.step() # pring statistics train_loss += loss.item() _, predicted = torch.max(outputs, 1) accuracy = torch.sum(predicted == labels_nopad).item() train_acc += accuracy info_log = { 'train_loss': '{:.3f}'.format(loss.item()), 'train_accuracy': '{:.3f}'.format(accuracy / labels_nopad.size(0)) } progress_bar.set_postfix(info_log, refresh=True) if wandb is not None: wandb.log(info_log) # start_time = time.time() accuracy_count += labels_nopad.size(0) # if count > 10: # break loss_aveg = float(train_loss) / count acc_aveg = float(train_acc) / accuracy_count print('Train Epoch: {}, train_loss: {}, train_accuracy: {}.'.format(epoch, loss_aveg, acc_aveg)) train_loss_epoch.append(loss_aveg) train_acc_epoch.append(acc_aveg) if wandb is not None: wandb.log({ 'train_loss_epoch': loss_aveg, 'train_acc_epoch': acc_aveg }) # scheduler.step() # caculate the test accuracy model.eval() if (epoch + 1) % 5 == 0: with torch.no_grad(): test_total = 0 test_correct = 0 process_bar_test = tqdm(val_loader, desc='|Test Epoch {}'.format(epoch), leave=False) for i, batch in enumerate(process_bar_test): img_id, obj_boxes, sub_boxes, union_boxes, labels = batch # print(img_id) # obj_boxes, sub_boxes, union_boxes, labels = obj_boxes.cuda(), sub_boxes.cuda(), union_boxes.cuda(), labels.cuda() labels = labels.cuda() with torch.no_grad(): obj_feature, sub_feature, union_feature = extract_feature(img_id, predictor, obj_boxes, sub_boxes, union_boxes, cfgs) outputs = model(obj_feature, sub_feature, union_feature) labels_reshape = torch.reshape(labels, (labels.size(0) * labels.size(1),)) labels_nopad = labels_reshape[labels_reshape[:] >= 0] _, predicted = torch.max(outputs, 1) test_total += labels_nopad.size(0) correct = torch.sum(predicted == labels_nopad).item() test_correct += correct process_bar_test.set_postfix({'test_accuracy': '{:.3f}'.format(correct / labels_nopad.size(0))}, refresh=True) # if count > 10: # break test_acc_aveg = float(test_correct) / test_total if wandb is not None: wandb.log({ 'test_acc_epoch': test_acc_aveg }) if acc_aveg > best_acc: if cfgs.mGPUs: torch.save({ 'epoch': epoch, 'model_state_dict': model.module.state_dict(), 'learning_rate': cfgs.learning_rate, 'loss': loss_aveg, 'accuracy': acc_aveg, 'test_accuracy': test_acc_aveg }, cfgs.checkpoint + 'checkpoint_best.pkl') else: torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), 'learning_rate': cfgs.learning_rate, 'loss': loss_aveg, 'accuracy': acc_aveg, 'test_accuracy': test_acc_aveg }, cfgs.checkpoint + 'checkpoint_best.pkl') print('Epoch: {}, Accuracy of the model on testset: {}'.format(epoch, test_acc_aveg)) test_acc_epoch.append(test_acc_aveg) epoch += 1 if epoch == cfgs.max_epochs: torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), 'learning_rate': cfgs.learning_rate, 'train_loss_epoch': train_loss_epoch, 'train_acc_epoch': train_acc_epoch, 'test_acc_epoch': test_acc_epoch }, cfgs.checkpoint + 'checkpoint_final.pkl')
def __init__(self, detection_thresh=0.3, use_default_weights=True): self.detection_thresh = detection_thresh # Configure predictor w/ COCO model self.cfg = self.get_model_cfg(use_default_weights=use_default_weights) self.predictor = DefaultPredictor(self.cfg) print('created segmenter')
def run_model(self, save_crop_output: bool, save_anno_output: bool, cpu_mode: bool): # set to CPU mode if system does not have NVIDIA GPU output_list = [] self.register_dataset() cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9 cfg.MODEL.WEIGHTS = "wb_model.pth" cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 if cpu_mode: cfg.MODEL.DEVICE = 'cpu' predictor = DefaultPredictor(cfg) error_count: int = 0 # print("Model imported: {:.3f}".format(time.time() - t0)) # image_file = "input/RC663_0040.76-0047.60_m_DRY.jpg" for image_file in os.listdir(self.InputDir): image_path = self.InputDir + '/' + image_file img: np.ndarray = cv2.imread(image_path) # print("Processing file: {:.3f}".format(time.time() - t0)) if type(img) is np.ndarray: # only process if image file print("Processing image: " + image_file) """ # resize image to save on computation time dW = 100 # desired width imgRe: np.ndarray # if img.ndim == 2: # black and white # (tH, tW) = img.shape # else: # colour (tH, tW, tmp) = img.shape if tW > dW: imgRe = imutils.resize(img, width=dW) # resize the image wRatio = tW / dW else: imgRe = img.copy() wRatio = 1 """ output: Instances = predictor(img)["instances"] # predict obj: dict = output.get_fields() scores: np.ndarray = obj['scores'].cpu().numpy() maxscore: float = 0 indmaxscore: int = 0 for i in range(len(scores) - 1): if scores[i] > maxscore: maxscore = scores[i] indmaxscore = i if len(scores) > 0: box: np.ndarray = obj['pred_boxes'].tensor.cpu().numpy()[indmaxscore] # box = box * wRatio else: box = np.ones(1) * (-1) # outputlist.append(output) # outputDict[image_file] = box anno_out_filename = "" if save_anno_output: # draw output and save to png v = Visualizer(img[:, :, ::-1], MetadataCatalog.get("wb_test"), scale=1.0) result: VisImage = v.draw_instance_predictions(output.to("cpu")) result_image: np.ndarray = result.get_image()[:, :, ::-1] # get file name without extension, -1 to remove "." at the end anno_out_filename: str = self.OutputWBAnnoDir + '/' + re.search(r"(.*)\.", image_file).group(0)[:-1] anno_out_filename += "_WB_Anno.png" cv2.imwrite(anno_out_filename, result_image) # code for displaying image: # imgout = cv2.imread(out_file_name) # cv2.imshow('Output Image', imgout) # cv2.waitKey(0) if len(scores) > 0: if save_crop_output: # crop and save the image # https://www.pyimagesearch.com/2014/01/20/basic-image-manipulations-in-python-and-opencv-resizing-scaling-rotating-and-cropping/ crop_img = img[box[1].astype(int):box[3].astype(int), box[0].astype(int):box[2].astype(int)] # get file name without extension, -1 to remove "." at the end out_file_name: str = self.OutputWBDir + '/' + re.search(r"(.*)\.", image_file).group(0)[:-1] out_file_name += "_WB_Cropped.png" cv2.imwrite(out_file_name, crop_img) # add to the outputDictionary # outputDict[image_file] = (out_file_name, anno_out_filename) output_list.append((image_file, image_path, out_file_name, anno_out_filename)) else: print("WARNING: WHITE BOARD NOT FOUND IN IMAGE FILE: " + image_file + ". SKIPPING IMAGE.") error_count += 1 return output_list, error_count
def make_predictions(): # Some basic setup: # Setup detectron2 logger import detectron2 from detectron2.utils.logger import setup_logger setup_logger() # import some common libraries import numpy as np import glob import os import cv2 import random from google.colab.patches import cv2_imshow # import some common detectron2 utilities from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer from detectron2.data import MetadataCatalog from detectron2.structures import BoxMode from detectron2.config import get_cfg class_names = ["specularity", "saturation", "artifact", "blur", "contrast", "bubbles", "instrument", "blood"] model_pth="" if current_model == "retinanet": model_path = "/home/ws2080/Desktop/codes/detectron/model_retinanet/output_32" model_pth = "model_0059999.pth" elif current_model == "faster": model_path = "/home/ws2080/Desktop/codes/detectron/model_faster_rcnn_R_50_FPN_3x/output_9" model_pth = "model_0139999.pth" elif current_model == "cascade": model_path = "/home/ws2080/Desktop/codes/detectron/model_cascade_mask_rcnn_R_50_FPN_3x/output_24" model_pth = "model_0059999.pth" cfg = get_cfg() cfg.merge_from_file(model_path+"/config.yaml") cfg.MODEL.WEIGHTS = os.path.join(model_path, model_pth) cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.1 cfg.DATASETS.TEST = ("ead_validation_1",) predictor = DefaultPredictor(cfg) #make predictions for image_folder in glob.glob(temp_augmented_image_dir+"*/"): prediction_output_dir = temp_prediction_dir + image_folder.split("/")[-2] + "/" #prediction image_folderin içindeki resimler için yapılıp prediction_output_dir'in içine atılmalı for im_path in glob.glob(image_folder + "*.jpg"): im_name = im_path.split("/")[-1].split(".")[0] saved_image_path = prediction_output_dir + im_name + ".txt" # Burası sana kalmış abi, predict edip saved_image_path olarak kaydetmek lazım sadece im = cv2.imread(im_path) outputs = predictor(im) total_detection = len(outputs["instances"]) temp_detection_list = [] detections = outputs["instances"] for i in range(total_detection): temp_detection = class_names[int(detections.pred_classes[i])]+" "+ str(float(detections.scores[i]))+" "+str(float(detections.pred_boxes.tensor[i,0]))+" "+ str(float(detections.pred_boxes.tensor[i,1])) +" "+str(float(detections.pred_boxes.tensor[i,2]))+" "+ str(float(detections.pred_boxes.tensor[i,3])) temp_detection_list.append(temp_detection) with open(saved_image_path, 'w') as f: for item in temp_detection_list: f.write("%s\n" % item)
def KITTIMOTS_training_and_evaluation_task(model_name, model_file): path = os.path.join(SAVE_PATH, 'train_task', model_name) if not os.path.exists(path): os.makedirs(path) # Load Data print('Loading Data.') dataloader = KITTIMOTS_Dataloader() def kittimots_train(): return dataloader.get_dicts(train_flag=True) def kittimots_test(): return dataloader.get_dicts(train_flag=False) DatasetCatalog.register("KITTIMOTS_train", kittimots_train) MetadataCatalog.get("KITTIMOTS_train").set( thing_classes=list(KITTI_CATEGORIES.keys())) DatasetCatalog.register("KITTIMOTS_test", kittimots_test) MetadataCatalog.get("KITTIMOTS_test").set( thing_classes=list(KITTI_CATEGORIES.keys())) NUM_IMGS = len(kittimots_train()) print(NUM_IMGS) # PARAMETERS print('Loading Model.') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('KITTIMOTS_train', ) cfg.DATASETS.TEST = ('KITTIMOTS_test', ) cfg.DATALOADER.NUM_WORKERS = 0 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = NUM_IMGS // cfg.SOLVER.IMS_PER_BATCH + 1 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 # Training print('Training....') os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # EVALUATION print('Evaluating....') evaluator = COCOEvaluator("KITTIMOTS_test", cfg, False, output_dir="./output/") trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) plot_validation_loss(cfg) # Qualitative results print('Inference on trained model') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) dataloader = Inference_Dataloader() dataset = dataloader.load_data() print('Getting Qualitative Results...') for i, img_path in enumerate(dataset['test'][:20]): img = cv2.imread(img_path) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join( path, 'Inference_' + model_name + '_trained_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def demo(cfg): """ Run inference on an input video or stream from webcam. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Run demo with config:") logger.info(cfg) # Build the video model and print model statistics. model = build.build_model(cfg) model.eval() misc.log_model_info(model, cfg) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TEST.CHECKPOINT_FILE_PATH elif cu.has_checkpoint(cfg.OUTPUT_DIR): ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH else: raise NotImplementedError("Unknown way to load checkpoint.") cu.load_checkpoint( ckpt, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2="caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE], ) if cfg.DETECTION.ENABLE: # Load object detector from detectron2. dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG dtron2_cfg = get_cfg() dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file)) dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9 dtron2_cfg.MODEL.WEIGHTS = ( cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS) logger.info("Initialize detectron2 model.") object_predictor = DefaultPredictor(dtron2_cfg) # Load the labels of AVA dataset with open(cfg.DEMO.LABEL_FILE_PATH) as f: labels = f.read().split("\n")[:-1] palette = np.random.randint(64, 128, (len(labels), 3)).tolist() boxes = [] logger.info("Finish loading detectron2") else: # Load the labels of Kinectics-400 dataset. labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH) labels = labels_df["name"].values frame_provider = VideoReader(cfg) seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE frames = [] pred_labels = [] s = 0.0 for able_to_read, frame in tqdm.tqdm(frame_provider): if not able_to_read: # when reaches the end frame, clear the buffer and continue to the next one. frames = [] break if len(frames) != seq_len: frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed) frames.append(frame_processed) if cfg.DETECTION.ENABLE and len(frames) == seq_len // 2 - 1: mid_frame = frame if len(frames) == seq_len: start = time() if cfg.DETECTION.ENABLE: outputs = object_predictor(mid_frame) fields = outputs["instances"]._fields pred_classes = fields["pred_classes"] selection_mask = pred_classes == 0 # acquire person boxes. pred_classes = pred_classes[selection_mask] pred_boxes = fields["pred_boxes"].tensor[selection_mask] boxes = cv2_transform.scale_boxes( cfg.DATA.TEST_CROP_SIZE, pred_boxes, frame_provider.display_height, frame_provider.display_width, ) boxes = torch.cat( [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1, ) inputs = torch.from_numpy(np.array(frames)).float() / 255.0 inputs = tensor_normalize(inputs, cfg.DATA.MEAN, cfg.DATA.STD) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # 1 C T H W. inputs = inputs.unsqueeze(0) if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH: # Sample frames for the fast pathway. index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long() inputs = [torch.index_select(inputs, 2, index)] elif cfg.MODEL.ARCH in cfg.MODEL.MULTI_PATHWAY_ARCH: # Sample frames for the fast pathway. index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long() fast_pathway = torch.index_select(inputs, 2, index) # Sample frames for the slow pathway. index = torch.linspace( 0, fast_pathway.shape[2] - 1, fast_pathway.shape[2] // cfg.SLOWFAST.ALPHA, ).long() slow_pathway = torch.index_select(fast_pathway, 2, index) inputs = [slow_pathway, fast_pathway] else: raise NotImplementedError("Model arch {} is not in {}".format( cfg.MODEL.ARCH, cfg.MODEL.SINGLE_PATHWAY_ARCH + cfg.MODEL.MULTI_PATHWAY_ARCH, )) # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Perform the forward pass. if cfg.DETECTION.ENABLE: # When there is nothing in the scene, # use a dummy variable to disable all computations below. if not len(boxes): preds = torch.tensor([]) else: preds = model(inputs, boxes) else: preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds = du.all_gather(preds)[0] if cfg.DETECTION.ENABLE: # This post processing was intendedly assigned to the cpu since my laptop GPU # RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend # to change this section to make CUDA does the processing. preds = preds.cpu().detach().numpy() pred_masks = preds > 0.1 label_ids = [ np.nonzero(pred_mask)[0] for pred_mask in pred_masks ] pred_labels = [[ labels[label_id] for label_id in perbox_label_ids ] for perbox_label_ids in label_ids] # I'm unsure how to detectron2 rescales boxes to image original size, so I use # input boxes of slowfast and rescale back it instead, it's safer and even if boxes # was not rescaled by cv2_transform.rescale_boxes, it still works. boxes = boxes.cpu().detach().numpy() ratio = (np.min([ frame_provider.display_height, frame_provider.display_width, ]) / cfg.DATA.TEST_CROP_SIZE) boxes = boxes[:, 1:] * ratio else: ## Option 1: single label inference selected from the highest probability entry. # label_id = preds.argmax(-1).cpu() # pred_label = labels[label_id] # Option 2: multi-label inferencing selected from probability entries > threshold. label_ids = (torch.nonzero( preds.squeeze() > 0.1).reshape(-1).cpu().detach().numpy()) pred_labels = labels[label_ids] logger.info(pred_labels) if not list(pred_labels): pred_labels = ["Unknown"] # # option 1: remove the oldest frame in the buffer to make place for the new one. # frames.pop(0) # option 2: empty the buffer frames = [] s = time() - start if cfg.DETECTION.ENABLE and pred_labels and boxes.any(): for box, box_labels in zip(boxes.astype(int), pred_labels): cv2.rectangle( frame, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), thickness=2, ) label_origin = box[:2] for label in box_labels: label_origin[-1] -= 5 (label_width, label_height), _ = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) cv2.rectangle( frame, (label_origin[0], label_origin[1] + 5), ( label_origin[0] + label_width, label_origin[1] - label_height - 5, ), palette[labels.index(label)], -1, ) cv2.putText( frame, label, tuple(label_origin), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, ) label_origin[-1] -= label_height + 5 if not cfg.DETECTION.ENABLE: # Display predicted labels to frame. y_offset = 50 cv2.putText( frame, "Action:", (10, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.65, color=(0, 235, 0), thickness=2, ) for pred_label in pred_labels: y_offset += 30 cv2.putText( frame, "{}".format(pred_label), (20, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.65, color=(0, 235, 0), thickness=2, ) # Display prediction speed. cv2.putText( frame, "Speed: {:.2f}s".format(s), (10, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.65, color=(0, 235, 0), thickness=2, ) frame_provider.display(frame) # hit Esc to quit the demo. key = cv2.waitKey(1) if key == 27: break frame_provider.clean()
def main(config): root = expanduser(config["base"]["root"]) imgs_root = expanduser(config["base"]["imgs_root"]) jsons_dir = join(root, "jsons") model_dir = join(root, "outputs") scale = float(config["test_model"]["scale"]) do_show = config["test_model"]["do_show"] register_data(jsons_dir, imgs_root) # Need this datasets line, in order for metadata to have .thing_classes attribute datasets = DatasetCatalog.get("test_data") metadata = MetadataCatalog.get("test_data") # Read the cfg back in: with open(join(model_dir, "cfg.txt"), "r") as f: cfg = f.read() # Turn into CfgNode obj: cfg = CfgNode.load_cfg(cfg) # Use the weights from the model trained on our custom dataset: cfg.MODEL.WEIGHTS = join(model_dir, "model_final.pth") # TODO: have option to use snapshot instead cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 # make small so I can make PR curve for broad range of scores # cfg.DATASETS.TEST = ("val_data", ) # should already be saved from train_model.py print("Generating predictor ...") predictor = DefaultPredictor(cfg) # For saving images with predicted labels: output_imgs_dir = join(model_dir, "test_pred_imgs") makedirs(output_imgs_dir, exist_ok=True) # For saving detection predictions as csv: output_csv = join(model_dir, "all_test_preds.csv") csv_file_handle = open(output_csv, "w", newline="") atexit.register(csv_file_handle.close) col_names = ["img", "x1", "y1", "x2", "y2", "score", "thing","dummy_id"] csv_writer = csv.DictWriter(csv_file_handle, fieldnames=col_names) csv_writer.writeheader() # Select 5 random images to visualize, # but save the prediction results for all imgs: rando_idxs = np.random.choice(range(len(datasets)), 5, replace=False).tolist() for i,d in enumerate(datasets): print(f"Predicting on image {i+1} of {len(datasets)} ...", end="\r") id = d["image_id"] img = cv2.imread(d["file_name"]) detected = predictor(img) # Visualize: visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=scale, instance_mode=ColorMode) visualizer = visualizer.draw_instance_predictions(detected["instances"].to("cpu")) # Save the first 5 images from the random draw: if i in rando_idxs: pred_img = visualizer.get_image()[:, :, ::-1] cv2.imwrite(join(output_imgs_dir, ("predicted_" + basename(d["file_name"]))), pred_img) if do_show: cv2.imshow(f"prediction on image {id}", pred_img) print(f"Press any key to go to the next image ({i+1}/5) ...") key = cv2.waitKey(0) & 0xFF if key == ord("q"): print("Quitting ...") break cv2.destroyAllWindows() # Stream the predicted box coords and scores to a csv: preds = detected['instances'].to('cpu') boxes = preds.pred_boxes thing_ids = preds.pred_classes.tolist() scores = preds.scores num_boxes = np.array(scores.size())[0] for i in range(0, num_boxes): coords = boxes[i].tensor.numpy() score = float(scores[i].numpy()) thing_id = thing_ids[i] # is int thing_class = metadata.thing_classes[thing_id] csv_writer.writerow({col_names[0]: basename(d["file_name"]), col_names[1]: int(coords[0][0]), # x1 col_names[2]: int(coords[0][1]), # y1 col_names[3]: int(coords[0][2]), # x2 col_names[4]: int(coords[0][3]), # y2 col_names[5]: score, # score col_names[6]: thing_class, # thing col_names[7]: i}) # dummy id print(f"Finished predicting on all {len(datasets)} images from the test data fraction.") print(f"Results are stored in {output_csv}") print(f"5 sample test images are stored in {output_imgs_dir}\n" "Note that the 5 sample test images show all detections with a score greater than 0.01. " "This low score cutoff is for test purposes and is intentional. " "You should expect to see many false positive labels.\n") # Clear GPU memory torch.cuda.empty_cache()
import pdb config_file = "./configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml" model_file = "./models/COCO-InstanceSegmentation/X-101-32x8d.pkl" data_path = sys.argv[1] feat_path = sys.argv[2] conf_th = float(sys.argv[3]) cfg = get_cfg() cfg.merge_from_file(config_file) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = conf_th cfg.MODEL.WEIGHTS = model_file predictor = DefaultPredictor(cfg) vocab = predictor.metadata.thing_classes + ["__background__"] with open(f"{feat_path}/vocab.txt", 'w') as f: for obj in vocab: f.write(obj + '\n') save_dir = os.path.join(feat_path, f"feat_th{conf_th}") if not os.path.exists(save_dir): os.makedirs(save_dir) img_fnames = os.listdir(data_path) for fname in tqdm.tqdm(img_fnames): im = cv2.imread(os.path.join(data_path, fname)) if im is None: print(f"load image failed, skipping {fname} ...")
def run(self): self.beginTaskRun() # we use seed to keep the same color for our masks + boxes + labels (same random each time) random.seed(30) # Get input : input = self.getInput(0) srcImage = input.getImage() # Get output : mask_output = self.getOutput(0) output_graph = self.getOutput(2) output_graph.setImageIndex(1) output_graph.setNewLayer("PointRend") # Get parameters : param = self.getParam() # predictor if not self.loaded: print("Chargement du modèle") if param.cuda == False: self.cfg.MODEL.DEVICE = "cpu" self.deviceFrom = "cpu" else: self.deviceFrom = "gpu" self.loaded = True self.predictor = DefaultPredictor(self.cfg) # reload model if CUDA check and load without CUDA elif self.deviceFrom == "cpu" and param.cuda == True: print("Chargement du modèle") self.cfg = get_cfg() add_pointrend_config(self.cfg) self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold self.cfg.merge_from_file(self.folder + self.path_to_config) self.cfg.MODEL.WEIGHTS = self.folder + self.path_to_model self.deviceFrom = "gpu" self.predictor = DefaultPredictor(self.cfg) # reload model if CUDA not check and load with CUDA elif self.deviceFrom == "gpu" and param.cuda == False: print("Chargement du modèle") self.cfg = get_cfg() add_pointrend_config(self.cfg) self.cfg.MODEL.DEVICE = "cpu" self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = self.threshold self.cfg.merge_from_file(self.folder + self.path_to_config) self.cfg.MODEL.WEIGHTS = self.folder + self.path_to_model self.deviceFrom = "cpu" self.predictor = DefaultPredictor(self.cfg) outputs = self.predictor(srcImage) # get outputs instances boxes = outputs["instances"].pred_boxes scores = outputs["instances"].scores classes = outputs["instances"].pred_classes masks = outputs["instances"].pred_masks # to numpy if param.cuda: boxes_np = boxes.tensor.cpu().numpy() scores_np = scores.cpu().numpy() classes_np = classes.cpu().numpy() else: boxes_np = boxes.tensor.numpy() scores_np = scores.numpy() classes_np = classes.numpy() self.emitStepProgress() # keep only the results with proba > threshold scores_np_tresh = list() for s in scores_np: if float(s) > param.proba: scores_np_tresh.append(s) if len(scores_np_tresh) > 0: # create random color for masks + boxes + labels colors = [[0, 0, 0]] for i in range(len(scores_np_tresh)): colors.append([ random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 255 ]) # text labels with scores labels = None class_names = MetadataCatalog.get( self.cfg.DATASETS.TRAIN[0]).get("thing_classes") if classes is not None and class_names is not None and len( class_names) > 1: labels = [class_names[i] for i in classes] if scores_np_tresh is not None: if labels is None: labels = [ "{:.0f}%".format(s * 100) for s in scores_np_tresh ] else: labels = [ "{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores_np_tresh) ] # Show boxes + labels for i in range(len(scores_np_tresh)): prop_text = core.GraphicsTextProperty() # start with i+1 we don't use the first color dedicated for the label mask prop_text.color = colors[i + 1] prop_text.font_size = 7 prop_rect = core.GraphicsRectProperty() prop_rect.pen_color = colors[i + 1] prop_rect.category = labels[i] output_graph.addRectangle( float(boxes_np[i][0]), float(boxes_np[i][1]), float(boxes_np[i][2] - boxes_np[i][0]), float(boxes_np[i][3] - boxes_np[i][1]), prop_rect) output_graph.addText(labels[i], float(boxes_np[i][0]), float(boxes_np[i][1]), prop_text) self.emitStepProgress() # label mask nb_objects = len(masks[:len(scores_np_tresh)]) if nb_objects > 0: masks = masks[:nb_objects, :, :, None] mask_or = masks[0] * nb_objects for j in range(1, nb_objects): mask_or = torch.max(mask_or, masks[j] * (nb_objects - j)) mask_numpy = mask_or.byte().cpu().numpy() mask_output.setImage(mask_numpy) # output mask apply to our original image # inverse colors to match boxes colors c = colors[1:] c = c[::-1] colors = [[0, 0, 0]] for col in c: colors.append(col) self.setOutputColorMap(1, 0, colors) else: self.emitStepProgress() self.forwardInputImage(0, 1) # Step progress bar: self.emitStepProgress() # Call endTaskRun to finalize process self.endTaskRun()
metavar='D', help= "folder where data is located. train_images/ and val_images/ need to be found in the folder" ) args = parser.parse_args() # Define a Mask-R-CNN model in Detectron2 cfg = get_cfg() cfg.merge_from_file( "detectron2_repo/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml" ) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # Detection Threshold cfg.MODEL.ROI_HEADS.NMS = 0.4 # Non Maximum Suppression Threshold cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl" model = DefaultPredictor(cfg) def detect_birds(model, input_folder, output_folder_crop, generate_masks=False, output_folder_mask="mask_dataset"): kernel = np.ones((25, 25), 'uint8') for data_folder in list( os.listdir(input_folder)): # Iterate over train, val and test non_cropped = 0 non_cropped_names = [] num_imgs = 0 directory = input_folder + '/' + data_folder print("\nDetecting birds on :", data_folder)
def build_predictor_vis(cfg): cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_0099999.pth") # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model cfg.DATASETS.TEST = (_name, ) predictor = DefaultPredictor(cfg) # print(predictor.model.roi) # exit() dataset_dicts = DatasetCatalog.get(_name) metadata = MetadataCatalog.get(_name) # print(dataset_dicts) for k in range(500): d = dataset_dicts[k] im = cv2.imread(d["file_name"]) outputs = predictor(im) # exit() v_p = Visualizer( im[:, :, ::-1], metadata=metadata, scale=2, instance_mode=ColorMode. IMAGE_BW # remove the colors of unsegmented pixels ) v_gt = Visualizer( im[:, :, ::-1], metadata=metadata, scale=2, instance_mode=ColorMode. IMAGE_BW # remove the colors of unsegmented pixels ) v_p_nobbx = Visualizer( im[:, :, ::-1], metadata=metadata, scale=2, instance_mode=ColorMode. IMAGE_BW # remove the colors of unsegmented pixels ) v_p = v_p.draw_instance_predictions(outputs["instances"].to("cpu")) v_gt = v_gt.draw_dataset_dict(d) outputs["instances"].pred_boxes.tensor[:] = 0 v_p_nobbx = v_p_nobbx.draw_instance_predictions( outputs["instances"].to("cpu")) # plt.figure(figsize=[30, 10]) # plt.subplot(131), plt.imshow(v_p_nobbx.get_image()[:, :, ::-1]) # plt.subplot(132), plt.imshow(v_p.get_image()[:, :, ::-1]) # plt.subplot(133), plt.imshow(v_gt.get_image()[:, :, ::-1]) cv2.imwrite(f"demo/predict_{d['image_id']}_im.png", im) # cv2.imwrite(f"demo/predict_{d['image_id']}_ps.png", v_p_nobbx.get_image()) cv2.imwrite(f"demo/predict_{d['image_id']}_pd.png", v_p.get_image()) cv2.imwrite(f"demo/predict_{d['image_id']}_gt.png", v_gt.get_image()) # plt.imshow(v_p_nobbx.get_image()[:, :, ::-1]) # plt.savefig(f"demo/predict_{d['image_id']}_ps.png", dpi=100), plt.close() # plt.imshow(v_p.get_image()[:, :, ::-1]) # plt.savefig(f"demo/predict_{d['image_id']}_pb.png", dpi=100), plt.close() # plt.imshow(v_gt.get_image()[:, :, ::-1]) # plt.savefig(f"demo/predict_{d['image_id']}_gt.png", dpi=100), plt.close() print(f"Saving demo/predict_{d['image_id']}.png")
def main(args): # define inputs/outputs hardcoded bastards -.- dataset_dir = args.test_folder csv_origin = 'rle.csv' csv_submit = args.submit_csv ship_proba = args.ship_proba csv_probs = args.ship_proba_csv print("Start creating predictions, dataset from:", dataset_dir) test_dataset = create_test_datatset(dataset_dir) # Only mask images that include a ship. if csv_probs != None: print("Combining classifier result:",csv_probs) df_probs = pd.read_csv(csv_probs) df_probs[['image_id','jpg']] = df_probs['file_name'].str.split('.',expand = True) df_probs = df_probs[df_probs['ship_proba'] > ship_proba] test_dataset = [item for item in test_dataset if item['image_id'] in df_probs['image_id'].values] print(test_dataset[0]) #int(test_dataset) # load model, config changes - predicting 768x768 masks, NMS 0 DatasetCatalog.register("submit_test", create_test_datatset) od_dataset = MetadataCatalog.get("submit_test") # https://detectron2.readthedocs.io/modules/config.html # https://medium.com/@hirotoschwert/digging-into-detectron-2-part-5-6e220d762f9 cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(args.config_file)) cfg.MODEL.WEIGHTS = os.path.join(args.model_path, "model_final.pth") cfg.DATASETS.TEST = ("submit_test1", ) cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = args.batch_size_per_image cfg.MODEL.ROI_HEADS.NUM_CLASSES = args.num_classes cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.score_thres cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = args.nms_thres if args.anchor_sizes == 'small': cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16, 32, 64, 128, 256, 512]] predictor = DefaultPredictor(cfg) outputs = [] img_ids = [] img_rle = [] img_scores = [] img_areas = [] def masks_to_rle_csv(img_id, masks, scores): index = np.argsort(-scores) bg = np.zeros((768,768), dtype=np.uint8) bg_list = [] for i in index: mask = masks[i,:,:] if(mask is None): continue mask_xor = (mask^bg)&mask area = mask_xor.sum() if(area == 0): continue bg += mask_xor #NO OVERLAPS... img_ids.append(img_id) img_rle.append(rle_encode(mask_xor)) img_scores.append(scores[i]) img_areas.append(area) for i in range(len(test_dataset)): img_id = test_dataset[i]['file_name'].split('/')[-1] inputs = cv2.imread(test_dataset[i]['file_name']) output = predictor(inputs) outputs.append({ 'ImageId':img_id,'Output':output}) if(i%1000 == 0): print(i,len(test_dataset)) for i in range(len(outputs)): img_id, boxes, segms, img_score = extract_result(outputs[i]) if segms is not None and len(segms) > 0: masks = np.array(segms) masks_to_rle_csv(img_id, masks, img_score) if(i%1000 == 0): print(i,len(outputs)) df = pd.DataFrame({'ImageId':img_ids, 'EncodedPixels':img_rle, 'confidence':img_scores, 'area':img_areas}) df = df[['ImageId', 'EncodedPixels', 'confidence', 'area']] # change the column index df.to_csv(csv_origin, index=False, sep=str(',')) df_submit = df print("Detectron2: %d instances, %d images" %(df_submit.shape[0], len(get_im_list(df_submit)))) #df_submit = df_submit[ (df_submit['area']>30) & (df_submit['confidence']>=0.80) ] def generate_final_csv(df_with_ship, dataset_dir = dataset_dir): print("Detectron2: %d instances, %d images" %(df_with_ship.shape[0], len(get_im_list(df_with_ship)))) im_no_ship = get_im_no_ship(df_with_ship, dataset_dir) df_empty = pd.DataFrame({'ImageId':im_no_ship, 'EncodedPixels':get_empty_list(len(im_no_ship))}) df_submit = pd.concat([df_with_ship, df_empty], sort=False) df_submit.drop(['area','confidence'], axis=1, inplace=True) df_submit.to_csv(csv_submit, index=False,sep=str(',')) # str(',') is needed print('Done!') generate_final_csv(df_submit)
def main(): """ Mask RCNN Object Detection with Detectron2 """ rospy.init_node("mask_rcnn", anonymous=True) bridge = CvBridge() start_time = time.time() image_counter = 0 register_coco_instances("train_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train") register_coco_instances("test_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test") train_metadata = MetadataCatalog.get("train_set") print(train_metadata) dataset_dicts_train = DatasetCatalog.get("train_set") test_metadata = MetadataCatalog.get("test_set") print(test_metadata) dataset_dicts_test = DatasetCatalog.get("test_set") cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("train_set") cfg.DATASETS.TEST = () # no metrics implemented for this dataset cfg.DATALOADER.NUM_WORKERS = 4 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.01 cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough, but you can certainly train longer cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ( 128 ) # faster, and good enough for this toy dataset cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5 # 5 classes (Plate, Carrot, Celery, Pretzel, Gripper) # Temporary Solution. If I train again I think I can use the dynamically set path again cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/output/model_final.pth") # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9 # set the testing threshold for this model cfg.DATASETS.TEST = ("test_set") predictor = DefaultPredictor(cfg) class_names = MetadataCatalog.get("train_set").thing_classes # Set up custom cv2 visualization parameters # Classes: [name, id] # - # [Plate, 0] # [Carrot, 1] # [Celery, 2] # [Pretzel, 3] # [Gripper, 4] # Colors = [blue, green, red] color_plate = [0, 255, 0] # green color_carrot = [255, 200, 0] # blue color_celery = [0, 0, 255] # red color_pretzel = [0, 220, 255] # yellow color_gripper = [204, 0, 150] # purple colors = list([color_plate, color_carrot, color_celery, color_pretzel, color_gripper]) alpha = .4 run = maskRCNN() while not rospy.is_shutdown(): # Get images img = run.get_img() if img is not None: outputs = predictor(img) predictions = outputs["instances"].to("cpu") # Get results result = run.getResult(predictions, class_names) # Visualize using custom cv2 code if result is not None: result_cls = result.class_names result_clsId = result.class_ids result_scores = result.scores result_masks = result.masks # Create copies of the original image im = img.copy() output = img.copy() # Initialize lists masks = [] masks_indices = [] for i in range(len(result_clsId)): # Obtain current object mask as a numpy array (black and white mask of single object) current_mask = bridge.imgmsg_to_cv2(result_masks[i]) # Find current mask indices mask_indices = np.where(current_mask==255) # Add to mask indices list if len(masks_indices) > len(result_clsId): masks_indices = [] else: masks_indices.append(mask_indices) # Add to mask list if len(masks) > len(result_clsId): masks = [] else: masks.append(current_mask) if len(masks) > 0: # Create composite mask composite_mask = sum(masks) # Clip composite mask between 0 and 255 composite_mask = composite_mask.clip(0, 255) # # Apply mask to image # masked_img = cv2.bitwise_and(im, im, mask=current_mask) # Find indices of object in mask # composite_mask_indices = np.where(composite_mask==255) for i in range(len(result_clsId)): # Select correct object color color = colors[result_clsId[i]] # Change the color of the current mask object im[masks_indices[i][0], masks_indices[i][1], :] = color # Apply alpha scaling to image to adjust opacity cv2.addWeighted(im, alpha, output, 1 - alpha, 0, output) for i in range(len(result_clsId)): # Draw Bounding boxes start_point = (result.boxes[i].x_offset, result.boxes[i].y_offset) end_point = (result.boxes[i].x_offset + result.boxes[i].width, result.boxes[i].y_offset + result.boxes[i].height) start_point2 = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 2) end_point2 = (result.boxes[i].x_offset + result.boxes[i].width - 2, result.boxes[i].y_offset + 12) color = colors[result_clsId[i]] box_thickness = 1 name = result_cls[i] score = result_scores[i] conf = round(score.item() * 100, 1) string = str(name) + ":" + str(conf) + "%" font = cv2.FONT_HERSHEY_SIMPLEX org = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 10) fontScale = .3 text_thickness = 1 output = cv2.rectangle(output, start_point, end_point, color, box_thickness) output = cv2.rectangle(output, start_point2, end_point2, color, -1) # Text box output = cv2.putText(output, string, org, font, fontScale, [0, 0, 0], text_thickness, cv2.LINE_AA, False) im_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) im_msg = bridge.cv2_to_imgmsg(im_rgb, encoding="rgb8") # Display Image Counter image_counter = image_counter + 1 # if (image_counter % 11) == 10: # rospy.loginfo("Images detected per second=%.2f", float(image_counter) / (time.time() - start_time)) run.publish(im_msg, result) return 0
def experiment_2(exp_name, model_file): print('Running Task B experiment', exp_name) SAVE_PATH = os.path.join('./results_week_6_task_b', exp_name) os.makedirs(SAVE_PATH, exist_ok=True) # Loading data print('Loading data') virtualoader = VirtualKitti() kittiloader = KittiMots() def vkitti_train(): return virtualoader.get_dicts() def rkitti_val(): return kittiloader.get_dicts(flag='val') def rkitti_test(): return kittiloader.get_dicts(flag='test') DatasetCatalog.register('Virtual_train', vkitti_train) MetadataCatalog.get('Virtual_train').set( thing_classes=list(KITTI_CATEGORIES.keys())) DatasetCatalog.register('KITTI_val', rkitti_val) MetadataCatalog.get('KITTI_val').set( thing_classes=list(KITTI_CATEGORIES.keys())) DatasetCatalog.register('KITTI_test', rkitti_test) MetadataCatalog.get('KITTI_test').set( thing_classes=list(KITTI_CATEGORIES.keys())) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('Virtual_train', ) cfg.DATASETS.TEST = ('KITTI_val', ) cfg.DATALOADER.NUM_WORKERS = 4 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 500 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 # Training print('Training') trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') cfg.DATASETS.TEST = ('KITTI_test', ) evaluator = COCOEvaluator('KITTI_test', cfg, False, output_dir=SAVE_PATH) trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, exp_name, SAVE_PATH, 'validation_loss.png') # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = rkitti_test() inputs = [inputs[i] for i in TEST_INFERENCE_VALUES] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join(SAVE_PATH, 'Inference_' + exp_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def __init__(self): self.visualize = True self.verbose = False # st() self.mapnames = os.listdir('/home/nel/gsarch/Replica-Dataset/out/') # self.mapnames = os.listdir('/hdd/replica/Replica-Dataset/out/') # self.num_episodes = len(self.mapnames) self.num_episodes = 1 # temporary #self.ignore_classes = ['book','base-cabinet','beam','blanket','blinds','cloth','clothing','coaster','comforter','curtain','ceiling','countertop','floor','handrail','mat','paper-towel','picture','pillar','pipe','scarf','shower-stall','switch','tissue-paper','towel','vent','wall','wall-plug','window','rug','logo','set-of-clothing'] self.include_classes = [ 'chair', 'bed', 'toilet', 'sofa', 'indoor-plant', 'refrigerator', 'tv-screen', 'table' ] self.small_classes = ['indoor-plant', 'toilet'] self.rot_interval = 5.0 self.radius_max = 3 self.radius_min = 1 self.num_flat_views = 3 self.num_any_views = 7 self.num_views = 25 self.num_objects_per_episode = 2 # Initialize maskRCNN cfg_det = get_cfg() cfg_det.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg_det.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model cfg_det.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg_det.MODEL.DEVICE = 'cpu' self.cfg_det = cfg_det self.maskrcnn = DefaultPredictor(cfg_det) # Filter only the five categories we care about ''' class mapping between replica and maskRCNN class-name replica ID maskRCNN ID chair 20 56 bed 7 59 dining table 80 60 toilet 84 61 couch 76 57 potted plant 44 58 # bottle 14 39 # clock 22 74 refrigerator 67 72 tv(tv-screen) 87 62 # vase 91 75 ''' self.maskrcnn_to_catname = { 56: "chair", 59: "bed", 61: "toilet", 57: "couch", 58: "indoor-plant", 72: "refrigerator", 62: "tv", 60: "dining-table" } self.replica_to_maskrcnn = { 20: 56, 7: 59, 84: 61, 76: 57, 44: 58, 67: 72, 87: 62, 80: 60 } # self.env = habitat.Env(config=config, dataset=None) # st() # self.test_navigable_points() self.run_episodes()
def set_predictor(self): self.predictor = DefaultPredictor(self.cfg)
print(f'detectron : {detectron2.__version__}') #%% object detection img = cv2.imread("../res/input.jpg") print(img.shape) # %% # creating detectron2 config https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md cfg_object_detection = get_cfg() cfg_object_detection.merge_from_file( model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")) cfg_object_detection.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg_object_detection.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml") # object detection predictor object_detection_predictor = DefaultPredictor(cfg_object_detection) start_tick = time.time() outputs = object_detection_predictor(img) # using `Visualizer` to draw the predictions on the image. v = Visualizer(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), MetadataCatalog.get(cfg_object_detection.DATASETS.TRAIN[0]), scale=1.2) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) print(f'delay { time.time() - start_tick }') display(Image.fromarray(out.get_image())) #%% instance segmentation # create config for instance segmentation cfg_instance_seg = get_cfg()
def build_predictor(cfg, threshold_score): cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold_score return DefaultPredictor(cfg), cfg
def get_detectnet_model(detectron_cfg): return DefaultPredictor(detectron_cfg)
from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer from detectron2.data import MetadataCatalog, DatasetCatalog cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library cfg.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") predictor = DefaultPredictor(cfg) #v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) #out = v.draw_instance_predictions(outputs["instances"].to("cpu")) #im2 = out.get_image()[:, :, ::-1] #b,g,r = cv2.split(im2) #image_rgb2 = cv2.merge([r,g,b]) #plt.figure() #plt.imshow(image_rgb2) #plt.show() ############## above is part a, without e.g. jpg import os # download, decompress the data
def main(args): # retrieve configuration file and update the weights cfg = get_cfg() cfg.merge_from_file(args.cfg) # update the model so that it uses the final output weights. cfg.MODEL.WEIGHTS = str(Path(cfg.OUTPUT_DIR) / Path("model_final.pth")) predictor = DefaultPredictor(cfg) # load image. # get data from validation data # need to get data from the signs dataset, not the hotspots dataset. dset = DatasetCatalog.get(args.dataset) all_hotspots = [] all_gt_aligned = [] all_scores = [] for example in tqdm(dset): img = cv2.imread(example["file_name"]) # # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format outputs = predictor(img) # gets individual hotspot images, save to npz array hotspots = extract_boxes( img[:, :, ::-1], outputs["instances"].to("cpu").pred_boxes ) all_hotspots.extend(hotspots) # get scores scores = outputs["instances"].to("cpu").scores all_scores.extend(scores.numpy()) # get groundtruth classes # these parameters can be customized. matcher = Matcher([0.4, 0.5], [0, -1, 1], allow_low_quality_matches=False) # convert the groundtruth annotations into a detectron Boxes object gt_boxes = Boxes( torch.tensor( np.vstack([annotation["bbox"] for annotation in example["annotations"]]) ) ) gt_classes = np.array( [annotation["category_id"] for annotation in example["annotations"]] ) pred_boxes = outputs["instances"].to("cpu").pred_boxes match_quality_matrix = pairwise_iou(gt_boxes, pred_boxes) matched_idxs, matched_labels = matcher(match_quality_matrix) # compute ground-truth classes for every box aligned_classes = gt_classes[matched_idxs] # handle edge case where only one aligned box shows up if not isinstance(aligned_classes, np.ndarray): aligned_classes = np.ndarray([aligned_classes]) # handle background classes: aligned_classes[matched_labels == 0] = -1 aligned_classes[matched_labels == -1] = -1 all_gt_aligned.extend(aligned_classes) np.savez( Path(args.outpath).with_suffix(".npz"), hotspots=np.array(all_hotspots, dtype=object), scores=all_scores, gt_classes=all_gt_aligned, )