data_coco["categories"] = categories
data_coco["annotations"] = annotations

json.dump(data_coco, open(save_json_path, "w"), indent=4)
register_coco_instances("my_dataset_train1", {}, "traincoco.json", "")

DatasetCatalog.get("my_dataset_train1")

#visualize training data
my_dataset_train_metadata = MetadataCatalog.get("my_dataset_train")
dataset_dicts = DatasetCatalog.get("my_dataset_train")

for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1],
                            metadata=my_dataset_train_metadata,
                            scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    cv2_imshow(vis.get_image()[:, :, ::-1])
"""# Train Custom Detectron2 Detector"""

#We are importing our own Trainer Module here to use the COCO validation evaluation during training. Otherwise no validation eval occurs.


class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):

        if output_folder is None:
            os.makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"
Ejemplo n.º 2
0
            webcv2.imshow("window", vis.get_image()[:, :, ::-1])
            webcv2.waitKey()

    train_data_loader = build_detection_train_loader(cfg)
    import ipdb
    ipdb.set_trace()
    for batch in tqdm.tqdm(train_data_loader):
        if args.speed:
            continue
        for per_image in batch:
            # Pytorch tensor is in (C, H, W) format
            img = per_image["image"].permute(1, 2, 0)
            if cfg.INPUT.FORMAT == "BGR":
                img = img[:, :, [2, 1, 0]]
            else:
                img = np.asarray(
                    Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB"))

            visualizer = Visualizer(img, metadata=metadata, scale=1.0)
            target_fields = per_image["instances"].get_fields()
            labels = [
                metadata.thing_classes[i] for i in target_fields["gt_classes"]
            ]
            vis = visualizer.overlay_instances(
                labels=labels,
                boxes=target_fields.get("gt_boxes", None),
                masks=target_fields.get("gt_masks", None),
                keypoints=target_fields.get("gt_keypoints", None),
            )
            output(vis, str(per_image["image_id"]) + ".jpg")
Ejemplo n.º 3
0
    """
    Test the d2sa json dataset loader.

    Usage:
        python -m detectron2.data.datasets.d2sa \
            path/to/json path/to/image_root dataset_name

        "dataset_name" can be "d2sa_val", or other
        pre-registered ones
    """
    from detectron2.utils.logger import setup_logger
    from detectron2.utils.visualizer import Visualizer
    import detectron2.data.datasets  # noqa # add pre-defined metadata
    import sys

    logger = setup_logger(name=__name__)
    assert sys.argv[3] in DatasetCatalog.list()
    meta = MetadataCatalog.get(sys.argv[3])

    dicts = load_cocoa_json(sys.argv[1], sys.argv[2], sys.argv[3])
    logger.info("Done loading {} samples.".format(len(dicts)))

    dirname = "d2sa-data-vis"
    os.makedirs(dirname, exist_ok=True)
    for d in dicts:
        img = np.array(Image.open(d["file_name"]))
        visualizer = Visualizer(img, metadata=meta)
        vis = visualizer.draw_dataset_dict(d)
        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
        vis.save(fpath)
Ejemplo n.º 4
0
    writer = SummaryWriter(
        log_dir='/tmp/tensorboard/{}'.format(datetime.datetime.now()))

    # Parse command line arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("--split", default="test")
    ap.add_argument("--samples", type=int, default=10)
    ap.add_argument("--scale", type=float, default=1.0)
    ap.add_argument("--path", type=str, default="../dataset", metavar='DIR')
    args = ap.parse_args()

    dataset_name = f"sacrum_{args.split}"
    print(dataset_name)
    register_sacrum_voc(dataset_name, args.path, args.split)
    dataset_dicts = DatasetCatalog.get(dataset_name)
    for d in random.sample(dataset_dicts, args.samples):
        img = cv2.imread(d["file_name"])
        visualizer = Visualizer(img[:, :, ::-1],
                                metadata=MetadataCatalog.get(dataset_name),
                                scale=args.scale)
        vis = visualizer.draw_dataset_dict(d)
        writer.add_image(d["file_name"],
                         np.transpose(vis.get_image(), axes=[2, 0, 1]))
        #cv2.imshow(dataset_name, vis.get_image()[:, :, ::-1])

        # Exit? Press ESC
        #if cv2.waitKey(0) & 0xFF == 27:
        #    break

    #cv2.destroyAllWindows()
Ejemplo n.º 5
0
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = MODEL_PATH
if not torch.cuda.is_available():
    cfg.MODEL.DEVICE = 'cpu'

predictor = DefaultPredictor(cfg)

im = cv2.imread(INPUT_FILE)
outputs = predictor(im)

instances = outputs['instances']

if len(instances) <= 0:
    sys.exit(1)

v = Visualizer(im[:, :, ::-1],
               MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
               scale=1.2,
               instance_mode=ColorMode.IMAGE_BW)

v = v.draw_instance_predictions(instances.to("cpu"))

result = v.get_image()[:, :, ::-1]

cv2.imshow('waldo', result)
while True:
    key = cv2.waitKey(1)
    if key == 27 or key == 113:
        break
cv2.destroyAllWindows()
    "Kia-Detection_faster_rcnn_R_50_FPN_1x-eps300/model_final.pth"
)  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set a custom testing threshold
predictor = DefaultPredictor(cfg)

from detectron2.utils.visualizer import ColorMode

for d in random.sample(Kia_trainval_dataset_dicts, 3):
    im = cv2.imread(d["file_name"])
    # outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    # print("outputs : ", outputs)
    # print("outputs[instances] : ", outputs["instances"])
    v = Visualizer(
        im[:, :, ::-1],
        metadata=Kia_trainval_metadata,
        scale=0.5,
        instance_mode=ColorMode.
        IMAGE_BW  # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    # out = v.draw_instance_predictions(outputs["instances"].to("cpu")) # draw instances with predictions of predictor
    out = v.draw_dataset_dict(d)  # draw instances with using Annotations
    # print("out.get_image().shape : ", out.get_image().shape)
    # print("out.get_image()[:, :, ::-1].shape : ", out.get_image()[:, :, ::-1].shape)

    cv2.imwrite("./output/%s" % (d["file_name"].split("/")[-1]),
                out.get_image()[:, :, ::-1])
"""
img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1]
basename = os.path.basename(dic["file_name"])

predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2])
Ejemplo n.º 7
0
    # for d in random.sample(dataset_dicts, 3):
    #    img = cv2.imread(d["file_name"])
    #    visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.3)
    #    vis = visualizer.draw_dataset_dict(d)
    #    cv2.imshow('img', vis.get_image()[:, :, ::-1])
    #    cv2.waitKey(0)

    predictor = DefaultPredictor(cfg)

    from IPython import embed

    for d in random.sample(dataset_dicts, 5):
        img = cv2.imread(d["file_name"])
        outputs = predictor(img)
        print(d["file_name"])
        visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.3)
        visualizer = visualizer.draw_instance_predictions(
            outputs["instances"].to("cpu"))
        visualizer = visualizer.get_image()[:, :, ::-1]
        visualizer = Visualizer(visualizer[:, :, ::-1],
                                metadata=metadata,
                                scale=0.3)
        visualizer = visualizer.draw_dataset_dict(d)
        cv2.imshow('img', visualizer.get_image()[:, :, ::-1])
        cv2.waitKey(0)

    # trainer = DefaultTrainer(cfg)
    # trainer.resume_or_load(resume=True)

    # evaluator = COCOEvaluator("train", cfg, False, output_dir="./output/")
    # val_loader = build_detection_test_loader(cfg, "train")
Ejemplo n.º 8
0
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # 3 classes (data, fig, hazelnut)
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 8
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_0000299.pth")
cfg.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpPVNetHead"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set the testing threshold for this model
cfg.DATASETS.TEST = ("nihonbashi", )
predictor = DefaultPredictor(cfg)

from detectron2.utils.visualizer import ColorMode

for d in random.sample(dataset_dicts, 3):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=nihonbashi_metadata, 
                   scale=0.8, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))


    visualizer = Visualizer(im[:, :, ::-1], 
                             metadata=nihonbashi_metadata, 
                             scale=0.8,               
                             instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
    )
    vis = visualizer.draw_dataset_dict(d)

    im_pred = v.get_image()[:, :, ::-1]
    im_gt = vis.get_image()[:, :, ::-1]
Ejemplo n.º 9
0
    def may_visualize_gt(self, batched_inputs, init_objectness, init_bbox,
                         refine_objectness, refine_boxes, centers,
                         pred_init_boxes, pred_refine_boxes, logits):
        """
        Visualize initial and refine boxes using mathced labels for filtering.
        The prediction at positive positions are shown.
        """
        if self.training:
            if self.vis_period <= 0:
                return
            storage = get_event_storage()
            if not storage.iter % self.vis_period == 0:
                return

        from detectron2.utils.visualizer import Visualizer
        image_index = 0
        img = batched_inputs[image_index]["image"].cpu().numpy()
        assert img.shape[0] == 3, "Images should have 3 channels."
        img = img[::-1, :, :]
        img = img.transpose(1, 2, 0)

        v_init = Visualizer(img, None)
        v_init = v_init.overlay_instances(boxes=Boxes(init_bbox[image_index][
            init_objectness[image_index]].cpu()))
        init_image = v_init.get_image()

        v_refine = Visualizer(img, None)
        v_refine = v_refine.overlay_instances(
            boxes=Boxes(refine_boxes[image_index][
                refine_objectness[image_index] > 0].cpu()))
        refine_image = v_refine.get_image()

        if self.training:
            vis_img = np.vstack((init_image, refine_image))
            vis_img = vis_img.transpose(2, 0, 1)
            storage.put_image("TOP: init gt boxes; Bottom: refine gt boxes",
                              vis_img)

        vp_init = Visualizer(img, None)
        selected_centers = centers[init_objectness[image_index]].cpu().numpy()
        vp_init = vp_init.overlay_instances(
            boxes=Boxes(pred_init_boxes[image_index][
                init_objectness[image_index]].detach().cpu()),
            labels=logits[image_index]
            [init_objectness[image_index]].sigmoid().max(1)[0].detach().cpu())
        init_image = vp_init.get_image()

        for point in selected_centers:
            init_image = cv2.circle(init_image, tuple(point), 3,
                                    (255, 255, 255))

        vp_refine = Visualizer(img, None)
        foreground_idxs = (refine_objectness[image_index] >= 0).logical_and(
            refine_objectness[image_index] < self.num_classes)
        selected_centers = centers[foreground_idxs].cpu().numpy()
        vp_refine = vp_refine.overlay_instances(
            boxes=pred_refine_boxes[image_index]
            [foreground_idxs].detach().cpu(),
            labels=logits[image_index][foreground_idxs].sigmoid().max(
                1)[0].detach().cpu())
        refine_image = vp_refine.get_image()
        for point in selected_centers:
            refine_image = cv2.circle(refine_image, tuple(point), 3,
                                      (255, 255, 255))

        vis_img = np.vstack((init_image, refine_image))
        if self.training:
            vis_img = vis_img.transpose(2, 0, 1)
            storage.put_image(
                "TOP: init pred boxes; Bottom: refine pred boxes", vis_img)
        # NOTE: This is commented temporarily. Uncomment it if
        # eagerly visualization is desired.
        '''
    def get_midpoint_obj_conf(self):

        #objects = random.sample(list(objects), self.num_objects_per_episode)
        xyz_obj_mids = []
        cat_ids_objects = []
        count = 0
        nav_points = np.array(self.nav_pts)
        action = "do_nothing"
        movement = "turn_right"

        if self.visualize_maskrcnn:
            self.plot_navigable_points(self.nav_pts)

        # constraint if two fixation points are very close
        # add in all confident masks
        # 20 spawns

        while count < self.num_spawns_per_episode:
            print("GETTING OBJECT #", count)

            rand_ind = np.random.randint(low=0, high=len(nav_points))
            pos_rand = nav_points[rand_ind, :]
            agent_state = habitat_sim.AgentState()
            agent_state.position = pos_rand + np.array([0, 1.5, 0])
            print("Random POS=", agent_state.position)
            self.agent.set_state(agent_state)

            # remove spawning points close to this spawn (so as to not spawn near there again)
            distances = np.sqrt(np.sum((nav_points - pos_rand)**2, axis=1))
            nav_points = nav_points[np.where(distances > self.radius_remove)]

            # if self.visualize:
            #     x_sample = self.nav_pts[:,0]
            #     z_sample = self.nav_pts[:,2]
            #     plt.plot(z_sample, x_sample, 'o', color = 'red')
            #     plt.plot(agent_state.position[2], agent_state.position[0], 'x', 'blue')
            #     plt.show()

            # rotate in place until get high confident object
            # bin_angle_size = 60.0
            # angles = np.arange(-180, 180, bin_angle_size)

            angles = np.arange(0, 360, self.turn_angle)

            for angle in angles:  #b_inds_notempty:

                # print("ANGLE=", angle)
                # turn_angle = np.radians(angle)
                # quat_yaw = quat_from_angle_axis(angle, np.array([0, 1.0, 0]))

                # # Set agent yaw rotation to current angle
                # agent_state.rotation = quat_yaw

                # # change sensor state to default
                # # need to move the sensors too
                # for sensor in self.agent.state.sensor_states:
                #     # st()
                #     self.agent.state.sensor_states[sensor].rotation = agent_state.rotation
                #     self.agent.state.sensor_states[sensor].position = agent_state.position # + np.array([0, 1.5, 0]) # ADDED IN UP TOP
                #     # print("PRINT", self.agent.state.sensor_states[sensor].rotation)

                # print(agent_state.rotation)

                # # get observations after centiering
                # self.agent.set_state(agent_state)

                # rotate until find confident object

                observations = self.sim.step(movement)  #self.sim.step(action)

                ####### %%%%%%%%%%%%%%%%%%%%%%% ######### MASK RCNN

                im = observations["color_sensor"]
                im = Image.fromarray(im, mode="RGBA")
                im = cv2.cvtColor(np.asarray(im), cv2.COLOR_RGB2BGR)

                # plt.imshow(im)
                # plt.show()

                outputs = self.maskrcnn(im)

                pred_masks = outputs['instances'].pred_masks
                pred_boxes = outputs['instances'].pred_boxes.tensor
                pred_classes = outputs['instances'].pred_classes
                pred_scores = outputs['instances'].scores

                maskrcnn_to_catname = {
                    56: "chair",
                    59: "bed",
                    61: "toilet",
                    57: "couch",
                    58: "indoor-plant",
                    72: "refrigerator",
                    62: "tv"
                }  #, 60: "dining-table"}

                obj_ids = []
                obj_catids = []
                obj_scores = []
                obj_masks = []
                obj_all_catids = []
                obj_all_scores = []
                obj_all_boxes = []
                for segs in range(len(pred_masks)):
                    if pred_classes[segs].item() in maskrcnn_to_catname:
                        if pred_scores[segs] >= 0.90:
                            obj_ids.append(segs)
                            obj_catids.append(pred_classes[segs].item())
                            obj_scores.append(pred_scores[segs].item())
                            obj_masks.append(pred_masks[segs])
                            cat_ids_objects.append(maskrcnn_to_catname[int(
                                pred_classes[segs])])

                            obj_all_catids.append(pred_classes[segs].item())
                            obj_all_scores.append(pred_scores[segs].item())
                            y, x = torch.where(pred_masks[segs])
                            pred_box = torch.Tensor(
                                [min(y), min(x),
                                 max(y), max(x)])  # ymin, xmin, ymax, xmax
                            obj_all_boxes.append(pred_box)

                print("MASKS ", len(pred_masks))
                print("VALID ", len(obj_scores))
                print(obj_scores)
                print(pred_scores.shape)

                translation_ = self.agent.state.sensor_states[
                    'depth_sensor'].position
                quaternion_ = self.agent.state.sensor_states[
                    'depth_sensor'].rotation
                rotation_ = quaternion.as_rotation_matrix(quaternion_)
                T_world_cam = np.eye(4)
                T_world_cam[0:3, 0:3] = rotation_
                T_world_cam[0:3, 3] = translation_

                if not obj_masks:
                    continue
                else:

                    # randomly choose a high confidence object
                    # instead of this I think we should iterate over ALL the high confident objects and fixate on them
                    # obj_mask_focus = random.choice(obj_masks)

                    for obj_mask in obj_masks:

                        depth = observations["depth_sensor"]

                        xs, ys = np.meshgrid(
                            np.linspace(-1 * 256 / 2., 1 * 256 / 2., 256),
                            np.linspace(1 * 256 / 2., -1 * 256 / 2., 256))
                        depth = depth.reshape(1, 256, 256)
                        xs = xs.reshape(1, 256, 256)
                        ys = ys.reshape(1, 256, 256)

                        xys = np.vstack((xs * depth, ys * depth, -depth,
                                         np.ones(depth.shape)))
                        xys = xys.reshape(4, -1)
                        xy_c0 = np.matmul(np.linalg.inv(self.K), xys)
                        xyz = xy_c0.T[:, :3].reshape(256, 256, 3)
                        # xyz_obj_masked = xyz[obj_mask_focus]
                        xyz_obj_masked = xyz[obj_mask]

                        xyz_obj_masked = np.matmul(
                            rotation_,
                            xyz_obj_masked.T) + translation_.reshape(3, 1)
                        xyz_obj_mid = np.mean(xyz_obj_masked, axis=1)

                        print("MIDPOINT=", xyz_obj_mid)

                        xyz_obj_mids.append(xyz_obj_mid)

                    count += 1

                    if self.visualize_maskrcnn:
                        plt.figure(1)
                        v = Visualizer(im[:, :, ::-1],
                                       MetadataCatalog.get(
                                           self.cfg_det.DATASETS.TRAIN[0]),
                                       scale=1.2)
                        out = v.draw_instance_predictions(
                            outputs['instances'].to("cpu"))
                        seg_im = out.get_image()
                        plt.imshow(seg_im)

                        plt.figure(2)
                        x_sample = self.nav_pts[:, 0]
                        z_sample = self.nav_pts[:, 2]
                        plt.plot(z_sample, x_sample, 'o', color='red')
                        plt.plot(nav_points[:, 2],
                                 nav_points[:, 0],
                                 'o',
                                 color='green')
                        plt.plot(agent_state.position[2],
                                 agent_state.position[0], 'x', 'blue')
                        plt.show()

                    break  # got a high confident view

        xyz_obj_mids = np.array(xyz_obj_mids)

        return xyz_obj_mids, cat_ids_objects
Ejemplo n.º 11
0
def main():
    # register_coco_instances(f"sugar_beet_train", {}, f"/netscratch/naeem/structured_cwc/instances_train{year}.json",
    #                         f"/netscratch/naeem/structured_cwc/train/img/")
    # register_coco_instances(f"sugar_beet_valid", {}, f"/netscratch/naeem/structured_cwc/instances_valid{year}.json",
    #                         f"/netscratch/naeem/structured_cwc/valid/img/")

    register_coco_instances(
        "sugar_beet_train", {},
        "/home/robot/datasets/structured_cwc/instances_train2016.json",
        "/home/robot/datasets/structured_cwc/train/img/")
    register_coco_instances(
        "sugar_beet_valid", {},
        "/home/robot/datasets/structured_cwc/instances_valid2016.json",
        "/home/robot/datasets/structured_cwc/valid/img/")
    register_coco_instances(
        "sugar_beet_test", {},
        "/home/robot/datasets/structured_cwc/instances_test2016.json",
        "/home/robot/datasets/structured_cwc/test/img/")

    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = (f"sugar_beet_train", )
    cfg.DATASETS.TEST = (f"sugar_beet_test", )
    cfg.DATALOADER.NUM_WORKERS = 8
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
    )  # Let training initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.001  # pick a good LR
    cfg.SOLVER.MAX_ITER = 10000  # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
    # cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128  # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2  #
    cfg.OUTPUT_DIR = '/home/robot/datasets/MRCNN_training'
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    # trainer = DefaultTrainer(cfg)
    # trainer.resume_or_load(resume=True)
    # trainer.train()

    # cfg already contains everything we've set previously. Now we changed it a little bit for inference:
    cfg.MODEL.WEIGHTS = os.path.join(
        '/home/robot/git/detectron2/output/model_final.pth')
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set a custom testing threshold
    predictor = DefaultPredictor(cfg)

    evaluator = COCOEvaluator(f"sugar_beet_valid",
                              cfg,
                              False,
                              output_dir="/home/robot/datasets/MRCNN_training")
    val_loader = build_detection_test_loader(cfg, f"sugar_beet_valid")
    # grad_cam = GradCam(model=trainer.model,
    #                    feature_module=trainer.model.layer4,
    #                    target_layer_names=["2"], use_cuda=True)
    # print(inference_on_dataset(trainer.model, val_loader, evaluator))
    dataset_dicts = DatasetCatalog.get(f"sugar_beet_valid")

    def get_label(rgb_path):
        data_root, file_name = os.path.split(
            os.path.split(rgb_path)[0])[0], os.path.split(rgb_path)[1]
        return os.path.join(data_root, 'lbl', file_name)

    c = 0

    for d in random.sample(dataset_dicts, 10):
        im = cv2.imread(d["file_name"])
        lbl = cv2.imread(get_label(d["file_name"]))
        outputs = predictor(im)
        # outputs = grad_cam(im, 0)
        v = Visualizer(
            im[:, :, ::-1],
            scale=0.5,
            instance_mode=ColorMode.IMAGE_BW
            # remove the colors of unsegmented pixels. This option is only available for segmentation models
        )
        out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        img = out.get_image()
        print(img.shape)
        img = Image.fromarray(
            np.concatenate([
                img[:, :, ::-1],
                cv2.resize(lbl, (img.shape[1], img.shape[0]),
                           interpolation=cv2.INTER_AREA)
            ],
                           axis=1))
        img.save(f"{cfg.OUTPUT_DIR}/output{c}.jpeg")
        c = c + 1
    def display_sample(self,
                       rgb_obs,
                       semantic_obs,
                       depth_obs,
                       mainobj=None,
                       visualize=False):
        rgb_img = Image.fromarray(rgb_obs, mode="RGBA")

        semantic_img = Image.new(
            "P", (semantic_obs.shape[1], semantic_obs.shape[0]))
        semantic_img.putpalette(d3_40_colors_rgb.flatten())
        semantic_img.putdata((semantic_obs.flatten() % 40).astype(np.uint8))
        semantic_img = semantic_img.convert("RGBA")
        # st()

        depth_img = Image.fromarray((depth_obs / 10 * 255).astype(np.uint8),
                                    mode="L")

        display_img = cv2.cvtColor(np.asarray(rgb_img), cv2.COLOR_RGB2BGR)
        #print(display_img.shape)

        # mask_image = False
        # if mask_image and mainobj is not None:
        #     main_id = int(mainobj.id[1:])
        #     print("MAINID ", main_id)
        #     # semantic = observations["semantic_sensor"]
        #     display_img[semantic_obs == main_id] = [1, 0, 1]
        # st()

        #display_img = cv2
        plt.imshow(display_img)
        plt.show()

        im = rgb_img[..., :3]
        im = im[:, :, ::-1]
        outputs = self.maskrcnn(im)

        pred_masks = outputs['instances'].pred_masks
        pred_boxes = outputs['instances'].pred_boxes.tensor
        pred_classes = outputs['instances'].pred_classes
        pred_scores = outputs['instances'].scores

        # converts instance segmentation to individual masks and bbox
        # visualisations
        v = Visualizer(im[:, :, ::-1],
                       MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
                       scale=1.2)
        out = v.draw_instance_predictions(outputs['instances'].to("cpu"))
        seg_im = out.get_image()

        # cv2.imshow('img',display_img)
        if visualize:
            arr = [rgb_img, semantic_img, depth_img, seg_im]
            titles = ['rgb', 'semantic', 'depth', 'seg_im']
            plt.figure(figsize=(12, 8))
            for i, data in enumerate(arr):
                ax = plt.subplot(1, 3, i + 1)
                ax.axis('off')
                ax.set_title(titles[i])
                plt.imshow(data)
                # plt.pause()
            plt.show()
Ejemplo n.º 13
0
def predict(save_json=False,megrge_result=False,d2_visual=True,my_visual=False):
    """
    instances format:{'pred_boxes': Boxes(tensor([[ 732.5856, 1598.1067,  766.4857, 1633.0486]], device='cuda:0')), 
    'scores': tensor([0.9482], device='cuda:0'), 'pred_classes': tensor([2], device='cuda:0')}
    BoxMode.convert(pre_instances.pred_boxes.tensor,from_mode=BoxMode.XYXY_ABS,to_mode=BoxMode.XYWH_ABS
    print("\n"+"-" * int(i/len(dataset_test_dicts.keys())*100*50) +">"+ "{}".format(i/len(dataset_test_dicts.keys())) + "%", end='\r')
    time.sleep(0.00001)
    json.dump(coco_list_results,f,cls=MyEncoder,indent=2)# print(type(dict_value))# print(type(dict_value["image id"]))
    """
    cfg.MODEL.WEIGHTS=os.path.join(cfg.OUTPUT_DIR,"model_final.pth")
    predictor = DefaultPredictor(cfg)
    # test_annos_root_dir="/root/data/gvision/dataset/predict/s0.5_t0.8_141517"
    # test_json="/root/data/gvision/dataset/predict/s0.5_t0.8_141517/image_annos/person_bbox_test_141517_split.json"
    test_image_path="/root/data/gvision/dataset/predict/s0.5_t0.9_14/image_test"
    test_json="/root/data/gvision/dataset/predict/s0.5_t0.9_14/image_annos/person_s0.5_t0.9_14_split_test.json"
    dataset_test_dicts = json.load(open(test_json,"r"))
    """metadata Metadata(evaluator_type='coco', image_root='/root/data/gvision/dataset/train_all_annos/s0.3_t0.7_all/image_train', 
    json_file='/root/data/gvision/dataset/train_all_annos/s0.3_t0.7_all/image_annos/coco_pv_train_bbox_hwnoi.json', name='pv_train', 
    thing_classes=['visible body', 'full body', 'head', 'vehicle'], thing_dataset_id_to_contiguous_id={1: 0, 2: 1, 3: 2, 4: 3})"""
    MetadataCatalog.get("pv_train").set(thing_colors=[(138,255,0),(138,0,255),(255,46,46),(131,131,131)])
    # MetadataCatalog.get("pv_train").set(thing_colors=[(131,131,131),(131,131,131),(131,131,131),(131,131,131)])
    """green            pink        purple    grey
      ['visible body', 'full body', 'head', 'vehicle']
      1                   2           3        4
    """
    train_dicts_metadata = MetadataCatalog.get("pv_train")
    print("metadata",train_dicts_metadata)
    coco_list_results=[]
    print("predict-------------------start")
    os.makedirs(os.path.join(cfg.OUTPUT_DIR, "my_predict"),exist_ok=True)
    # for j,(file_name,dict_value) in  enumerate(dataset_test_dicts.items()):
    for j,(file_name,dict_value) in  enumerate(random.sample(dataset_test_dicts.items(),9)):
        cate=[]
        coco_dict_results={}
        id_1,id_2,id_3,id_4=0,0,0,0
        print("{}\t{}-------------------{}".format(file_name,j,len(dataset_test_dicts.keys())),flush=True)
        img=cv2.imread(os.path.join(test_image_path,file_name))
        pre_output =predictor(img)
        pre_instances=pre_output['instances']
        for i in range(len(pre_instances.scores)):
            coco_dict_results["image_id"]=dict_value["image id"]
            coco_dict_results["category_id"]=pre_instances.pred_classes.cpu().numpy()[i]+1
            coco_dict_results["bbox"]=pre_instances.pred_boxes.tensor.cpu().numpy()[i]#pre_output['instances'].to("cpu")
            coco_dict_results["score"]=pre_instances.scores.cpu().numpy()[i]
            coco_list_results.append(coco_dict_results)
            if my_visual:
                cate.append(coco_dict_results["category_id"])
                xmin, ymin, xmax , ymax = coco_dict_results["bbox"]
                if coco_dict_results["category_id"]==1:#green
                    id_1+=1
                    img=cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (138,255,0), 8,lineType=8)
                    cv2.putText(img, '{}'.format(coco_dict_results["category_id"]), (xmin,ymin), cv2.FONT_HERSHEY_COMPLEX, 1.5, (138,255,0), 4)
                if coco_dict_results["category_id"]==2:#pink
                    id_2+=1
                    img=cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (138,0,255), 8,lineType=8)
                    cv2.putText(img, '{}'.format(coco_dict_results["category_id"]), (xmin,ymin), cv2.FONT_HERSHEY_COMPLEX, 1.5, (138,0,255), 4)
                if coco_dict_results["category_id"]==3:#purple
                    id_3+=1
                    img=cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,46,46), 8,lineType=8)
                    cv2.putText(img, '{}'.format(coco_dict_results["category_id"]), (xmin,ymin), cv2.FONT_HERSHEY_COMPLEX, 1.5, (255,46,46), 4)
                if coco_dict_results["category_id"]==4:#grey
                    id_4+=1
                    img=cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (131,131,131), 8,lineType=8)
                    cv2.putText(img, '{}'.format(coco_dict_results["category_id"]), (xmin,ymin), cv2.FONT_HERSHEY_COMPLEX, 1.5, (131,131,131), 4)
                if i==len(pre_instances.scores)-1:
                    cv2.putText(img, r"len{} cid:{}".format(len(pre_instances.scores),list(set(cate))[:]), (15,40), cv2.FONT_HERSHEY_COMPLEX, 1.5, (170,64,112), 4)#
                    cv2.putText(img, r"c1:{} c2:{} c3:{} c4:{}".format(id_1,id_2,id_3,id_4), (15,80), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (170,64,112), 4)
                    os.makedirs(os.path.join(cfg.OUTPUT_DIR,"my_pre_split_visual"),exist_ok=True)
                    cv2.imwrite(os.path.join(cfg.OUTPUT_DIR,"my_pre_split_visual","vis2_{}".format(file_name)),img)
        if d2_visual: 
            v = Visualizer(img[:, :, ::-1],metadata=train_dicts_metadata, scale=1,instance_mode=ColorMode.IMAGE)# ColorMode.SEGMENTATION or ColorMode.IMAGE_BW) 
            v = v.draw_instance_predictions(pre_output["instances"].to("cpu"))
            os.makedirs(os.path.join(cfg.OUTPUT_DIR,"d2_predict_split_visual"),exist_ok=True)
            cv2.imwrite(os.path.join(cfg.OUTPUT_DIR,"d2_predict_split_visual","vis2_{}".format(file_name)),v.get_image()[:, :, ::-1])
    if save_json:
        f1=open(os.path.join(cfg.OUTPUT_DIR, "my_predict","pre_result_test.json"),'w') 
        f1.write(json.dumps(coco_list_results,cls=MyEncoder))
    print("predict----------------end")
    if megrge_result:
        print("--------->>>>>>>>>merge-------------start")
        merge =ResultMerge.DetResMerge(resfile=os.path.join(cfg.OUTPUT_DIR, "my_predict","pre_result.json"), 
                                splitannofile=test_json, 
                                srcannofile="/root/data/gvision/dataset/predict/s0.5_t0.8_141517/image_annos/person_bbox_test_141517.json",
                                outpath=cfg.OUTPUT_DIR,
                                outfile="my_predict/pre_merge_result.json")
        merge.mergeResults(is_nms=True)
        print("merge-------------end")
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0021/000028.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0027/000067.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0002/000126.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0000/000456.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0020/000091.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0016/000446.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0005/000285.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0028/000048.png'
]

i = 0
for image in random_paths:
    im = cv2.imread(image)
    output = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                   scale=1.2)
    v = v.draw_instance_predictions(output["instances"].to("cpu"))
    cv2.imwrite(f"{path2}0{i}.png", v.get_image()[:, :, ::-1])
    i = i + 1

random_paths = [
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0023/000329.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0022/000157.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0004/000156.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0018/000123.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0017/000200.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0001/000047.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0008/000027.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0019/000028.png',
    '/home/group00/mcv/datasets/KITTI-MOTS/testing/image_02/0010/000073.png',
Ejemplo n.º 15
0
            "width": w,
            "date_captured": "",
            "flickr_url": "",
            "id": img_id
        }
        images.append(img_dict)

        if c == 1:
            tqdm.write('Got greyscale image. Repeating channel axis.')
            im = im[:, :, np.newaxis].repeat(3, axis=-1)

        outputs = predictor(im)

        if args.vis:
            v = Visualizer(im,
                           MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                           scale=1.2)
            v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
            plt.imshow(v.get_image())
            plt.show()
            continue

        cv2.imwrite(str(img_folder / img_name), im)

        fields = outputs['instances'].get_fields()
        for points, score, box, area in zip(
                fields['pred_keypoints'].cpu().numpy(), fields['scores'].cpu(),
                fields['pred_boxes'].tensor.cpu().numpy(),
                fields['pred_boxes'].area()):
            if score < detections_threshold:
                continue
Ejemplo n.º 16
0
    if scores is not None:
        if labels is None:
            labels = ["{:.0f}%".format(s * 100) for s in scores]
        else:
            labels = [
                "{} {:.0f}%".format(l, s * 100)
                for l, s in zip(labels, scores)
            ]
    return labels


def object_detect(image):
    predictions = predictor(image)
    boxes = predictions["instances"].pred_boxes
    scores = predictions["instances"].scores
    classes = predictions["instances"].pred_classes
    # labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))

    return predictions, boxes, scores, classes


if __name__ == '__main__':
    image = cv2.imread("demo.png")
    image = imutils.resize(image, width=400)
    outputs, boxes, scores, classes = object_detect(image)
    v = Visualizer(image[:, :, ::-1],
                   MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                   scale=1.2)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2.imshow("image", out.get_image()[:, :, ::-1])
    cv2.waitKey(0)
Ejemplo n.º 17
0
        predictions = json.load(f)

    pred_by_image = defaultdict(list)
    for p in predictions:
        pred_by_image[p["image_id"]].append(p)

    dicts = list(DatasetCatalog.get(args.dataset))
    metadata = MetadataCatalog.get(args.dataset)

    os.makedirs(args.output, exist_ok=True)

    for dic in tqdm.tqdm(dicts):
        img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1]
        basename = os.path.basename(dic["file_name"])

        predictions = create_instances(pred_by_image[dic["image_id"]],
                                       img.shape[:2])
        vis = Visualizer(img, metadata, edge_width=1, ifShowLabel=False)
        vis_pred = vis.draw_instance_predictions(predictions,
                                                 alpha=0).get_image()

        vis = Visualizer(img,
                         metadata,
                         edge_width=1,
                         ifDrawBox=False,
                         ifShowLabel=False)
        vis_gt = vis.draw_dataset_dict(dic, alpha=0).get_image()

        concat = np.concatenate((vis_pred, vis_gt), axis=0)
        cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])
Ejemplo n.º 18
0
    if args.source == "dataloader":
        train_data_loader = build_detection_train_loader(cfg)
        for batch in train_data_loader:
            for per_image in batch:
                # Pytorch tensor is in (C, H, W) format
                img = per_image["image"].permute(1, 2, 0)
                if cfg.INPUT.FORMAT == "BGR":
                    img = img[:, :, [2, 1, 0]]
                else:
                    img = np.asarray(
                        Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert(
                            "RGB"
                        )
                    )

                visualizer = Visualizer(img, metadata=metadata, scale=scale)
                target_fields = per_image["instances"].get_fields()
                labels = [
                    metadata.thing_classes[i]
                    for i in target_fields["gt_classes"]
                ]
                vis = visualizer.overlay_instances(
                    labels=labels,
                    boxes=target_fields.get("gt_boxes", None),
                )
                output(vis, str(per_image["image_id"]) + ".jpg")
    else:
        dicts = list(
            chain.from_iterable(
                [DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]
            )
def detect(save_img=False):
    # (320, 192) or (416, 256) or (608, 352) for (height, width)
    img_size = (320, 192) if ONNX_EXPORT else opt.img_size
    out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Initialize model
    model = Darknet(opt.cfg, img_size)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(
            weights, map_location=device)['model'])
    else:  # darknet format
        load_darknet_weights(model, weights)

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(
            name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load(
            'weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Eval mode
    model.to(device).eval()

    # Fuse Conv2d + BatchNorm2d layers
    # model.fuse()

    # Export mode
    if ONNX_EXPORT:
        model.fuse()
        img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
        f = opt.weights.replace(opt.weights.split(
            '.')[-1], 'onnx')  # *.onnx filename
        torch.onnx.export(model, img, f, verbose=False, opset_version=11,
                          input_names=['images'], output_names=['classes', 'boxes'])

        # Validate exported model
        import onnx
        model = onnx.load(f)  # Load the ONNX model
        onnx.checker.check_model(model)  # Check that the IR is well formed
        # Print a human readable representation of the graph
        print(onnx.helper.printable_graph(model.graph))
        return

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        # set True to speed up constant image size inference
        torch.backends.cudnn.benchmark = True
        dataset = LoadStreams(source, img_size=img_size)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=img_size)

    # Get names and colors
    names = load_classes(opt.names)
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time()
    img = torch.zeros((1, 3, img_size, img_size), device=device)  # init img
    _ = model(img.half() if half else img.float()
              ) if device.type != 'cpu' else None  # run once

    times = []
    datas = []
    for path, img, im0s, vid_cap in dataset:
        start_time = time()

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = torch_utils.time_synchronized()
        pred = model(img, augment=opt.augment)[0]
        t2 = torch_utils.time_synchronized()

        # to float
        if half:
            pred = pred.float()

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres,
                                   multi_label=False, classes=opt.classes, agnostic=opt.agnostic_nms)

        # Pass on to Detectron is Classes detected
        if pred != [None]:
            im = cv2.imread(path)
            outputs = predictor(im)
            v = Visualizer(
                im[:, :, ::-1],
                metadata=statement_metadata,
                scale=1.,
                instance_mode=ColorMode.IMAGE
            )
            instances = outputs["instances"].to("cpu")
            instances.remove('pred_masks')
            print(instances)
            v = v.draw_instance_predictions(instances)
            result = v.get_image()[:, :, ::-1]
            file_name = ntpath.basename(path)
            write_res = cv2.imwrite(f'output_hybrid/{file_name}', result)
            end_time = time()
            datas.append(str([fname, instances, end-start]))

        else:
            im = cv2.imread(path)
            file_name = ntpath.basename(path)
            write_res = cv2.imwrite(f'output_hybrid/{file_name}', im)
            end_time = time()
            datas.append(str([fname, instances, end-start]))

    print('Done. (%.3fs)' % (time() - t0))

    # with open('output_hybrid/times.txt', 'w') as t:
    #    t.write('\n'.join(times))
    with open(f'output/outputs_hybrid{opt.conf_thres}.txt', 'w') as t:
        t.write('\n'.join(data_to_save))
        t.write()
Ejemplo n.º 20
0
### original visualize data
#dataset_dicts = get_balloon_dicts("balloon/train")
#for d in random.sample(dataset_dicts, 3):
#    img = cv2.imread(d["file_name"])
#    visualizer = Visualizer(img[:, :, ::-1], metadata=balloon_metadata, scale=0.5)
#    out = visualizer.draw_dataset_dict(d)
#    cv2_imshow(out.get_image()[:, :, ::-1])
### end visualize

### my local visualize data
dataset_dicts = get_balloon_dicts("balloon/train")
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1],
                            metadata=balloon_metadata,
                            scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    b, g, r = cv2.split((out.get_image()[:, :, ::-1]))
    image_rgb = cv2.merge([r, g, b])
    plt.figure()
    plt.ion()
    plt.imshow(image_rgb)
    plt.show()
### end visualize

from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file(
Ejemplo n.º 21
0
def task_a(model_name, model_file, augmentation=False):
    model_name = model_name + '_inference'
    print('Running task A for model', model_name)

    SAVE_PATH = os.path.join('./results_week_6_task_a', model_name)
    os.makedirs(SAVE_PATH, exist_ok=True)

    # Load model and configuration
    print('Loading Model')
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_file))
    cfg.DATASETS.TRAIN = ('KITTIMOTS_train', )
    cfg.DATASETS.TEST = ('KITTIMOTS_val', )
    cfg.DATALOADER.NUM_WORKERS = 0
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.OUTPUT_DIR = SAVE_PATH
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file)
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 1000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
    cfg.TEST.SCORE_THRESH = 0.5
    print(cfg)
    # Training

    print('Training')
    if augmentation:
        print("data augmentation")
        '''resize_factor = 1
        crop_size = [300,300]
        print("resize_factor: ", resize_factor)
        print("crop_size: ", crop_size)'''
        trainer = OurTrainer(cfg)
    else:
        print("NO data augmentation")
        trainer = DefaultTrainer(cfg)

    val_loss = ValidationLoss(cfg)
    trainer.register_hooks([val_loss])
    trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1]
    trainer.resume_or_load(resume=False)
    trainer.train()

    # Evaluation
    print('Evaluating')
    evaluator = COCOEvaluator('KITTIMOTS_val',
                              cfg,
                              False,
                              output_dir='./output')
    trainer.model.load_state_dict(val_loss.weights)
    trainer.test(cfg, trainer.model, evaluators=[evaluator])
    print('Plotting losses')
    draw_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH)

    # Qualitative results: visualize some results
    print('Getting qualitative results')
    predictor = DefaultPredictor(cfg)
    predictor.model.load_state_dict(trainer.model.state_dict())
    inputs = kitti_val()
    inputs = inputs[:20] + inputs[-20:]
    for i, input in enumerate(inputs):
        file_name = input['file_name']
        print('Prediction on image ' + file_name)
        img = cv2.imread(file_name)
        outputs = predictor(img)
        v = Visualizer(img[:, :, ::-1],
                       metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                       scale=0.8,
                       instance_mode=ColorMode.IMAGE)
        v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
        cv2.imwrite(
            os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' +
                         str(i) + '.png'),
            v.get_image()[:, :, ::-1])
Ejemplo n.º 22
0
    def draw_instance_predictions(self, frame, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        frame_visualizer = Visualizer(frame, self.metadata)
        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has(
            "pred_classes") else None
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None
        colors = predictions.COLOR if predictions.has(
            "COLOR") else [None] * len(predictions)
        durations = predictions.ID_duration if predictions.has(
            "ID_duration") else None
        duration_threshold = self.metadata.get("duration_threshold", 0)
        visibilities = None if durations is None else [
            x > duration_threshold for x in durations
        ]

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
            # mask IOU is not yet enabled
            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
            # assert len(masks_rles) == num_instances
        else:
            masks = None

        detected = [
            _DetectedInstance(classes[i],
                              boxes[i],
                              mask_rle=None,
                              color=colors[i],
                              ttl=8) for i in range(num_instances)
        ]
        if not predictions.has("COLOR"):
            colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.reset_image(
                frame_visualizer._create_grayscale_image((masks.any(
                    dim=0) > 0).numpy() if masks is not None else None))
            alpha = 0.3
        else:
            alpha = 0.5

        labels = None if labels is None else [
            y[0] for y in filter(lambda x: x[1], zip(labels, visibilities))
        ]  # noqa
        assigned_colors = None if colors is None else [
            y[0] for y in filter(lambda x: x[1], zip(colors, visibilities))
        ]  # noqa
        frame_visualizer.overlay_instances(
            boxes=None if masks is not None else
            boxes[visibilities],  # boxes are a bit distracting
            masks=None if masks is None else masks[visibilities],
            labels=labels,
            keypoints=None if keypoints is None else keypoints[visibilities],
            assigned_colors=assigned_colors,
            alpha=alpha,
        )

        return frame_visualizer.output
Ejemplo n.º 23
0
def detect():

    out_res, source, weights, save_txt = \
        opt.output, opt.source, opt.weights,  opt.save_txt

    def get_wire_dict(img_dir):
        fff = ['instances_default.json', 'instances_default2.json']
        dataset_dicts = []
        for name in fff:
            json_file = os.path.join(img_dir, name)
            with open(json_file) as f:
                imgs_anns = json.load(f)

            for idx, v in enumerate(imgs_anns['images']):
                record = {}

                filename = os.path.join(img_dir, v["file_name"])
                height, width = cv2.imread(filename).shape[:2]

                record["file_name"] = filename
                record["image_id"] = idx
                record["height"] = height
                record["width"] = width

                annos = imgs_anns['annotations']
                objs = []
                for anno in annos:
                    if anno['image_id'] == v['id']:
                        obj = {
                            "bbox": anno['bbox'],
                            "bbox_mode": BoxMode.XYWH_ABS,
                            "segmentation": anno['segmentation'],
                            "category_id": 0,
                        }
                        objs.append(obj)
                record["annotations"] = objs
                dataset_dicts.append(record)
        return dataset_dicts

    class DatasetMapper:
        """
        A callable which takes a dataset dict in Detectron2 Dataset format,
        and map it into a format used by the model.

        This is a custom version of the DatasetMapper. The only different with Detectron2's
        DatasetMapper is that we extract attributes from our dataset_dict.
        """
        def __init__(self, cfg, is_train=True):
            if cfg.INPUT.CROP.ENABLED and is_train:
                self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE,
                                             cfg.INPUT.CROP.SIZE)
                # logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen))
            else:
                self.crop_gen = None

            self.tfm_gens = [
                T.Resize((640, 640)),
                T.RandomBrightness(0.1, 1.6),
                T.RandomContrast(0.1, 1),
                T.RandomSaturation(0.1, 1),
                T.RandomRotation(angle=[90, 90]),
                T.RandomFlip(prob=0.4, horizontal=False, vertical=True),
                # T.RandomCrop('relative_range', (0.4, 0.6)),
                # CutOut()
            ]

            # self.tfm_gens = utils.build_transform_gen(cfg, is_train)

            # fmt: off
            self.img_format = cfg.INPUT.FORMAT
            self.mask_on = cfg.MODEL.MASK_ON
            self.mask_format = cfg.INPUT.MASK_FORMAT
            self.keypoint_on = cfg.MODEL.KEYPOINT_ON
            self.load_proposals = cfg.MODEL.LOAD_PROPOSALS
            # fmt: on
            if self.keypoint_on and is_train:
                # Flip only makes sense in training
                self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(
                    cfg.DATASETS.TRAIN)
            else:
                self.keypoint_hflip_indices = None

            if self.load_proposals:
                self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
                self.proposal_topk = (
                    cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN if is_train
                    else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST)
            self.is_train = is_train

        def __call__(self, dataset_dict):
            """
            Args:
                dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

            Returns:
                dict: a format that builtin models in detectron2 accept
            """
            dataset_dict = copy.deepcopy(
                dataset_dict)  # it will be modified by code below
            # USER: Write your own image loading if it's not from a file
            image = utils.read_image(dataset_dict["file_name"],
                                     format=self.img_format)
            utils.check_image_size(dataset_dict, image)

            if "annotations" not in dataset_dict:
                image, transforms = T.apply_transform_gens(
                    ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                    image)
            else:
                # Crop around an instance if there are instances in the image.
                # USER: Remove if you don't use cropping
                if self.crop_gen:
                    crop_tfm = utils.gen_crop_transform_with_instance(
                        self.crop_gen.get_crop_size(image.shape[:2]),
                        image.shape[:2],
                        np.random.choice(dataset_dict["annotations"]),
                    )
                    image = crop_tfm.apply_image(image)
                image, transforms = T.apply_transform_gens(
                    self.tfm_gens, image)
                if self.crop_gen:
                    transforms = crop_tfm + transforms

            image_shape = image.shape[:2]  # h, w

            # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
            # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
            # Therefore it's important to use torch.Tensor.
            dataset_dict["image"] = torch.as_tensor(
                np.ascontiguousarray(image.transpose(2, 0, 1)))

            # USER: Remove if you don't use pre-computed proposals.
            if self.load_proposals:
                utils.transform_proposals(dataset_dict, image_shape,
                                          transforms, self.min_box_side_len,
                                          self.proposal_topk)

            if not self.is_train:
                # USER: Modify this if you want to keep them for some reason.
                dataset_dict.pop("annotations", None)
                dataset_dict.pop("sem_seg_file_name", None)
                return dataset_dict

            if "annotations" in dataset_dict:
                # USER: Modify this if you want to keep them for some reason.
                for anno in dataset_dict["annotations"]:
                    if not self.mask_on:
                        anno.pop("segmentation", None)
                    if not self.keypoint_on:
                        anno.pop("keypoints", None)

                # USER: Implement additional transformations if you have other types of data
                annos = [
                    utils.transform_instance_annotations(
                        obj,
                        transforms,
                        image_shape,
                        keypoint_hflip_indices=self.keypoint_hflip_indices)
                    for obj in dataset_dict.pop("annotations")
                    if obj.get("iscrowd", 0) == 0
                ]
                instances = utils.annotations_to_instances(
                    annos, image_shape, mask_format=self.mask_format)
                # Create a tight bounding box from masks, useful when image is cropped
                if self.crop_gen and instances.has("gt_masks"):
                    instances.gt_boxes = instances.gt_masks.get_bounding_boxes(
                    )

                dataset_dict["instances"] = utils.filter_empty_instances(
                    instances)

                # USER: Remove if you don't do semantic/panoptic segmentation.
            # if "sem_seg_file_name" in dataset_dict:
            #     with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
            #         sem_seg_gt = Image.open(f)
            #         sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            #     sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            #     sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            #     dataset_dict["sem_seg"] = sem_seg_gt

            return dataset_dict

    class wireTrainer(DefaultTrainer):
        @classmethod
        def build_train_loader(cls, cfg):
            return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg))

        @classmethod
        def build_test_loader(cls, cfg, dataset_name):
            return build_detection_test_loader(cfg,
                                               dataset_name,
                                               mapper=DatasetMapper(cfg))

    def register_dataset(dataset_label, d):
        # Register dataset - if dataset is already registered, give it a new name
        try:
            DatasetCatalog.register(
                dataset_label, lambda d=d: get_wire_dict("dataset_wire/" + d))
            MetadataCatalog.get(dataset_label).thing_classes = ['wire']
        except:
            # Add random int to dataset name to not run into 'Already registered' error
            n = random.randint(1, 1000)
            dataset_label = dataset_label + str(n)
            DatasetCatalog.register(
                dataset_label, lambda d=d: get_wire_dict("dataset_wire/" + d))
            MetadataCatalog.get(dataset_label).thing_classes = ['wire']

        return MetadataCatalog.get(dataset_label), dataset_label

    metadata, train_dataset = register_dataset('wire_train', "train")
    # metadata, test_dataset = register_dataset('wire_test', "val")

    wire_dict = get_wire_dict("dataset_wire/train")

    # for d in random.sample(wire_dict, 2):
    #     plt.figure(figsize=(10,10))
    #     img = cv2.imread(d["file_name"])
    #     visualizer = Visualizer(img[:, :, ::-1], metadata, scale=0.5)
    #     vis = visualizer.draw_dataset_dict(d)
    #     plt.imshow(vis.get_image()[:, :, ::-1])
    # plt.show()

    MODEL_USE = 'ResNet'
    if MODEL_USE == 'faster_rcnn':
        MODEL_PATH = 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'
        WEIGHT_PATH = 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'
    elif MODEL_USE == 'ResNet':
        MODEL_PATH = 'COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml'
        WEIGHT_PATH = 'COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml'

    def cfg_setup():
        cfg = get_cfg()
        cfg.merge_from_file(model_zoo.get_config_file(MODEL_PATH))
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(WEIGHT_PATH)
        cfg.MODEL.RETINANET.NUM_CLASSES = 1
        cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256

        cfg.DATASETS.TRAIN = (train_dataset, )
        cfg.DATASETS.TEST = ()
        cfg.DATALOADER.NUM_WORKERS = 4

        cfg.SOLVER.IMS_PER_BATCH = 4
        cfg.SOLVER.LR_SCHEDULER_NAME = 'WarmupCosineLR'
        cfg.SOLVER.BASE_LS = 0.02
        #     cfg.SOLVER.WARMUP_ITERS = 4500
        #     cfg.SOLVER.WARMUP_METHOD = "linear"
        cfg.SOLVER.MAX_ITER = 2000
        os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

        return cfg

    def cfg_test():
        cfg = get_cfg()
        cfg.merge_from_file(model_zoo.get_config_file(MODEL_PATH))
        cfg.MODEL.WEIGHTS = os.path.join(weights)
        # cfg.DATASETS.TEST = (test_dataset,)
        cfg.MODEL.RETINANET.NUM_CLASSES = 1
        cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5

        return cfg

    cfg = cfg_test()
    predict = DefaultPredictor(cfg)

    lines = []
    # for i, d in enumerate(glob.glob('./img/*.jpg')):
    #     fig, ax = plt.subplots()
    #     namefile = os.path.basename(d)
    #
    #     im = cv2.imread(d)
    #     outputs = predict(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    #     out = outputs["instances"].to("cpu")
    #     masks = out.pred_masks.numpy()
    #
    #     for i in range(masks.shape[0]):
    #         immask = masks[i,:,:]
    #         graf = []
    #         plot_x = []
    #         plot_y = []
    #         coords = np.column_stack(np.where(immask > 0))
    #         # for x in range(immask.shape[1]):
    #         #     n, = np.where(immask[:,x] == True)
    #         #     if len(n) != 0:
    #         #         yyy = immask.shape[0] - np.mean(n)
    #         #         # graf.append([x,yyy])
    #         #         plot_x.append(x)
    #         #         plot_y.append(yyy)
    #         # # print(graf)
    #         if len(coords[:,1]) != 0:
    #             cor1 = stat.pearsonr(coords[:,1], coords[:,0])
    #             # cor2 = stat.pearsonr(coords[:,0], coords[:,1])
    #             plt.scatter(coords[:,1],immask.shape[0]-coords[:,0], label = '{}'.format(np.round(cor1[0],3)))
    #     ax.legend()
    #     plt.title(namefile)
    #     plt.show()

    # lines.append(graf)
    # dataset_dicts = get_wire_dicts("dataset_wire/val")

    for i, d in enumerate(glob.glob(source + '/*.jpg')):
        im = cv2.imread(d)
        namefile = os.path.basename(d)
        arr = os.path.splitext(namefile)

        outputs = predict(
            im
        )  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
        v = Visualizer(
            im[:, :, ::-1],
            metadata=metadata,
            scale=1,
            instance_mode=ColorMode.
            IMAGE_BW  # remove the colors of unsegmented pixels. This option is only available for segmentation models
        )
        outputs2 = outputs["instances"].to("cpu")
        out = v.draw_instance_predictions(outputs2)
        # cv2.imshow('',out.get_image()[:, :, ::-1])
        cv2.imwrite(os.path.join(out_res, namefile),
                    out.get_image()[:, :, ::-1])
        # cv2.waitKey()
        if True:  #save_txt:
            arr_nump_mask = outputs2.pred_masks.numpy()
            for iiii in range(arr_nump_mask.shape[0]):
                np.savetxt(os.path.join(out_res,
                                        '{}_{}.txt'.format(arr[0], iiii)),
                           arr_nump_mask[iiii, :, :],
                           fmt='%i')
    def draw_instance_predictions_custom(self,
                                         frame,
                                         predictions,
                                         incl_boxes=True,
                                         incl_labels=True,
                                         incl_scores=True,
                                         target_alpha=None):

        frame_visualizer = Visualizer(frame, self.metadata)
        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        scores = scores if incl_scores else None
        classes = predictions.pred_classes.numpy() if predictions.has(
            "pred_classes") else None
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
            # mask IOU is not yet enabled
            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
            # assert len(masks_rles) == num_instances
        else:
            masks = None

        detected = [
            _DetectedInstance(classes[i],
                              boxes[i],
                              mask_rle=None,
                              color=None,
                              ttl=8) for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                (masks.any(dim=0) > 0).numpy() if masks is not None else None)
            alpha = 0.3
        else:
            alpha = 0.5

        boxes = boxes if incl_boxes else None
        labels = labels if incl_labels else None
        alpha = alpha if target_alpha is None else target_alpha

        frame_visualizer.overlay_instances(
            boxes=None
            if masks is not None else boxes,  # boxes are a bit distracting
            masks=masks,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )

        return frame_visualizer.output
Ejemplo n.º 25
0
def experiment_1(exp_name, model_file):

    print('Running Task B experiment', exp_name)
    SAVE_PATH = os.path.join('./results_week_6_task_b', exp_name)
    os.makedirs(SAVE_PATH, exist_ok=True)

    # Loading data
    print('Loading data')
    kittiloader = KittiMots()

    def rkitti_train():
        return kittiloader.get_dicts(flag='train',
                                     method='complete',
                                     percentage=1.0)

    def rkitti_val():
        return kittiloader.get_dicts(flag='val')

    def rkitti_test():
        return kittiloader.get_dicts(flag='test')

    DatasetCatalog.register('KITTI_train', rkitti_train)
    MetadataCatalog.get('KITTI_train').set(
        thing_classes=list(KITTI_CATEGORIES.keys()))
    DatasetCatalog.register('KITTI_val', rkitti_val)
    MetadataCatalog.get('KITTI_val').set(
        thing_classes=list(KITTI_CATEGORIES.keys()))
    DatasetCatalog.register('KITTI_test', rkitti_test)
    MetadataCatalog.get('KITTI_test').set(
        thing_classes=list(KITTI_CATEGORIES.keys()))

    # Load model and configuration
    print('Loading Model')
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_file))
    cfg.DATASETS.TRAIN = ('KITTI_train', )
    cfg.DATASETS.TEST = ('KITTI_val', )
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.OUTPUT_DIR = SAVE_PATH
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file)
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 4000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
    cfg.TEST.SCORE_THRESH = 0.5

    # Training
    print('Training')
    trainer = DefaultTrainer(cfg)
    val_loss = ValidationLoss(cfg)
    trainer.register_hooks([val_loss])
    trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1]
    trainer.resume_or_load(resume=False)
    trainer.train()

    # Evaluation
    print('Evaluating')
    cfg.DATASETS.TEST = ('KITTI_test', )
    evaluator = COCOEvaluator('KITTI_test', cfg, False, output_dir=SAVE_PATH)
    trainer.model.load_state_dict(val_loss.weights)
    trainer.test(cfg, trainer.model, evaluators=[evaluator])
    print('Plotting losses')
    plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, exp_name, SAVE_PATH,
                         'validation_loss.png')

    # Qualitative results: visualize some results
    print('Getting qualitative results')
    predictor = DefaultPredictor(cfg)
    predictor.model.load_state_dict(trainer.model.state_dict())
    inputs = rkitti_test()
    inputs = [inputs[i] for i in TEST_INFERENCE_VALUES]
    for i, input in enumerate(inputs):
        file_name = input['file_name']
        print('Prediction on image ' + file_name)
        img = cv2.imread(file_name)
        outputs = predictor(img)
        v = Visualizer(img[:, :, ::-1],
                       metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                       scale=0.8,
                       instance_mode=ColorMode.IMAGE)
        v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
        cv2.imwrite(
            os.path.join(SAVE_PATH,
                         'Inference_' + exp_name + '_inf_' + str(i) + '.png'),
            v.get_image()[:, :, ::-1])
    def draw_panoptic_seg_predictions_custom(self,
                                             frame,
                                             panoptic_seg,
                                             segments_info,
                                             area_threshold=None,
                                             alpha=0.5,
                                             incl_labels=True):
        frame_visualizer = Visualizer(frame, self.metadata)
        pred = _PanopticPrediction(panoptic_seg, segments_info)

        if self._instance_mode == ColorMode.IMAGE_BW:
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                pred.non_empty_mask())

        # draw mask for all semantic segments first i.e. "stuff"
        for mask, sinfo in pred.semantic_masks():
            category_idx = sinfo["category_id"]
            try:
                mask_color = [
                    x / 255 for x in self.metadata.stuff_colors[category_idx]
                ]
            except AttributeError:
                mask_color = None

            text = self.metadata.stuff_classes[
                category_idx] if incl_labels else None
            frame_visualizer.draw_binary_mask(
                mask,
                color=mask_color,
                text=text,
                alpha=alpha,
                area_threshold=area_threshold,
            )

        all_instances = list(pred.instance_masks())
        if len(all_instances) == 0:
            return frame_visualizer.output
        # draw mask for all instances second
        masks, sinfo = list(zip(*all_instances))
        num_instances = len(masks)
        masks_rles = mask_util.encode(
            np.asarray(np.asarray(masks).transpose(1, 2, 0),
                       dtype=np.uint8,
                       order="F"))
        assert len(masks_rles) == num_instances

        category_ids = [x["category_id"] for x in sinfo]
        detected = [
            _DetectedInstance(category_ids[i],
                              bbox=None,
                              mask_rle=masks_rles[i],
                              color=None,
                              ttl=8) for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)
        labels = [self.metadata.thing_classes[k] for k in category_ids]
        labels = labels if incl_labels else None

        frame_visualizer.overlay_instances(
            boxes=None,
            masks=masks,
            labels=labels,
            keypoints=None,
            assigned_colors=colors,
            alpha=alpha,
        )
        return frame_visualizer.output
Ejemplo n.º 27
0
name_list = []
for images in ddd["images"]:
  k = "/content/drive/My Drive/new_imgs/" + images["file_name"]
  name_list.append(k)
print(len(name_list))
print(name_list)


for imageName in glob.glob('/content/drive/My Drive/new_imgs/*png'):
  if (imageName in name_list):
    N = imageName
    im = cv2.imread(N)
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                  metadata=test_metadata, 
                  scale=1
                  )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])

    original_box = []
    for box in outputs["instances"]._fields["pred_boxes"]:
      box = box.to('cpu')
      box = box.numpy()
      box = box.tolist()
      original_box.append(box)
    
    aa = cv2.imread(N, cv2.IMREAD_COLOR)
    aa = cv2.cvtColor(aa, cv2.COLOR_BGR2RGB)
    for box in original_box:
      lx, ly, rx, ry = box
    def draw_instance_predictions(self, frame, predictions):
        """
        Draw instance-level prediction results on an image.

        Args:
            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
            predictions (Instances): the output of an instance detection/segmentation
                model. Following fields will be used to draw:
                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

        Returns:
            output (VisImage): image object with visualizations.
        """
        frame_visualizer = Visualizer(frame, self.metadata)
        num_instances = len(predictions)
        if num_instances == 0:
            return frame_visualizer.output

        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has(
            "pred_boxes") else None
        scores = predictions.scores if predictions.has("scores") else None
        classes = predictions.pred_classes.numpy() if predictions.has(
            "pred_classes") else None
        keypoints = predictions.pred_keypoints if predictions.has(
            "pred_keypoints") else None

        if predictions.has("pred_masks"):
            masks = predictions.pred_masks
            # mask IOU is not yet enabled
            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
            # assert len(masks_rles) == num_instances
        else:
            masks = None

        detected = [
            _DetectedInstance(classes[i],
                              boxes[i],
                              mask_rle=None,
                              color=None,
                              ttl=8) for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)

        labels = _create_text_labels(classes, scores,
                                     self.metadata.get("thing_classes", None))

        if self._instance_mode == ColorMode.IMAGE_BW:
            # any() returns uint8 tensor
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                (masks.any(dim=0) > 0).numpy() if masks is not None else None)
            alpha = 0.3
        else:
            alpha = 0.5

        frame_visualizer.overlay_instances(
            boxes=None
            if masks is not None else boxes,  # boxes are a bit distracting
            masks=masks,
            labels=labels,
            keypoints=keypoints,
            assigned_colors=colors,
            alpha=alpha,
        )

        return frame_visualizer.output
Ejemplo n.º 29
0
    plt.show()

    # lines.append(graf)
from detectron2.utils.visualizer import ColorMode
# dataset_dicts = get_wire_dicts("dataset_wire/val")

for i, d in enumerate(glob.glob('./img/*.jpg')):
    im = cv2.imread(d)
    namefile = os.path.basename(d)
    arr = os.path.splitext(namefile)

    outputs = predict(
        im
    )  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(
        im[:, :, ::-1],
        metadata=metadata,
        scale=1,
        instance_mode=ColorMode.
        IMAGE_BW  # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    outputs2 = outputs["instances"].to("cpu")
    out = v.draw_instance_predictions(outputs2)
    # cv2.imshow('',out.get_image()[:, :, ::-1])
    cv2.imwrite(os.path.join('./out_img', namefile),
                out.get_image()[:, :, ::-1])
    # cv2.waitKey()

    # np.savetxt(os.path.join('./out_img', arr[0]+ '.txt'),outputs2.)
    # f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format
cfg = get_cfg()
cfg.merge_from_file(
    "./detectron2_repo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
)
cfg.DATASETS.TRAIN = ("fruits_nuts", )
cfg.DATASETS.TEST = ()  # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 2
# initialize from model zoo
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.02
cfg.SOLVER.MAX_ITER = (
    300
)  # 300 iterations seems good enough, but you can certainly train longer
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
    128)  # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3  # 3 classes (data, fig, hazelnut)

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.DATASETS.TEST = ("fruits_nuts", )
predictor = DefaultPredictor(cfg)

for d in random.sample(dataset_dicts, 3):
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1], metadata=fruits_nuts_metadata, scale=0.8)
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2.imshow("preview", v.get_image()[:, :, ::-1])
    cv2.waitKey(0)