Exemple #1
0
def calc_metadata(args): #i need this function cause the metadata is NOT stored inside cfg
    if (args.meta_data is None):
        metadata=None
    else: #convert json to Metadata() format
        imported_Metadata_as_JSON = json.load(open(args.meta_data)) #parses argument as json
        imported_Metadata_as_Metadata = Metadata(name=imported_Metadata_as_JSON["name"]) #create Metadata type and init it with "name" attribute
        imported_Metadata_as_Metadata.set(thing_classes = imported_Metadata_as_JSON["thing_classes"]) #put classes information into Metadata
        metadata = imported_Metadata_as_Metadata
    return metadata
Exemple #2
0
 def set_up_faster_rcnn(self):
     self.cfg = get_cfg()
     self.cfg.merge_from_file(
         model_zoo.get_config_file(
             "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
     self.cfg.MODEL.WEIGHTS = "weights/model_final.pth"
     self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
     self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.65
     self.predictor = DefaultPredictor(self.cfg)
     self.metadata = Metadata()
     self.metadata.set(thing_classes=['station', 'forklift'])
Exemple #3
0
 def postprocess(self, inference_output):
     """
     Return predict result in batch
     """
     print("Starting model inference output postprocessing...")
     results = []
     for model_inference in inference_output:
         image = model_inference[0]
         outputs = model_inference[1]
         predictions = outputs["instances"].to("cpu")
         severstal_metadata = Metadata()
         severstal_metadata.set(
             thing_classes=["Type 1", "Type 2", "Type 3", "Type 4"])
         visualizer_pred = Visualizer(image[:, :, ::-1],
                                      metadata=severstal_metadata,
                                      scale=0.5)
         image_pred = visualizer_pred.draw_instance_predictions(predictions)
         image_cv2 = cv2.cvtColor(image_pred.get_image()[:, :, ::-1],
                                  cv2.COLOR_BGR2RGB)
         image_string = base64.b64encode(
             cv2.imencode(".jpg", image_cv2)[1].tobytes()).decode("utf-8")
         image_b64 = "data:image/jpg;base64," + image_string
         if predictions.has("pred_classes"):
             classes = predictions.pred_classes.numpy().tolist()
         else:
             classes = None
         if predictions.has("scores"):
             scores = predictions.scores.numpy().tolist()
         else:
             scores = None
         if predictions.has("pred_boxes"):
             boxes = predictions.pred_boxes.tensor.numpy().tolist()
         else:
             boxes = None
         if predictions.has("pred_masks"):
             #/!\ For an unknown reason (lack of memory, timeout...?), this doesn't work with TorchServe:
             #/!\ (it works perfectly in a Jupyter notebook!)
             #masks = predictions.pred_masks.numpy().tolist()
             masks = None
         else:
             masks = None
         result = {
             "data": image_b64,
             "classes": classes,
             "scores": scores,
             "boxes": boxes,
             "masks": masks
         }
         results.append(json.dumps(result))
     print("Model inference output postprocessing done")
     return results
Exemple #4
0
    def _merge_metadata(self):
        added_classes = self.mask_md.get('thing_classes')
        total_classes = self.po_md.get('thing_classes').copy()
        stuff_classes = self.po_md.get('stuff_classes')

        self.offset = len(total_classes)

        for c in added_classes:
            if c not in total_classes:
                total_classes.append(c)
            else:
                total_classes.append(f"custom_{c}")

        self.blend_md = Metadata(thing_classes=total_classes, stuff_classes=stuff_classes)
def register_custom_coco_dataset(cfg: DictConfig,
                                 process: str = 'train') \
        -> Tuple[List[Dict], Metadata]:
    """
    Registering the custom dataset in COCO format to detectron2.

    :param cfg: the configuration dictionary of dataset_model.
    :type cfg: omegaconf.dictconfig.DictConfig.
    :param process: value should be 'train', 'val', or 'test'
    :type process: str
    :return information about images and instances in
             COCO format, together with its metadata.
    :rtype dataset_dicts: List[Dict].
           dataset_metadata: detectron2.data.catalog.Metadata.
    """
    if process not in ['train', 'test', 'val']:
        raise Exception(
            f"process is {process}, but it must be either 'train', 'test', or 'val'"
        )
    dataset_dicts: List[Dict] = [{}]
    dataset_metadata: Metadata = Metadata()

    train_dataset: str = cfg.name + "_train"
    train_images_dir: Path = PROJECT_PATH / cfg.train.train_dataset_dir / 'images'
    train_coco_instances_json: str = str(
        PROJECT_PATH / cfg.train.train_dataset_dir / 'coco_instances.json')
    try:
        log.info(f'Registering {train_dataset} as a COCO-format dataset')
        register_coco_instances(name=train_dataset,
                                metadata={},
                                json_file=train_coco_instances_json,
                                image_root=train_images_dir)
    except AssertionError:  # if the dataset is already registered, do nothing
        pass

    if process == 'train':
        dataset_dicts = DatasetCatalog.get(train_dataset)
        dataset_metadata = MetadataCatalog.get(train_dataset)
    elif process == 'test':
        log.info(f'Getting metadata for testing on {cfg.name}')
        dataset_metadata = MetadataCatalog.get(train_dataset)
    elif process == 'val':
        val_dataset: str = cfg.name + "_val"
        val_images_dir: Path = PROJECT_PATH / cfg.validation.val_dataset_dir / 'images'
        val_coco_instances_json: str = str(PROJECT_PATH /
                                           cfg.validation.val_dataset_dir /
                                           'coco_instances.json')
        log.info(f'Registering {val_dataset} as a COCO-format dataset')
        register_coco_instances(name=val_dataset,
                                metadata={},
                                json_file=val_coco_instances_json,
                                image_root=val_images_dir)
        dataset_dicts = DatasetCatalog.get(val_dataset)
        dataset_metadata = MetadataCatalog.get(val_dataset)

    return dataset_dicts, dataset_metadata
Exemple #6
0
def make_inference(image, model_weights, threshold, n=5, save=False):
    """
  Makes inference on image (single image) using model_config, model_weights and threshold.

  Returns image with n instance predictions drawn on.

  Params:
  -------
  image (str) : file path to target image
  model_weights (str) : file path to model weights 
  threshold (float) : confidence threshold for model prediction, default 0.5
  n (int) : number of prediction instances to draw on, default 5
    Note: some images may not have 5 instances to draw on depending on threshold,
    n=5 means the top 5 instances above the threshold will be drawn on.
  save (bool) : if True will save image with predicted instances to file, default False
  """
    # Create predictor and model config
    cfg, predictor = create_predictor(model_weights, threshold)

    # Convert PIL image to array
    image = np.asarray(image)

    # Create metadata
    metadata = Metadata()
    metadata.set(thing_classes=subset)

    # Create visualizer instance
    visualizer = Visualizer(img_rgb=image, metadata=metadata, scale=0.3)
    outputs = predictor(image)

    # Get instance predictions from outputs
    instances = outputs["instances"]

    # Draw on predictions to image
    vis = visualizer.draw_instance_predictions(instances[:n].to("cpu"))

    return vis.get_image(), instances[:n]
    def _maybe_substitute_metadata(self):
        cont_id_2_cat_id = get_contiguous_id_to_category_id_map(self._metadata)
        cat_id_2_cont_id = self._metadata.thing_dataset_id_to_contiguous_id
        if len(cont_id_2_cat_id) == len(cat_id_2_cont_id):
            return

        cat_id_2_cont_id_injective = {}
        for cat_id, cont_id in cat_id_2_cont_id.items():
            if (cont_id in cont_id_2_cat_id) and (cont_id_2_cat_id[cont_id]
                                                  == cat_id):
                cat_id_2_cont_id_injective[cat_id] = cont_id

        metadata_new = Metadata(name=self._metadata.name)
        for key, value in self._metadata.__dict__.items():
            if key == "thing_dataset_id_to_contiguous_id":
                setattr(metadata_new, key, cat_id_2_cont_id_injective)
            else:
                setattr(metadata_new, key, value)
        self._metadata = metadata_new
Exemple #8
0
    def __init__(self, config_path, model_weights_path, model_device="cpu"):
        mp.set_start_method("spawn", force=True)
        self.logger = setup_logger()

        cfg = self._setup_cfg(
            config_path,
            [
                "MODEL.WEIGHTS",
                model_weights_path,
                "MODEL.DEVICE",
                model_device,
            ],
            0.5,
        )

        metadata = Metadata(
            evaluator_type="coco",
            name="PubLayNet",
            thing_classes=["text", "title", "list", "table", "figure"],
        )
        self.demo = VisualizationDemo(cfg, metadata, parallel=True)
    def __init__(self, config_path, model_weights_path, model_device="cpu"):
        mp.set_start_method("spawn", force=True)
        self.logger = setup_logger("detectron.out")

        cfg = self._setup_cfg(
            config_path,
            [
                "MODEL.WEIGHTS",
                model_weights_path,
                "MODEL.DEVICE",
                model_device,
            ],
            0.5,
        )

        self.metadata = Metadata(
            evaluator_type="coco",
            name="PubLayNet",
            thing_classes=["text", "title", "list", "table", "figure"],
        )
        num_gpu = torch.cuda.device_count()
        self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
Exemple #10
0
class detector:
    def __init__(self, rgb_image, depth_image, fx, fy, cx, cy):
        self.set_up_faster_rcnn()
        self.set_up_fpointnet()
        self.detection(rgb_image, depth_image, fx, fy, cx, cy)

    def set_up_faster_rcnn(self):
        self.cfg = get_cfg()
        self.cfg.merge_from_file(
            model_zoo.get_config_file(
                "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
        self.cfg.MODEL.WEIGHTS = "weights/model_final.pth"
        self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
        self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.65
        self.cfg.MODEL.DEVICE = 'cpu'
        self.predictor = DefaultPredictor(self.cfg)
        self.metadata = Metadata()
        self.metadata.set(thing_classes=['station', 'forklift'])

    def set_up_fpointnet(self):
        self.FrustumPointNet = FrustumPointNetv1(n_classes=6, n_channel=6)
        self.pth = torch.load("weights/frustum_model.pth", map_location='cpu')
        self.FrustumPointNet.load_state_dict(self.pth['model_state_dict'])
        self.model = self.FrustumPointNet.eval()

    def detection(self, rgb_image, depth_image, fx, fy, cx, cy):
        print('start detection')
        rgb_image = rgb_image
        depth_image = np.nan_to_num(depth_image, nan=0)
        outputs = self.predictor(rgb_image)
        prob_list = outputs["instances"].scores
        class_list = outputs["instances"].pred_classes
        box2d_list = outputs["instances"].pred_boxes.tensor

        pitch = 0.09557043068606919
        rotation = np.array([[1, 0, 0], [0, np.cos(pitch), -np.sin(pitch)],
                             [0, np.sin(pitch),
                              np.cos(pitch)]])

        count = 0
        pose = np.zeros([1, 4])
        pose_list = []

        for idx in range(len(class_list)):

            object_class = class_list[idx].numpy()
            prob = prob_list[idx].numpy()
            xmin, ymin, xmax, ymax = map(int, box2d_list[idx])

            if (xmax - xmin) > 1.5 * (ymax - ymin):
                continue

            rgb = np.zeros_like(rgb_image)
            depth = np.zeros_like(depth_image)
            rgb[ymin:ymax, xmin:xmax] = rgb_image[ymin:ymax, xmin:xmax]
            depth[ymin:ymax, xmin:xmax] = depth_image[ymin:ymax, xmin:xmax]
            print("class: {} ,depth_mean: {}".format(
                object_class, np.mean(depth[ymin:ymax, xmin:xmax])))
            pcs = depth2pc(rgb, depth, fx, fy, cx, cy,
                           1).point_cloud_generator()
            pcs[:, 0:3] = np.dot(pcs[:, 0:3].astype(np.float32), rotation)
            mask = pcs[:, 2] != 0
            pcs = pcs[mask, :]
            box2d_center = np.array([(xmin + xmax) / 2.0, (ymin + ymax) / 2.0])
            uvdepth = np.zeros((1, 3))
            uvdepth[0, 0:2] = box2d_center
            uvdepth[0, 2] = np.mean(pcs[:, 2])  #20 # some random depth
            x = ((uvdepth[:, 0] - cx) * uvdepth[:, 2]) / fx
            y = ((uvdepth[:, 1] - cy) * uvdepth[:, 2]) / fy
            uvdepth[:, 0] = x
            uvdepth[:, 1] = y
            frustum_angle = -1 * np.arctan2(uvdepth[0, 2], uvdepth[
                0, 0])  # angle as to positive x-axis as in the Zoox paper

            # Pass objects that are too small
            if len(pcs) < 5:
                continue

            if object_class == 0:
                object_class = 'box'
                data = provider.FrustumDataset(npoints=2048,
                                               pcs=pcs,
                                               object_class=object_class,
                                               frustum_angle=frustum_angle,
                                               prob=prob)
                point_set, rot_angle, prob, one_hot_vec = data.data()
                point_set = torch.unsqueeze(torch.tensor(point_set),
                                            0).transpose(2, 1).float()
                one_hot_vec = torch.unsqueeze(torch.tensor(one_hot_vec),
                                              0).float()

                # print('start fpointnets')
                logits, mask, stage1_center, center_boxnet, object_pts, \
                heading_scores, heading_residuals_normalized, heading_residuals, \
                size_scores, size_residuals_normalized, size_residuals, center = \
                self.model(point_set, one_hot_vec)

                corners_3d = get_box3d_corners(center, heading_residuals,
                                               size_residuals)

                logits = logits.detach().numpy()
                mask = mask.detach().numpy()
                center_boxnet = center_boxnet.detach().numpy()
                object_pts = object_pts.detach().squeeze().numpy().transpose(
                    1, 0)
                stage1_center = stage1_center.detach().numpy()
                center = center.detach().numpy()
                heading_scores = heading_scores.detach().numpy()
                # heading_residuals_normalized = heading_residuals_normalized.detach().numpy()
                heading_residuals = heading_residuals.detach().numpy()
                size_scores = size_scores.detach().numpy()
                size_residuals = size_residuals.detach().numpy()
                corners_3d = corners_3d.detach().numpy()

                output = np.argmax(logits, 2)
                heading_class = np.argmax(heading_scores)
                size_class = np.argmax(size_scores)
                corners_3d = corners_3d[0, heading_class, size_class]
                pred_angle = provider.class2angle(
                    heading_class, heading_residuals[0, heading_class],
                    NUM_HEADING_BIN)
                pred_size = provider.class2size(size_class,
                                                size_residuals[0, size_class])

                cloud = pcs[:, 0:3].astype(np.float32)

                object_cloud = (object_pts - center_boxnet.repeat(
                    object_pts.shape[0], 0)).astype(np.float32)

                station_size = (0.979, 0.969, 0.979)
                cube = generate_station_model_with_normal(
                    np.array([[0, 0, 0]]), station_size, -pred_angle)
                station_cloud = cube.generate_points().astype(np.float32)

                cloud_icp = cicp(object_cloud,
                                 station_cloud,
                                 max_iterations=20)
                T, R, t = cloud_icp.cicp()
                cloud_t = np.tile(t, (station_cloud.shape[0], 1))
                station_cloud_rect = station_cloud[:, :3] - cloud_t

                station_cloud_rect = station_cloud_rect + center.repeat(
                    station_cloud_rect.shape[0], 0)
                object_cloud = object_cloud + center.repeat(
                    object_cloud.shape[0], 0)
                station_cloud[:, :3] = station_cloud[:, :3] + center.repeat(
                    station_cloud.shape[0], 0)

                center = center - t[np.newaxis, :]
                corners_3d_rect = box3d_corners(center, pred_angle,
                                                station_size)

                object_cloud = rotate_pc_along_y(object_cloud, -rot_angle)
                station_cloud_rect = rotate_pc_along_y(station_cloud_rect,
                                                       -rot_angle)
                station_cloud[:, :3] = rotate_pc_along_y(
                    station_cloud[:, :3], -rot_angle)
                center = rotate_pc_along_y(center, -rot_angle)
                corners_3d = rotate_pc_along_y(corners_3d, -rot_angle)
                corners_3d_rect = rotate_pc_along_y(corners_3d_rect,
                                                    -rot_angle)

                center[0, 1] = 0.815

                pose[0, :3] = center
                pose[0, 3] = pred_angle + rot_angle
                pose_list.append(pose.copy())

                count += 1
                station_rect_pub = point_cloud_publisher(
                    '/points_station_rect%d' % (count), station_cloud_rect)
                bbox_pub_rect = bbox_publisher('/bbox_rect%d' % (count),
                                               corners_3d_rect,
                                               color="green")
                object_pub = point_cloud_publisher(
                    '/points_object%d' % (count), object_cloud)

                station_rect_pub.point_cloud_publish()
                bbox_pub_rect.bbox_publish()
                object_pub.point_cloud_publish()

        pose_pub = pose_publisher('station_pose', pose_list)
        pose_pub.pose_publish()
        print('once detection')
Exemple #11
0
class detector:
    def __init__(self, rgb_image, depth_image, fx, fy, cx, cy):
        self.set_up_faster_rcnn()
        self.set_up_fpointnet()
        self.detection(rgb_image, depth_image, fx, fy, cx, cy)

    def set_up_faster_rcnn(self):
        self.cfg = get_cfg()
        self.cfg.merge_from_file(
            model_zoo.get_config_file(
                "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
        self.cfg.MODEL.WEIGHTS = "weights/model_final.pth"
        self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
        self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.65
        self.predictor = DefaultPredictor(self.cfg)
        self.metadata = Metadata()
        self.metadata.set(thing_classes=['station', 'forklift'])

    def set_up_fpointnet(self):
        self.FrustumPointNet = FrustumPointNetv1(n_classes=6,
                                                 n_channel=6).cuda()
        self.pth = torch.load("weights/frustum_model.pth")
        self.FrustumPointNet.load_state_dict(self.pth['model_state_dict'])
        self.model = self.FrustumPointNet.eval()

    def detection(self, rgb_image, depth_image, fx, fy, cx, cy):
        print('start detection')
        rgb_image = rgb_image
        depth_image = np.nan_to_num(depth_image, nan=0)
        outputs = self.predictor(rgb_image)
        prob_list = outputs["instances"].scores
        class_list = outputs["instances"].pred_classes
        box2d_list = outputs["instances"].pred_boxes.tensor

        # # v = Visualizer(rgb_image[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2)
        # v = Visualizer(rgb_image[:, :, ::-1], metadata=self.metadata, scale=1.2)
        # out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        # cv2.namedWindow('test',0)
        # cv2.imshow('test',out.get_image()[:, :, ::-1])
        # cv2.waitKey(0)

        # print("depth mean {}".format(np.mean(depth_image)))

        pitch = 0.09557043068606919
        rotation = np.array([[1, 0, 0], [0, np.cos(pitch), -np.sin(pitch)],
                             [0, np.sin(pitch),
                              np.cos(pitch)]])

        count = 0
        pose = np.zeros([1, 4])
        pose_list = []

        for idx in range(len(class_list)):

            object_class = class_list[idx].cpu().numpy()
            prob = prob_list[idx].cpu().numpy()
            xmin, ymin, xmax, ymax = map(int, box2d_list[idx])

            if (xmax - xmin) > 1.5 * (ymax - ymin):
                continue

            rgb = np.zeros_like(rgb_image)
            depth = np.zeros_like(depth_image)
            rgb[ymin:ymax, xmin:xmax] = rgb_image[ymin:ymax, xmin:xmax]
            depth[ymin:ymax, xmin:xmax] = depth_image[ymin:ymax, xmin:xmax]
            print("class: {} ,depth_mean: {}".format(
                object_class, np.mean(depth[ymin:ymax, xmin:xmax])))
            pcs = depth2pc(rgb, depth, fx, fy, cx, cy,
                           1).point_cloud_generator()
            pcs[:, 0:3] = np.dot(pcs[:, 0:3].astype(np.float32), rotation)
            mask = pcs[:, 2] != 0
            pcs = pcs[mask, :]
            box2d_center = np.array([(xmin + xmax) / 2.0, (ymin + ymax) / 2.0])
            uvdepth = np.zeros((1, 3))
            uvdepth[0, 0:2] = box2d_center
            uvdepth[0, 2] = np.mean(pcs[:, 2])  #20 # some random depth
            x = ((uvdepth[:, 0] - cx) * uvdepth[:, 2]) / fx
            y = ((uvdepth[:, 1] - cy) * uvdepth[:, 2]) / fy
            uvdepth[:, 0] = x
            uvdepth[:, 1] = y
            frustum_angle = -1 * np.arctan2(uvdepth[0, 2], uvdepth[
                0, 0])  # angle as to positive x-axis as in the Zoox paper

            # Pass objects that are too small
            if len(pcs) < 5:
                continue

            if object_class == 0:
                object_class = 'box'
                data = provider.FrustumDataset(npoints=2048,
                                               pcs=pcs,
                                               object_class=object_class,
                                               frustum_angle=frustum_angle,
                                               prob=prob)
                point_set, rot_angle, prob, one_hot_vec = data.data()
                point_set = torch.unsqueeze(torch.tensor(point_set),
                                            0).transpose(2, 1).float().cuda()
                one_hot_vec = torch.unsqueeze(torch.tensor(one_hot_vec),
                                              0).float().cuda()

                # print('start fpointnets')
                logits, mask, stage1_center, center_boxnet, object_pts, \
                heading_scores, heading_residuals_normalized, heading_residuals, \
                size_scores, size_residuals_normalized, size_residuals, center = \
                self.model(point_set, one_hot_vec)

                corners_3d = get_box3d_corners(center, heading_residuals,
                                               size_residuals)

                logits = logits.cpu().detach().numpy()
                mask = mask.cpu().detach().numpy()
                center_boxnet = center_boxnet.cpu().detach().numpy()
                object_pts = object_pts.cpu().detach().squeeze().numpy(
                ).transpose(1, 0)
                stage1_center = stage1_center.cpu().detach().numpy()
                center = center.cpu().detach().numpy()
                heading_scores = heading_scores.cpu().detach().numpy()
                # heading_residuals_normalized = heading_residuals_normalized.cpu().detach().numpy()
                heading_residuals = heading_residuals.cpu().detach().numpy()
                size_scores = size_scores.cpu().detach().numpy()
                size_residuals = size_residuals.cpu().detach().numpy()
                corners_3d = corners_3d.cpu().detach().numpy()

                output = np.argmax(logits, 2)
                heading_class = np.argmax(heading_scores)
                size_class = np.argmax(size_scores)
                corners_3d = corners_3d[0, heading_class, size_class]
                pred_angle = provider.class2angle(
                    heading_class, heading_residuals[0, heading_class],
                    NUM_HEADING_BIN)
                pred_size = provider.class2size(size_class,
                                                size_residuals[0, size_class])

                cloud = pcs[:, 0:3].astype(np.float32)

                object_cloud = (object_pts - center_boxnet.repeat(
                    object_pts.shape[0], 0)).astype(np.float32)

                station_size = (0.979, 0.969, 0.979)
                cube = generate_station_model_with_normal(
                    np.array([[0, 0, 0]]), station_size, -pred_angle)
                # object_cloud = rotate_pc_along_y(object_cloud,pred_angle)
                # cube = generate_station_model_with_normal(np.array([[0,0,0]]),station_size,0)
                station_cloud = cube.generate_points().astype(np.float32)
                # # object_cloud_crop = object_cloud[object_cloud[:,1]>(center[0][1]-0.48)]
                # # station_cloud_crop = station_cloud[station_cloud[:,1]>(center[0][1]-0.48)]
                # object_cloud_crop = object_cloud[object_cloud[:,1]>(-0.48)]
                # station_cloud_crop = station_cloud[station_cloud[:,1]>(-0.48)]
                # # station_cloud = rotate_pc_along_y(station_cloud, rot_angle)
                # cloud_icp = cicp(object_cloud,station_cloud_crop,max_iterations=30)
                cloud_icp = cicp(object_cloud,
                                 station_cloud,
                                 max_iterations=20)
                # cloud_icp = icp(object_cloud,station_cloud,max_iterations=20)
                T, R, t = cloud_icp.cicp()
                # R_angle = np.arccos(R[0,0])
                # print(R)
                # print(t)
                # if abs(t[0]) > abs(t[2]):
                #     t[2] = 0
                # if abs(t[0]) < abs(t[2]):
                #     t[0] = 0
                # print(t)
                cloud_t = np.tile(t, (station_cloud.shape[0], 1))
                # station_cloud_rect = np.dot(station_cloud[:,:3]-cloud_t, np.transpose(np.linalg.pinv(R)))
                # station_cloud_rect = np.dot(station_cloud[:,:3]-cloud_t, np.transpose(np.linalg.pinv(R)))
                # station_cloud[:,:3] = rotate_pc_along_y(station_cloud[:,:3],-R_angle)
                station_cloud_rect = station_cloud[:, :3] - cloud_t

                station_cloud_rect = station_cloud_rect + center.repeat(
                    station_cloud_rect.shape[0], 0)
                object_cloud = object_cloud + center.repeat(
                    object_cloud.shape[0], 0)
                station_cloud[:, :3] = station_cloud[:, :3] + center.repeat(
                    station_cloud.shape[0], 0)

                center = center - t[np.newaxis, :]
                corners_3d_rect = box3d_corners(center, pred_angle,
                                                station_size)

                object_cloud = rotate_pc_along_y(object_cloud, -rot_angle)
                station_cloud_rect = rotate_pc_along_y(station_cloud_rect,
                                                       -rot_angle)
                station_cloud[:, :3] = rotate_pc_along_y(
                    station_cloud[:, :3], -rot_angle)
                center = rotate_pc_along_y(center, -rot_angle)
                corners_3d = rotate_pc_along_y(corners_3d, -rot_angle)
                corners_3d_rect = rotate_pc_along_y(corners_3d_rect,
                                                    -rot_angle)

                center[0, 1] = 0.815

                pose[0, :3] = center
                pose[0, 3] = pred_angle + rot_angle
                pose_list.append(pose.copy())

                # # station_cloud_rect = rotate_pc_along_y(station_cloud[:,:3]-cloud_t, R_angle)
                # # cloud = np.dot(cloud, np.transpose(R))+np.tile(t,(cloud.shape[0],1))
                # object_pub = point_cloud_publisher('/points_object',object_cloud_crop)
                # station_pub = point_cloud_publisher('/points_station',station_cloud_crop[:,:3])
                count += 1
                # station_pub = point_cloud_publisher('/points_station%d'%(count),station_cloud[:,:3])
                station_rect_pub = point_cloud_publisher(
                    '/points_station_rect%d' % (count), station_cloud_rect)
                # bbox_pub = bbox_publisher('/bbox%d'%(count),corners_3d)
                bbox_pub_rect = bbox_publisher('/bbox_rect%d' % (count),
                                               corners_3d_rect,
                                               color="green")
                object_pub = point_cloud_publisher(
                    '/points_object%d' % (count), object_cloud)
                # cloud_pub = point_cloud_publisher('/points_cloud%d'%(count),cloud)
                # cloud_pub1 = point_cloud_publisher('/points_cloud1',cloud1)

                station_rect_pub.point_cloud_publish()
                bbox_pub_rect.bbox_publish()
                object_pub.point_cloud_publish()

                # bbox_list.append(bbox_pub)
                # bbox_list.append(bbox_pub_rect)
                # point_cloud_list.append(station_pub)
                # point_cloud_list.append(station_rect_pub)
                # point_cloud_list.append(object_pub)
                # point_cloud_list.append(cloud_pub)
                # with open('results.txt','ab') as f:
                #     np.savetxt(f, box_info, delimiter=" ")

                # rate = rospy.Rate(10)
                # while not rospy.is_shutdown():
                #     # point_cloud_list[3].point_cloud_publish()
                # #             # object_pub.point_cloud_publish()
                #     station_pub.point_cloud_publish()
                #     station_rect_pub.point_cloud_publish()
                #     bbox_pub.bbox_publish()
                #     bbox_pub_rect.bbox_publish()
                #     object_pub.point_cloud_publish()
                #     cloud_pub.point_cloud_publish()
                # #             # cloud_pub1.point_cloud_publish()
                #     rate.sleep()

        pose_pub = pose_publisher('station_pose', pose_list)
        pose_pub.pose_publish()
        print('once detection')
Exemple #12
0
Fichier : vis.py Projet : w-hc/pcv
def d2_vis(dset_meta, pan_mask, pan_ann, im, scale=0.7):
    from detectron2.data import MetadataCatalog
    from detectron2.utils.visualizer import ColorMode, Visualizer
    # print(self.dset_meta)
    # if len(self.dset_meta['cats']) > 20:
    if len(dset_meta['cats']) > 20:
        meta = MetadataCatalog.get("coco_2017_val_panoptic_separated")
    else:
        thing_ids = [_['id'] for _ in dset_meta['cats'].values() if _['isthing']]
        stuff_ids = [_['id'] for _ in dset_meta['cats'].values() if not _['isthing']]
        thing_colors = [dset_meta['cats'][_]['color'] for _ in thing_ids]
        stuff_colors = [dset_meta['cats'][_]['color'] for _ in stuff_ids]
        thing_classes = [dset_meta['cats'][_]['name'] for _ in thing_ids]
        stuff_classes = [dset_meta['cats'][_]['name'] for _ in stuff_ids]
        thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
        stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)}

        from detectron2.data.catalog import Metadata
        meta = Metadata().set(
            thing_ids=thing_ids,
            stuff_ids=stuff_ids,
            thing_colors=thing_colors,
            stuff_colors=stuff_colors,
            thing_classes=thing_classes,
            stuff_classes=stuff_classes,
            thing_dataset_id_to_contiguous_id=thing_dataset_id_to_contiguous_id,
            stuff_dataset_id_to_contiguous_id=stuff_dataset_id_to_contiguous_id
        )
    for seg in pan_ann['segments_info']:
        if seg['isthing']:
            seg["category_id"] = meta.thing_dataset_id_to_contiguous_id[seg["category_id"]]
        else:
            seg["category_id"] = meta.stuff_dataset_id_to_contiguous_id[seg["category_id"]]

    def upsample_mask(mask, im_size):
        # return torch.nn.functional.upsample(torch.from_numpy(mask.astype(np.int32)).float().unsqueeze(0).unsqueeze(0), scale_factor=scale_factor).squeeze(0).squeeze(0).long()
        mask = torch.from_numpy(mask.astype(np.int64)).cuda()
        # import pdb;pdb.set_trace()
        inx = torch.unique(mask[mask > 0])
        inx_len = len(inx)
        tmp = mask.new_zeros((inx_len, )+mask.shape, dtype=torch.bool)
        for i in range(inx_len):
            tmp[i, :, :] = mask == inx[i]
        tmp = torch.nn.functional.interpolate(
            tmp.float().unsqueeze(0), im_size, mode='bicubic'
        ).squeeze(0)
        tmp = torch.nn.functional.avg_pool2d(
            tmp.float().unsqueeze(0), kernel_size=7, stride=1, padding=3
        ).squeeze(0)
        _out_mask = tmp.argmax(dim=0)
        _out_mask[tmp.max(0)[0] < 0.5] = -1
        out_mask = torch.zeros_like(_out_mask)
        for i in range(inx_len):
            out_mask[_out_mask == i] = inx[i]
        return out_mask.cpu()

    # from PIL import Image
    # im = np.array(Image.fromarray(im).resize((im.shape[1] // 3, im.shape[0]//3)))
    # print(im.max())

    vis_img = Visualizer(
        im, meta, instance_mode=ColorMode.IMAGE_BW, scale=scale
    ).draw_pan_seg(
        upsample_mask(pan_mask, (im.shape[0], im.shape[1])),
        pan_ann['segments_info'], alpha=0.5
    ).get_image()
    return vis_img
Exemple #13
0
        nargs=argparse.REMAINDER,
    )
    return parser


if __name__ == "__main__":
    mp.set_start_method("spawn", force=True)
    args = get_parser().parse_args()
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)

    from detectron2.data.catalog import Metadata
    metadata = Metadata(
        evaluator_type='coco',
        name='PubLayNet',
        thing_classes=["text", "title", "list", "figure", "table"])
    demo = VisualizationDemo(cfg, metadata)

    if args.input:
        if len(args.input) == 1:
            args.input = glob.glob(os.path.expanduser(args.input[0]))
            assert args.input, "The input path(s) was not found"
        for path in tqdm.tqdm(args.input, disable=not args.output):
            # use PIL, to be consistent with evaluation
            img = read_image(path, format="BGR")
            start_time = time.time()
            predictions, visualized_output = demo.run_on_image(img)
            logger.info("{}: detected {} instances in {:.2f}s".format(
                path, len(predictions["instances"]),
                time.time() - start_time))
Exemple #14
0
    parser.add_argument("image_dir")
    parser.add_argument("--type", choices=["instance", "semantic"], default="instance")
    args = parser.parse_args()
    from detectron2.data.catalog import Metadata
    from detectron2.utils.visualizer import Visualizer

    logger = setup_logger(name=__name__)

    dirname = "cornell-data-vis"
    os.makedirs(dirname, exist_ok=True)

    if args.type == "instance":
        dicts = load_cornell_instances(
            args.image_dir, to_polygons=True
        )
        logger.info("Done loading {} samples.".format(len(dicts)))
        meta = Metadata().set(
            thing_classes=["grasp", "nograsp"],
            #thing_classes=[f"{sector}grasp" for sector in range(18)],#os.listdir(args.image_dir),
            stuff_classes=["nothing", "thing"]
        )

    for d in dicts:
        img = np.array(Image.open(d["file_name"]))
        visualizer = Visualizer(img, metadata=meta)
        vis = visualizer.draw_dataset_dict(d)
        # cv2.imshow("a", vis.get_image()[:, :, ::-1])
        # cv2.waitKey()
        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
        vis.save(fpath)
Exemple #15
0
class BlendPredictor:
    shrink_threshold = .15

    def __init__(self, panoptic_config, mask_config, panoptic_predictor=None, mask_predictor=None, panoptic_metadata=None, mask_metadata=None, shrink_threshold=None):
        self.po_predictor = panoptic_predictor if panoptic_predictor is not None else DefaultPredictor(panoptic_config)
        self.mask_predictor = mask_predictor if mask_predictor is not None else DefaultPredictor(mask_config)
        self.po_md = panoptic_metadata if panoptic_metadata is not None else MetadataCatalog.get(panoptic_config.DATASETS.TRAIN[0])
        self.mask_md = mask_metadata if mask_metadata is not None else MetadataCatalog.get(mask_config.DATASETS.TRAIN[0])
        self.shrink_threshold = shrink_threshold if shrink_threshold is not None else self.shrink_threshold
        self._merge_metadata()

    def _merge_metadata(self):
        added_classes = self.mask_md.get('thing_classes')
        total_classes = self.po_md.get('thing_classes').copy()
        stuff_classes = self.po_md.get('stuff_classes')

        self.offset = len(total_classes)

        for c in added_classes:
            if c not in total_classes:
                total_classes.append(c)
            else:
                total_classes.append(f"custom_{c}")

        self.blend_md = Metadata(thing_classes=total_classes, stuff_classes=stuff_classes)

    def predict(self, img):
        # First lets run the predictions
        self.panoptic_seg, self.panoptic_seg_info = self.po_predictor(img)["panoptic_seg"]
        self.mask_output = self.mask_predictor(img)
        return self.blend_segs(self.panoptic_seg, self.panoptic_seg_info, self.mask_output)

    def blend_segs(self, panoptic_seg, panoptic_seg_info, mask_output):
        total_classes = self.blend_md.get('thing_classes')
        po_seg = panoptic_seg.to("cpu").numpy()
        blend_seg = po_seg.copy()
        max_seg = blend_seg.max()
        blend_info = panoptic_seg_info.copy()

        masks = mask_output["instances"].to("cpu").get("pred_masks").numpy()
        scores = mask_output["instances"].to("cpu").get("scores").numpy()
        classes = mask_output["instances"].to("cpu").get("pred_classes").numpy()

        # basic blending
        new_seg_info = []
        instance_ids = {}
        for i in range(len(classes)):
            iid = max_seg + i + 1
            c = classes[i]
            m = masks[i]
            s = scores[i]

            if c in instance_ids:
                instance_ids[c] += 1
            else:
                instance_ids[c] = 0

            blend_seg = blend_seg * ((m - 1) * -1)
            blend_seg = blend_seg + (m * iid)
            area = np.count_nonzero(blend_seg == iid)

            info = {"id": iid, "isthing": True, "score": s, "category_id": c + self.offset, "instance_id": instance_ids[c], "area": area}
            blend_info.append(info)
            
        # Remove "dead" instances where the instance has lost more than N% of volume
        final_info = []
        for i in range(len(blend_info)):
            seg = blend_info[i]
            if i >= len(panoptic_seg_info):
                final_info.append(seg)
                continue
            orig_seg = panoptic_seg_info[i]
            iid = seg["id"]
            if "area" in orig_seg:
                orig_area = orig_seg["area"]
            else:
                orig_area = np.count_nonzero(po_seg==iid)
            new_area = np.count_nonzero(blend_seg==iid)
            pct = new_area/orig_area
            if pct > self.shrink_threshold:
                seg["area"] = new_area
                final_info.append(seg)
            else:
                print(f"REMOVING ID: {iid}, Category: {total_classes[seg['category_id']]}, Orig Area: {orig_area}, New Area: {new_area}, PCT: {pct}")
                blend_seg = np.where(blend_seg == iid, 0, blend_seg)

        final_seg = torch.tensor(blend_seg)

        return final_seg, blend_info
def main():
    st.title('Household Amenity Detection Project 👁')
    st.write(
        "This Project is inspired by [Airbnb's machine learning powered amenity detection](https://medium.com/airbnb-engineering/amenity-detection-and-beyond-new-frontiers-of-computer-vision-at-airbnb-144a4441b72e)."
    )
    st.write(
        "And also by [Daniel Bourke's Airbnb amenity detection replication](https://github.com/mrdbourke/airbnb-amenity-detection)."
    )

    st.subheader('How does it work?')
    st.write("1. Upload an image in either JPG or PNG or JPEG format.")
    st.write(
        "2. Pick a probability threshold to determine what object + boxes to render."
    )
    st.write(
        "   Only objects with higher than threshold probability will be rendered."
    )
    st.write("3. Click the Make Prediction Button to run the model.")
    st.image(Image.open('demo.jpg'), use_column_width=True)

    st.subheader('Input File')

    objects = ['Bathtub', 'Bed', 'Billiard table', 'Ceiling fan', \
               'Coffeemaker', 'Couch', 'Countertop', 'Dishwasher', \
               'Fireplace', 'Fountain', 'Gas stove', 'Jacuzzi', \
               'Kitchen & dining room table', 'Microwave oven', \
               'Mirror', 'Oven', 'Pillow', 'Porch', 'Refrigerator',  \
               'Shower', 'Sink', 'Sofa bed', 'Stairs', 'Swimming pool', \
               'Television', 'Toilet', 'Towel', 'Tree house', 'Washing machine', 'Wine rack']

    # load model
    predictor = load_model()

    # create metadata
    data_metadata = Metadata(name='data_train',
                             evaluator_type='coco',
                             thing_classes=objects)

    uploaded_file = st.file_uploader(
        "Upload an Image", type=["png", "jpg", "jpeg", "JPG", "PNG", "JPEG"])
    if uploaded_file is not None:
        image = Image.open(uploaded_file)
        st.image(image, caption='Uploaded Image', use_column_width=True)

        # Make sure image is RGB
        image = image.convert("RGB")

        st.subheader('Output:')
        st.write(
            "Pick a prediction threshold where only objects with probabilities above the threshold will be displayed!"
        )
        pred_threshold = st.slider('Prediction Threshold:', 0.0, 1.0, 0.25)

        # get inference on image and display if button is clicked
        if st.button("Make Prediction"):
            start_time = time.time()

            # Some number in the range 0-1 (probabilities)
            with st.spinner("Doing Prediction..."):
                custom_pred, filt_instance = inference(image, predictor,
                                                       data_metadata,
                                                       pred_threshold)

            end_time = time.time()

            st.subheader('Predictions: ')
            # need to convert CV2 format to PIL format
            custom_pred = cv2.cvtColor(custom_pred, cv2.COLOR_RGB2BGR)
            st.image(custom_pred,
                     caption='Predictions Image',
                     use_column_width=True)

            st.write('Predicted Classes and Probabilities: ')
            # save predictions to dataframe
            pred_df = pd.DataFrame()
            object_name = []
            for elem in filt_instance.pred_classes.numpy():
                object_name.append(objects[elem])

            pred_df['Classes'] = object_name
            pred_df['Probabilities'] = filt_instance.scores.numpy()

            if pred_df.shape[0] == 0:
                st.write('No Objects Detected!')
            else:
                st.write(pred_df)

            # write prediction time
            pred_time = end_time - start_time
            st.write('Prediction Time: ' + ' {0:.2f}'.format(pred_time) +
                     ' seconds')

    st.write("")
    st.subheader("What is under the hood?")
    st.write(
        "Detectron2 RetinaNet model (PyTorch) and Streamlit web application")
    st.image(Image.open('logo.jpg'), use_column_width=True)

    st.subheader("Supported Classes/Objects:")
    st.write("• Bathtub          • Bed                 • Billiard Table")
    st.write("• Ceiling Fan      • Coffeemaker         • Couch")
    st.write("• Countertop       • Dishwasher          • Fireplace")
    st.write("• Fountain         • Gas Stove           • Jacuzzi")
    st.write("• Dining Table     • Microwave Oven      • Mirror")
    st.write("• Oven             • Pillow              • Porch")
    st.write("• Refrigerator     • Shower              • Sink")
    st.write("• Sofa bed         • Stairs              • Swimming Pool")
    st.write("• Television       • Toilet              • Towel")
    st.write("• Tree house       • Washing Machine     • Wine Rack")
    logger = setup_logger(name=__name__)

    dirname = "cityscapes-data-vis"
    os.makedirs(dirname, exist_ok=True)

    if args.type == "instance":
        dicts = load_cityscapes_instances(args.image_dir,
                                          args.gt_dir,
                                          from_json=True,
                                          to_polygons=True)
        logger.info("Done loading {} samples.".format(len(dicts)))

        thing_classes = [
            k.name for k in labels if k.hasInstances and not k.ignoreInEval
        ]
        meta = Metadata().set(thing_classes=thing_classes)

    else:
        dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir)
        logger.info("Done loading {} samples.".format(len(dicts)))

        stuff_names = [k.name for k in labels if k.trainId != 255]
        stuff_colors = [k.color for k in labels if k.trainId != 255]
        meta = Metadata().set(stuff_names=stuff_names,
                              stuff_colors=stuff_colors)

    for d in dicts:
        img = np.array(Image.open(PathManager.open(d["file_name"], "rb")))
        visualizer = Visualizer(img, metadata=meta)
        vis = visualizer.draw_dataset_dict(d)
        # cv2.imshow("a", vis.get_image()[:, :, ::-1])
Exemple #18
0
    parser.add_argument("--gt_dir",type=str, default="datasets/ctf/char/json")

    parser.add_argument("--type", choices=["field", "char"], default="char")
    args = parser.parse_args()
    from detectron2.data.catalog import Metadata
    from detectron2.utils.visualizer import Visualizer

    logger = setup_logger(name=__name__)

    dirname = "ctf-data-vis"
    os.makedirs(dirname, exist_ok=True)
    dicts = load_ctf_json(args.image_dir, args.gt_dir, args.type)
    logger.info("Done loading {} samples.".format(len(dicts)))

    thing_classes = [k.name for k in labels_ctf]
    meta = Metadata().set(thing_classes=thing_classes)
    
    # stuff_names = [k.name for k in labels]
    # stuff_colors = [k.color for k in labels]
    # meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors)

    for d in dicts:
        img = np.array(Image.open(d["file_name"]))
        visualizer = Visualizer(img,metadata=meta)
        vis = visualizer.draw_dataset_dict(d)
        cv2.imshow("a", vis.get_image()[:, :, ::-1])
        cv2.waitKey()
        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
        vis.save(fpath)