Python Yolact.eval Exemples, yolact.Yolact.eval Python Exemples

Exemple #1

0

Afficher le fichier

def load_model(model_file):
  torch.set_default_tensor_type('torch.cuda.FloatTensor')
  set_cfg('yolact_plus_resnet50_config')
  net = Yolact()
  net.load_weights(model_file)
  net.eval()
  return net

Exemple #2

0

Afficher le fichier

Fichier : model.py Projet : agermanidis/yolact

class YOLACT_MODEL():

    def __init__(self, opts):
        #concat the two files to one file 
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):    
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()                        
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
        
    # Generate an image based on some text.
    def detect(self, img):
        numpy_image = np.array(img)
        print('starting inference...')
        frame = torch.from_numpy(numpy_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)
        print("done.")
        output_image = self.display(preds, frame, None, None,
                                     undo_transform=False, score_threshold=self.threshold)
        return output_image

    def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k = 100, score_threshold = 0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape
        
        with timer.env('Postprocess'):
            t = postprocess(dets_out, w, h, visualize_lincomb = False,
                                            crop_masks        = True,
                                            score_threshold   = score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]

        img_gpu = img_gpu * masks[0]
            
        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()
               
        return img_numpy

Exemple #3

0

Afficher le fichier

    def __init__(
            self,
            weights='./crow_vision_yolact/data/yolact/weights/weights_yolact_kuka_17/crow_base_35_457142.pth',
            config=None,
            batchsize=1,
            top_k=25,
            score_threshold=0.1,
            display_text=True,
            display_bboxes=True,
            display_masks=True,
            display_scores=True):
        self.score_threshold = score_threshold
        self.top_k = top_k
        self.batchsize = batchsize

        # initialize a yolact net for inference
        ## YOLACT setup
        # setup config
        if config is not None:
            if '.obj' in config:
                with open(config, 'rb') as f:
                    config = dill.load(f)
            set_cfg(config)
        self.class_names_tuple = get_class_names_tuple()

        parse_args([
            '--top_k=' + str(top_k),
            '--score_threshold=' + str(score_threshold),
            '--display_text=' + str(display_text),
            '--display_bboxes=' + str(display_bboxes),
            '--display_masks=' + str(display_masks),
            '--display_scores=' + str(display_scores),
        ])

        # CUDA setup for yolact
        torch.backends.cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

        #YOLACT net itself
        with torch.no_grad():
            net = Yolact().cuda(torch.cuda.current_device())
            net.load_weights(weights)
            net.eval()
            net.detect.use_fast_nms = True
            net.detect.use_cross_class_nms = False

        self.net = net
        print("YOLACT network available as self.net")

        #for debug,benchmark
        self.duration = 0.0

Exemple #4

0

Afficher le fichier

def init_model(transform):
    args = parse_args()

    if args.config is not None:
        print(args.config)
        set_cfg(args.config)
        cfg.mask_proto_debug = False

    if args.trained_model == 'interrupt':
        args.trained_model = SavePath.get_interrupt('weights/')
    elif args.trained_model == 'latest':
        args.trained_model = SavePath.get_latest('weights/', cfg.name)

    if args.config is None:
        model_path = SavePath.from_str(args.trained_model)
        # TODO: Bad practice? Probably want to do a name lookup instead.
        args.config = model_path.model_name + '_config'
        print('Config not specified. Parsed %s from the file name.\n' %
              args.config)
        set_cfg(args.config)

    if args.detect:
        cfg.eval_mask_branch = False

    if args.dataset is not None:
        set_dataset(args.dataset)

    with torch.no_grad():
        if args.cuda:
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')
        net = net.cuda()

        net = CustomDataParallel(net).cuda()
        transform = torch.nn.DataParallel(FastBaseTransform()).cuda()

    return net, args

Exemple #5

0

Afficher le fichier

Fichier : batchtrack.py Projet : subhacom/argos_tracker

def load_weights(filename, cuda):
    """Load YOLACT network weights"""
    global ynet
    if filename == '':
        raise ValueError('Empty filename for network weights')
    print('#### CUDA ENABLED', cuda)
    print(f'Loading weights from {filename}')
    tic = time.perf_counter_ns()
    with torch.no_grad():
        if cuda:
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')
        # torch.set_default_tensor_type('torch.FloatTensor')
        ynet = Yolact()
        ynet.load_weights(filename, False)
        ynet.eval()
    toc = time.perf_counter_ns()
    logging.debug(f'Time to load weights: {1e-9 * (toc - tic)}')

Exemple #6

0

Afficher le fichier

def convert_to_onnx_with_hydra(cfg: DictConfig):

    # create folder for onnx
    createFolderOnnx(cfg)
    # set cfg
    set_cfg(cfg.onnx.yolact_cfg)

    model = Yolact()
    model.load_weights(cfg.onnx.model_ckpt_path)
    model.eval()

    model = model.cpu()

    dummy_input = torch.rand(
        (cfg.onnx.model_batch_size, cfg.onnx.model_channel_input,
         cfg.onnx.model_height_input, cfg.onnx.model_width_input))

    torch.onnx.export(model,
                      dummy_input,
                      cfg.onnx.model_onnx_path,
                      verbose=cfg.onnx.verbose,
                      opset_version=cfg.onnx.opset_version)

Exemple #7

0

Afficher le fichier

def main(args):

  rospy.init_node('yolact_ros')
  rospack = rospkg.RosPack()
  yolact_path = rospack.get_path('yolact_ros')
  
  model_path_str = yolact_path + "/scripts/yolact/weights/yolact_base_54_800000.pth"
  model_path = SavePath.from_str(model_path_str)
  set_cfg(model_path.model_name + '_config')

  with torch.no_grad():
      results_path_str = yolact_path + "/scripts/yolact/results"
      if not os.path.exists(results_path_str):
          os.makedirs(results_path_str)

      cudnn.benchmark = True
      cudnn.fastest = True
      torch.set_default_tensor_type('torch.cuda.FloatTensor')   

      print('Loading model...', end='')
      net = Yolact()
      net.load_weights(model_path_str)
      net.eval()
      print(' Done.')

      net = net.cuda()
      net.detect.use_fast_nms = True
      cfg.mask_proto_debug = False

  ic = image_converter(net)
  

  try:
    rospy.spin()
  except KeyboardInterrupt:
    print("Shutting down")
  cv2.destroyAllWindows()

Exemple #8

0

Afficher le fichier

class YolactWorker(qc.QObject):
    # emits list of classes, scores, and bboxes of detected objects
    # bboxes are in (top-left, w, h) format
    # The even is passed for synchronizing display of image in videowidget
    # with the bounding boxes
    sigProcessed = qc.pyqtSignal(np.ndarray, int)
    sigInitialized = qc.pyqtSignal()
    sigError = qc.pyqtSignal(YolactException)

    def __init__(self):
        super(YolactWorker, self).__init__()
        self.mutex = qc.QMutex()
        self._image = None
        self._pos = 0
        self.top_k = 10
        self.cuda = torch.cuda.is_available()
        self.net = None
        self.score_threshold = 0.15
        self.overlap_thresh = 1.0
        self.config = yconfig.cfg
        self.weights_file = ''
        self.config_file = ''
        self.video_file = None

    def setWaitCond(self, waitCond: threading.Event) -> None:
        _ = qc.QMutexLocker(self.mutex)
        self._waitCond = waitCond

    @qc.pyqtSlot(bool)
    def enableCuda(self, on):
        settings.setValue('yolact/cuda', on)
        self.cuda = on

    @qc.pyqtSlot(int)
    def setTopK(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.top_k = value

    @qc.pyqtSlot(int)
    def setBatchSize(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.batch_size = int(value)

    @qc.pyqtSlot(float)
    def setScoreThresh(self, value):
        _ = qc.QMutexLocker(self.mutex)
        self.score_threshold = value

    @qc.pyqtSlot(float)
    def setOverlapThresh(self, value):
        """Merge objects if their bboxes overlap more than this."""
        _ = qc.QMutexLocker(self.mutex)
        self.overlap_thresh = value

    @qc.pyqtSlot(str)
    def setConfig(self, filename):
        if filename == '':
            return
        self.config_file = filename
        with open(filename, 'r') as cfg_file:
            config = yaml.safe_load(cfg_file)
            for key, value in config.items():
                logging.debug('%r \n%r %r', key, type(value), value)
                self.config.__setattr__(key, value)
            if 'mask_proto_debug' not in config:
                self.config.mask_proto_debug = False
        logging.debug(yaml.dump(self.config))

    @qc.pyqtSlot(str)
    def setWeights(self, filename: str) -> None:
        if filename == '':
            raise YolactException('Empty filename for network weights')
        self.weights_file = filename
        tic = time.perf_counter_ns()
        with torch.no_grad():
            if self.cuda:
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')
            self.net = Yolact()
            self.net.load_weights(self.weights_file, self.cuda)
            self.net.eval()
            if self.cuda:
                self.net = self.net.cuda()
        toc = time.perf_counter_ns()
        logging.debug('Time to load weights %f s', 1e-9 * (toc - tic))
        self.sigInitialized.emit()

    @qc.pyqtSlot(np.ndarray, int)
    def process(self, image: np.ndarray, pos: int):
        """:returns (classes, scores, boxes)

        where `boxes` is an array of bounding boxes of detected objects in
        (xleft, ytop, width, height) format.

        `classes` is the class ids of the corresponding objects.

        `scores` are the computed class scores corresponding to the detected objects.
        Roughly high score indicates strong belief that the object belongs to
        the identified class.
        """
        _ts = time.perf_counter()
        logging.debug(f'Received frame {pos}')
        if self.net is None:
            self.sigError.emit(YolactException('Network not initialized'))
            return
        # Partly follows yolact eval.py
        tic = time.perf_counter_ns()
        _ = qc.QMutexLocker(self.mutex)
        with torch.no_grad():
            if self.cuda:
                image = torch.from_numpy(image).cuda().float()
            else:
                image = torch.from_numpy(image).float()
            batch = FastBaseTransform()(image.unsqueeze(0))
            preds = self.net(batch)
            image_gpu = image / 255.0
            h, w, _ = image.shape
            save = self.config.rescore_bbox
            self.config.rescore_bbox = True
            classes, scores, boxes, masks = oututils.postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=self.score_threshold)
            idx = scores.argsort(0, descending=True)[:self.top_k]
            # if self.config.eval_mask_branch:
            #     masks = masks[idx]
            classes, scores, boxes = [
                x[idx].cpu().numpy() for x in (classes, scores, boxes)
            ]
            # This is probably not required, `postprocess` uses
            # `score_thresh` already
            num_dets_to_consider = min(self.top_k, classes.shape[0])
            for j in range(num_dets_to_consider):
                if scores[j] < self.score_threshold:
                    num_dets_to_consider = j
                    break
            # logging.debug('Bounding boxes: %r', boxes)
            # Convert from top-left bottom-right format to
            # top-left, width, height format
            if len(boxes) == 0:
                self.sigProcessed.emit(boxes, pos)
                return
            boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2]
            boxes = np.asanyarray(boxes, dtype=np.int_)
            if self.overlap_thresh < 1:
                dist_matrix = pairwise_distance(new_bboxes=boxes,
                                                bboxes=boxes,
                                                boxtype=OutlineStyle.bbox,
                                                metric=DistanceMetric.ios)
                bad_idx = [jj for ii in range(dist_matrix.shape[0] - 1) \
                             for jj in range(ii+1, dist_matrix.shape[1]) \
                              if dist_matrix[ii, jj] < 1 - self.overlap_thresh]
                good_idx = list(set(range(boxes.shape[0])) - set(bad_idx))
                boxes = boxes[good_idx].copy()

            toc = time.perf_counter_ns()
            logging.debug('Time to process single _image: %f s',
                          1e-9 * (toc - tic))
            self.sigProcessed.emit(boxes, pos)
            logging.debug(f'Emitted bboxes for frame {pos}: {boxes}')
        _dt = time.perf_counter() - _ts
        logging.debug(
            f'{__name__}.{self.__class__.__name__}.process: Runtime: {_dt}s')

Exemple #9

0

Afficher le fichier

Fichier : convert_weight.py Projet : robotseye/pytorch-yolact

#
#   Editor      : VIM
#   File name   : convert_weight.py
#   Author      : YunYang1994
#   Created date: 2019-07-27 18:07:20
#   Description :
#
#================================================================

import torch
import numpy as np
from yolact import Yolact

with torch.no_grad():
    model = Yolact()
    model.eval()
    model.load_weights("./yolact_darknet53_54_800000.pth")
    modules = model.children()


def parse_layer(layer, weights):
    assert isinstance(layer, torch.nn.Conv2d) or isinstance(
        layer, torch.nn.BatchNorm2d)
    print("=> Parsing ", layer)
    if isinstance(layer, torch.nn.Conv2d):
        weight, bias = layer.weight.detach().numpy(), layer.bias
        weight = np.transpose(
            weight, [2, 3, 1, 0])  # k_h, h_w, in_channels, out_channels
        if bias is None:
            weights.append([weight])
        else:

Exemple #10

0

Afficher le fichier

class YOLACT_MODEL():
    def __init__(self, opts):
        #concat the two files to one file
        # if not os.path.isfile('weights/yolact_resnet50_54_800000.pth'):
        #     script = "cat weights/a* > weights/yolact_resnet50_54_800000.pth"
        #     call(script, shell=True)

        set_cfg('yolact_resnet50_config')
        cudnn.benchmark = True
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        self.net = Yolact()
        self.net.load_weights(opts['checkpoint'])
        print("done.")

        self.net.eval()
        self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        self.color_cache = defaultdict(lambda: {})
        self.threshold = opts['threshold']
        self.mode = opts['mode']

    # Generate an image based on some text.
    def detect(self, img):
        numpy_image = np.array(img)
        print('starting inference...')
        frame = torch.from_numpy(numpy_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = self.net(batch)
        print("done.")
        return self.display(preds,
                            frame,
                            None,
                            None,
                            undo_transform=False,
                            score_threshold=self.threshold)

    def display(self,
                dets_out,
                img,
                h,
                w,
                undo_transform=True,
                class_color=False,
                mask_alpha=0.45,
                top_k=100,
                score_threshold=0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=True,
                            score_threshold=score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]
            classes, scores, boxes = [
                x[:top_k].detach().cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < 0:
                num_dets_to_consider = j
                break

        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().detach().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
                return self.color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    self.color_cache[on_gpu][color_idx] = color
                return color

        show_mask = True
        show_box = True

        if self.mode == "mask_only":
            show_box = False

        if self.mode == "box_only":
            show_mask = False

        print("mode :", self.mode)
        print("show_mask :", show_mask)
        print("show_box :", show_box)

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if show_mask and cfg.eval_mask_branch:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]

            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if show_box:
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if True:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if True:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (_class, score) if True else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        return (img_numpy, boxes, scores)

Exemple #11

0

Afficher le fichier

def detect():
    img_path = '/home/user/dataset/pear/train/JPEGImages'
    save_path = '/home/user/pear_output'
    weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth'

    set_cfg('pear_config')

    with torch.no_grad():
        torch.cuda.set_device(0)

        ######
        # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
        # cudnn.benchmark = True
        # cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        ######

        net = Yolact()
        net.load_weights(weight_path)
        net.eval()
        net = net.cuda()
        print('model loaded...')

        net.detect.cross_class_nms = True
        net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False

        if not os.path.exists(save_path):
            os.mkdir(save_path)

        img_names = [
            name for name in os.listdir(img_path)
            if name.endswith('.jpg') or name.endswith('.png')
        ]
        #for img_name in tqdm(img_names):
        for img_name in img_names:
            img = cv2.imread(os.path.join(img_path, img_name))
            img = torch.from_numpy(img).cuda().float()
            img = FastBaseTransform()(img.unsqueeze(0))
            start = time.time()
            preds = net(img)
            print('clw: image_name: %s, inference time use %.3fs' %
                  (img_name,
                   time.time() - start))  # inference time use 0.023s, 550x550

            # start = time.time()
            h, w = img.shape[2:]
            result = postprocess(
                preds, w, h, crop_masks=True,
                score_threshold=0.3)  # classes, scores, boxes, masks 按照score排序
            # top_k = 10
            # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result]  # clw note TODO: 是否有必要只取top_k个？
            # print('clw: postprocess time use %.3fs' % (time.time() - start))  # 0.001s

            ### 顺序遍历result[0]，找到第一个是0的值，也就是梨，也就拿到了相应的mask
            # start = time.time()
            bFindPear = False
            for i, cls_id in enumerate(result[0]):
                if cls_id == 0 and not bFindPear:
                    pear_mask = result[3][i].cpu().numpy()
                    bFindPear = True

            # 从梨的mask中提取轮廓
            pear_outline = get_outline_from_mask(pear_mask, w, h)
            # print('pear_mask.sum:', pear_mask.sum())     # 124250.0
            # print('pear_outline.sum:', pear_outline.sum())  # 34335.0
            # print('clw: outline extract time use %.3fs' % (time.time() - start))  # 0.001s
            roundness = compute_roundness(pear_outline)
            ###

            result.append(roundness)

Exemple #12

0

Afficher le fichier

class pear_detector(object):
    #def __init__(self, weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth'):
    def __init__(
            self,
            weight_path='C:/Users/user/yolact_notes/weights/yolact_darknet53_249_2000.pth',
            save_path='C:/Users/user/yolact_notes/pear_output'):
        set_cfg('pear_config')
        self.save_path = save_path
        self.weight_path = weight_path
        self.net = Yolact()
        self.net.load_weights(self.weight_path)
        self.net.eval()
        self.net = self.net.cuda()
        print('model loaded...')

        self.net.detect.cross_class_nms = True
        self.net.detect.use_fast_nms = True

    def detect(self, img):
        try:
            print('')
            print(
                '======================== clw: detect of python nn start !! ================================'
            )
            print('img.shape:', img.shape)

            with torch.no_grad():
                torch.cuda.set_device(0)

                ######
                # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
                # cudnn.benchmark = True
                # cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
                ######

                cfg.mask_proto_debug = False

                # if not os.path.exists(self.save_path):
                #     os.mkdir(self.save_path)

                #img = img[:, :, ::-1].copy()
                img = img.copy(
                )  # clw note: 训练的时候cv2.imread()加载进来，然后通过BackboneTransform对BGR做处理；测试的时候用FastBaseTransform也会对BGR做处理；因此应该不需要::-1的操作
                img = torch.from_numpy(img).cuda().float()
                img = FastBaseTransform()(img.unsqueeze(0))
                start = time.time()
                preds = self.net(img)

                # start = time.time()
                h, w = img.shape[2:]
                result = postprocess(
                    preds, w, h, crop_masks=True, score_threshold=0.3
                )  # classes, scores, boxes, masks 按照score排序
                # top_k = 10
                # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result]  # clw note TODO: 是否有必要只取top_k个？
                # print('clw: postprocess time use %.3fs' % (time.time() - start))  # 0.001s
                print('clw: inference time use %.3fs, item nums in result:%d' %
                      (time.time() - start, len(
                          result[0])))  # inference time use 0.023s, 550x550

                ### 顺序遍历result[0]，找到第一个是0的值，也就是梨，也就拿到了相应的mask
                # start = time.time()
                bFindPear = False
                for i, cls_id in enumerate(result[0]):
                    if cls_id == 0 and not bFindPear:
                        pear_mask = result[3][i].cpu().numpy()
                        bFindPear = True

                # 从梨的mask中提取轮廓
                pear_outline = get_outline_from_mask(pear_mask, w, h)
                # print('pear_mask.sum:', pear_mask.sum())     # 124250.0
                # print('pear_outline.sum:', pear_outline.sum())  # 34335.0
                # print('clw: outline extract time use %.3fs' % (time.time() - start))  # 0.001s
                roundness = compute_roundness(pear_outline)
                ###

                result.append(roundness)

        except:
            traceback.print_exc()

        print(
            '======================== clw: detect of python nn end !! ================================'
        )
        print('')

        return result

Exemple #13

0

Afficher le fichier

class YolactInterface(object):
    def __init__(self, model_pth, output_num=5):
        self.output_num = output_num
        with torch.no_grad():
            set_cfg("yolact_base_config")
            torch.cuda.set_device(0)
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = Yolact()
            self.net.load_weights(model_pth)
            self.net.eval()
            self.net = self.net.cuda()
        print("load model complete")

    def run_once(self, src):
        self.net.detect.cross_class_nms = True
        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        with torch.no_grad():
            frame = torch.Tensor(src).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            time_elapsed = (time.clock() - time_start)
            h, w, _ = src.shape
            t = postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0.)  # TODO: give a suitable threshold
            torch.cuda.synchronize()
            classes, scores, bboxes, masks = [
                x[:self.output_num].cpu().numpy() for x in t
            ]  # TODO: Only 5 objects for test
            print(time_elapsed)
        instances = self.build_up_result(masks.shape[0], classes, bboxes,
                                         masks, scores)
        return {"instances": instances}

    def build_up_result(self, num, classes, bboxes, masks, scores):
        instances = []
        for i in range(num):
            bbox = [
                bboxes[i, 0], bboxes[i, 1], bboxes[i, 2] - bboxes[i, 0],
                bboxes[i, 3] - bboxes[i, 1]
            ]
            # Round to the nearest 10th to avoid huge file sizes, as COCO suggests
            bbox = [round(float(x) * 10) / 10 for x in bbox]
            # encode segmentation with RLE
            rle = pycocotools.mask.encode(
                np.asfortranarray(masks[i, :, :].astype(
                    np.uint8)))  # rle binary encoding
            rle['counts'] = rle['counts'].decode(
                'ascii')  # json.dump doesn't like bytes strings
            # create one instance json
            instances.append({
                'category_id':
                int(classes[i]
                    ),  # TODO: origin: get_coco_cat(int(category_id))
                'bbox': {
                    "b": bbox
                },
                "segmentation": rle,
                'score': float(scores[i])
            })

        return instances

Exemple #14

0

Afficher le fichier

def main(argv=None):
    """
    Parses the parameters or, if None, sys.argv and starts prediction mode.

    :param argv: the command-line parameters to parse (list of strings)
    :type: argv: list
    """

    parser = argparse.ArgumentParser(description='YOLACT Prediction')
    parser.add_argument('--model',
                        required=True,
                        type=str,
                        help='The trained model to use (.pth file).')
    parser.add_argument('--config',
                        default="external_config",
                        help='The name of the configuration to use.')
    parser.add_argument(
        '--top_k',
        default=5,
        type=int,
        help='Further restrict the number of predictions (eg objects) to parse'
    )
    parser.add_argument(
        '--score_threshold',
        default=0,
        type=float,
        help=
        'Detections with a score under this threshold will not be considered.')
    parser.add_argument(
        '--fast_nms',
        action="store_false",
        help='Whether to use a faster, but not entirely correct version of NMS.'
    )
    parser.add_argument('--cross_class_nms',
                        action="store_true",
                        help='Whether compute NMS cross-class or per-class.')
    parser.add_argument(
        '--prediction_in',
        default=None,
        type=str,
        required=True,
        help='The directory in which to look for images for processing.')
    parser.add_argument('--prediction_out',
                        default=None,
                        type=str,
                        required=True,
                        help='The directory to store the results in.')
    parser.add_argument(
        '--prediction_tmp',
        default=None,
        type=str,
        required=False,
        help=
        'The directory to store the results in first, before moving them to the actual output directory.'
    )
    parser.add_argument(
        '--continuous',
        action="store_true",
        help=
        'Whether to continuously poll the input directory or exit once all initial images have been processed.'
    )
    parser.add_argument(
        '--delete_input',
        action="store_true",
        help=
        'Whether to delete the input images rather than moving them to the output directory.'
    )
    parser.add_argument(
        '--output_polygons',
        action='store_true',
        help=
        'Whether to masks are predicted and polygons should be output in the ROIS CSV files',
        required=False,
        default=False)
    parser.add_argument(
        '--fit_bbox_to_polygon',
        action='store_true',
        help=
        'When outputting polygons whether to fit the bounding box to the polygon',
        required=False,
        default=False)
    parser.add_argument(
        '--bbox_as_fallback',
        default=-1.0,
        type=float,
        help=
        'When outputting polygons the bbox can be used as fallback polygon. This happens if the ratio '
        +
        'between the surrounding bbox of the polygon and the bbox is smaller than the specified value. '
        + 'Turned off if < 0.',
        required=False)
    parser.add_argument(
        '--mask_threshold',
        type=float,
        help='The threshold (0-1) to use for determining the contour of a mask',
        required=False,
        default=0.1)
    parser.add_argument(
        '--mask_nth',
        type=int,
        help='To speed polygon detection up, use every nth row and column only',
        required=False,
        default=1)
    parser.add_argument(
        '--output_minrect',
        action='store_true',
        help=
        'When outputting polygons whether to store the minimal rectangle around the objects in the CSV files as well',
        required=False,
        default=False)
    parser.add_argument(
        '--view_margin',
        default=2,
        type=int,
        required=False,
        help=
        'The number of pixels to use as margin around the masks when determining the polygon'
    )
    parser.add_argument(
        '--fully_connected',
        default='high',
        choices=['high', 'low'],
        required=False,
        help=
        'When determining polygons, whether regions of high or low values should be fully-connected at isthmuses'
    )
    parser.add_argument(
        '--output_width_height',
        action='store_true',
        help=
        "Whether to output x/y/w/h instead of x0/y0/x1/y1 in the ROI CSV files",
        required=False,
        default=False)
    parser.add_argument(
        '--scale',
        type=float,
        help=
        'The scale factor to apply to the image (0-1) before processing. Output will be in original dimension space.',
        required=False,
        default=1.0)
    parser.add_argument(
        '--debayer',
        default="",
        type=str,
        help='The OpenCV2 debayering method to use, eg "COLOR_BAYER_BG2BGR"',
        required=False)
    parser.add_argument(
        '--output_mask_image',
        action='store_true',
        default=False,
        help=
        "Whether to output a mask image (PNG) when predictions generate masks (independent of outputting polygons)",
        required=False)
    parsed = parser.parse_args(args=argv)

    if parsed.fit_bbox_to_polygon and (parsed.bbox_as_fallback >= 0):
        raise Exception(
            "Options --fit_bbox_to_polygon and --bbox_as_fallback cannot be used together!"
        )
    if (parsed.debayer is not None
        ) and not (parsed.debayer
                   == "") and not parsed.debayer.startswith("COLOR_BAYER_"):
        raise Exception(
            "Expected debayering type to start with COLOR_BAYER_, instead got: "
            + str(parsed.debayer))

    with torch.no_grad():
        # initializing cudnn
        print('Initializing cudnn', end='')
        cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        print(' Done.')

        # load configuration and model
        print('Loading config %s' % parsed.config, end='')
        set_cfg(parsed.config)
        cfg.mask_proto_debug = False
        print(' Done.')

        print('Loading model: %s' % parsed.model, end='')
        net = Yolact()
        net.load_weights(parsed.model)
        net.eval()
        net = net.cuda()
        net.detect.use_fast_nms = parsed.fast_nms
        net.detect.use_cross_class_nms = parsed.cross_class_nms
        print(' Done.')

        predict(model=net,
                input_dir=parsed.prediction_in,
                output_dir=parsed.prediction_out,
                tmp_dir=parsed.prediction_tmp,
                top_k=parsed.top_k,
                score_threshold=parsed.score_threshold,
                delete_input=parsed.delete_input,
                output_polygons=parsed.output_polygons,
                mask_threshold=parsed.mask_threshold,
                mask_nth=parsed.mask_nth,
                output_minrect=parsed.output_minrect,
                view_margin=parsed.view_margin,
                fully_connected=parsed.fully_connected,
                fit_bbox_to_polygon=parsed.fit_bbox_to_polygon,
                output_width_height=parsed.output_width_height,
                bbox_as_fallback=parsed.bbox_as_fallback,
                scale=parsed.scale,
                debayer=parsed.debayer,
                continuous=parsed.continuous,
                output_mask_image=parsed.output_mask_image)

Exemple #15

0

Afficher le fichier

class RunYolact(object):
    """
    运行YOLACT的类
    source: https://github.com/dbolya/yolact/issues/9
    """
    def __init__(self,
                 trained_model: str,
                 save_json=True,
                 output_dir=None,
                 output_name="detection",
                 output_num=5):
        """
        YOLACT 初始化,参数:
            - save_json         是否将计算结果保存为json文件
            - output_dir        当上个参数为True时,这个参数表示将json文件保存到的位置
            - output_name       保存的json文件名
            - output_num        # ? 目测是要输出的类别个数
        """
        #  step 0 初始化变量
        self.save_json = save_json
        # NOTE 卧槽还有这种用法,学习了
        self.detections = None
        self.output_num = output_num
        # step 1 如果指定了要生成json文件,就创建上面的Detection类对象
        if self.save_json and output_dir is not None:
            self.detections = Detections(output_dir, output_name)
        # step 2 初始化YOLACT网络
        with torch.no_grad():
            set_cfg("yolact_base_config")
            torch.cuda.set_device(1)
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = Yolact()
            # TODO 这里的权值是需要进行修改的
            # self.net.load_weights('./weights/yolact_base_54_800000.pth')
            self.net.load_weights(trained_model)
            self.net.eval()
            self.net = self.net.cuda()
        print("load model complete")

    def run_once(self, src, image_name):
        """
        只对一张图像进行预测.参数:
            - src           # ? 要预测的图像
            - image_name    图像名称 # ? 猜测就是图像的文件名
        """
        # step 0 准备
        self.net.detect.cross_class_nms = True
        self.net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False
        # step 1 预测
        with torch.no_grad():
            frame = torch.Tensor(src).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            time_elapsed = (time.clock() - time_start)
            h, w, _ = src.shape
            # NOTICE 这里并没有设置最小的阈值
            t = postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0.)  # TODO: give a suitable threshold
            torch.cuda.synchronize()
            classes, scores, boxes, masks = [
                x[:self.output_num].cpu().numpy() for x in t
            ]  # TODO: Only 5 objects for test
            print(time_elapsed)
            # 将预测得到的每一个结果都添加到detection对象中
            for i in range(masks.shape[0]):
                self.detections.add_instance(image_name, i, classes[i],
                                             boxes[i, :], masks[i, :, :],
                                             scores[i])
        # step 2 保存所有预测结果
        self.detections.dump_all()

Exemple #16

0

Afficher le fichier

class MattingService:
    def __init__(self,
                 model_path="./weights/yolact_im700_54_800000.pth",
                 use_cuda=False):
        print('Loading model...', end='')
        self.use_cuda = use_cuda
        self.trained_model = model_path
        self.net = Yolact()
        self.net.load_weights(self.trained_model)
        self.net.eval()

        if self.use_cuda:
            self.net = self.net.cuda()

        self.net.detect.use_fast_nms = True
        self.net.detect.use_cross_class_nms = False
        cfg.mask_proto_debug = False

        print(' Done.')

    def process(self, image, top_k=1, score_threshold=0.6):
        # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
        with torch.no_grad():
            if image is not None:
                if ':' in image:
                    inp, _image_name = image.split(':')
                    self._infer_image(self.net, inp, _image_name, top_k,
                                      score_threshold)
                else:
                    _image_name = image.split('/')[-1].split('.')[0] + '.png'
                    out = os.path.join('results/', _image_name)
                    self._infer_image(self.net, image, out, top_k,
                                      score_threshold)
                return _image_name

    def _infer_image(self, net: Yolact, path, save_path, top_k,
                     score_threshold):
        if self.use_cuda:
            frame = torch.from_numpy(cv2.imread(path)).cuda().float()
        else:
            frame = torch.from_numpy(cv2.imread(path)).float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = net(batch)

        img_numpy = self.post_process(preds,
                                      frame,
                                      None,
                                      None,
                                      top_k,
                                      score_threshold,
                                      undo_transform=False)

        if save_path is None:
            img_numpy = img_numpy[:, :, (2, 1, 0, 3)]

        if save_path is None:
            plt.subplot()
            plt.imshow(img_numpy)
            plt.title(path)
            plt.show()
        else:
            # plt.subplot()
            # plt.imshow(img_numpy)
            # plt.title(path)
            # plt.show()
            cv2.imwrite(save_path, img_numpy)

    @staticmethod
    def post_process(dets_out,
                     img,
                     h,
                     w,
                     top_k=1,
                     score_threshold=0.6,
                     undo_transform=True):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=False,
                            score_threshold=score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:top_k]

            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < score_threshold:
                num_dets_to_consider = j
                break

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        # After this, mask is of size [num_dets, h, w, 1]
        final_res = (img_gpu * 255).byte().cpu().numpy()
        final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA)

        if num_dets_to_consider == 0:
            return final_res

        masks = masks[:num_dets_to_consider, :, :, None]

        _mask = (masks * 255).byte().cpu().numpy()[0]

        # Then assign the mask to the last channel of the image
        final_res[:, :, 3] = _mask.squeeze()

        return final_res

Exemple #17

0

Afficher le fichier

Fichier : postutils.py Projet : simutisernestas/yolact

def postprocess(det_output,
                w,
                h,
                batch_idx=0,
                interpolation_mode='bilinear',
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0):

    dets = det_output[batch_idx]['detection']

    if dets is None:
        print("detections None")
        return [torch.Tensor()
                ] * 4  # Warning, this is 4 copies of the same thing

    score_threshold = 0.15  # TODO hardcoded
    keep = dets['score'] > score_threshold

    for k in dets:
        if k != 'proto':
            dets[k] = dets[k][keep]

    if dets['score'].size(0) == 0:
        return [torch.Tensor()] * 4

    # Actually extract everything from dets now
    classes = dets['class']
    boxes = dets['box']
    scores = dets['score']
    masks = dets['mask']

    # At this points masks is only the coefficients
    proto_data = dets['proto']

    masks = proto_data @ masks.t()
    masks = activation_func['sigmoid'](masks)

    # Crop masks before upsampling because you know why
    if crop_masks:
        masks = crop(masks, boxes)

    # Permute into the correct output shape [num_dets, proto_h, proto_w]
    masks = masks.permute(2, 0, 1).contiguous()

    net = Yolact()
    net.load_weights('yolact_resnet50_54_800000.pth')
    net.eval()

    # if cfg.use_maskiou:
    if True:
        with torch.no_grad():
            maskiou_p = net.maskiou_net(masks.unsqueeze(1))
            maskiou_p = torch.gather(maskiou_p,
                                     dim=1,
                                     index=classes.unsqueeze(1)).squeeze(1)
            if True:
                if True:
                    scores = scores * maskiou_p
                else:
                    scores = [scores, scores * maskiou_p]

    # Scale masks up to the full image
    masks = F.interpolate(masks.unsqueeze(0), (h, w),
                          mode=interpolation_mode,
                          align_corners=False).squeeze(0)

    # Binarize the masks
    masks.gt_(0.5)

    boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0],
                                                    boxes[:, 2],
                                                    w,
                                                    cast=False)
    boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1],
                                                    boxes[:, 3],
                                                    h,
                                                    cast=False)
    boxes = boxes.long()

Exemple #18

0

Afficher le fichier

Fichier : YolactEdgeEngine.py Projet : Hankfirst/ros-instance-segmentation

class YolactEdgeEngine:
    def __init__(self):
        parse_args(self)
        self.args.config = 'yolact_edge_mobilenetv2_config'
        set_cfg(self.args.config)
        self.args.trained_model = '/home/ht/catkin_ws/src/instance_segmentation/scripts/weights/yolact_edge_mobilenetv2_124_10000.pth'
        self.args.top_k = 10
        self.args.score_threshold = 0.3
        self.args.trt_batch_size = 3
        self.args.disable_tensorrt = False
        self.args.use_fp16_tensorrt = False
        self.args.use_tensorrt_safe_mode = True
        self.args.cuda = True
        self.args.fast_nms = True
        self.args.display_masks = True
        self.args.display_bboxes = True
        self.args.display_text = True
        self.args.display_scores = True
        self.args.display_linecomb = False
        self.args.fast_eval = False
        self.args.deterministic = False
        self.args.no_crop = False
        self.args.crop = True
        self.args.calib_images = '/home/ht/catkin_ws/src/instance_segmentation/scripts/data/coco/calib_images'

        setup_logger(logging_level=logging.INFO)
        self.logger = logging.getLogger('yolact.eval')

        self.color_cache = defaultdict(lambda: {})

        with torch.no_grad():
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')

            self.logger.info('Loading model...')
            self.net = Yolact(training=False)
            if self.args.trained_model is not None:
                self.net.load_weights(self.args.trained_model, args=self.args)
            else:
                self.logger.warning('No weights loaded!')
            self.net.eval()
            self.logger.info('Model loaded.')
            convert_to_tensorrt(self.net,
                                cfg,
                                self.args,
                                transform=BaseTransform())

    def evaluate(self, train_mode=False, train_cfg=None):
        with torch.no_grad():
            self.net = self.net.cuda()
            self.net.detect.use_fast_nms = self.args.fast_nms
            cfg.mask_proto_debug = self.args.mask_proto_debug
            inp, out = self.args.images.split(':')
            self.evalimages(inp, out)

    def evalimages(self, input_folder: str, output_folder: str):
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)

        print()
        for p in Path(input_folder).glob('*'):
            path = str(p)
            name = os.path.basename(path)
            name = '.'.join(name.split('.')[:-1]) + '.jpg'
            out_path = os.path.join(output_folder, name)

            img = cv2.imread(path)
            img_out = self.evalimage(img, out_path)
            #print(path + ' -> ' + out_path)
        print('Done.')

    def detect(self, img_in, return_imgs=False):
        with torch.no_grad():
            self.net = self.net.cuda()
            self.net.detect.use_fast_nms = self.args.fast_nms
            cfg.mask_proto_debug = self.args.mask_proto_debug
            #return self.evalimage(img_in[0])
            return self.evalbatch(img_in, return_imgs)

    def evalbatch(self, imgs, return_imgs=False):
        frame = torch.from_numpy(np.array(imgs)).cuda().float()
        batch = FastBaseTransform()(frame)

        if cfg.flow.warp_mode != 'none':
            assert False, 'Evaluating the image with a video-based model.'

        extras = {
            "backbone": "full",
            "interrupt": False,
            "keep_statistics": False,
            "moving_statistics": None
        }

        #start_time = time.time()
        preds = self.net(batch, extras=extras)["pred_outs"]
        #end_time = time.time()
        #print('%.3f s' % (end_time-start_time))

        imgs_out = []
        allres = []
        for i, img in enumerate(imgs):
            if return_imgs:
                img_out, res = self.prep_display(preds,
                                                 frame[i],
                                                 None,
                                                 None,
                                                 undo_transform=False,
                                                 batch_idx=i,
                                                 create_mask=True,
                                                 return_imgs=return_imgs)
                imgs_out.append(img_out)
                allres.append(res)
            else:
                res = self.prep_display(preds,
                                        frame[i],
                                        None,
                                        None,
                                        undo_transform=False,
                                        batch_idx=i,
                                        create_mask=True,
                                        return_imgs=return_imgs)
                allres.append(res)
        if return_imgs:
            return imgs_out, allres
        else:
            return allres

    def evalimage(self, img, save_path=None):
        frame = torch.from_numpy(img).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))

        if cfg.flow.warp_mode != 'none':
            assert False, 'Evaluating the image with a video-based model.'

        extras = {
            "backbone": "full",
            "interrupt": False,
            "keep_statistics": False,
            "moving_statistics": None
        }

        preds = self.net(batch, extras=extras)["pred_outs"]

        return self.prep_display(preds,
                                 frame,
                                 None,
                                 None,
                                 undo_transform=False,
                                 create_mask=True)
        #if save_path:
        #    cv2.imwrite(save_path, img_numpy)
        #return img_numpy, mask

    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     batch_idx=0,
                     create_mask=False,
                     return_imgs=False):
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape
            #print(h, " ", w)

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            batch_idx,
                            visualize_lincomb=self.args.display_linecomb,
                            crop_masks=self.args.crop,
                            score_threshold=self.args.score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                masks = t[3][:self.args.top_k]
            classes, scores, boxes = [
                x[:self.args.top_k].cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(self.args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < self.args.score_threshold:
                num_dets_to_consider = j
                break

        idx_fil = []
        for i in range(num_dets_to_consider):
            if cfg.dataset.class_names[
                    classes[i]] == 'car' or cfg.dataset.class_names[
                        classes[i]] == 'truck':
                idx_fil.append(i)
        num_dets_to_consider = len(idx_fil)

        if num_dets_to_consider == 0:
            # no detection found so just output original image
            if not create_mask:
                return (img_gpu * 255).byte().cpu().numpy()
            elif return_imgs:
                return (img_gpu * 255).byte().cpu().numpy(), ImageResult(
                    None, None, None, np.zeros((h, w, 1), dtype='uint8'), 0)
            else:
                return ImageResult(None, None, None,
                                   np.zeros((h, w, 1), dtype='uint8'), 0)

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
                return self.color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    self.color_cache[on_gpu][color_idx] = color
                return color

        if self.args.display_masks and cfg.eval_mask_branch:
            # after this, mask is of size [num_dets, h, w, l]
            #masks = masks[:num_dets_to_consider, :, :, None]
            #classes = classes[:num_dets_to_consider]
            #scores = scores[:num_dets_to_consider]
            #boxes = boxes[:num_dets_to_consider, :]

            masks = masks[idx_fil, :, :, None]
            classes = classes[idx_fil]
            scores = scores[idx_fil]
            boxes = boxes[idx_fil, :]

            if create_mask:
                mask_img = np.zeros((h, w, 1), dtype='uint8')
                for j in range(num_dets_to_consider):
                    mask_img += 10 * (j + 1) * masks[j].cpu().numpy().astype(
                        np.uint8)
                if not return_imgs:
                    return ImageResult(classes, scores, boxes, mask_img,
                                       num_dets_to_consider)

            # prepare the rgb image for each mask given their color (of size [num_dets, w, h, l])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # this is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # then draw the stuff that needs to be done on cpu
        # note make sure this is a uint8 tensor or opencv will not anti aliaz text for wahtever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if self.args.display_text or self.args.display_bboxes:
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if self.args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if self.args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if self.args.display_scores else _class
                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)
        return img_numpy, ImageResult(classes, scores, boxes, mask_img,
                                      num_dets_to_consider)

Exemple #19

0

Afficher le fichier

Fichier : webapp.py Projet : whwtraffic12306/Social-Distance-Monitoring

class SocialDistance:
    def __init__(self, id):
        # self.cap = cv2.VideoCapture(id)
        self.cap = WebcamVideoStream(src=id).start()
        self.width = 1280  #640#
        self.height = 720  #360#
        self.display_lincomb = False
        self.crop = True
        self.score_threshold = 0.15
        self.top_k = 30
        self.display_masks = True
        self.display_fps = False
        self.display_text = True
        self.display_bboxes = True
        self.display_scores = False

        self.fast_nms = True
        self.cross_class_nms = True
        self.config = 'yolact_plus_base_config'
        print('Config specified. Parsed %s from the file name.\n' %
              self.config)
        set_cfg(self.config)
        print('Loading model...', end='')
        self.trained_model = 'weights/yolact_plus_base_54_800000.pth'
        self.model = Yolact()
        self.model.load_weights(self.trained_model)
        self.model.detect.use_fast_nms = self.fast_nms
        self.model.detect.use_cross_class_nms = self.cross_class_nms
        self.model.eval()
        self.model = self.model.to(device, non_blocking=True)
        print(' Done.')
        self.model_path = SavePath.from_str(self.trained_model)

    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     fps_str=''):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """

        lineThickness = 2

        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=self.display_lincomb,
                            crop_masks=self.crop,
                            score_threshold=self.score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            # idx = t[1].argsort(0, descending=True)[top_k]
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:self.top_k]

            classes, scores, boxes = [
                x[:self.top_k].cpu().detach().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(self.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < self.score_threshold:
                num_dets_to_consider = j
                break

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if self.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]
            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        if self.display_fps:
            # Draw the box for the fps on the GPU
            font_face = cv2.FONT_HERSHEY_DUPLEX
            font_scale = 0.6
            font_thickness = 1

            text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                             font_thickness)[0]

            img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().detach().numpy()

        if self.display_fps:
            # Draw the text on the CPU
            text_pt = (4, text_h + 2)
            text_color = [255, 255, 255]

            cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                        text_color, font_thickness, cv2.LINE_AA)

        if num_dets_to_consider == 0:
            return img_numpy

        if self.display_text or self.display_bboxes:
            distance_boxes = []

            def all_subsets(ss):
                return chain(
                    *map(lambda x: combinations(ss, x), range(0,
                                                              len(ss) + 1)))

            def draw_distance(boxes):
                """
                    input : boxes(type=list)
                    Make all possible combinations between the detected boxes of persons
                    perform distance measurement between the boxes to measure distancing
                
                """
                red_counter = 0  ## Countting people who are in high risk
                green_counter = 0
                for subset in all_subsets(boxes):
                    if len(subset) == 2:
                        a = np.array((subset[0][2], subset[0][3]))
                        b = np.array((subset[1][2], subset[1][3]))
                        dist = np.linalg.norm(
                            a - b
                        )  ## Eucledian distance if you want differnt ways to measure distance b/w two boxes you can use the following options
                        # dist = spatial.distance.cosine(a, b)
                        # # print ('Eucledian distance is version-1', dist)
                        # # print ('Eucledian distance is', spatial.distance.euclidean(a, b))
                        # print ('Cosine distance is', dist)
                        if dist < 250:
                            red_counter += len(subset)
                            cv2.line(img_numpy, (subset[0][2], subset[0][3]),
                                     (subset[1][2], subset[1][3]), (0, 0, 255),
                                     lineThickness)

                        elif dist < 300:
                            green_counter += len(subset)
                            cv2.line(img_numpy, (subset[0][2], subset[0][3]),
                                     (subset[1][2], subset[1][3]), (0, 255, 0),
                                     lineThickness)
                    log["total_person_in_red_zone"] = red_counter // 2
                    log["total_person_in_green_zone"] = green_counter // 2
                    # gc.collect()

            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if self.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if self.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    if _class == "person":
                        log["total_person"] = num_dets_to_consider
                        distance_boxes.append(boxes[j, :].tolist())
                        draw_distance(distance_boxes)

                    text_str = '%s: %.2f' % (
                        _class, score) if self.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        return img_numpy

    def main(self):
        q = queue.Queue()
        while True:

            def frame_render(queue_from_cam):
                frame = self.cap.read(
                )  # If you capture stream using opencv (cv2.VideoCapture()) the use the following line
                # ret, frame = self.cap.read()
                frame = cv2.resize(frame, (self.width, self.height))
                queue_from_cam.put(frame)

            cam = threading.Thread(target=frame_render, args=(q, ))
            cam.start()
            cam.join()
            inputs = q.get()
            q.task_done()

            ## Desiging the frame with necessary infos
            title = "Social Distance Monitoring - COVID19"
            total_person = "Total = {}".format(log["total_person"])
            # print(log)
            red_zone = "High Risk = {}".format(log["total_person_in_red_zone"])
            green_zone = "Safe Distance = {}".format(
                log["total_person_in_green_zone"])
            notification_bar_thickness = 3

            overlay = inputs.copy()
            background = inputs.copy()
            opacity = 0.4

            cv2.rectangle(overlay, (0, 0), (1280, 100), (255, 255, 255), -1)
            cv2.rectangle(overlay, (0, 615), (400, 720), (255, 255, 255), -1)
            cv2.addWeighted(overlay, opacity, background, 1 - opacity, 0,
                            inputs)

            cv2.putText(inputs, title, (195, 50), cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 2, cv2.LINE_AA)  ### Text Main Title
            cv2.putText(inputs, total_person, (50, 640),
                        cv2.FONT_HERSHEY_DUPLEX, 0.8, (0, 0, 0), 2,
                        cv2.LINE_AA)  ### Text Total Person

            cv2.line(inputs, (15, 660), (40, 660), (0, 0, 255),
                     notification_bar_thickness)  ### Line red-zone
            cv2.putText(inputs, red_zone, (50, 670), cv2.FONT_HERSHEY_DUPLEX,
                        0.8, (0, 0, 255), 1,
                        cv2.LINE_AA)  ### Text Red Zone Person

            cv2.line(inputs, (15, 700), (40, 700), (0, 255, 0),
                     notification_bar_thickness)  ### Line Green-zone
            cv2.putText(inputs, green_zone, (50, 710), cv2.FONT_HERSHEY_DUPLEX,
                        0.8, (0, 255, 0), 1,
                        cv2.LINE_AA)  ### Text green Zone Person

            with torch.no_grad():
                inputs = torch.from_numpy(inputs).cuda().float()
                images = FastBaseTransform()(inputs.unsqueeze(0))
                images = images.to(device)
                preds = self.model(images)
                frame = self.prep_display(preds,
                                          inputs,
                                          None,
                                          None,
                                          undo_transform=False)

            ret, jpeg = cv2.imencode('.jpg', frame)
            torch.cuda.empty_cache()
            return jpeg.tostring()

Exemple #20

0

Afficher le fichier

        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')

    if args.resume and not args.display:
        with open(args.ap_data_file, 'rb') as f:
            ap_data = pickle.load(f)
        calc_map(ap_data)
        exit()

    dataset = None

    print('Loading model...', end='')
    net = Yolact()
    net.load_weights(args.trained_model)
    net.eval()
    print(' Done.')

    if args.cuda:
        net = net.cuda()

    net.detect.use_fast_nms = args.fast_nms
    net.detect.use_cross_class_nms = args.cross_class_nms
    cfg.mask_proto_debug = args.mask_proto_debug



scan = Scan(rgb_paths=rgb_paths, depth_paths=depth_paths, pose_paths=pose_paths,
            cam_intr=cam_intr, mesh_plot=mesh_plot, scannet_data=scannet_data, mask_net=net,
            args=args, root_path=root_path, use_gpu=use_gpu)

Exemple #21

0

Afficher le fichier

Fichier : dotmask_node.py Projet : linukc/dotmask

class DOTMask():

    def __init__(self, nn, input_device):
        """
        Initialisation function
        """
    
        print('Loading model...')
        self.nn = nn
        if self.nn == 'yolact':
            print("Selected NN: Yolact")
            # Yoloact imports
            sys.path.append('../nn/yolact/')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn 

            set_cfg("yolact_resnet50_config")
            #set_cfg("yolact_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            #self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'yolact++':
            print("Selected NN: Yolact++")
            # Yoloact imports
            sys.path.append('../nn/yolact/')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn 

            set_cfg("yolact_plus_resnet50_config")
            #set_cfg("yolact_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_plus_resnet50_54_800000.pth")
            #self.net.load_weights("../weights/yolact_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'yolact_edge':
            print("Selected NN: Yolact_edge")
            #Yoloact_edge imports
            sys.path.append('../nn/yolact_edge')
            from yolact import Yolact
            from data import cfg, set_cfg, set_dataset
            import torch
            import torch.backends.cudnn as cudnn

            set_cfg("yolact_edge_resnet50_config")
            cfg.eval_mask_branch = True
            cfg.mask_proto_debug = False
            cfg.rescore_bbox = True
            self.net = Yolact()
            self.net.load_weights("../weights/yolact_edge_resnet50_54_800000.pth")
            self.net.eval()
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
            self.net = self.net.cuda()

        elif self.nn == 'mrcnn':
            print("Selected NN: Mask-RCNN")
             # Keras
            import keras
            from keras.models import Model
            from keras import backend as K
            K.common.set_image_dim_ordering('tf')

            # Mask-RCNN
            sys.path.append('../nn/Mask_RCNN/')
            from mrcnn import config
            from mrcnn import utils 
            from mrcnn import model as modellib
            from inference_config import InferenceConfig

            self.config = InferenceConfig()
            self.model = modellib.MaskRCNN(
                mode="inference", 
                model_dir="../weights/",#"../nn/Mask_RCNN/mrcnn/", 
                config=self.config)

            # Load weights trained on MS-COCO
            self.model.load_weights("../weights/mask_rcnn_coco.h5", by_name=True)
        
        else:
            print("no nn defined")

        self.bridge = CvBridge()

        self._max_inactive_frames = 10 # Maximum nb of frames before destruction
        self.next_object_id = 0 # ID for next object
        self.objects_dict = {} # Detected objects dictionary
        self.var_init = 0
        self.cam_pos_qat = np.array([[0.,0.,0.],[0.,0.,0.,1.]])
        self.cam_pos = np.array([[0.,0.,0.],[0.,0.,0.]])
        
        self.dilatation = 1
        self.score_threshold = 0.1
        self.max_number_observation = 5
        self.human_threshold = 0.01
        self.object_threshold = 0.3
        self.iou_threshold = 0.9
        self.selected_classes = [0, 56, 67]
        self.masked_id = []

        #if input_device == 'xtion':
        #    self.human_threshold = 0.1
        #    self.iou_threshold = 0.3

        self.depth_image_pub = rospy.Publisher(
            "/camera/depth_registered/masked_image_raw", 
            Image,queue_size=1)

        self.dynamic_depth_image_pub = rospy.Publisher(
            "/camera/depth_registered/dynamic_masked_image_raw", 
            Image,queue_size=1)

        self.frame = []
        self.depth_frame = []
        self.msg_header = std_msgs.msg.Header()
        self.depth_msg_header = std_msgs.msg.Header()

        # Class names COCO dataset
        self.class_names = [
            'person', 'bicycle', 'car', 'motorcycle',
            'airplane', 'bus', 'train', 'truck', 'boat',
            'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 
            'bird', 'cat', 'dog', 'horse', 'sheep', 
            'cow', 'elephant', 'bear', 'zebra', 'giraffe', 
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 
            'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 
            'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'wine glass', 'cup', 'fork', 'knife', 
            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 
            'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
            'donut', 'cake', 'chair', 'couch', 'potted plant', 
            'bed', 'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 
            'oven', 'toaster', 'sink', 'refrigerator', 'book',
            'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 
            'toothbrush']
        
    def get_active(self, val):
        for key in self.objects_dict:
            if self.objects_dict[key]["maskID"] == val:
                return self.objects_dict[key]["activeObject"]
        return "Key not exist"

    def class_selection(self, masks_in, class_ids):
        """
        Function for Mask class selection (Selected classes : 1,40,41,42,57)
        """
        if len(masks_in.shape) > 1:
            masks=copy.deepcopy(masks_in)
            x = np.zeros([class_ids.shape[0], masks.shape[1], masks.shape[2]])
            for l in range(masks.shape[0]):
                if (class_ids[l] == 0 or class_ids[l] == 39 or 
                    class_ids[l] == 56):
                    x[l, :, :] = masks[l, :, :]
                else:
                    x[l, :, :] = 0
            return x
        else:
            x = np.zeros([1, 480, 640])
            return x

    def static_masks_selection(self, masks_in, class_ids):
        """
        Function for static Mask class selection
        """
        if len(masks_in.shape) > 1:
            masks=copy.deepcopy(masks_in)
            x = np.zeros([masks.shape[0], masks.shape[1], masks.shape[2]])
            for i in self.objects_dict:
                if not np.in1d(i, self.masked_id):
                    if self.objects_dict[i]["activeObject"] == 1 and self.objects_dict[i]["maskID"] < masks.shape[0] and (class_ids[self.objects_dict[i]["maskID"]] == 0 or class_ids[self.objects_dict[i]["maskID"]] == 39 or 
                        class_ids[self.objects_dict[i]["maskID"]] == 56):
                        x[self.objects_dict[i]["maskID"], :, :] = masks[self.objects_dict[i]["maskID"], :, :]
                        
                    elif self.objects_dict[i]["activeObject"] == 0 and self.objects_dict[i]["maskID"] < masks.shape[0]:
                        x[self.objects_dict[i]["maskID"], :, :] = 0
                    else:
                        pass
                    self.masked_id.append(i)
            return x
        else:
            x = np.zeros([1, 480, 640])
            return x

    def read_objects_pose(self):

        for i in self.objects_dict:
            
            if self.objects_dict[i]["classID"]==0:
                object_type = "Person"
            elif self.objects_dict[i]["classID"]==39:
                object_type = "Bottle"
            elif self.objects_dict[i]["classID"]==56:
                object_type = "Chair"
            else:
                object_type = "Nan"

            try:
                (self.objects_dict[i]["worldPose"],rot) = listener.lookupTransform('/map',object_type+'_'+str(i), rospy.Time(0))
            except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
                continue
                        
    def handle_objects_pose(self):
        for i in self.objects_dict:
            if self.objects_dict[i]["classID"]==0 or self.objects_dict[i]["classID"]==39 or self.objects_dict[i]["classID"]==56:
                if self.objects_dict[i]["classID"]==0:
                    object_type = "Person"
                elif self.objects_dict[i]["classID"]==39:
                    object_type = "Bottle"
                elif self.objects_dict[i]["classID"]==56:
                    object_type = "Chair"
                else:
                    object_type = "Nan"
                
                br = tf.TransformBroadcaster()
                e_pose = self.objects_dict[i]["estimatedPose"]
                br.sendTransform((e_pose[0], e_pose[1], e_pose[2]), 
                                tf.transformations.quaternion_from_euler(0,0,0),
                                rospy.Time.now(),
                                object_type+'_'+str(i),
                                '/map')

    def iou_centered_centroid(self, rois_old, rois_new, mask_old, mask_new):
        # intersection_over_union applied on centered centroid 
        img_v = mask_old.shape[0]
        img_h = mask_old.shape[1]

        pad_x_old = int((img_v-(rois_old[3]-rois_old[1]))/2)
        pad_y_old = int((img_h-(rois_old[2]-rois_old[0]))/2)
        pad_x_new = int((img_v-(rois_new[3]-rois_new[1]))/2)
        pad_y_new = int((img_h-(rois_new[2]-rois_new[0]))/2)

        cropped_mask_old = mask_old[rois_old[1]:rois_old[3], rois_old[0]:rois_old[2]]
        cropped_mask_new = mask_new[rois_new[1]:rois_new[3], rois_new[0]:rois_new[2]]

        centered_mask_old = add_padding(cropped_mask_old, pad_y_old, pad_x_old, pad_y_old, pad_x_old)
        centered_mask_new = add_padding(cropped_mask_new, pad_y_new, pad_x_new, pad_y_new, pad_x_new)

        centered_mask_old_croped = centered_mask_old[1:478, 1:638]
        centered_mask_new_croped = centered_mask_new[1:478, 1:638]

        intersection = np.logical_and(centered_mask_old_croped, centered_mask_new_croped)
        union = np.logical_or(centered_mask_old_croped, centered_mask_new_croped)
        iou = np.sum(intersection) / np.sum(union)
        return iou

    def apply_depth_image_masking(self, image_in, masks):
        """Apply the given mask to the image.
        """
        
        image = copy.deepcopy(image_in)
        image_static = copy.deepcopy(image_in)
        for i in range(masks.shape[0]):
            is_active = self.get_active(i)
            mask = masks[i, :, :]
            mask = ndimage.binary_dilation(mask, iterations=self.dilatation)
            if is_active == 1:
                image[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])
                image_static[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])
            else:
                image[:, :] = np.where(mask == 1,
                                    0,
                                    image[:, :])

            
        return image_static, image

    def mask_dilatation(self, masks):

        timebefore = time.time()
        mask=copy.deepcopy(masks)
        for i in range(mask.shape[0]):
            mask[i] = ndimage.binary_dilation(mask[i], iterations=self.dilatation)

        print("Numpy dilation time : ", - (timebefore - time.time()))
        return mask

    def mask_dilatation_cv(self, masks):

        timebefore = time.time()
        mask=copy.deepcopy(masks)
        kernel = np.ones((3,3))
        for i in range(mask.shape[0]):
            mask[i] = cv2.dilate(mask[i],kernel, iterations=self.dilatation)
        

        print("cv2 dilation time : ", - (timebefore - time.time()))
        return mask

    def get_masking_depth(self, image, mask):
        """Apply the given mask to the image.
        """
        x = np.zeros([image.shape[0], image.shape[1]])
        y = np.zeros(mask.shape[0])

        for i in range(mask.shape[0]):
            x[:, :] = np.where(mask[i,:,:] != 1,
                                0,
                                image[:, :])

            x[:, :] = np.where( np.isnan(x[:,:]),
                                0,
                                x[:, :])

            if sum(sum((x[:, :]!=0))) == 0:
                y[i] = 0
            else:
                y[i] = (x[:, :].sum()/sum(sum((x[:, :]!=0))))
        
        return y

    def add_object(self, centroid, dimensions, mask_id, class_id, mask_old, rois_old):
        dt = 0.25

        try:
            (transc, rotc) = listener.lookupTransform('/map', self.tf_camera, rospy.Time(0))
        except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
            transc = np.array([0.,0.,0.])
            rotc = np.array([0.,0.,0.,1.])

        euler = tf.transformations.euler_from_quaternion(rotc)
        rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2])

        h_mat = rot
        h_mat[0:3,3:] = np.array([transc]).T
        b = h_mat.dot(np.array([[centroid[0],centroid[1],centroid[2],1]]).T)[0:3,:]
        
        y = np.array([b[0,0], b[1,0], b[2,0]])

        x = [y[0], y[1], y[2], 0, 0, 0]

        P = np.eye(len(x))

        F = np.array([[ 1,  0,  0, dt,  0,  0],
                      [ 0,  1,  0,  0, dt,  0],
                      [ 0,  0,  1,  0,  0, dt],
                      [ 0,  0,  0,  1,  0,  0],
                      [ 0,  0,  0,  0,  1,  0],
                      [ 0,  0,  0,  0,  0,  1]])

        H = np.array([[ 0.001,  0,  0,  0,  0,  0],
                      [ 0,  0.001,  0,  0,  0,  0],
                      [ 0,  0,  0.001,  0,  0,  0]])

        if class_id == 1:
            ax = 0.68
            ay = 0.68
            az = 0.68
        else:
            ax = 1
            ay = 1
            az = 1

        Q = np.array([[((dt**4)/4)*(ax**2),  0.0,  0.0,  ((dt**4)/4)*(ax**3),  0.0,  0.0],
                      [0.0,  ((dt**4)/4)*(ay**2),  0.0,  0.0, ((dt**4)/4)*(ay**3),   0.0],
                      [0.0,  0.0,  ((dt**4)/4)*(az**2),  0.0,   0.0, ((dt**4)/4)*(az**3)],
                      [((dt**4)/4)*(ax**3),  0.0,  0.0,  (dt**2)*(ax**2),  0.0,  0.0],
                      [0.0,  ((dt**4)/4)*(ay**3),  0.0,  0.0,  (dt**2)*(ax**2),  0.0],
                      [0.0,  0.0,  ((dt**4)/4)*(az**3),  0.0,  0.0, (dt**2)*(ax**2)]])             

        R = np.array([[ 0.8,  0,  0],
                      [ 0,  0.8,  0],
                      [ 0,  0,  1.2]])

        self.objects_dict.update({self.next_object_id : {
            "kalmanFilter" : extendedKalmanFilter(x, P, F, H, Q, R),
            "centroid" : centroid,
            "dimension" : dimensions,
            "classID" : class_id,
            "roisOld" : rois_old,
            "maskID" : mask_id,
            "maskOld" : mask_old,
            "worldPose" : [0,0,0],
            "estimatedVelocity" : [0,0,0],
            "estimatedPose" : [0,0,0],
            "inactiveNbFrame" : 0,
            "activeObject" : 0}})
        
        self.next_object_id = self.next_object_id+1
        
    def delete_object(self, object_id):
        del self.objects_dict[object_id]

    def mask_to_centroid(self, rois, mask_depth):
        current_centroids = {}
        current_dimensions = {}
        for i in range(len(rois)):    
            # 3D centroids from depth frame
            
            if args.input == 'tum':
                fx = 525.0  # focal length x
                fy = 525.0  # focal length y
                cx = 319.5  # optical center x
                cy = 239.5  # optical center y
            elif args.input == 'xtion':    
                # Asus xtion sensor 
                fx = 525
                fy = 525
                cx = 319.5
                cy = 239.5
            elif args.input == 'zed':
                # Zed sensor left img vga
                fx = 350.113
                fy = 350.113
                cx = 336.811
                cy = 190.357
            else:
                print("No valid input")
            
            # Translation from depth pixel to local point
            if mask_depth[i] == -1:
                z = 0
            else :
                z = mask_depth[i]
            
            y = (((rois[i,3]+rois[i,1])/2) - cy) * z / fy
            x = (((rois[i,2]+rois[i,0])/2) - cx) * z / fx

            # Translation from point to world coord
            current_centroids.update({i:[x, y, z]})
            current_dimensions.update({i:[rois[i,3]-rois[i,1], rois[i,2]-rois[i,0]]})
        return current_centroids, current_dimensions
        
    def live_analysis(self):
        """
        Function for live stream video masking
        """
        
        bar = [
                " Waiting for frame [=     ]              ",
                " Waiting for frame [ =    ]              ",
                " Waiting for frame [  =   ]              ",
                " Waiting for frame [   =  ]              ",
                " Waiting for frame [    = ]              ",
                " Waiting for frame [     =]              ",
                " Waiting for frame [    = ]              ",
                " Waiting for frame [   =  ]              ",
                " Waiting for frame [  =   ]              ",
                " Waiting for frame [ =    ]              ",
            ]
        idx = 0
        while not rospy.is_shutdown():
            start_time = time.time()
            self.masked_id = []
            current_frame = self.frame
            current_depth_frame = self.depth_frame

            if len(current_frame)==0  or  len(current_depth_frame)==0 :

                print(bar[idx % len(bar)], end= "\r")
                idx = idx +1
                time.sleep(0.1)
            
            else:
                
                nn_start_time = time.time()
                
                if self.nn == 'yolact' or self.nn == 'yolact++' or self.nn == 'yolact_edge':
                    frame = torch.from_numpy(current_frame).cuda().float()
                    batch = FastBaseTransform()(frame.unsqueeze(0))
                    if self.nn == 'yolact_edge':
                        extras = {"backbone": "full", "interrupt":False, "keep_statistics":False, "moving_statistics":None}
                        preds = self.net(batch.cuda(), extras=extras)
                        preds = preds["pred_outs"]
                    else:
                        preds = self.net(batch.cuda())
                        
                    nn_pred_time = time.time()
                    h, w, _ = frame.shape
                    b = {}
                    r = {}
                    b['class_ids'], b['scores'], b['rois'], b['masks'] = postprocess(preds, w, h, score_threshold=self.score_threshold)

                    r['class_ids'] = copy.deepcopy(b['class_ids'].cpu().data.numpy())
                    r['scores'] = copy.deepcopy(b['scores'].cpu().data.numpy())
                    r['rois'] = copy.deepcopy(b['rois'].cpu().data.numpy())
                    r['masks'] = copy.deepcopy(b['masks'].cpu().data.numpy())    
               
                elif self.nn == 'mrcnn':
                    results = self.model.detect([current_frame],verbose=1)
                    r = results[0]
                    r['masks'] = np.swapaxes(r['masks'],0,2)
                    r['masks'] = np.swapaxes(r['masks'],1,2)

                    for i in range(r['rois'].shape[0]):
                        buff = r['rois'][i]
                        r['rois'][i] = [buff[1],buff[0],buff[3],buff[2]]
                    r['class_ids'] = r['class_ids'] - 1
                
                ''' Deprecated, did not enhance speed
                j=0
                for i in range(len(r['class_ids'])):
                    if not np.in1d(r['class_ids'][j], self.selected_classes):
                        r['class_ids'] = np.delete(r['class_ids'], j)
                        r['scores']= np.delete(r['scores'], j)
                        r['rois']= np.delete(r['rois'], j,axis=0)
                        r['masks']= np.delete(r['masks'], j, axis=0)
                    else:
                        j=j+1
                '''
                self.number_observation = min(self.max_number_observation, r['class_ids'].shape[0])
                for j in range(self.number_observation):
                    if r['scores'][j] < self.score_threshold:
                        self.number_observation = j
                        break

                r['class_ids'] = r['class_ids'][:self.number_observation]
                r['scores'] = r['scores'][:self.number_observation]
                r['rois'] = r['rois'][:self.number_observation]
                r['masks'] = r['masks'][:self.number_observation]

                nn_time = time.time()

                mask_depth = self.get_masking_depth(current_depth_frame, r['masks'])
                
                # Read object tf pose
                self.read_objects_pose()
                
                # Read camera tf pose
                try:
                    (transc, rotc) = listener.lookupTransform(self.tf_camera,'/map', rospy.Time(0))
                except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
                    transc = np.array([0.,0.,0.])
                    rotc = np.array([0.,0.,0.,1.])

                euler = tf.transformations.euler_from_quaternion(rotc)
                rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2])
        
                h_mat = rot
                h_mat[0:3,3:] = np.array([transc]).T

                objects_to_delete = []

                # Main filter update and prediction step
                if len(r['rois']) == 0:
                    for i in self.objects_dict:
                        self.objects_dict[i]["inactiveNbFrame"] = self.objects_dict[i]["inactiveNbFrame"] + 1

                        if self.objects_dict[i]["inactiveNbFrame"] > self._max_inactive_frames:                            
                            objects_to_delete.append(i)
                    
                    for i in objects_to_delete:
                        self.delete_object(i)
                        
                else : 
                    current_centroids, current_dimensions = self.mask_to_centroid(r['rois'],mask_depth)

                    if not self.objects_dict:
                        if not len(current_centroids)==0:
                            for i in range(len(current_centroids)):
                                self.add_object(current_centroids[i], current_dimensions[i], i, r['class_ids'][i], r['masks'][i], r['rois'][i])

                            for i in self.objects_dict:
                                self.objects_dict[i]["kalmanFilter"].prediction()
                                self.objects_dict[i]["kalmanFilter"].update(self.objects_dict[i]["centroid"], h_mat)
                                self.objects_dict[i]["estimatedPose"] = self.objects_dict[i]["kalmanFilter"].x[0:3]
                                self.objects_dict[i]["estimatedVelocity"] = self.objects_dict[i]["kalmanFilter"].x[3:6]
                    else:
                        objects_pose = np.zeros((len(self.objects_dict),3))
                        objects_ids = np.zeros((len(self.objects_dict)))
                        index = 0
                        for i in self.objects_dict:
                            objects_pose[index,] = self.objects_dict[i]["centroid"]
                            objects_ids[index] = i
                            index = index + 1

                        centroids_pose = np.zeros((len(current_centroids),3))
                        for i in range(len(current_centroids)):
                            centroids_pose[i,] = current_centroids[i]
                        
                        eucledian_dist_pairwise = np.array(cdist(objects_pose, centroids_pose)).flatten()
                        index_sorted = np.argsort(eucledian_dist_pairwise)

                        used_objects = []
                        used_centroids = []
                        
                        for index in range(len(eucledian_dist_pairwise)):
                            object_id = int(index_sorted[index] / len(centroids_pose))
                            centroid_id = index_sorted[index] % len(centroids_pose)

                            if not np.in1d(object_id, used_objects) and not np.in1d(centroid_id, used_centroids):# and (eucledian_dist_pairwise[index]<0.5):
                                if self.objects_dict[objects_ids[object_id]]["classID"] == r['class_ids'][centroid_id]:
                                    timebefore = time.time()
                                    used_objects.append(object_id)
                                    used_centroids.append(centroid_id)

                                    self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction()
                                    self.objects_dict[objects_ids[object_id]]["kalmanFilter"].update(current_centroids[centroid_id], h_mat)
                                    self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[0:3]
                                    self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[3:6]

                                    if self.objects_dict[objects_ids[object_id]]["classID"] == 0:
                                        max_threshold = self.human_threshold
                                    else:
                                        max_threshold = self.object_threshold
                                    
                                    if abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][0])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][1])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][2])>max_threshold:
                                        self.objects_dict[objects_ids[object_id]]["activeObject"] = 1
                                    else:
                                        self.objects_dict[objects_ids[object_id]]["activeObject"] = 0

                                    if self.objects_dict[objects_ids[object_id]]["classID"] == 0 and self.objects_dict[objects_ids[object_id]]["activeObject"] == 0:
                                        
                                        iou = self.iou_centered_centroid(self.objects_dict[objects_ids[object_id]]["roisOld"], r['rois'][centroid_id], self.objects_dict[objects_ids[object_id]]["maskOld"],r['masks'][centroid_id])         
                                        if iou<self.iou_threshold:
                                            self.objects_dict[objects_ids[object_id]]["activeObject"] = 1
                                        else:
                                            x=1
                                    
                                    self.objects_dict[objects_ids[object_id]]["centroid"] = centroids_pose[centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["dimensions"] = current_dimensions[centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] = 0
                                    self.objects_dict[objects_ids[object_id]]["maskID"] = centroid_id
                                    self.objects_dict[objects_ids[object_id]]["maskOld"] = r['masks'][centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["roisOld"] = r['rois'][centroid_id]
                        
                        if len(centroids_pose) < len(objects_pose):
                            for index in range(len(eucledian_dist_pairwise)):
                                object_id = int(index_sorted[index] / len(objects_pose))
                                if not np.in1d(object_id, used_objects):
                                    self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] += 1
                                    self.objects_dict[objects_ids[object_id]]["activeObject"] = 0
                                    if self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] >= self._max_inactive_frames:
                                        self.delete_object(objects_ids[object_id])
                                        used_objects.append(object_id)
                                    else:
                                        self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction()
                                        self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[0:3]
                                        self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[3:6]

                        elif len(centroids_pose) > len(objects_pose):
                            buff_id = self.next_object_id
                            for index in range(len(eucledian_dist_pairwise)):
                                centroid_id = index_sorted[index] % len(centroids_pose)
                                if not np.in1d(centroid_id, used_centroids):
                                    self.add_object(current_centroids[centroid_id], current_dimensions[centroid_id], centroid_id, r['class_ids'][centroid_id], r['masks'][centroid_id], r['rois'][centroid_id])
                                    self.objects_dict[buff_id]["kalmanFilter"].prediction()
                                    self.objects_dict[buff_id]["kalmanFilter"].update(current_centroids[centroid_id], h_mat)
                                    self.objects_dict[buff_id]["estimatedPose"] = self.objects_dict[buff_id]["kalmanFilter"].x[0:3]
                                    self.objects_dict[buff_id]["estimatedVelocity"] = self.objects_dict[buff_id]["kalmanFilter"].x[3:6]
                                    buff_id = buff_id + 1
                               
                kalman_time = time.time()
                # Write objects filter pose to tf
                self.handle_objects_pose()

                result_dynamic_depth_image, result_depth_image = self.apply_depth_image_masking(current_depth_frame, r['masks'])
                
                DDITS = Image()
                DDITS = self.bridge.cv2_to_imgmsg(result_dynamic_depth_image,'32FC1')
                DDITS.header = self.depth_msg_header
                self.dynamic_depth_image_pub.publish(DDITS)

                DITS = Image()
                DITS = self.bridge.cv2_to_imgmsg(result_depth_image,'32FC1')
                DITS.header = self.depth_msg_header
                self.depth_image_pub.publish(DITS)
                
                print_time = time.time()

                #print(" NN pred time: ", format(nn_pred_time - nn_start_time, '.3f'),", NN post time: ", format(nn_time - nn_pred_time, '.3f'),", NN time: ", format(nn_time - start_time, '.3f'), ", Kalman time: ", format(kalman_time - nn_time, '.3f'),
                #", Print time: ", format(print_time - kalman_time, '.3f'), ", Total time: ", format(time.time() - start_time, '.3f'),
                #", FPS :", format(1/(time.time() - start_time), '.2f'), end="\r")

    def image_callback(self, msg):

        self.msg_header = msg.header
        self.frame = self.bridge.imgmsg_to_cv2(msg, "bgr8")

    def depth_image_callback(self, msg):

        self.depth_msg_header = msg.header
        #32FC1 for asus xtion
        #8UC1 forkicect
        self.depth_frame = self.bridge.imgmsg_to_cv2(msg, "32FC1")