Ejemplo n.º 1
0
    def update_thread(self):

        self._stream = cv.VideoCapture(self._fp_video)
        valid, frame = self._stream.read()
        self.log.debug('size: {}'.format(frame.shape))

        self.log.debug('init update_thread')
        frame_idx = 0

        while True:
            valid, frame = self._stream.read()
            if not valid:
                self._stream.release()
                break
            frame = im_utils.resize(frame, width=self._frame_width)
            self._keyframes[frame_idx] = frame
            if self._opt_drawframes:
                self._drawframes[frame_idx] = frame.copy(
                )  # make drawable copy
            frame_idx += 1
Ejemplo n.º 2
0
    def thread_processor(task_obj):
      tl = threading.local()
      tl.fp_video = task_obj['fp_video']
      tl.idxs = task_obj['idxs']
      tl.dir_out = task_obj['dir_out']
      tl.sha256_tree = task_obj['sha256_tree']
      tl.sha256 = task_obj['sha256']
      try:
        tl.frame_ims = im_utils.vid2frames(tl.fp_video, idxs=tl.idxs)
      except Exception as ex:
        logging.getLogger().error('Could not read video file')
        logging.getLogger().error('file: {}'.format(tl.fp_video))
        logging.getLogger().error('sha256: {}'.format(tl.sha256))
        return
        
      tl.labels = cfg.IMAGE_SIZE_LABELS
      tl.sizes = cfg.IMAGE_SIZES

      for tl.k_label, tl.k_width in zip(reversed(tl.labels), reversed(tl.sizes)):
        tl.label = tl.labels[tl.k_label]
        tl.width = tl.sizes[tl.k_width]
        # pyramid down frame sizes 1280, 640, 320, 160
        try:
          tl.frame_ims = [im_utils.resize(tl.im, width=tl.width) for tl.im in tl.frame_ims]
        except:
          logging.getLogger().error('')
          logging.getLogger().error('Could not resize. Bad video or missing file')
          logging.getLogger().error(tl.sha256)
          logging.getLogger().error('')
          return


        for tl.idx, tl.im in zip(tl.idxs, tl.frame_ims):
          # ensure path exists
          tl.zpad = file_utils.zpad(tl.idx)
          tl.fp_dst = join(tl.dir_out, tl.sha256_tree, tl.sha256, tl.zpad, tl.label, 'index.jpg')
          # convert to PIL
          tl.im_pil = im_utils.ensure_pil(tl.im, bgr2rgb=True)
          file_utils.ensure_path_exists(tl.fp_dst)
          tl.im_pil.save(tl.fp_dst, quality=cfg.JPG_SAVE_QUALITY)
Ejemplo n.º 3
0
 def frame_transform(frame):
     return im_utils.resize(frame, width=opt_size)
Ejemplo n.º 4
0
def cli(ctx, sink, opt_disk, opt_net, opt_dnn_size, opt_pyramids,
        opt_conf_thresh, opt_gpu):
    """Generates face detection ROIs"""

    # ----------------------------------------------------------------
    # imports

    import os
    from os.path import join
    from pathlib import Path

    import click
    import cv2 as cv
    import dlib
    import numpy as np
    from nms import nms

    from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils
    from vframe.models.metadata_item import ROIMetadataItem, ROIDetectResult
    from vframe.settings.paths import Paths
    from vframe.models.bbox import BBox

    # ----------------------------------------------------------------
    # init

    log = logger_utils.Logger.getLogger()

    metadata_type = types.Metadata.FACE_ROI

    if opt_net == types.FaceDetectNet.CVDNN:
        dnn_scale = 1.0  # fixed
        dnn_mean = (104.0, 177.0, 123.0)  # fixed
        dnn_crop = False  # probably crop or force resize
        fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'face_detect',
                           'opencv_face_detector.prototxt')
        fp_model = join(cfg.DIR_MODELS_CAFFE, 'face_detect',
                        'opencv_face_detector.caffemodel')
        log.debug('fp_model: {}'.format(fp_model))
        net = cv.dnn.readNet(fp_prototxt, fp_model)
        # TODO parameterize
        net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
        net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
    elif opt_net == types.FaceDetectNet.DLIB_CNN:
        # use dlib's CNN module
        cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '')
        os.environ['CUDA_VISIBLE_DEVICES'] = str(opt_gpu)
        cnn_face_detector = dlib.cnn_face_detection_model_v1(
            cfg.DIR_MODELS_DLIB_CNN)
        os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices  # reset
    elif opt_net == types.FaceDetectNet.DLIB_HOG:
        # use dlib's HoG module
        dlib_hog_predictor = dlib.get_frontal_face_detector()
    elif opt_net == types.FaceDetectNet.HAAR:
        # use opencv's haarcasde module
        log.error('not yet implemented')
        return

    # ----------------------------------------------------------------
    # process

    # iterate sink
    while True:

        chair_item = yield

        metadata = {}

        for frame_idx, frame in chair_item.keyframes.items():

            rois = []

            if opt_net == types.FaceDetectNet.CVDNN:
                # use OpenCV's DNN face detector with caffe model
                frame = cv.resize(frame, opt_dnn_size)
                blob = cv.dnn.blobFromImage(frame, dnn_scale, opt_dnn_size,
                                            dnn_mean)
                net.setInput(blob)
                net_outputs = net.forward()

                for i in range(0, net_outputs.shape[2]):
                    conf = net_outputs[0, 0, i, 2]
                    if conf > opt_conf_thresh:
                        rect_norm = net_outputs[0, 0, i, 3:7]
                        rois.append(ROIDetectResult(conf, rect_norm))
                        log.debug('face roi: {}'.format(rect_norm))

            elif opt_net == types.FaceDetectNet.DLIB_CNN:
                frame = im_utils.resize(frame,
                                        width=opt_dnn_size[0],
                                        height=opt_dnn_size[1])
                # convert to RGB for dlib
                dim = frame.shape[:2][::-1]
                frame = im_utils.bgr2rgb(frame)
                mmod_rects = cnn_face_detector(frame, opt_pyramids)
                for mmod_rect in mmod_rects:
                    if mmod_rect.confidence > opt_conf_thresh:
                        bbox = BBox.from_dlib_dim(mmod_rect.rect, dim)
                        # NB mmod_rect.confidence is sometimes > 1.0 ?
                        rois.append(
                            ROIDetectResult(mmod_rect.confidence,
                                            bbox.as_xyxy()))

            elif opt_net == types.FaceDetectNet.DLIB_HOG:
                frame = im_utils.resize(frame,
                                        width=opt_dnn_size[0],
                                        height=opt_dnn_size[1])
                # convert to RGB for dlib
                dim = frame.shape[:2][::-1]
                frame = im_utils.bgr2rgb(frame)  # ?
                hog_results = dlib_hog_predictor.run(frame, opt_pyramids)
                if len(hog_results[0]) > 0:
                    for rect, score, direction in zip(*hog_results):
                        if score > opt_conf_thresh:
                            bbox = BBox.from_dlib_dim(rect, dim)
                            rois.append(ROIDetectResult(score, bbox.as_xyxy()))

            metadata[frame_idx] = rois

        # append metadata to chair_item's mapping item
        chair_item.set_metadata(metadata_type, ROIMetadataItem(metadata))

        # ----------------------------------------------------------------
        # yield back to the processor pipeline

        # send back to generator
        sink.send(chair_item)
Ejemplo n.º 5
0
 def load_images(self, fp_image, opt_size_type, opt_drawframes=False):
     # append metadata to chair_item's mapping item
     opt_size = cfg.IMAGE_SIZES[opt_size_type]
     im = im_utils.resize(cv.imread(self._fp_image), width=opt_size)
     self.set_keyframes({0: im}, opt_drawframes)
Ejemplo n.º 6
0
def extract(items, dir_out, dir_videos, keyframe_type, threads=1):
  """Extracts keyframes from images"""
  
  task_queue = Queue()
  print_lock = threading.Lock()
  log = logging.getLogger()

  if threads > 1:
    
    def thread_processor(task_obj):
      tl = threading.local()
      tl.fp_video = task_obj['fp_video']
      tl.idxs = task_obj['idxs']
      tl.dir_out = task_obj['dir_out']
      tl.sha256_tree = task_obj['sha256_tree']
      tl.sha256 = task_obj['sha256']
      try:
        tl.frame_ims = im_utils.vid2frames(tl.fp_video, idxs=tl.idxs)
      except Exception as ex:
        logging.getLogger().error('Could not read video file')
        logging.getLogger().error('file: {}'.format(tl.fp_video))
        logging.getLogger().error('sha256: {}'.format(tl.sha256))
        return
        
      tl.labels = cfg.IMAGE_SIZE_LABELS
      tl.sizes = cfg.IMAGE_SIZES

      for tl.k_label, tl.k_width in zip(reversed(tl.labels), reversed(tl.sizes)):
        tl.label = tl.labels[tl.k_label]
        tl.width = tl.sizes[tl.k_width]
        # pyramid down frame sizes 1280, 640, 320, 160
        try:
          tl.frame_ims = [im_utils.resize(tl.im, width=tl.width) for tl.im in tl.frame_ims]
        except:
          logging.getLogger().error('')
          logging.getLogger().error('Could not resize. Bad video or missing file')
          logging.getLogger().error(tl.sha256)
          logging.getLogger().error('')
          return


        for tl.idx, tl.im in zip(tl.idxs, tl.frame_ims):
          # ensure path exists
          tl.zpad = file_utils.zpad(tl.idx)
          tl.fp_dst = join(tl.dir_out, tl.sha256_tree, tl.sha256, tl.zpad, tl.label, 'index.jpg')
          # convert to PIL
          tl.im_pil = im_utils.ensure_pil(tl.im, bgr2rgb=True)
          file_utils.ensure_path_exists(tl.fp_dst)
          tl.im_pil.save(tl.fp_dst, quality=cfg.JPG_SAVE_QUALITY)


    def process_queue(num_items):
      # TODO: progress bar
      while True:
        task_obj = task_queue.get()
        thread_processor(task_obj)
        logging.getLogger().info('process: {:.2f}% {:,}/{:,}'.format( 
          (task_queue.qsize() / num_items)*100, num_items-task_queue.qsize(), num_items))
        task_queue.task_done()

    # avoid race conditions by creating dir structure here
    log.info('create directory structure first to avoid race conditions')
    log.info('TODO: this needs to be fixed, thread lock maybe')
    for sha256, item in tqdm(items.items()):
      item_metadata = item.metadata.get(MetadataType.KEYFRAME, {})
      sha256_tree = file_utils.sha256_tree(sha256)
      fp_dst = join(dir_out, sha256_tree)
      file_utils.ensure_path_exists(fp_dst)

    # init threads
    num_items = len(items)
    for i in range(threads):
      t = threading.Thread(target=process_queue, args=(num_items,))
      t.daemon = True
      t.start()

    # process threads
    start = time.time()
    for sha256, item in items.items():
      sha256_tree = file_utils.sha256_tree(sha256)
      item_metadata = item.metadata.get(MetadataType.KEYFRAME, {})
      if not item_metadata:
        continue
      keyframe_data = item_metadata.metadata
      idxs = keyframe_data.get(keyframe_type)
      fp_video = join(dir_videos, sha256_tree, '{}.{}'.format(sha256, item.ext))
      task_obj = {
        'fp_video': fp_video,
        'idxs': idxs,
        'dir_out':dir_out,
        'sha256': sha256,
        'sha256_tree': sha256_tree
        }
      task_queue.put(task_obj)

    task_queue.join()

  else:
    
    for sha256, item in tqdm(items.items()):
      item_metadata = item.metadata.get(MetadataType.KEYFRAME, {})
      if not item_metadata:
        continue

      sha256_tree = file_utils.sha256_tree(sha256)
      keyframe_data = item_metadata.metadata
      
      #idxs_basic = keyframe_data.get(KeyframeMetadataType.BASIC)
      #idxs_dense = keyframe_data.get(KeyframeMetadataType.DENSE)
      #idxs_expanded = keyframe_data.get(KeyframeMetadataType.EXPANDED)

      # fetches the metadata by the enum type from the custom click param
      idxs = keyframe_data.get(keyframe_type)

      # get frames from video
      fp_video = join(dir_videos, sha256_tree, '{}.{}'.format(sha256, item.ext))
      frame_ims = im_utils.vid2frames(fp_video, idxs=idxs)
      labels = cfg.IMAGE_SIZE_LABELS
      sizes = cfg.IMAGE_SIZES
      for k_label, k_width in zip(reversed(labels), reversed(sizes)):
        label = labels[k_label]
        width = sizes[k_width]
        # pyramid down frame sizes 1280, 640, 320, 160
        frame_ims = [im_utils.resize(im, width=width) for im in frame_ims]

        for idx, im in zip(idxs, frame_ims):
          # ensure path exists
          zpad = file_utils.zpad(idx)
          fp_dst = join(dir_out, sha256_tree, sha256, zpad, label, 'index.jpg')
          # conver to PIL
          im_pil = im_utils.ensure_pil(im, bgr2rgb=True)
          file_utils.ensure_path_exists(fp_dst)
          im_pil.save(fp_dst, quality=cfg.JPG_SAVE_QUALITY)
Ejemplo n.º 7
0
def cli(ctx, sink, opt_disk, opt_net, opt_gpu):
  """Generates detection metadata (CV DNN)"""

  # ----------------------------------------------------------------
  # imports

  import os
  from os.path import join
  from pathlib import Path

  import click
  import cv2 as cv
  import numpy as np

  from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils
  from vframe.models.metadata_item import DetectMetadataItem, DetectResult
  from vframe.settings.paths import Paths

  # ----------------------------------------------------------------
  # init

  log = logger_utils.Logger.getLogger()


  # TODO externalize function

  # initialize dnn
  dnn_clr = (0, 0, 0)  # mean color to subtract
  dnn_scale = 1/255  # ?
  nms_threshold = 0.4   #Non-maximum suppression threshold
  dnn_px_range = 1  # pixel value range ?
  dnn_crop = False  # probably crop or force resize

  # Use mulitples of 32: 416, 448, 480, 512, 544, 576, 608, 640, 672, 704
  if opt_net == types.DetectorNet.OPENIMAGES:
    metadata_type = types.Metadata.OPENIMAGES
    dnn_size = (608, 608)
    dnn_threshold = 0.875
  elif  opt_net == types.DetectorNet.COCO:
    metadata_type = types.Metadata.COCO
    dnn_size = (416, 416)
    dnn_threshold = 0.925
  elif  opt_net == types.DetectorNet.COCO_SPP:
    metadata_type = types.Metadata.COCO
    dnn_size = (608, 608)
    dnn_threshold = 0.875
  elif  opt_net == types.DetectorNet.VOC:
    metadata_type = types.Metadata.VOC
    dnn_size = (416, 416)
    dnn_threshold = 0.875
  elif  opt_net == types.DetectorNet.SUBMUNITION:
    metadata_type = types.Metadata.SUBMUNITION
    dnn_size = (608, 608)
    dnn_threshold = 0.90

  # Initialize the parameters
  fp_cfg = Paths.darknet_cfg(opt_net=opt_net, data_store=opt_disk, as_bytes=False)
  fp_weights = Paths.darknet_weights(opt_net=opt_net, data_store=opt_disk, as_bytes=False)
  fp_data = Paths.darknet_data(opt_net=opt_net, data_store=opt_disk, as_bytes=False)
  fp_classes = Paths.darknet_classes(opt_net=opt_net, data_store=opt_disk)
  class_names = file_utils.load_text(fp_classes)
  class_idx_lookup = {label: i for i, label in enumerate(class_names)}

  log.debug('fp_cfg: {}'.format(fp_cfg))
  log.debug('fp_weights: {}'.format(fp_weights))
  log.debug('fp_data: {}'.format(fp_data))
  log.debug('fp_classes: {}'.format(fp_classes))

  net = cv.dnn.readNetFromDarknet(fp_cfg, fp_weights)
  net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
  net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

  # ----------------------------------------------------------------
  # process

  # iterate sink
  while True:
    
    chair_item = yield
    
    metadata = {}
    
    for frame_idx, frame in chair_item.keyframes.items():

      frame = im_utils.resize(frame, width=dnn_size[0], height=dnn_size[1])
      blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_clr, 
        dnn_px_range, crop=dnn_crop)
      
      # Sets the input to the network
      net.setInput(blob)

      # Runs the forward pass to get output of the output layers
      net_outputs = net.forward(dnn_utils.getOutputsNames(net))
      det_results = dnn_utils.nms_cvdnn(net_outputs, dnn_threshold, nms_threshold)
      
      metadata[frame_idx] = det_results

    # append metadata to chair_item's mapping item
    chair_item.set_metadata(metadata_type, DetectMetadataItem(metadata))
  
    # ----------------------------------------------------------------
    # yield back to the processor pipeline

    # send back to generator
    sink.send(chair_item)
Ejemplo n.º 8
0
def cli(ctx, sink, opt_disk, opt_net, opt_conf_thresh, opt_nms_thresh):
    """Generates scene text ROIs (CV DNN)"""

    # ----------------------------------------------------------------
    # imports

    import os
    from os.path import join
    from pathlib import Path

    import click
    import cv2 as cv
    import numpy as np
    from nms import nms

    from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils
    from vframe.utils import scenetext_utils
    from vframe.models.metadata_item import ROIMetadataItem, ROIDetectResult
    from vframe.settings.paths import Paths
    from vframe.models.bbox import BBox

    # ----------------------------------------------------------------
    # init

    log = logger_utils.Logger.getLogger()

    metadata_type = types.Metadata.TEXT_ROI

    # initialize dnn
    if opt_net == types.SceneTextNet.EAST:
        # TODO externalize
        dnn_size = (320, 320)  # fixed
        dnn_mean_clr = (123.68, 116.78, 103.94)  # fixed
        dnn_scale = 1.0  # fixed
        dnn_layer_names = [
            "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"
        ]
        fp_model = join(cfg.DIR_MODELS_TF, 'east',
                        'frozen_east_text_detection.pb')
        log.debug('fp_model: {}'.format(fp_model))
        net = cv.dnn.readNet(fp_model)
        #net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
        #net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

    elif opt_net == types.SceneTextNet.DEEPSCENE:
        dnn_size = (320, 320)  # fixed
        fp_model = join(cfg.DIR_MODELS_CAFFE, 'deepscenetext',
                        "TextBoxes_icdar13.caffemodel")
        fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'deepscenetext',
                           'textbox.prototxt')
        net = cv.text.TextDetectorCNN_create(fp_prototxt, fp_model)

    # ----------------------------------------------------------------
    # process

    # iterate sink
    while True:

        chair_item = yield

        metadata = {}

        for frame_idx, frame in chair_item.keyframes.items():

            if opt_net == types.SceneTextNet.DEEPSCENE:
                # DeepScene scene text detector (opencv contrib)
                frame = im_utils.resize(frame,
                                        width=dnn_size[0],
                                        height=dnn_size[1])
                frame_dim = frame.shape[:2][::-1]
                rects, probs = net.detect(frame)
                det_results = []
                for r in range(np.shape(rects)[0]):
                    prob = float(probs[r])
                    if prob > opt_conf_thresh:
                        rect = BBox.from_xywh_dim(
                            *rects[r], frame_dim).as_xyxy()  # normalized
                        det_results.append(ROIDetectResult(prob, rect))

                metadata[frame_idx] = det_results

            elif types.SceneTextNet.EAST:
                # EAST scene text detector
                frame = im_utils.resize(frame,
                                        width=dnn_size[0],
                                        height=dnn_size[1])
                # frame = im_utils.resize(frame, width=dnn_size[0], he)
                frame_dim = frame.shape[:2][::-1]
                frame_dim = dnn_size

                # blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_mean_clr, swapRB=True, crop=False)
                blob = cv.dnn.blobFromImage(frame,
                                            dnn_scale,
                                            dnn_size,
                                            dnn_mean_clr,
                                            swapRB=True,
                                            crop=False)
                net.setInput(blob)
                (scores, geometry) = net.forward(dnn_layer_names)
                (rects, confidences,
                 baggage) = scenetext_utils.east_text_decode(
                     scores, geometry, opt_conf_thresh)

                det_results = []
                if rects:
                    offsets = []
                    thetas = []
                    for b in baggage:
                        offsets.append(b['offset'])
                        thetas.append(b['angle'])

                    # functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms]
                    indicies = nms.boxes(rects,
                                         confidences,
                                         nms_function=nms.fast.nms,
                                         confidence_threshold=opt_conf_thresh,
                                         nsm_threshold=opt_nms_thresh)

                    indicies = np.array(indicies).reshape(-1)
                    rects = np.array(rects)[indicies]
                    scores = np.array(confidences)[indicies]
                    for rect, score in zip(rects, scores):
                        rect = BBox.from_xywh_dim(
                            *rect, frame_dim).as_xyxy()  # normalized
                        det_results.append(ROIDetectResult(score, rect))

                metadata[frame_idx] = det_results

        # append metadata to chair_item's mapping item
        chair_item.set_metadata(metadata_type, ROIMetadataItem(metadata))

        # ----------------------------------------------------------------
        # yield back to the processor pipeline

        # send back to generator
        sink.send(chair_item)