def update_thread(self): self._stream = cv.VideoCapture(self._fp_video) valid, frame = self._stream.read() self.log.debug('size: {}'.format(frame.shape)) self.log.debug('init update_thread') frame_idx = 0 while True: valid, frame = self._stream.read() if not valid: self._stream.release() break frame = im_utils.resize(frame, width=self._frame_width) self._keyframes[frame_idx] = frame if self._opt_drawframes: self._drawframes[frame_idx] = frame.copy( ) # make drawable copy frame_idx += 1
def thread_processor(task_obj): tl = threading.local() tl.fp_video = task_obj['fp_video'] tl.idxs = task_obj['idxs'] tl.dir_out = task_obj['dir_out'] tl.sha256_tree = task_obj['sha256_tree'] tl.sha256 = task_obj['sha256'] try: tl.frame_ims = im_utils.vid2frames(tl.fp_video, idxs=tl.idxs) except Exception as ex: logging.getLogger().error('Could not read video file') logging.getLogger().error('file: {}'.format(tl.fp_video)) logging.getLogger().error('sha256: {}'.format(tl.sha256)) return tl.labels = cfg.IMAGE_SIZE_LABELS tl.sizes = cfg.IMAGE_SIZES for tl.k_label, tl.k_width in zip(reversed(tl.labels), reversed(tl.sizes)): tl.label = tl.labels[tl.k_label] tl.width = tl.sizes[tl.k_width] # pyramid down frame sizes 1280, 640, 320, 160 try: tl.frame_ims = [im_utils.resize(tl.im, width=tl.width) for tl.im in tl.frame_ims] except: logging.getLogger().error('') logging.getLogger().error('Could not resize. Bad video or missing file') logging.getLogger().error(tl.sha256) logging.getLogger().error('') return for tl.idx, tl.im in zip(tl.idxs, tl.frame_ims): # ensure path exists tl.zpad = file_utils.zpad(tl.idx) tl.fp_dst = join(tl.dir_out, tl.sha256_tree, tl.sha256, tl.zpad, tl.label, 'index.jpg') # convert to PIL tl.im_pil = im_utils.ensure_pil(tl.im, bgr2rgb=True) file_utils.ensure_path_exists(tl.fp_dst) tl.im_pil.save(tl.fp_dst, quality=cfg.JPG_SAVE_QUALITY)
def frame_transform(frame): return im_utils.resize(frame, width=opt_size)
def cli(ctx, sink, opt_disk, opt_net, opt_dnn_size, opt_pyramids, opt_conf_thresh, opt_gpu): """Generates face detection ROIs""" # ---------------------------------------------------------------- # imports import os from os.path import join from pathlib import Path import click import cv2 as cv import dlib import numpy as np from nms import nms from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils from vframe.models.metadata_item import ROIMetadataItem, ROIDetectResult from vframe.settings.paths import Paths from vframe.models.bbox import BBox # ---------------------------------------------------------------- # init log = logger_utils.Logger.getLogger() metadata_type = types.Metadata.FACE_ROI if opt_net == types.FaceDetectNet.CVDNN: dnn_scale = 1.0 # fixed dnn_mean = (104.0, 177.0, 123.0) # fixed dnn_crop = False # probably crop or force resize fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'face_detect', 'opencv_face_detector.prototxt') fp_model = join(cfg.DIR_MODELS_CAFFE, 'face_detect', 'opencv_face_detector.caffemodel') log.debug('fp_model: {}'.format(fp_model)) net = cv.dnn.readNet(fp_prototxt, fp_model) # TODO parameterize net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) elif opt_net == types.FaceDetectNet.DLIB_CNN: # use dlib's CNN module cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '') os.environ['CUDA_VISIBLE_DEVICES'] = str(opt_gpu) cnn_face_detector = dlib.cnn_face_detection_model_v1( cfg.DIR_MODELS_DLIB_CNN) os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset elif opt_net == types.FaceDetectNet.DLIB_HOG: # use dlib's HoG module dlib_hog_predictor = dlib.get_frontal_face_detector() elif opt_net == types.FaceDetectNet.HAAR: # use opencv's haarcasde module log.error('not yet implemented') return # ---------------------------------------------------------------- # process # iterate sink while True: chair_item = yield metadata = {} for frame_idx, frame in chair_item.keyframes.items(): rois = [] if opt_net == types.FaceDetectNet.CVDNN: # use OpenCV's DNN face detector with caffe model frame = cv.resize(frame, opt_dnn_size) blob = cv.dnn.blobFromImage(frame, dnn_scale, opt_dnn_size, dnn_mean) net.setInput(blob) net_outputs = net.forward() for i in range(0, net_outputs.shape[2]): conf = net_outputs[0, 0, i, 2] if conf > opt_conf_thresh: rect_norm = net_outputs[0, 0, i, 3:7] rois.append(ROIDetectResult(conf, rect_norm)) log.debug('face roi: {}'.format(rect_norm)) elif opt_net == types.FaceDetectNet.DLIB_CNN: frame = im_utils.resize(frame, width=opt_dnn_size[0], height=opt_dnn_size[1]) # convert to RGB for dlib dim = frame.shape[:2][::-1] frame = im_utils.bgr2rgb(frame) mmod_rects = cnn_face_detector(frame, opt_pyramids) for mmod_rect in mmod_rects: if mmod_rect.confidence > opt_conf_thresh: bbox = BBox.from_dlib_dim(mmod_rect.rect, dim) # NB mmod_rect.confidence is sometimes > 1.0 ? rois.append( ROIDetectResult(mmod_rect.confidence, bbox.as_xyxy())) elif opt_net == types.FaceDetectNet.DLIB_HOG: frame = im_utils.resize(frame, width=opt_dnn_size[0], height=opt_dnn_size[1]) # convert to RGB for dlib dim = frame.shape[:2][::-1] frame = im_utils.bgr2rgb(frame) # ? hog_results = dlib_hog_predictor.run(frame, opt_pyramids) if len(hog_results[0]) > 0: for rect, score, direction in zip(*hog_results): if score > opt_conf_thresh: bbox = BBox.from_dlib_dim(rect, dim) rois.append(ROIDetectResult(score, bbox.as_xyxy())) metadata[frame_idx] = rois # append metadata to chair_item's mapping item chair_item.set_metadata(metadata_type, ROIMetadataItem(metadata)) # ---------------------------------------------------------------- # yield back to the processor pipeline # send back to generator sink.send(chair_item)
def load_images(self, fp_image, opt_size_type, opt_drawframes=False): # append metadata to chair_item's mapping item opt_size = cfg.IMAGE_SIZES[opt_size_type] im = im_utils.resize(cv.imread(self._fp_image), width=opt_size) self.set_keyframes({0: im}, opt_drawframes)
def extract(items, dir_out, dir_videos, keyframe_type, threads=1): """Extracts keyframes from images""" task_queue = Queue() print_lock = threading.Lock() log = logging.getLogger() if threads > 1: def thread_processor(task_obj): tl = threading.local() tl.fp_video = task_obj['fp_video'] tl.idxs = task_obj['idxs'] tl.dir_out = task_obj['dir_out'] tl.sha256_tree = task_obj['sha256_tree'] tl.sha256 = task_obj['sha256'] try: tl.frame_ims = im_utils.vid2frames(tl.fp_video, idxs=tl.idxs) except Exception as ex: logging.getLogger().error('Could not read video file') logging.getLogger().error('file: {}'.format(tl.fp_video)) logging.getLogger().error('sha256: {}'.format(tl.sha256)) return tl.labels = cfg.IMAGE_SIZE_LABELS tl.sizes = cfg.IMAGE_SIZES for tl.k_label, tl.k_width in zip(reversed(tl.labels), reversed(tl.sizes)): tl.label = tl.labels[tl.k_label] tl.width = tl.sizes[tl.k_width] # pyramid down frame sizes 1280, 640, 320, 160 try: tl.frame_ims = [im_utils.resize(tl.im, width=tl.width) for tl.im in tl.frame_ims] except: logging.getLogger().error('') logging.getLogger().error('Could not resize. Bad video or missing file') logging.getLogger().error(tl.sha256) logging.getLogger().error('') return for tl.idx, tl.im in zip(tl.idxs, tl.frame_ims): # ensure path exists tl.zpad = file_utils.zpad(tl.idx) tl.fp_dst = join(tl.dir_out, tl.sha256_tree, tl.sha256, tl.zpad, tl.label, 'index.jpg') # convert to PIL tl.im_pil = im_utils.ensure_pil(tl.im, bgr2rgb=True) file_utils.ensure_path_exists(tl.fp_dst) tl.im_pil.save(tl.fp_dst, quality=cfg.JPG_SAVE_QUALITY) def process_queue(num_items): # TODO: progress bar while True: task_obj = task_queue.get() thread_processor(task_obj) logging.getLogger().info('process: {:.2f}% {:,}/{:,}'.format( (task_queue.qsize() / num_items)*100, num_items-task_queue.qsize(), num_items)) task_queue.task_done() # avoid race conditions by creating dir structure here log.info('create directory structure first to avoid race conditions') log.info('TODO: this needs to be fixed, thread lock maybe') for sha256, item in tqdm(items.items()): item_metadata = item.metadata.get(MetadataType.KEYFRAME, {}) sha256_tree = file_utils.sha256_tree(sha256) fp_dst = join(dir_out, sha256_tree) file_utils.ensure_path_exists(fp_dst) # init threads num_items = len(items) for i in range(threads): t = threading.Thread(target=process_queue, args=(num_items,)) t.daemon = True t.start() # process threads start = time.time() for sha256, item in items.items(): sha256_tree = file_utils.sha256_tree(sha256) item_metadata = item.metadata.get(MetadataType.KEYFRAME, {}) if not item_metadata: continue keyframe_data = item_metadata.metadata idxs = keyframe_data.get(keyframe_type) fp_video = join(dir_videos, sha256_tree, '{}.{}'.format(sha256, item.ext)) task_obj = { 'fp_video': fp_video, 'idxs': idxs, 'dir_out':dir_out, 'sha256': sha256, 'sha256_tree': sha256_tree } task_queue.put(task_obj) task_queue.join() else: for sha256, item in tqdm(items.items()): item_metadata = item.metadata.get(MetadataType.KEYFRAME, {}) if not item_metadata: continue sha256_tree = file_utils.sha256_tree(sha256) keyframe_data = item_metadata.metadata #idxs_basic = keyframe_data.get(KeyframeMetadataType.BASIC) #idxs_dense = keyframe_data.get(KeyframeMetadataType.DENSE) #idxs_expanded = keyframe_data.get(KeyframeMetadataType.EXPANDED) # fetches the metadata by the enum type from the custom click param idxs = keyframe_data.get(keyframe_type) # get frames from video fp_video = join(dir_videos, sha256_tree, '{}.{}'.format(sha256, item.ext)) frame_ims = im_utils.vid2frames(fp_video, idxs=idxs) labels = cfg.IMAGE_SIZE_LABELS sizes = cfg.IMAGE_SIZES for k_label, k_width in zip(reversed(labels), reversed(sizes)): label = labels[k_label] width = sizes[k_width] # pyramid down frame sizes 1280, 640, 320, 160 frame_ims = [im_utils.resize(im, width=width) for im in frame_ims] for idx, im in zip(idxs, frame_ims): # ensure path exists zpad = file_utils.zpad(idx) fp_dst = join(dir_out, sha256_tree, sha256, zpad, label, 'index.jpg') # conver to PIL im_pil = im_utils.ensure_pil(im, bgr2rgb=True) file_utils.ensure_path_exists(fp_dst) im_pil.save(fp_dst, quality=cfg.JPG_SAVE_QUALITY)
def cli(ctx, sink, opt_disk, opt_net, opt_gpu): """Generates detection metadata (CV DNN)""" # ---------------------------------------------------------------- # imports import os from os.path import join from pathlib import Path import click import cv2 as cv import numpy as np from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils from vframe.models.metadata_item import DetectMetadataItem, DetectResult from vframe.settings.paths import Paths # ---------------------------------------------------------------- # init log = logger_utils.Logger.getLogger() # TODO externalize function # initialize dnn dnn_clr = (0, 0, 0) # mean color to subtract dnn_scale = 1/255 # ? nms_threshold = 0.4 #Non-maximum suppression threshold dnn_px_range = 1 # pixel value range ? dnn_crop = False # probably crop or force resize # Use mulitples of 32: 416, 448, 480, 512, 544, 576, 608, 640, 672, 704 if opt_net == types.DetectorNet.OPENIMAGES: metadata_type = types.Metadata.OPENIMAGES dnn_size = (608, 608) dnn_threshold = 0.875 elif opt_net == types.DetectorNet.COCO: metadata_type = types.Metadata.COCO dnn_size = (416, 416) dnn_threshold = 0.925 elif opt_net == types.DetectorNet.COCO_SPP: metadata_type = types.Metadata.COCO dnn_size = (608, 608) dnn_threshold = 0.875 elif opt_net == types.DetectorNet.VOC: metadata_type = types.Metadata.VOC dnn_size = (416, 416) dnn_threshold = 0.875 elif opt_net == types.DetectorNet.SUBMUNITION: metadata_type = types.Metadata.SUBMUNITION dnn_size = (608, 608) dnn_threshold = 0.90 # Initialize the parameters fp_cfg = Paths.darknet_cfg(opt_net=opt_net, data_store=opt_disk, as_bytes=False) fp_weights = Paths.darknet_weights(opt_net=opt_net, data_store=opt_disk, as_bytes=False) fp_data = Paths.darknet_data(opt_net=opt_net, data_store=opt_disk, as_bytes=False) fp_classes = Paths.darknet_classes(opt_net=opt_net, data_store=opt_disk) class_names = file_utils.load_text(fp_classes) class_idx_lookup = {label: i for i, label in enumerate(class_names)} log.debug('fp_cfg: {}'.format(fp_cfg)) log.debug('fp_weights: {}'.format(fp_weights)) log.debug('fp_data: {}'.format(fp_data)) log.debug('fp_classes: {}'.format(fp_classes)) net = cv.dnn.readNetFromDarknet(fp_cfg, fp_weights) net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) # ---------------------------------------------------------------- # process # iterate sink while True: chair_item = yield metadata = {} for frame_idx, frame in chair_item.keyframes.items(): frame = im_utils.resize(frame, width=dnn_size[0], height=dnn_size[1]) blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_clr, dnn_px_range, crop=dnn_crop) # Sets the input to the network net.setInput(blob) # Runs the forward pass to get output of the output layers net_outputs = net.forward(dnn_utils.getOutputsNames(net)) det_results = dnn_utils.nms_cvdnn(net_outputs, dnn_threshold, nms_threshold) metadata[frame_idx] = det_results # append metadata to chair_item's mapping item chair_item.set_metadata(metadata_type, DetectMetadataItem(metadata)) # ---------------------------------------------------------------- # yield back to the processor pipeline # send back to generator sink.send(chair_item)
def cli(ctx, sink, opt_disk, opt_net, opt_conf_thresh, opt_nms_thresh): """Generates scene text ROIs (CV DNN)""" # ---------------------------------------------------------------- # imports import os from os.path import join from pathlib import Path import click import cv2 as cv import numpy as np from nms import nms from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils from vframe.utils import scenetext_utils from vframe.models.metadata_item import ROIMetadataItem, ROIDetectResult from vframe.settings.paths import Paths from vframe.models.bbox import BBox # ---------------------------------------------------------------- # init log = logger_utils.Logger.getLogger() metadata_type = types.Metadata.TEXT_ROI # initialize dnn if opt_net == types.SceneTextNet.EAST: # TODO externalize dnn_size = (320, 320) # fixed dnn_mean_clr = (123.68, 116.78, 103.94) # fixed dnn_scale = 1.0 # fixed dnn_layer_names = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" ] fp_model = join(cfg.DIR_MODELS_TF, 'east', 'frozen_east_text_detection.pb') log.debug('fp_model: {}'.format(fp_model)) net = cv.dnn.readNet(fp_model) #net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) #net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) elif opt_net == types.SceneTextNet.DEEPSCENE: dnn_size = (320, 320) # fixed fp_model = join(cfg.DIR_MODELS_CAFFE, 'deepscenetext', "TextBoxes_icdar13.caffemodel") fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'deepscenetext', 'textbox.prototxt') net = cv.text.TextDetectorCNN_create(fp_prototxt, fp_model) # ---------------------------------------------------------------- # process # iterate sink while True: chair_item = yield metadata = {} for frame_idx, frame in chair_item.keyframes.items(): if opt_net == types.SceneTextNet.DEEPSCENE: # DeepScene scene text detector (opencv contrib) frame = im_utils.resize(frame, width=dnn_size[0], height=dnn_size[1]) frame_dim = frame.shape[:2][::-1] rects, probs = net.detect(frame) det_results = [] for r in range(np.shape(rects)[0]): prob = float(probs[r]) if prob > opt_conf_thresh: rect = BBox.from_xywh_dim( *rects[r], frame_dim).as_xyxy() # normalized det_results.append(ROIDetectResult(prob, rect)) metadata[frame_idx] = det_results elif types.SceneTextNet.EAST: # EAST scene text detector frame = im_utils.resize(frame, width=dnn_size[0], height=dnn_size[1]) # frame = im_utils.resize(frame, width=dnn_size[0], he) frame_dim = frame.shape[:2][::-1] frame_dim = dnn_size # blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_mean_clr, swapRB=True, crop=False) blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_mean_clr, swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(dnn_layer_names) (rects, confidences, baggage) = scenetext_utils.east_text_decode( scores, geometry, opt_conf_thresh) det_results = [] if rects: offsets = [] thetas = [] for b in baggage: offsets.append(b['offset']) thetas.append(b['angle']) # functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms] indicies = nms.boxes(rects, confidences, nms_function=nms.fast.nms, confidence_threshold=opt_conf_thresh, nsm_threshold=opt_nms_thresh) indicies = np.array(indicies).reshape(-1) rects = np.array(rects)[indicies] scores = np.array(confidences)[indicies] for rect, score in zip(rects, scores): rect = BBox.from_xywh_dim( *rect, frame_dim).as_xyxy() # normalized det_results.append(ROIDetectResult(score, rect)) metadata[frame_idx] = det_results # append metadata to chair_item's mapping item chair_item.set_metadata(metadata_type, ROIMetadataItem(metadata)) # ---------------------------------------------------------------- # yield back to the processor pipeline # send back to generator sink.send(chair_item)