def gen_video_preview(elem, output_dir): """ Copy temporary image to specified output filepath. :param elem: Data element to get the preview image for. :type elem: smqtk.data_rep.DataElement :param output_dir: Directory to save generated image to. :type output_dir: str """ output_fp = os.path.join(output_dir, "%s.gif" % elem.md5()) if not os.path.isfile(output_fp): tmp_vid_fp = elem.write_temp() interval = 0.5 # ~2fps gif fm = video_utils.ffmpeg_extract_frame_map( tmp_vid_fp, second_interval=interval ) img_arrays = [] for frm_num in sorted(fm.keys()): img_arrays.append(imageio.imread(fm[frm_num])) imageio.mimwrite(output_fp, img_arrays, duration=interval) elem.clean_temp() return output_fp
def gen_video_preview(elem, output_dir): """ Copy temporary image to specified output filepath. :param elem: Data element to get the preview image for. :type elem: smqtk.representation.DataElement :param output_dir: Directory to save generated image to. :type output_dir: str """ output_fp = os.path.join(output_dir, "%s.gif" % elem.uuid()) if not os.path.isfile(output_fp): tmp_vid_fp = elem.write_temp() interval = 0.5 # ~2fps gif fm = video_utils.ffmpeg_extract_frame_map(tmp_vid_fp, second_interval=interval) img_arrays = [] for frm_num in sorted(fm.keys()): img_arrays.append(imageio.imread(fm[frm_num])) imageio.mimwrite(output_fp, img_arrays, duration=interval) elem.clean_temp() return output_fp
def _generate_descriptor_matrices(self, data_set, **kwargs): """ Generate info and descriptor matrices based on ingest type. :param data_set: Iterable of data elements to generate combined info and descriptor matrices for. :type item_iter: collections.Set[smqtk.representation.DataElement] :param limit: Limit the number of descriptor entries to this amount. :type limit: int :return: Combined info and descriptor matrices for all base images :rtype: (numpy.core.multiarray.ndarray, numpy.core.multiarray.ndarray) """ descriptor_limit = kwargs.get('limit', float('inf')) # With videos, an "item" is one video, so, collect for a while video # as normal, then subsample from the full video collection. per_item_limit = numpy.floor(float(descriptor_limit) / len(data_set)) # If an odd number of jobs, favor descriptor extraction if self.parallel: descr_parallel = int(max(1, math.ceil(self.parallel / 2.0))) extract_parallel = int(max(1, math.floor(self.parallel / 2.0))) else: cpuc = multiprocessing.cpu_count() descr_parallel = int(max(1, math.ceil(cpuc / 2.0))) extract_parallel = int(max(1, math.floor(cpuc / 2.0))) # For each video, extract frames and submit colorDescriptor processing # jobs for each frame, combining all results into a single matrix for # return. pool = multiprocessing.Pool(processes=descr_parallel) # Mapping of [UID] to [frame] to tuple containing: # (info_fp, desc_fp, async processing result) r_map = {} with SimpleTimer("Extracting frames and submitting descriptor jobs...", self._log.debug): for di in data_set: r_map[di.uuid()] = {} tmp_vid_fp = self._get_data_temp_path(di) p = dict(self.FRAME_EXTRACTION_PARAMS) vmd = get_metadata_info(tmp_vid_fp) p['second_offset'] = vmd.duration * p['second_offset'] p['max_duration'] = vmd.duration * p['max_duration'] fm = video_utils.ffmpeg_extract_frame_map( self._work_dir, tmp_vid_fp, parallel=extract_parallel, **p) # Compute descriptors for extracted frames. for frame, imgPath in fm.iteritems(): info_fp, desc_fp = \ self._get_standard_info_descriptors_filepath(di, frame) r = pool.apply_async(utils.generate_descriptors, args=(self.EXE, imgPath, self.descriptor_type(), info_fp, desc_fp)) r_map[di.uuid()][frame] = (info_fp, desc_fp, r) # Clean temporary video file file while computing descriptors # This does not remove the extracted frames that the underlying # detector/descriptor is working on. di.clean_temp() pool.close() # Each result is a tuple of two ndarrays: info and descriptor matrices with SimpleTimer("Collecting shape information for super matrices...", self._log.debug): running_height = 0 i_width = None d_width = None # Transform r_map[uid] into: # (info_mat_files, desc_mat_files, sR, ssi_list) # -> files in frame order uids = sorted(r_map) for uid in uids: video_num_desc = 0 video_info_mat_fps = [] # ordered list of frame info mat files video_desc_mat_fps = [] # ordered list of frame desc mat files for frame in sorted(r_map[uid]): ifp, dfp, r = r_map[uid][frame] # Descriptor generation may have failed for this UID try: i_shape, d_shape = r.get() except RuntimeError, ex: self._log.warning( 'Descriptor generation failed for ' 'frame %d in video UID[%s]: %s', frame, uid, str(ex)) r_map[uid] = None continue if d_width is None and d_shape[0] != 0: i_width = i_shape[1] d_width = d_shape[1] # Skip if there were no descriptors generated for this # frame if d_shape[1] == 0: continue video_info_mat_fps.append(ifp) video_desc_mat_fps.append(dfp) video_num_desc += d_shape[0] # If combined descriptor height exceeds the per-item limit, # generate a random subsample index list ssi = None if video_num_desc > per_item_limit: ssi = sorted( numpy.random.permutation(video_num_desc) [:per_item_limit]) video_num_desc = len(ssi) r_map[uid] = (video_info_mat_fps, video_desc_mat_fps, running_height, ssi) running_height += video_num_desc
def _generate_descriptor_matrices(self, data_set, **kwargs): """ Generate info and descriptor matrices based on ingest type. :param data_set: Iterable of data elements to generate combined info and descriptor matrices for. :type item_iter: collections.Set[smqtk.data_rep.DataElement] :param limit: Limit the number of descriptor entries to this amount. :type limit: int :return: Combined info and descriptor matrices for all base images :rtype: (numpy.core.multiarray.ndarray, numpy.core.multiarray.ndarray) """ descriptor_limit = kwargs.get('limit', float('inf')) # With videos, an "item" is one video, so, collect for a while video # as normal, then subsample from the full video collection. per_item_limit = numpy.floor(float(descriptor_limit) / len(data_set)) # If an odd number of jobs, favor descriptor extraction if self.PARALLEL: descr_parallel = int(max(1, math.ceil(self.PARALLEL/2.0))) extract_parallel = int(max(1, math.floor(self.PARALLEL/2.0))) else: cpuc = multiprocessing.cpu_count() descr_parallel = int(max(1, math.ceil(cpuc/2.0))) extract_parallel = int(max(1, math.floor(cpuc/2.0))) # For each video, extract frames and submit colorDescriptor processing # jobs for each frame, combining all results into a single matrix for # return. pool = multiprocessing.Pool(processes=descr_parallel) # Mapping of [UID] to [frame] to tuple containing: # (info_fp, desc_fp, async processing result) r_map = {} with SimpleTimer("Extracting frames and submitting descriptor jobs...", self.log.debug): for di in data_set: r_map[di.uuid()] = {} tmp_vid_fp = di.write_temp(self.temp_dir) p = dict(self.FRAME_EXTRACTION_PARAMS) vmd = get_metadata_info(tmp_vid_fp) p['second_offset'] = vmd.duration * p['second_offset'] p['max_duration'] = vmd.duration * p['max_duration'] fm = video_utils.ffmpeg_extract_frame_map( tmp_vid_fp, parallel=extract_parallel, **p ) # Compute descriptors for extracted frames. for frame, imgPath in fm.iteritems(): info_fp, desc_fp = \ self._get_standard_info_descriptors_filepath(di, frame) r = pool.apply_async( utils.generate_descriptors, args=(self.PROC_COLORDESCRIPTOR, imgPath, self.descriptor_type(), info_fp, desc_fp) ) r_map[di.uuid()][frame] = (info_fp, desc_fp, r) # Clean temporary file while computing descriptors di.clean_temp() pool.close() # Each result is a tuple of two ndarrays: info and descriptor matrices with SimpleTimer("Collecting shape information for super matrices...", self.log.debug): running_height = 0 # Known constants i_width = 5 d_width = 384 # Transform r_map[uid] into: # (info_mat_files, desc_mat_files, sR, ssi_list) # -> files in frame order uids = sorted(r_map) for uid in uids: video_num_desc = 0 video_info_mat_fps = [] # ordered list of frame info mat files video_desc_mat_fps = [] # ordered list of frame desc mat files for frame in sorted(r_map[uid]): ifp, dfp, r = r_map[uid][frame] i_shape, d_shape = r.get() if None in (i_width, d_width): i_width = i_shape[1] d_width = d_shape[1] video_info_mat_fps.append(ifp) video_desc_mat_fps.append(dfp) video_num_desc += i_shape[0] # If combined descriptor height exceeds the per-item limit, # generate a random subsample index list ssi = None if video_num_desc > per_item_limit: ssi = sorted( numpy.random.permutation(video_num_desc)[:per_item_limit] ) video_num_desc = len(ssi) r_map[uid] = (video_info_mat_fps, video_desc_mat_fps, running_height, ssi) running_height += video_num_desc pool.join() del pool with SimpleTimer("Building master descriptor matrices...", self.log.debug): master_info = numpy.zeros((running_height, i_width), dtype=float) master_desc = numpy.zeros((running_height, d_width), dtype=float) tp = multiprocessing.pool.ThreadPool(processes=self.PARALLEL) for uid in uids: info_fp_list, desc_fp_list, sR, ssi = r_map[uid] tp.apply_async(ColorDescriptor_Video._thread_load_matrices, args=(master_info, info_fp_list, sR, ssi)) tp.apply_async(ColorDescriptor_Video._thread_load_matrices, args=(master_desc, desc_fp_list, sR, ssi)) tp.close() tp.join() return master_info, master_desc
def _generate_descriptor_matrices(self, data_set, **kwargs): """ Generate info and descriptor matrices based on ingest type. :param data_set: Iterable of data elements to generate combined info and descriptor matrices for. :type item_iter: collections.Set[smqtk.representation.DataElement] :param limit: Limit the number of descriptor entries to this amount. :type limit: int :return: Combined info and descriptor matrices for all base images :rtype: (numpy.core.multiarray.ndarray, numpy.core.multiarray.ndarray) """ descriptor_limit = kwargs.get('limit', float('inf')) # With videos, an "item" is one video, so, collect for a while video # as normal, then subsample from the full video collection. per_item_limit = numpy.floor(float(descriptor_limit) / len(data_set)) # If an odd number of jobs, favor descriptor extraction if self.PARALLEL: descr_parallel = int(max(1, math.ceil(self.PARALLEL/2.0))) extract_parallel = int(max(1, math.floor(self.PARALLEL/2.0))) else: cpuc = multiprocessing.cpu_count() descr_parallel = int(max(1, math.ceil(cpuc/2.0))) extract_parallel = int(max(1, math.floor(cpuc/2.0))) # For each video, extract frames and submit colorDescriptor processing # jobs for each frame, combining all results into a single matrix for # return. pool = multiprocessing.Pool(processes=descr_parallel) # Mapping of [UID] to [frame] to tuple containing: # (info_fp, desc_fp, async processing result) r_map = {} with SimpleTimer("Extracting frames and submitting descriptor jobs...", self._log.debug): for di in data_set: r_map[di.uuid()] = {} tmp_vid_fp = self._get_data_temp_path(di) p = dict(self.FRAME_EXTRACTION_PARAMS) vmd = get_metadata_info(tmp_vid_fp) p['second_offset'] = vmd.duration * p['second_offset'] p['max_duration'] = vmd.duration * p['max_duration'] fm = video_utils.ffmpeg_extract_frame_map( self._work_dir, tmp_vid_fp, parallel=extract_parallel, **p ) # Compute descriptors for extracted frames. for frame, imgPath in fm.iteritems(): info_fp, desc_fp = \ self._get_standard_info_descriptors_filepath(di, frame) r = pool.apply_async( utils.generate_descriptors, args=(self.PROC_COLORDESCRIPTOR, imgPath, self.descriptor_type(), info_fp, desc_fp) ) r_map[di.uuid()][frame] = (info_fp, desc_fp, r) # Clean temporary video file file while computing descriptors # This does not remove the extracted frames that the underlying # detector/descriptor is working on. di.clean_temp() pool.close() # Each result is a tuple of two ndarrays: info and descriptor matrices with SimpleTimer("Collecting shape information for super matrices...", self._log.debug): running_height = 0 i_width = None d_width = None # Transform r_map[uid] into: # (info_mat_files, desc_mat_files, sR, ssi_list) # -> files in frame order uids = sorted(r_map) for uid in uids: video_num_desc = 0 video_info_mat_fps = [] # ordered list of frame info mat files video_desc_mat_fps = [] # ordered list of frame desc mat files for frame in sorted(r_map[uid]): ifp, dfp, r = r_map[uid][frame] # Descriptor generation may have failed for this UID try: i_shape, d_shape = r.get() except RuntimeError, ex: self._log.warning('Descriptor generation failed for ' 'frame %d in video UID[%s]: %s', frame, uid, str(ex)) r_map[uid] = None continue if d_width is None and d_shape[0] != 0: i_width = i_shape[1] d_width = d_shape[1] # Skip if there were no descriptors generated for this # frame if d_shape[1] == 0: continue video_info_mat_fps.append(ifp) video_desc_mat_fps.append(dfp) video_num_desc += d_shape[0] # If combined descriptor height exceeds the per-item limit, # generate a random subsample index list ssi = None if video_num_desc > per_item_limit: ssi = sorted( numpy.random.permutation(video_num_desc)[:per_item_limit] ) video_num_desc = len(ssi) r_map[uid] = (video_info_mat_fps, video_desc_mat_fps, running_height, ssi) running_height += video_num_desc