def get_annotation_csv_generator( folder: types.GirderModel, user: types.GirderUserModel, excludeBelowThreshold=False, typeFilter=None, ) -> Tuple[str, Callable[[], Generator[str, None, None]]]: """Get the annotation generator for a folder""" fps = None imageFiles = None source_type = fromMeta(folder, constants.TypeMarker) if source_type == constants.VideoType: fps = fromMeta(folder, constants.FPSMarker) elif source_type == constants.ImageSequenceType: imageFiles = [img['name'] for img in crud.valid_images(folder, user)] thresholds = fromMeta(folder, "confidenceFilters", {}) def downloadGenerator(): datalist, _ = get_annotations(folder) for data in viame.export_tracks_as_csv( datalist, excludeBelowThreshold, thresholds=thresholds, filenames=imageFiles, fps=fps, typeFilter=typeFilter, ): yield data filename = folder["name"] + ".csv" return filename, downloadGenerator
def verify_dataset(folder: GirderModel): """Verify that a given folder is a DIVE dataset""" if not asbool(fromMeta(folder, constants.DatasetMarker, False)): raise RestException('Source folder is not a valid DIVE dataset', code=404) dstype = fromMeta(folder, 'type') if dstype not in [constants.ImageSequenceType, constants.VideoType]: raise ValueError( f'Source folder is marked as dataset but has invalid type {dstype}' ) if dstype == constants.VideoType: fps = fromMeta(folder, 'fps') if type(fps) not in [int, float]: raise ValueError(f'Video missing numerical fps, found {fps}') return True
def process_assetstore_import(event, meta: dict): """ Function for appending the appropriate metadata to no-copy import data """ info = event.info objectType = info.get("type") importPath = info.get("importPath") now = datetime.now() if not importPath or not objectType or objectType != "item": return dataset_type = None item = Item().findOne({"_id": info["id"]}) item['meta'].update({ **meta, AssetstoreSourcePathMarker: importPath, }) # TODO figure out what's going on here? if imageRegex.search(importPath): dataset_type = ImageSequenceType elif videoRegex.search(importPath): # Look for exisitng video dataset directory parentFolder = Folder().findOne({"_id": item["folderId"]}) userId = parentFolder['creatorId'] or parentFolder['baseParentId'] user = User().findOne({'_id': ObjectId(userId)}) foldername = f'Video {item["name"]}' dest = Folder().createFolder(parentFolder, foldername, creator=user, reuseExisting=True) if dest['created'] < now: # Remove the old item, replace it with the new one. oldItem = Item().findOne({ 'folderId': dest['_id'], 'name': item['name'] }) if oldItem is not None: Item().remove(oldItem) Item().move(item, dest) dataset_type = VideoType if dataset_type is not None: # Update metadata of parent folder # FPS is hardcoded for now Item().save(item) folder = Folder().findOne({"_id": item["folderId"]}) root, _ = os.path.split(importPath) if not asbool(fromMeta(folder, DatasetMarker)): folder["meta"].update({ TypeMarker: dataset_type, FPSMarker: DefaultVideoFPS, DatasetMarker: True, AssetstoreSourcePathMarker: root, **meta, }) Folder().save(folder)
def export_dataset_zipstream( dsFolder: types.GirderModel, user: types.GirderUserModel, includeMedia: bool, includeDetections: bool, excludeBelowThreshold: bool, typeFilter: Optional[List[str]], ): _, gen = crud_annotation.get_annotation_csv_generator( dsFolder, user, excludeBelowThreshold, typeFilter) mediaFolder = crud.getCloneRoot(user, dsFolder) source_type = fromMeta(mediaFolder, constants.TypeMarker) mediaRegex = None if source_type == constants.ImageSequenceType: mediaRegex = constants.imageRegex elif source_type == constants.VideoType: mediaRegex = constants.videoRegex def makeMetajson(): """Include dataset metadtata file with full export""" meta = get_dataset(dsFolder, user) media = get_media(dsFolder, user) yield json.dumps( { **meta.dict(exclude_none=True), **media.dict(exclude_none=True), }, indent=2, ) def stream(): z = ziputil.ZipGenerator(dsFolder['name']) # Always add the metadata file for data in z.addFile(makeMetajson, 'meta.json'): yield data if includeMedia: # Add media for item in Folder().childItems( mediaFolder, filters={"lowerName": { "$regex": mediaRegex }}, ): for (path, file) in Item().fileList(item): for data in z.addFile(file, path): yield data break # Media items should only have 1 valid file if includeDetections: # TODO Add back in dump to json # add CSV detections for data in z.addFile(gen, "output_tracks.csv"): yield data yield z.footer() return stream
def saveImportAttributes(folder, attributes, user): attributes_dict = fromMeta(folder, 'attributes', {}) # we don't overwrite any existing meta attributes for attribute in attributes.values(): validated: models.Attribute = models.Attribute(**attribute) if attribute['key'] not in attributes_dict: attributes_dict[str( validated.key)] = validated.dict(exclude_none=True) folder['meta']['attributes'] = attributes_dict Folder().save(folder)
def getCloneRoot(owner: GirderModel, source_folder: GirderModel): """Get the source media folder associated with a clone""" verify_dataset(source_folder) next_id = fromMeta(source_folder, constants.ForeignMediaIdMarker, False) while next_id is not False: # Recurse through source folders to find the root, allowing clones of clones source_folder = Folder().load( next_id, level=AccessType.READ, user=owner, ) if source_folder is None: raise RestException( (f"Referenced media source missing. Folder Id {next_id} was not found." " This may be a cloned dataset where the source was deleted." ), code=404, ) verify_dataset(source_folder) next_id = fromMeta(source_folder, constants.ForeignMediaIdMarker, False) return source_folder
def get_media(dsFolder: types.GirderModel, user: types.GirderUserModel) -> models.DatasetSourceMedia: videoResource = None imageData: List[models.MediaResource] = [] crud.verify_dataset(dsFolder) source_type = fromMeta(dsFolder, constants.TypeMarker) if source_type == constants.VideoType: # Find a video tagged with an h264 codec left by the transcoder videoItem = Item().findOne({ 'folderId': crud.getCloneRoot(user, dsFolder)['_id'], 'meta.codec': 'h264', 'meta.source_video': { '$in': [None, False] }, }) if videoItem: videoFile: types.GirderModel = Item().childFiles(videoItem)[0] videoResource = models.MediaResource( id=str(videoFile['_id']), url=get_url(videoFile), filename=videoFile['name'], ) elif source_type == constants.ImageSequenceType: imageData = [ models.MediaResource( id=str(image["_id"]), url=get_url(image, modelType='item'), filename=image['name'], ) for image in crud.valid_images(dsFolder, user) ] else: raise ValueError(f'Unrecognized source type: {source_type}') return models.DatasetSourceMedia( imageData=imageData, video=videoResource, )
def update_attributes(dsFolder: types.GirderModel, data: dict): """Upsert or delete attributes""" crud.verify_dataset(dsFolder) validated: AttributeUpdateArgs = crud.get_validated_model( AttributeUpdateArgs, **data) attributes_dict = fromMeta(dsFolder, 'attributes', {}) for attribute_id in validated.delete: attributes_dict.pop(str(attribute_id), None) for attribute in validated.upsert: attributes_dict[str(attribute.key)] = attribute.dict(exclude_none=True) upserted_len = len(validated.delete) deleted_len = len(validated.upsert) if upserted_len or deleted_len: update_metadata(dsFolder, {'attributes': attributes_dict}) return { "updated": upserted_len, "deleted": deleted_len, }
def convert_video(self: Task, folderId: str, itemId: str): context: dict = {} gc: GirderClient = self.girder_client manager: JobManager = patch_manager(self.job_manager) if utils.check_canceled(self, context): manager.updateStatus(JobStatus.CANCELED) return folderData = gc.getFolder(folderId) requestedFps = fromMeta(folderData, constants.FPSMarker) with tempfile.TemporaryDirectory() as _working_directory, suppress(utils.CanceledError): _working_directory_path = Path(_working_directory) item: GirderModel = gc.getItem(itemId) file_name = str(_working_directory_path / item['name']) output_file_path = (_working_directory_path / item['name']).with_suffix('.transcoded.mp4') manager.write(f'Fetching input from {itemId} to {file_name}...\n') gc.downloadItem(itemId, _working_directory_path, name=item.get('name')) command = [ "ffprobe", "-print_format", "json", "-v", "quiet", "-show_format", "-show_streams", file_name, ] stdout = utils.stream_subprocess( self, context, manager, {'args': command}, keep_stdout=True ) jsoninfo = json.loads(stdout) videostream = list(filter(lambda x: x["codec_type"] == "video", jsoninfo["streams"])) if len(videostream) != 1: raise Exception('Expected 1 video stream, found {}'.format(len(videostream))) # Extract average framerate avgFpsString: str = videostream[0]["avg_frame_rate"] originalFps = None if avgFpsString: dividend, divisor = [int(v) for v in avgFpsString.split('/')] originalFps = dividend / divisor else: raise Exception('Expected key avg_frame_rate in ffprobe') if requestedFps == -1: newAnnotationFps = originalFps else: newAnnotationFps = min(requestedFps, originalFps) if newAnnotationFps < 1: raise Exception('FPS lower than 1 is not supported') command = [ "ffmpeg", "-i", file_name, "-c:v", "libx264", "-preset", "slow", # https://github.com/Kitware/dive/issues/855 "-crf", "22", # https://askubuntu.com/questions/1315697/could-not-find-tag-for-codec-pcm-s16le-in-stream-1-codec-not-currently-support "-c:a", "aac", # see native/<platform> code for a discussion of this option "-vf", "scale=ceil(iw*sar/2)*2:ceil(ih/2)*2,setsar=1", str(output_file_path), ] utils.stream_subprocess(self, context, manager, {'args': command}) # Check to see if frame alignment remains the same aligned_file = check_and_fix_frame_alignment(self, output_file_path, context, manager) manager.updateStatus(JobStatus.PUSHING_OUTPUT) new_file = gc.uploadFileToFolder(folderId, aligned_file) gc.addMetadataToItem( new_file['itemId'], { "source_video": False, "transcoder": "ffmpeg", constants.OriginalFPSMarker: originalFps, constants.OriginalFPSStringMarker: avgFpsString, "codec": "h264", }, ) gc.addMetadataToItem( itemId, { "source_video": True, constants.OriginalFPSMarker: originalFps, constants.OriginalFPSStringMarker: avgFpsString, "codec": videostream[0]["codec_name"], }, ) gc.addMetadataToFolder( folderId, { constants.DatasetMarker: True, # mark the parent folder as able to annotate. constants.OriginalFPSMarker: originalFps, constants.OriginalFPSStringMarker: avgFpsString, constants.FPSMarker: newAnnotationFps, "ffprobe_info": videostream[0], }, )
def run_pipeline(self: Task, params: PipelineJob): conf = Config() context: dict = {} manager: JobManager = patch_manager(self.job_manager) if utils.check_canceled(self, context): manager.updateStatus(JobStatus.CANCELED) return gc: GirderClient = self.girder_client utils.authenticate_urllib(gc) manager.updateStatus(JobStatus.FETCHING_INPUT) # Extract params pipeline = params["pipeline"] input_folder_id = str(params["input_folder"]) input_type = params["input_type"] output_folder_id = str(params["output_folder"]) input_revision = params["input_revision"] with tempfile.TemporaryDirectory() as _working_directory, suppress(utils.CanceledError): _working_directory_path = Path(_working_directory) input_path = utils.make_directory(_working_directory_path / 'input') trained_pipeline_path = utils.make_directory(_working_directory_path / 'trained_pipeline') output_path = utils.make_directory(_working_directory_path / 'output') detector_output_file = str(output_path / 'detector_output.csv') track_output_file = str(output_path / 'track_output.csv') img_list_path = input_path / 'img_list_file.txt' if pipeline["type"] == constants.TrainedPipelineCategory: gc.downloadFolderRecursive(pipeline["folderId"], str(trained_pipeline_path)) pipeline_path = trained_pipeline_path / pipeline["pipe"] else: pipeline_path = conf.get_extracted_pipeline_path() / pipeline["pipe"] assert pipeline_path.exists(), ( "Requested pipeline could not be found." " Make sure that VIAME is installed correctly and all addons have loaded." f" Job asked for {pipeline_path} but it does not exist" ) # Download source media input_folder: GirderModel = gc.getFolder(input_folder_id) input_media_list, _ = utils.download_source_media(gc, input_folder_id, input_path) if input_type == constants.VideoType: input_fps = fromMeta(input_folder, constants.FPSMarker) assert len(input_media_list) == 1, "Expected exactly 1 video" command = [ f". {shlex.quote(str(conf.viame_setup_script))} &&", f"KWIVER_DEFAULT_LOG_LEVEL={shlex.quote(conf.kwiver_log_level)}", "kwiver runner", "-s input:video_reader:type=vidl_ffmpeg", f"-p {shlex.quote(str(pipeline_path))}", f"-s input:video_filename={shlex.quote(input_media_list[0])}", f"-s downsampler:target_frame_rate={shlex.quote(str(input_fps))}", f"-s detector_writer:file_name={shlex.quote(detector_output_file)}", f"-s track_writer:file_name={shlex.quote(track_output_file)}", ] elif input_type == constants.ImageSequenceType: with open(img_list_path, "w+") as img_list_file: img_list_file.write('\n'.join(input_media_list)) command = [ f". {shlex.quote(str(conf.viame_setup_script))} &&", f"KWIVER_DEFAULT_LOG_LEVEL={shlex.quote(conf.kwiver_log_level)}", "kwiver runner", f"-p {shlex.quote(str(pipeline_path))}", f"-s input:video_filename={shlex.quote(str(img_list_path))}", f"-s detector_writer:file_name={shlex.quote(detector_output_file)}", f"-s track_writer:file_name={shlex.quote(track_output_file)}", ] else: raise ValueError('Unknown input type: {}'.format(input_type)) # Include input detections if input_revision is not None: pipeline_input_file = input_path / 'groundtruth.csv' utils.download_revision_csv(gc, input_folder_id, input_revision, pipeline_input_file) quoted_input_file = shlex.quote(str(pipeline_input_file)) command.append(f'-s detection_reader:file_name={quoted_input_file}') command.append(f'-s track_reader:file_name={quoted_input_file}') manager.updateStatus(JobStatus.RUNNING) popen_kwargs = { 'args': " ".join(command), 'shell': True, 'executable': '/bin/bash', 'cwd': output_path, 'env': conf.gpu_process_env, } utils.stream_subprocess(self, context, manager, popen_kwargs) if Path(track_output_file).exists() and os.path.getsize(track_output_file): output_file = track_output_file else: output_file = detector_output_file manager.updateStatus(JobStatus.PUSHING_OUTPUT) newfile = gc.uploadFileToFolder(output_folder_id, output_file) gc.addMetadataToItem(str(newfile["itemId"]), {"pipeline": pipeline}) gc.post(f'dive_rpc/postprocess/{output_folder_id}', data={"skipJobs": True})
def postprocess(user: types.GirderUserModel, dsFolder: types.GirderModel, skipJobs: bool) -> types.GirderModel: """ Post-processing to be run after media/annotation import When skipJobs=False, the following may run as jobs: Transcoding of Video Transcoding of Images Conversion of KPF annotations into track JSON Extraction and upload of zip files In either case, the following may run synchronously: Conversion of CSV annotations into track JSON """ job_is_private = user.get(constants.UserPrivateQueueEnabledMarker, False) isClone = fromMeta(dsFolder, constants.ForeignMediaIdMarker, None) is not None # add default confidence filter threshold to folder metadata dsFolder['meta'][constants.ConfidenceFiltersMarker] = {'default': 0.1} # Validate user-supplied metadata fields are present if fromMeta(dsFolder, constants.FPSMarker) is None: raise RestException(f'{constants.FPSMarker} missing from metadata') if fromMeta(dsFolder, constants.TypeMarker) is None: raise RestException(f'{constants.TypeMarker} missing from metadata') if not skipJobs and not isClone: token = Token().createToken(user=user, days=2) # extract ZIP Files if not already completed zipItems = list(Folder().childItems( dsFolder, filters={"lowerName": { "$regex": constants.zipRegex }}, )) if len(zipItems) > 1: raise RestException('There are multiple zip files in the folder.') for item in zipItems: total_items = len(list((Folder().childItems(dsFolder)))) if total_items > 1: raise RestException( 'There are multiple files besides a zip, cannot continue') newjob = tasks.extract_zip.apply_async( queue=_get_queue_name(user), kwargs=dict( folderId=str(item["folderId"]), itemId=str(item["_id"]), girder_job_title= f"Extracting {item['_id']} to folder {str(dsFolder['_id'])}", girder_client_token=str(token["_id"]), girder_job_type="private" if job_is_private else "convert", ), ) newjob.job[constants.JOBCONST_PRIVATE_QUEUE] = job_is_private Job().save(newjob.job) return dsFolder # transcode VIDEO if necessary videoItems = Folder().childItems( dsFolder, filters={"lowerName": { "$regex": constants.videoRegex }}) for item in videoItems: newjob = tasks.convert_video.apply_async( queue=_get_queue_name(user), kwargs=dict( folderId=str(item["folderId"]), itemId=str(item["_id"]), girder_job_title= f"Converting {item['_id']} to a web friendly format", girder_client_token=str(token["_id"]), girder_job_type="private" if job_is_private else "convert", ), ) newjob.job[constants.JOBCONST_PRIVATE_QUEUE] = job_is_private newjob.job[constants.JOBCONST_DATASET_ID] = dsFolder["_id"] Job().save(newjob.job) # transcode IMAGERY if necessary imageItems = Folder().childItems( dsFolder, filters={"lowerName": { "$regex": constants.imageRegex }}) safeImageItems = Folder().childItems( dsFolder, filters={"lowerName": { "$regex": constants.safeImageRegex }}) if imageItems.count() > safeImageItems.count(): newjob = tasks.convert_images.apply_async( queue=_get_queue_name(user), kwargs=dict( folderId=dsFolder["_id"], girder_client_token=str(token["_id"]), girder_job_title= f"Converting {dsFolder['_id']} to a web friendly format", girder_job_type="private" if job_is_private else "convert", ), ) newjob.job[constants.JOBCONST_PRIVATE_QUEUE] = job_is_private newjob.job[constants.JOBCONST_DATASET_ID] = dsFolder["_id"] Job().save(newjob.job) elif imageItems.count() > 0: dsFolder["meta"][constants.DatasetMarker] = True # transform KPF if necessary ymlItems = Folder().childItems( dsFolder, filters={"lowerName": { "$regex": constants.ymlRegex }}) if ymlItems.count() > 0: # There might be up to 3 yamls def make_file_generator(item): file = Item().childFiles(item)[0] return File().download(file, headers=False)() allFiles = [make_file_generator(item) for item in ymlItems] data = meva.load_kpf_as_tracks(allFiles) crud_annotation.save_annotations(dsFolder, data.values(), [], user, overwrite=True, description="Import from KPF") ymlItems.rewind() auxiliary = crud.get_or_create_auxiliary_folder(dsFolder, user) for item in ymlItems: Item().move(item, auxiliary) Folder().save(dsFolder) process_items(dsFolder, user) return dsFolder
def run_pipeline( user: types.GirderUserModel, folder: types.GirderModel, pipeline: types.PipelineDescription, ) -> types.GirderModel: """ Run a pipeline on a dataset. :param folder: The girder folder containing the dataset to run on. :param pipeline: The pipeline to run the dataset on. """ verify_pipe(user, pipeline) crud.getCloneRoot(user, folder) folder_id_str = str(folder["_id"]) # First, verify that no other outstanding jobs are running on this dataset if _check_running_jobs(folder_id_str): raise RestException( (f"A pipeline for {folder_id_str} is already running. " "Only one outstanding job may be run at a time for " "a dataset.")) token = Token().createToken(user=user, days=14) input_revision = None # include CSV input for pipe if pipeline["type"] == constants.TrainedPipelineCategory: # Verify that the user has READ access to the pipe they want to run pipeFolder = Folder().load(pipeline["folderId"], level=AccessType.READ, user=user) if asbool(fromMeta(pipeFolder, "requires_input")): input_revision = crud_annotation.get_last_revision(folder) elif pipeline["pipe"].startswith('utility_'): # TODO Temporary inclusion of utility pipes which take csv input input_revision = crud_annotation.get_last_revision(folder) job_is_private = user.get(constants.UserPrivateQueueEnabledMarker, False) params: types.PipelineJob = { "pipeline": pipeline, "input_folder": folder_id_str, "input_type": fromMeta(folder, "type", required=True), "output_folder": folder_id_str, "input_revision": input_revision, } newjob = tasks.run_pipeline.apply_async( queue=_get_queue_name(user, "pipelines"), kwargs=dict( params=params, girder_job_title= f"Running {pipeline['name']} on {str(folder['name'])}", girder_client_token=str(token["_id"]), girder_job_type="private" if job_is_private else "pipelines", ), ) newjob.job[constants.JOBCONST_PRIVATE_QUEUE] = job_is_private newjob.job[constants.JOBCONST_DATASET_ID] = folder_id_str newjob.job[constants.JOBCONST_PARAMS] = params newjob.job[constants.JOBCONST_CREATOR] = str(user['_id']) # Allow any users with accecss to the input data to also # see and possibly manage the job Job().copyAccessPolicies(folder, newjob.job) Job().save(newjob.job) return newjob.job
def itemIsWebsafeVideo(item: Item) -> bool: return fromMeta(item, "codec") == "h264"