예제 #1
0
def get_annotation_csv_generator(
    folder: types.GirderModel,
    user: types.GirderUserModel,
    excludeBelowThreshold=False,
    typeFilter=None,
) -> Tuple[str, Callable[[], Generator[str, None, None]]]:
    """Get the annotation generator for a folder"""
    fps = None
    imageFiles = None

    source_type = fromMeta(folder, constants.TypeMarker)
    if source_type == constants.VideoType:
        fps = fromMeta(folder, constants.FPSMarker)
    elif source_type == constants.ImageSequenceType:
        imageFiles = [img['name'] for img in crud.valid_images(folder, user)]

    thresholds = fromMeta(folder, "confidenceFilters", {})

    def downloadGenerator():
        datalist, _ = get_annotations(folder)
        for data in viame.export_tracks_as_csv(
                datalist,
                excludeBelowThreshold,
                thresholds=thresholds,
                filenames=imageFiles,
                fps=fps,
                typeFilter=typeFilter,
        ):
            yield data

    filename = folder["name"] + ".csv"
    return filename, downloadGenerator
예제 #2
0
파일: crud.py 프로젝트: Kitware/dive
def verify_dataset(folder: GirderModel):
    """Verify that a given folder is a DIVE dataset"""
    if not asbool(fromMeta(folder, constants.DatasetMarker, False)):
        raise RestException('Source folder is not a valid DIVE dataset',
                            code=404)
    dstype = fromMeta(folder, 'type')
    if dstype not in [constants.ImageSequenceType, constants.VideoType]:
        raise ValueError(
            f'Source folder is marked as dataset but has invalid type {dstype}'
        )
    if dstype == constants.VideoType:
        fps = fromMeta(folder, 'fps')
        if type(fps) not in [int, float]:
            raise ValueError(f'Video missing numerical fps, found {fps}')
    return True
예제 #3
0
파일: event.py 프로젝트: Kitware/dive
def process_assetstore_import(event, meta: dict):
    """
    Function for appending the appropriate metadata to no-copy import data
    """
    info = event.info
    objectType = info.get("type")
    importPath = info.get("importPath")
    now = datetime.now()

    if not importPath or not objectType or objectType != "item":
        return

    dataset_type = None
    item = Item().findOne({"_id": info["id"]})
    item['meta'].update({
        **meta,
        AssetstoreSourcePathMarker: importPath,
    })

    # TODO figure out what's going on here?

    if imageRegex.search(importPath):
        dataset_type = ImageSequenceType

    elif videoRegex.search(importPath):
        # Look for exisitng video dataset directory
        parentFolder = Folder().findOne({"_id": item["folderId"]})
        userId = parentFolder['creatorId'] or parentFolder['baseParentId']
        user = User().findOne({'_id': ObjectId(userId)})
        foldername = f'Video {item["name"]}'
        dest = Folder().createFolder(parentFolder,
                                     foldername,
                                     creator=user,
                                     reuseExisting=True)
        if dest['created'] < now:
            # Remove the old item, replace it with the new one.
            oldItem = Item().findOne({
                'folderId': dest['_id'],
                'name': item['name']
            })
            if oldItem is not None:
                Item().remove(oldItem)
        Item().move(item, dest)
        dataset_type = VideoType

    if dataset_type is not None:
        # Update metadata of parent folder
        # FPS is hardcoded for now
        Item().save(item)
        folder = Folder().findOne({"_id": item["folderId"]})
        root, _ = os.path.split(importPath)
        if not asbool(fromMeta(folder, DatasetMarker)):
            folder["meta"].update({
                TypeMarker: dataset_type,
                FPSMarker: DefaultVideoFPS,
                DatasetMarker: True,
                AssetstoreSourcePathMarker: root,
                **meta,
            })
            Folder().save(folder)
예제 #4
0
def export_dataset_zipstream(
    dsFolder: types.GirderModel,
    user: types.GirderUserModel,
    includeMedia: bool,
    includeDetections: bool,
    excludeBelowThreshold: bool,
    typeFilter: Optional[List[str]],
):
    _, gen = crud_annotation.get_annotation_csv_generator(
        dsFolder, user, excludeBelowThreshold, typeFilter)
    mediaFolder = crud.getCloneRoot(user, dsFolder)
    source_type = fromMeta(mediaFolder, constants.TypeMarker)
    mediaRegex = None
    if source_type == constants.ImageSequenceType:
        mediaRegex = constants.imageRegex
    elif source_type == constants.VideoType:
        mediaRegex = constants.videoRegex

    def makeMetajson():
        """Include dataset metadtata file with full export"""
        meta = get_dataset(dsFolder, user)
        media = get_media(dsFolder, user)
        yield json.dumps(
            {
                **meta.dict(exclude_none=True),
                **media.dict(exclude_none=True),
            },
            indent=2,
        )

    def stream():
        z = ziputil.ZipGenerator(dsFolder['name'])

        # Always add the metadata file
        for data in z.addFile(makeMetajson, 'meta.json'):
            yield data

        if includeMedia:
            # Add media
            for item in Folder().childItems(
                    mediaFolder,
                    filters={"lowerName": {
                        "$regex": mediaRegex
                    }},
            ):
                for (path, file) in Item().fileList(item):
                    for data in z.addFile(file, path):
                        yield data
                    break  # Media items should only have 1 valid file

        if includeDetections:
            # TODO Add back in dump to json
            # add CSV detections
            for data in z.addFile(gen, "output_tracks.csv"):
                yield data
        yield z.footer()

    return stream
예제 #5
0
파일: crud.py 프로젝트: Kitware/dive
def saveImportAttributes(folder, attributes, user):
    attributes_dict = fromMeta(folder, 'attributes', {})
    # we don't overwrite any existing meta attributes
    for attribute in attributes.values():
        validated: models.Attribute = models.Attribute(**attribute)
        if attribute['key'] not in attributes_dict:
            attributes_dict[str(
                validated.key)] = validated.dict(exclude_none=True)

    folder['meta']['attributes'] = attributes_dict
    Folder().save(folder)
예제 #6
0
파일: crud.py 프로젝트: Kitware/dive
def getCloneRoot(owner: GirderModel, source_folder: GirderModel):
    """Get the source media folder associated with a clone"""
    verify_dataset(source_folder)
    next_id = fromMeta(source_folder, constants.ForeignMediaIdMarker, False)
    while next_id is not False:
        # Recurse through source folders to find the root, allowing clones of clones
        source_folder = Folder().load(
            next_id,
            level=AccessType.READ,
            user=owner,
        )
        if source_folder is None:
            raise RestException(
                (f"Referenced media source missing. Folder Id {next_id} was not found."
                 " This may be a cloned dataset where the source was deleted."
                 ),
                code=404,
            )
        verify_dataset(source_folder)
        next_id = fromMeta(source_folder, constants.ForeignMediaIdMarker,
                           False)
    return source_folder
예제 #7
0
def get_media(dsFolder: types.GirderModel,
              user: types.GirderUserModel) -> models.DatasetSourceMedia:
    videoResource = None
    imageData: List[models.MediaResource] = []
    crud.verify_dataset(dsFolder)
    source_type = fromMeta(dsFolder, constants.TypeMarker)

    if source_type == constants.VideoType:
        # Find a video tagged with an h264 codec left by the transcoder
        videoItem = Item().findOne({
            'folderId':
            crud.getCloneRoot(user, dsFolder)['_id'],
            'meta.codec':
            'h264',
            'meta.source_video': {
                '$in': [None, False]
            },
        })
        if videoItem:
            videoFile: types.GirderModel = Item().childFiles(videoItem)[0]
            videoResource = models.MediaResource(
                id=str(videoFile['_id']),
                url=get_url(videoFile),
                filename=videoFile['name'],
            )
    elif source_type == constants.ImageSequenceType:
        imageData = [
            models.MediaResource(
                id=str(image["_id"]),
                url=get_url(image, modelType='item'),
                filename=image['name'],
            ) for image in crud.valid_images(dsFolder, user)
        ]
    else:
        raise ValueError(f'Unrecognized source type: {source_type}')

    return models.DatasetSourceMedia(
        imageData=imageData,
        video=videoResource,
    )
예제 #8
0
def update_attributes(dsFolder: types.GirderModel, data: dict):
    """Upsert or delete attributes"""
    crud.verify_dataset(dsFolder)
    validated: AttributeUpdateArgs = crud.get_validated_model(
        AttributeUpdateArgs, **data)
    attributes_dict = fromMeta(dsFolder, 'attributes', {})

    for attribute_id in validated.delete:
        attributes_dict.pop(str(attribute_id), None)
    for attribute in validated.upsert:
        attributes_dict[str(attribute.key)] = attribute.dict(exclude_none=True)

    upserted_len = len(validated.delete)
    deleted_len = len(validated.upsert)

    if upserted_len or deleted_len:
        update_metadata(dsFolder, {'attributes': attributes_dict})

    return {
        "updated": upserted_len,
        "deleted": deleted_len,
    }
예제 #9
0
def convert_video(self: Task, folderId: str, itemId: str):
    context: dict = {}
    gc: GirderClient = self.girder_client
    manager: JobManager = patch_manager(self.job_manager)
    if utils.check_canceled(self, context):
        manager.updateStatus(JobStatus.CANCELED)
        return

    folderData = gc.getFolder(folderId)
    requestedFps = fromMeta(folderData, constants.FPSMarker)

    with tempfile.TemporaryDirectory() as _working_directory, suppress(utils.CanceledError):
        _working_directory_path = Path(_working_directory)
        item: GirderModel = gc.getItem(itemId)
        file_name = str(_working_directory_path / item['name'])
        output_file_path = (_working_directory_path / item['name']).with_suffix('.transcoded.mp4')
        manager.write(f'Fetching input from {itemId} to {file_name}...\n')
        gc.downloadItem(itemId, _working_directory_path, name=item.get('name'))

        command = [
            "ffprobe",
            "-print_format",
            "json",
            "-v",
            "quiet",
            "-show_format",
            "-show_streams",
            file_name,
        ]
        stdout = utils.stream_subprocess(
            self, context, manager, {'args': command}, keep_stdout=True
        )
        jsoninfo = json.loads(stdout)
        videostream = list(filter(lambda x: x["codec_type"] == "video", jsoninfo["streams"]))
        if len(videostream) != 1:
            raise Exception('Expected 1 video stream, found {}'.format(len(videostream)))

        # Extract average framerate
        avgFpsString: str = videostream[0]["avg_frame_rate"]
        originalFps = None
        if avgFpsString:
            dividend, divisor = [int(v) for v in avgFpsString.split('/')]
            originalFps = dividend / divisor
        else:
            raise Exception('Expected key avg_frame_rate in ffprobe')

        if requestedFps == -1:
            newAnnotationFps = originalFps
        else:
            newAnnotationFps = min(requestedFps, originalFps)
        if newAnnotationFps < 1:
            raise Exception('FPS lower than 1 is not supported')

        command = [
            "ffmpeg",
            "-i",
            file_name,
            "-c:v",
            "libx264",
            "-preset",
            "slow",
            # https://github.com/Kitware/dive/issues/855
            "-crf",
            "22",
            # https://askubuntu.com/questions/1315697/could-not-find-tag-for-codec-pcm-s16le-in-stream-1-codec-not-currently-support
            "-c:a",
            "aac",
            # see native/<platform> code for a discussion of this option
            "-vf",
            "scale=ceil(iw*sar/2)*2:ceil(ih/2)*2,setsar=1",
            str(output_file_path),
        ]
        utils.stream_subprocess(self, context, manager, {'args': command})
        # Check to see if frame alignment remains the same
        aligned_file = check_and_fix_frame_alignment(self, output_file_path, context, manager)

        manager.updateStatus(JobStatus.PUSHING_OUTPUT)
        new_file = gc.uploadFileToFolder(folderId, aligned_file)
        gc.addMetadataToItem(
            new_file['itemId'],
            {
                "source_video": False,
                "transcoder": "ffmpeg",
                constants.OriginalFPSMarker: originalFps,
                constants.OriginalFPSStringMarker: avgFpsString,
                "codec": "h264",
            },
        )
        gc.addMetadataToItem(
            itemId,
            {
                "source_video": True,
                constants.OriginalFPSMarker: originalFps,
                constants.OriginalFPSStringMarker: avgFpsString,
                "codec": videostream[0]["codec_name"],
            },
        )
        gc.addMetadataToFolder(
            folderId,
            {
                constants.DatasetMarker: True,  # mark the parent folder as able to annotate.
                constants.OriginalFPSMarker: originalFps,
                constants.OriginalFPSStringMarker: avgFpsString,
                constants.FPSMarker: newAnnotationFps,
                "ffprobe_info": videostream[0],
            },
        )
예제 #10
0
def run_pipeline(self: Task, params: PipelineJob):
    conf = Config()
    context: dict = {}
    manager: JobManager = patch_manager(self.job_manager)
    if utils.check_canceled(self, context):
        manager.updateStatus(JobStatus.CANCELED)
        return

    gc: GirderClient = self.girder_client
    utils.authenticate_urllib(gc)
    manager.updateStatus(JobStatus.FETCHING_INPUT)

    # Extract params
    pipeline = params["pipeline"]
    input_folder_id = str(params["input_folder"])
    input_type = params["input_type"]
    output_folder_id = str(params["output_folder"])
    input_revision = params["input_revision"]

    with tempfile.TemporaryDirectory() as _working_directory, suppress(utils.CanceledError):
        _working_directory_path = Path(_working_directory)
        input_path = utils.make_directory(_working_directory_path / 'input')
        trained_pipeline_path = utils.make_directory(_working_directory_path / 'trained_pipeline')
        output_path = utils.make_directory(_working_directory_path / 'output')

        detector_output_file = str(output_path / 'detector_output.csv')
        track_output_file = str(output_path / 'track_output.csv')
        img_list_path = input_path / 'img_list_file.txt'

        if pipeline["type"] == constants.TrainedPipelineCategory:
            gc.downloadFolderRecursive(pipeline["folderId"], str(trained_pipeline_path))
            pipeline_path = trained_pipeline_path / pipeline["pipe"]
        else:
            pipeline_path = conf.get_extracted_pipeline_path() / pipeline["pipe"]

        assert pipeline_path.exists(), (
            "Requested pipeline could not be found."
            " Make sure that VIAME is installed correctly and all addons have loaded."
            f" Job asked for {pipeline_path} but it does not exist"
        )

        # Download source media
        input_folder: GirderModel = gc.getFolder(input_folder_id)
        input_media_list, _ = utils.download_source_media(gc, input_folder_id, input_path)

        if input_type == constants.VideoType:
            input_fps = fromMeta(input_folder, constants.FPSMarker)
            assert len(input_media_list) == 1, "Expected exactly 1 video"
            command = [
                f". {shlex.quote(str(conf.viame_setup_script))} &&",
                f"KWIVER_DEFAULT_LOG_LEVEL={shlex.quote(conf.kwiver_log_level)}",
                "kwiver runner",
                "-s input:video_reader:type=vidl_ffmpeg",
                f"-p {shlex.quote(str(pipeline_path))}",
                f"-s input:video_filename={shlex.quote(input_media_list[0])}",
                f"-s downsampler:target_frame_rate={shlex.quote(str(input_fps))}",
                f"-s detector_writer:file_name={shlex.quote(detector_output_file)}",
                f"-s track_writer:file_name={shlex.quote(track_output_file)}",
            ]
        elif input_type == constants.ImageSequenceType:
            with open(img_list_path, "w+") as img_list_file:
                img_list_file.write('\n'.join(input_media_list))
            command = [
                f". {shlex.quote(str(conf.viame_setup_script))} &&",
                f"KWIVER_DEFAULT_LOG_LEVEL={shlex.quote(conf.kwiver_log_level)}",
                "kwiver runner",
                f"-p {shlex.quote(str(pipeline_path))}",
                f"-s input:video_filename={shlex.quote(str(img_list_path))}",
                f"-s detector_writer:file_name={shlex.quote(detector_output_file)}",
                f"-s track_writer:file_name={shlex.quote(track_output_file)}",
            ]
        else:
            raise ValueError('Unknown input type: {}'.format(input_type))

        # Include input detections
        if input_revision is not None:
            pipeline_input_file = input_path / 'groundtruth.csv'
            utils.download_revision_csv(gc, input_folder_id, input_revision, pipeline_input_file)
            quoted_input_file = shlex.quote(str(pipeline_input_file))
            command.append(f'-s detection_reader:file_name={quoted_input_file}')
            command.append(f'-s track_reader:file_name={quoted_input_file}')

        manager.updateStatus(JobStatus.RUNNING)
        popen_kwargs = {
            'args': " ".join(command),
            'shell': True,
            'executable': '/bin/bash',
            'cwd': output_path,
            'env': conf.gpu_process_env,
        }
        utils.stream_subprocess(self, context, manager, popen_kwargs)

        if Path(track_output_file).exists() and os.path.getsize(track_output_file):
            output_file = track_output_file
        else:
            output_file = detector_output_file

        manager.updateStatus(JobStatus.PUSHING_OUTPUT)
        newfile = gc.uploadFileToFolder(output_folder_id, output_file)

        gc.addMetadataToItem(str(newfile["itemId"]), {"pipeline": pipeline})
        gc.post(f'dive_rpc/postprocess/{output_folder_id}', data={"skipJobs": True})
예제 #11
0
def postprocess(user: types.GirderUserModel, dsFolder: types.GirderModel,
                skipJobs: bool) -> types.GirderModel:
    """
    Post-processing to be run after media/annotation import

    When skipJobs=False, the following may run as jobs:
        Transcoding of Video
        Transcoding of Images
        Conversion of KPF annotations into track JSON
        Extraction and upload of zip files

    In either case, the following may run synchronously:
        Conversion of CSV annotations into track JSON
    """
    job_is_private = user.get(constants.UserPrivateQueueEnabledMarker, False)
    isClone = fromMeta(dsFolder, constants.ForeignMediaIdMarker,
                       None) is not None
    # add default confidence filter threshold to folder metadata
    dsFolder['meta'][constants.ConfidenceFiltersMarker] = {'default': 0.1}

    # Validate user-supplied metadata fields are present
    if fromMeta(dsFolder, constants.FPSMarker) is None:
        raise RestException(f'{constants.FPSMarker} missing from metadata')
    if fromMeta(dsFolder, constants.TypeMarker) is None:
        raise RestException(f'{constants.TypeMarker} missing from metadata')

    if not skipJobs and not isClone:
        token = Token().createToken(user=user, days=2)

        # extract ZIP Files if not already completed
        zipItems = list(Folder().childItems(
            dsFolder,
            filters={"lowerName": {
                "$regex": constants.zipRegex
            }},
        ))
        if len(zipItems) > 1:
            raise RestException('There are multiple zip files in the folder.')
        for item in zipItems:
            total_items = len(list((Folder().childItems(dsFolder))))
            if total_items > 1:
                raise RestException(
                    'There are multiple files besides a zip, cannot continue')
            newjob = tasks.extract_zip.apply_async(
                queue=_get_queue_name(user),
                kwargs=dict(
                    folderId=str(item["folderId"]),
                    itemId=str(item["_id"]),
                    girder_job_title=
                    f"Extracting {item['_id']} to folder {str(dsFolder['_id'])}",
                    girder_client_token=str(token["_id"]),
                    girder_job_type="private" if job_is_private else "convert",
                ),
            )
            newjob.job[constants.JOBCONST_PRIVATE_QUEUE] = job_is_private
            Job().save(newjob.job)
            return dsFolder

        # transcode VIDEO if necessary
        videoItems = Folder().childItems(
            dsFolder, filters={"lowerName": {
                "$regex": constants.videoRegex
            }})

        for item in videoItems:
            newjob = tasks.convert_video.apply_async(
                queue=_get_queue_name(user),
                kwargs=dict(
                    folderId=str(item["folderId"]),
                    itemId=str(item["_id"]),
                    girder_job_title=
                    f"Converting {item['_id']} to a web friendly format",
                    girder_client_token=str(token["_id"]),
                    girder_job_type="private" if job_is_private else "convert",
                ),
            )
            newjob.job[constants.JOBCONST_PRIVATE_QUEUE] = job_is_private
            newjob.job[constants.JOBCONST_DATASET_ID] = dsFolder["_id"]
            Job().save(newjob.job)

        # transcode IMAGERY if necessary
        imageItems = Folder().childItems(
            dsFolder, filters={"lowerName": {
                "$regex": constants.imageRegex
            }})
        safeImageItems = Folder().childItems(
            dsFolder,
            filters={"lowerName": {
                "$regex": constants.safeImageRegex
            }})

        if imageItems.count() > safeImageItems.count():
            newjob = tasks.convert_images.apply_async(
                queue=_get_queue_name(user),
                kwargs=dict(
                    folderId=dsFolder["_id"],
                    girder_client_token=str(token["_id"]),
                    girder_job_title=
                    f"Converting {dsFolder['_id']} to a web friendly format",
                    girder_job_type="private" if job_is_private else "convert",
                ),
            )
            newjob.job[constants.JOBCONST_PRIVATE_QUEUE] = job_is_private
            newjob.job[constants.JOBCONST_DATASET_ID] = dsFolder["_id"]
            Job().save(newjob.job)

        elif imageItems.count() > 0:
            dsFolder["meta"][constants.DatasetMarker] = True

        # transform KPF if necessary
        ymlItems = Folder().childItems(
            dsFolder, filters={"lowerName": {
                "$regex": constants.ymlRegex
            }})
        if ymlItems.count() > 0:
            # There might be up to 3 yamls
            def make_file_generator(item):
                file = Item().childFiles(item)[0]
                return File().download(file, headers=False)()

            allFiles = [make_file_generator(item) for item in ymlItems]
            data = meva.load_kpf_as_tracks(allFiles)
            crud_annotation.save_annotations(dsFolder,
                                             data.values(), [],
                                             user,
                                             overwrite=True,
                                             description="Import from KPF")
            ymlItems.rewind()
            auxiliary = crud.get_or_create_auxiliary_folder(dsFolder, user)
            for item in ymlItems:
                Item().move(item, auxiliary)

        Folder().save(dsFolder)

    process_items(dsFolder, user)
    return dsFolder
예제 #12
0
def run_pipeline(
    user: types.GirderUserModel,
    folder: types.GirderModel,
    pipeline: types.PipelineDescription,
) -> types.GirderModel:
    """
    Run a pipeline on a dataset.

    :param folder: The girder folder containing the dataset to run on.
    :param pipeline: The pipeline to run the dataset on.
    """
    verify_pipe(user, pipeline)
    crud.getCloneRoot(user, folder)
    folder_id_str = str(folder["_id"])
    # First, verify that no other outstanding jobs are running on this dataset
    if _check_running_jobs(folder_id_str):
        raise RestException(
            (f"A pipeline for {folder_id_str} is already running. "
             "Only one outstanding job may be run at a time for "
             "a dataset."))

    token = Token().createToken(user=user, days=14)

    input_revision = None  # include CSV input for pipe
    if pipeline["type"] == constants.TrainedPipelineCategory:
        # Verify that the user has READ access to the pipe they want to run
        pipeFolder = Folder().load(pipeline["folderId"],
                                   level=AccessType.READ,
                                   user=user)
        if asbool(fromMeta(pipeFolder, "requires_input")):
            input_revision = crud_annotation.get_last_revision(folder)
    elif pipeline["pipe"].startswith('utility_'):
        # TODO Temporary inclusion of utility pipes which take csv input
        input_revision = crud_annotation.get_last_revision(folder)

    job_is_private = user.get(constants.UserPrivateQueueEnabledMarker, False)

    params: types.PipelineJob = {
        "pipeline": pipeline,
        "input_folder": folder_id_str,
        "input_type": fromMeta(folder, "type", required=True),
        "output_folder": folder_id_str,
        "input_revision": input_revision,
    }
    newjob = tasks.run_pipeline.apply_async(
        queue=_get_queue_name(user, "pipelines"),
        kwargs=dict(
            params=params,
            girder_job_title=
            f"Running {pipeline['name']} on {str(folder['name'])}",
            girder_client_token=str(token["_id"]),
            girder_job_type="private" if job_is_private else "pipelines",
        ),
    )
    newjob.job[constants.JOBCONST_PRIVATE_QUEUE] = job_is_private
    newjob.job[constants.JOBCONST_DATASET_ID] = folder_id_str
    newjob.job[constants.JOBCONST_PARAMS] = params
    newjob.job[constants.JOBCONST_CREATOR] = str(user['_id'])
    # Allow any users with accecss to the input data to also
    # see and possibly manage the job
    Job().copyAccessPolicies(folder, newjob.job)
    Job().save(newjob.job)
    return newjob.job
예제 #13
0
파일: crud.py 프로젝트: Kitware/dive
def itemIsWebsafeVideo(item: Item) -> bool:
    return fromMeta(item, "codec") == "h264"