Example #1
0
def move_backups_to_s3():
    store = get_tator_store().store
    transfer = S3Transfer(store)
    bucket_name = os.getenv('BUCKET_NAME')
    num_moved = 0
    for backup in os.listdir('/backup'):
        logger.info(f"Moving {backup} to S3...")
        key = f'backup/{backup}'
        path = os.path.join('/backup', backup)
        transfer.upload_file(path, bucket_name, key)
        os.remove(path)
        num_moved += 1
    logger.info(f"Finished moving {num_moved} files!")
Example #2
0
File: util.py Project: xlnrp/tator
def cleanup_object_uploads(max_age_days=1):
    """ Removes s3 uploads that are greater than a day old.
    """
    items = Project.objects.values('bucket', 'pk')
    now = datetime.datetime.now(datetime.timezone.utc)
    for item in items:
        project = Project.objects.get(pk=item["pk"])

        logger.info(
            f"Searching project {project.id} | {project.name} for stale uploads..."
        )
        if project.organization is None:
            logger.info(f"Skipping because this project has no organization!")
            continue

        bucket = Bucket.objects.get(
            pk=item["bucket"]) if item["bucket"] else None
        tator_store = get_tator_store(bucket)

        prefix = upload_prefix_from_project(project)
        last_key = None
        num_deleted = 0
        while True:
            kwargs = {}
            if last_key:
                kwargs["StartAfter"] = last_key
            obj_list = tator_store.list_objects_v2(prefix, **kwargs)
            if not obj_list:
                break
            key_age_list = [(obj["Key"], now - obj["LastModified"])
                            for obj in obj_list]
            last_key = key_age_list[-1][0]
            for key, age in key_age_list:
                not_resource = not Resource.objects.filter(path=key).exists()
                if age > datetime.timedelta(
                        days=max_age_days) and not_resource:
                    tator_store.delete_object(key)
                    num_deleted += 1
        logger.info(f"Deleted {num_deleted} objects in project {project.id}!")
    logger.info("Object cleanup finished!")
Example #3
0
def updateProjectTotals(force=False):
    projects = Project.objects.all()
    for project in projects:
        temp_files = TemporaryFile.objects.filter(project=project)
        files = Media.objects.filter(project=project, deleted=False)
        num_files = temp_files.count() + files.count()
        if force or num_files != project.num_files:
            project.num_files = num_files
            duration_info = files.values('num_frames', 'fps')
            project.duration = sum([
                info['num_frames'] / info['fps'] for info in duration_info
                if info['num_frames'] and info['fps']
            ])
            logger.info(
                f"Updating {project.name}: Num files = {project.num_files}, "
                f"Duration = {project.duration}")
        if not project.thumb:
            media = Media.objects.filter(project=project,
                                         media_files__isnull=False).first()
            if media:
                tator_store = get_tator_store(project.bucket)
                if "thumbnail" in media.media_files and media.media_files[
                        "thumbnail"]:
                    src_path = media.media_files['thumbnail'][0]['path']
                    dest_path = f"{project.organization.pk}/{project.pk}/{os.path.basename(src_path)}"
                    tator_store.copy(src_path, dest_path)
                    project.thumb = dest_path
        users = User.objects.filter(pk__in=Membership.objects.filter(project=project)\
                            .values_list('user')).order_by('last_name')
        usernames = [str(user) for user in users]
        creator = str(project.creator)
        if creator in usernames:
            usernames.remove(creator)
            usernames.insert(0, creator)
        project.usernames = usernames
        project.save()
Example #4
0
def fix_bad_archives(*,
                     project_id_list=None,
                     live_run=False,
                     force_update=False):
    from pprint import pformat
    media_to_update = set()
    path_filename = "manifest_spec.txt"

    def _tag_needs_updating(path, store):
        return force_update or not store._object_tagged_for_archive(path)

    def _sc_needs_updating(path, store):
        return (force_update or store.head_object(path).get(
            "StorageClass", "STANDARD") != store.get_archive_sc())

    def _update_tag(path, store):
        if live_run:
            try:
                store._put_archive_tag(path)
            except:
                logger.warning(f"Tag operation on {path} failed",
                               exc_info=True)
                return False
        else:
            media_to_update.add(f"{path}\n")

        return True

    def _archive_multi(multi, store):
        media_ids = multi.media_files.get("ids")
        if not media_ids:
            return "failed"

        success = True
        sc_needs_updating = False
        tag_needs_updating = False
        media_qs = Media.objects.filter(pk__in=media_ids)
        for single in media_qs.iterator():
            single_success, single_sc_needs_updating, single_tag_needs_updating = _archive_single(
                single, store)
            success = success and single_success
            sc_needs_updating = sc_needs_updating or single_sc_needs_updating
            tag_needs_updating = tag_needs_updating or single_tag_needs_updating

        return success, sc_needs_updating, tag_needs_updating

    def _archive_single(single, store):
        success = True
        sc_needs_updating = False
        tag_needs_updating = False
        for key in ["streaming", "archival", "audio", "image"]:
            if not (key in single.media_files and single.media_files[key]):
                continue

            for obj in single.media_files[key]:
                try:
                    path = obj["path"]
                except:
                    logger.warning(
                        f"Could not get path from {key} in {single.id}",
                        exc_info=True)
                    success = False
                    continue

                if not _sc_needs_updating(path, store):
                    continue

                sc_needs_updating = True
                if not _tag_needs_updating(path, store):
                    continue

                tag_needs_updating = True
                try:
                    success = _update_tag(path, store) and success
                except:
                    logger.warning(
                        f"Copy operation on {path} from {single.id} failed",
                        exc_info=True)
                    success = False

                if key == "streaming":
                    try:
                        success = _update_tag(obj["segment_info"],
                                              store) and success
                    except:
                        success = False

        return success, sc_needs_updating, tag_needs_updating

    logger.info(f"fix_bad_archives {'live' if live_run else 'dry'} run")

    archive_state_dict = {}
    project_qs = Project.objects.all()

    if project_id_list:
        project_qs = project_qs.filter(pk__in=project_id_list)

    for project in project_qs.iterator():
        tator_store = get_tator_store(project.bucket)
        proj_id = project.id
        logger.info(f"Analyzing project {proj_id}...")
        archived_media_qs = Media.objects.filter(project=project,
                                                 archive_state="archived")
        media_count = archived_media_qs.count()
        if media_count < 1:
            logger.info(f"No archived media in project {proj_id}, moving on")
            continue

        archive_state_dict[proj_id] = {
            "correct_sc": 0,
            "successfully_archived": 0,
            "correct_tag": 0,
            "successfully_tagged": 0,
            "failed": 0,
        }
        idx = 0
        for media in archived_media_qs.iterator():
            idx += 1
            if idx % 250 == 0 or idx == media_count:
                logger.info(
                    f"Processed {idx} of {media_count} archived media for project {project.id}"
                )

            if not media.meta:
                logger.warning(f"No dtype for '{media.id}'")
                continue

            media_dtype = media.meta.dtype
            if media_dtype in ["image", "video"]:
                success, sc_needs_updating, tag_needs_updating = _archive_single(
                    media, tator_store)
            elif media_dtype == "multi":
                success, sc_needs_updating, tag_needs_updating = _archive_multi(
                    media, tator_store)
            else:
                logger.warning(
                    f"Unrecognized dtype '{media_dtype}' for media {media.id}, failed to archive"
                )
                continue

            if success:
                if tag_needs_updating:
                    archive_state_dict[proj_id]["successfully_tagged"] += 1
                else:
                    archive_state_dict[proj_id]["correct_tag"] += 1

                if sc_needs_updating:
                    archive_state_dict[proj_id]["successfully_archived"] += 1
                else:
                    archive_state_dict[proj_id]["correct_sc"] += 1
            else:
                archive_state_dict[proj_id]["failed"] += 1

    logger.info(f"fix_bad_archives stats:\n{pformat(archive_state_dict)}\n")
    if media_to_update:
        with open(path_filename, "w") as fp:
            fp.writelines(media_to_update)