def move_backups_to_s3(): store = get_tator_store().store transfer = S3Transfer(store) bucket_name = os.getenv('BUCKET_NAME') num_moved = 0 for backup in os.listdir('/backup'): logger.info(f"Moving {backup} to S3...") key = f'backup/{backup}' path = os.path.join('/backup', backup) transfer.upload_file(path, bucket_name, key) os.remove(path) num_moved += 1 logger.info(f"Finished moving {num_moved} files!")
def cleanup_object_uploads(max_age_days=1): """ Removes s3 uploads that are greater than a day old. """ items = Project.objects.values('bucket', 'pk') now = datetime.datetime.now(datetime.timezone.utc) for item in items: project = Project.objects.get(pk=item["pk"]) logger.info( f"Searching project {project.id} | {project.name} for stale uploads..." ) if project.organization is None: logger.info(f"Skipping because this project has no organization!") continue bucket = Bucket.objects.get( pk=item["bucket"]) if item["bucket"] else None tator_store = get_tator_store(bucket) prefix = upload_prefix_from_project(project) last_key = None num_deleted = 0 while True: kwargs = {} if last_key: kwargs["StartAfter"] = last_key obj_list = tator_store.list_objects_v2(prefix, **kwargs) if not obj_list: break key_age_list = [(obj["Key"], now - obj["LastModified"]) for obj in obj_list] last_key = key_age_list[-1][0] for key, age in key_age_list: not_resource = not Resource.objects.filter(path=key).exists() if age > datetime.timedelta( days=max_age_days) and not_resource: tator_store.delete_object(key) num_deleted += 1 logger.info(f"Deleted {num_deleted} objects in project {project.id}!") logger.info("Object cleanup finished!")
def updateProjectTotals(force=False): projects = Project.objects.all() for project in projects: temp_files = TemporaryFile.objects.filter(project=project) files = Media.objects.filter(project=project, deleted=False) num_files = temp_files.count() + files.count() if force or num_files != project.num_files: project.num_files = num_files duration_info = files.values('num_frames', 'fps') project.duration = sum([ info['num_frames'] / info['fps'] for info in duration_info if info['num_frames'] and info['fps'] ]) logger.info( f"Updating {project.name}: Num files = {project.num_files}, " f"Duration = {project.duration}") if not project.thumb: media = Media.objects.filter(project=project, media_files__isnull=False).first() if media: tator_store = get_tator_store(project.bucket) if "thumbnail" in media.media_files and media.media_files[ "thumbnail"]: src_path = media.media_files['thumbnail'][0]['path'] dest_path = f"{project.organization.pk}/{project.pk}/{os.path.basename(src_path)}" tator_store.copy(src_path, dest_path) project.thumb = dest_path users = User.objects.filter(pk__in=Membership.objects.filter(project=project)\ .values_list('user')).order_by('last_name') usernames = [str(user) for user in users] creator = str(project.creator) if creator in usernames: usernames.remove(creator) usernames.insert(0, creator) project.usernames = usernames project.save()
def fix_bad_archives(*, project_id_list=None, live_run=False, force_update=False): from pprint import pformat media_to_update = set() path_filename = "manifest_spec.txt" def _tag_needs_updating(path, store): return force_update or not store._object_tagged_for_archive(path) def _sc_needs_updating(path, store): return (force_update or store.head_object(path).get( "StorageClass", "STANDARD") != store.get_archive_sc()) def _update_tag(path, store): if live_run: try: store._put_archive_tag(path) except: logger.warning(f"Tag operation on {path} failed", exc_info=True) return False else: media_to_update.add(f"{path}\n") return True def _archive_multi(multi, store): media_ids = multi.media_files.get("ids") if not media_ids: return "failed" success = True sc_needs_updating = False tag_needs_updating = False media_qs = Media.objects.filter(pk__in=media_ids) for single in media_qs.iterator(): single_success, single_sc_needs_updating, single_tag_needs_updating = _archive_single( single, store) success = success and single_success sc_needs_updating = sc_needs_updating or single_sc_needs_updating tag_needs_updating = tag_needs_updating or single_tag_needs_updating return success, sc_needs_updating, tag_needs_updating def _archive_single(single, store): success = True sc_needs_updating = False tag_needs_updating = False for key in ["streaming", "archival", "audio", "image"]: if not (key in single.media_files and single.media_files[key]): continue for obj in single.media_files[key]: try: path = obj["path"] except: logger.warning( f"Could not get path from {key} in {single.id}", exc_info=True) success = False continue if not _sc_needs_updating(path, store): continue sc_needs_updating = True if not _tag_needs_updating(path, store): continue tag_needs_updating = True try: success = _update_tag(path, store) and success except: logger.warning( f"Copy operation on {path} from {single.id} failed", exc_info=True) success = False if key == "streaming": try: success = _update_tag(obj["segment_info"], store) and success except: success = False return success, sc_needs_updating, tag_needs_updating logger.info(f"fix_bad_archives {'live' if live_run else 'dry'} run") archive_state_dict = {} project_qs = Project.objects.all() if project_id_list: project_qs = project_qs.filter(pk__in=project_id_list) for project in project_qs.iterator(): tator_store = get_tator_store(project.bucket) proj_id = project.id logger.info(f"Analyzing project {proj_id}...") archived_media_qs = Media.objects.filter(project=project, archive_state="archived") media_count = archived_media_qs.count() if media_count < 1: logger.info(f"No archived media in project {proj_id}, moving on") continue archive_state_dict[proj_id] = { "correct_sc": 0, "successfully_archived": 0, "correct_tag": 0, "successfully_tagged": 0, "failed": 0, } idx = 0 for media in archived_media_qs.iterator(): idx += 1 if idx % 250 == 0 or idx == media_count: logger.info( f"Processed {idx} of {media_count} archived media for project {project.id}" ) if not media.meta: logger.warning(f"No dtype for '{media.id}'") continue media_dtype = media.meta.dtype if media_dtype in ["image", "video"]: success, sc_needs_updating, tag_needs_updating = _archive_single( media, tator_store) elif media_dtype == "multi": success, sc_needs_updating, tag_needs_updating = _archive_multi( media, tator_store) else: logger.warning( f"Unrecognized dtype '{media_dtype}' for media {media.id}, failed to archive" ) continue if success: if tag_needs_updating: archive_state_dict[proj_id]["successfully_tagged"] += 1 else: archive_state_dict[proj_id]["correct_tag"] += 1 if sc_needs_updating: archive_state_dict[proj_id]["successfully_archived"] += 1 else: archive_state_dict[proj_id]["correct_sc"] += 1 else: archive_state_dict[proj_id]["failed"] += 1 logger.info(f"fix_bad_archives stats:\n{pformat(archive_state_dict)}\n") if media_to_update: with open(path_filename, "w") as fp: fp.writelines(media_to_update)