def _put_media( media: typing.Set[str], db_root: str, db_name: str, version: str, deps: Dependencies, backend: audbackend.Backend, num_workers: typing.Optional[int], verbose: bool, ): # create a mapping from archives to media and # select archives with new or altered files for upload map_media_to_files = collections.defaultdict(list) for file in deps.media: if not deps.removed(file): map_media_to_files[deps.archive(file)].append(file) if deps.version(file) == version: media.add(deps.archive(file)) lock = threading.Lock() def job(archive): if archive in map_media_to_files: for file in map_media_to_files[archive]: with lock: deps._add_media(db_root, file, version) archive_file = backend.join( db_name, define.DEPEND_TYPE_NAMES[define.DependType.MEDIA], archive, ) backend.put_archive( db_root, map_media_to_files[archive], archive_file, version, ) # upload new and altered archives if it contains at least one file audeer.run_tasks( job, params=[([archive], {}) for archive in media], num_workers=num_workers, progress_bar=verbose, task_description='Put media', )
def _cached_files( files: typing.Sequence[str], deps: Dependencies, cached_versions: typing.Sequence[typing.Tuple[LooseVersion, str, Dependencies], ], flavor: typing.Optional[Flavor], verbose: bool, ) -> (typing.Sequence[typing.Union[str, str]], typing.Sequence[str]): r"""Find cached files.""" cached_files = [] missing_files = [] for file in audeer.progress_bar( files, desc='Cached files', disable=not verbose, ): found = False file_version = LooseVersion(deps.version(file)) for cache_version, cache_root, cache_deps in cached_versions: if cache_version >= file_version: if file in cache_deps: if deps.checksum(file) == cache_deps.checksum(file): path = os.path.join(cache_root, file) if flavor and flavor.format is not None: path = audeer.replace_file_extension( path, flavor.format, ) if os.path.exists(path): found = True break if found: if flavor and flavor.format is not None: file = audeer.replace_file_extension( file, flavor.format, ) cached_files.append((cache_root, file)) else: missing_files.append(file) return cached_files, missing_files
def _get_media( media: typing.List[str], db_root: str, db_root_tmp: str, db_name: str, deps: Dependencies, backend: audbackend.Backend, num_workers: typing.Optional[int], verbose: bool, ): # create folder tree to avoid race condition # in os.makedirs when files are unpacked for file in media: audeer.mkdir(os.path.dirname(os.path.join(db_root, file))) audeer.mkdir(os.path.dirname(os.path.join(db_root_tmp, file))) # figure out archives archives = set() for file in media: archives.add((deps.archive(file), deps.version(file))) def job(archive: str, version: str): archive = backend.join( db_name, define.DEPEND_TYPE_NAMES[define.DependType.MEDIA], archive, ) files = backend.get_archive(archive, db_root_tmp, version) for file in files: _move_file(db_root_tmp, db_root, file) audeer.run_tasks( job, params=[([archive, version], {}) for archive, version in archives], num_workers=num_workers, progress_bar=verbose, task_description='Get media', )
def _get_media_from_backend( name: str, media: typing.Sequence[str], db_root: str, db_root_tmp: str, flavor: typing.Optional[Flavor], deps: Dependencies, backend: audbackend.Backend, num_workers: typing.Optional[int], verbose: bool, ): r"""Load media from backend.""" # figure out archives archives = set() archive_names = set() for file in media: archive_name = deps.archive(file) archive_version = deps.version(file) archives.add((archive_name, archive_version)) archive_names.add(archive_name) # collect all files that will be extracted, # if we have more files than archives if len(deps.files) > len(deps.archives): files = list() for file in deps.media: archive = deps.archive(file) if archive in archive_names: files.append(file) media = files # create folder tree to avoid race condition # in os.makedirs when files are unpacked # using multi-processing for file in media: audeer.mkdir(os.path.dirname(os.path.join(db_root, file))) audeer.mkdir(os.path.dirname(os.path.join(db_root_tmp, file))) def job(archive: str, version: str): archive = backend.join( name, define.DEPEND_TYPE_NAMES[define.DependType.MEDIA], archive, ) # extract and move all files that are stored in the archive, # even if only a single file from the archive was requested files = backend.get_archive(archive, db_root_tmp, version) for file in files: if flavor is not None: bit_depth = deps.bit_depth(file) channels = deps.channels(file) sampling_rate = deps.sampling_rate(file) src_path = os.path.join(db_root_tmp, file) file = flavor.destination(file) dst_path = os.path.join(db_root_tmp, file) flavor( src_path, dst_path, src_bit_depth=bit_depth, src_channels=channels, src_sampling_rate=sampling_rate, ) if src_path != dst_path: os.remove(src_path) _move_file(db_root_tmp, db_root, file) audeer.run_tasks( job, params=[([archive, version], {}) for archive, version in archives], num_workers=num_workers, progress_bar=verbose, task_description='Load media', )