def copy_file_to(self, local_path_or_handle, remote_path, metadata=None): """Copy file from a local path to a remote path.""" fs_path = self.convert_path_for_write(remote_path) if isinstance(local_path_or_handle, str): if not shell.copy_file(local_path_or_handle, fs_path): return False else: with open(fs_path, 'wb') as f: shutil.copyfileobj(local_path_or_handle, f) self._write_metadata(remote_path, metadata) return True
def get_file_from_cache_if_exists(file_path, update_modification_time_on_access=True): """Get file from nfs cache if available.""" cache_file_path = get_cache_file_path(file_path) if not cache_file_path or not file_exists_in_cache(cache_file_path): # If the file does not exist in cache, bail out. return False # Fetch cache file size before starting the actual copy. cache_file_size = get_cache_file_size_from_metadata(cache_file_path) # Copy file from cache to local. if not shell.copy_file(cache_file_path, file_path): return False # Update timestamp to later help with eviction of old files. if update_modification_time_on_access: update_access_and_modification_timestamp(cache_file_path) # Return success or failure based on existence of local file and size # comparison. return (os.path.exists(file_path) and os.path.getsize(file_path) == cache_file_size)
def copy_blob(self, remote_source, remote_target): """Copy a remote file to another remote location.""" fs_source_path = self.convert_path(remote_source) fs_target_path = self.convert_path_for_write(remote_target) return shell.copy_file(fs_source_path, fs_target_path)
def copy_file_from(self, remote_path, local_path): """Copy file from a remote path to a local path.""" fs_path = self.convert_path(remote_path) return shell.copy_file(fs_path, local_path)
def store_file_in_cache(file_path, cached_files_per_directory_limit=True, force_update=False): """Get file from nfs cache if available.""" if not os.path.exists(file_path): logs.log_error( 'Local file %s does not exist, nothing to store in cache.' % file_path) return if os.path.getsize(file_path) > CACHE_SIZE_LIMIT: logs.log('File %s is too large to store in cache, skipping.' % file_path) return nfs_root = environment.get_value('NFS_ROOT') if not nfs_root: # No NFS, nothing to store in cache. return # If NFS server is not available due to heavy load, skip storage operation # altogether as we would fail to store file. if not os.path.exists(os.path.join(nfs_root, '.')): # Use . to iterate mount. logs.log_warn('Cache %s not available.' % nfs_root) return cache_file_path = get_cache_file_path(file_path) cache_directory = os.path.dirname(cache_file_path) filename = os.path.basename(file_path) if not os.path.exists(cache_directory): if not shell.create_directory(cache_directory, create_intermediates=True): logs.log_error('Failed to create cache directory %s.' % cache_directory) return # Check if the file already exists in cache. if file_exists_in_cache(cache_file_path): if not force_update: return # If we are forcing update, we need to remove current cached file and its # metadata. remove_cache_file_and_metadata(cache_file_path) # Delete old cached files beyond our maximum storage limit. if cached_files_per_directory_limit: # Get a list of cached files. cached_files_list = [] for cached_filename in os.listdir(cache_directory): if cached_filename.endswith(CACHE_METADATA_FILE_EXTENSION): continue cached_file_path = os.path.join(cache_directory, cached_filename) cached_files_list.append(cached_file_path) mtime = lambda f: os.stat(f).st_mtime last_used_cached_files_list = list( sorted(cached_files_list, key=mtime, reverse=True)) for cached_file_path in ( last_used_cached_files_list[MAX_CACHED_FILES_PER_DIRECTORY - 1:]): remove_cache_file_and_metadata(cached_file_path) # Start storing the actual file in cache now. logs.log('Started storing file %s into cache.' % filename) # Fetch lock to store this file. Try only once since if any other bot has # started to store it, we don't need to do it ourselves. Just bail out. lock_name = 'store:cache_file:%s' % utils.string_hash(cache_file_path) if not locks.acquire_lock(lock_name, max_hold_seconds=CACHE_LOCK_TIMEOUT, retries=1, by_zone=True): logs.log_warn( 'Unable to fetch lock to update cache file %s, skipping.' % filename) return # Check if another bot already updated it. if file_exists_in_cache(cache_file_path): locks.release_lock(lock_name, by_zone=True) return shell.copy_file(file_path, cache_file_path) write_cache_file_metadata(cache_file_path, file_path) time.sleep(CACHE_COPY_WAIT_TIME) error_occurred = not file_exists_in_cache(cache_file_path) locks.release_lock(lock_name, by_zone=True) if error_occurred: logs.log_error('Failed to store file %s into cache.' % filename) else: logs.log('Completed storing file %s into cache.' % filename)