class MetadataCachingStore(Store): def __init__(self, store, cache_expiration_time=60): self.store = store self.logger = logging.getLogger(self.get_logging_handler()) self.logger.debug("creating MetadataCachingStore object") self.entries = MPSynchronizeProxy( MPCache(cache_expiration_time) ) if cache_expiration_time < 240: self.logger.warning("Be aware of the synchronization issue https://github.com/joe42/CloudFusion/issues/16 \ or to avoid the issue set cache_expiration_time to more than 240 seconds.") self.store_metadata = Cache(cache_expiration_time) self.free_space_worker = GetFreeSpaceWorker(deepcopy(store), self.logger) self.free_space_worker.start() self._last_cleaned = time.time() manager = Manager() self._is_cleaning_cache = Lock() self._is_uploading_lock = Lock() self._is_uploading = manager.Value('i', 0) def _acquire_uploading_lock(self): '''This method needs to be called before uploading data.''' with self._is_cleaning_cache: # actually Value should support get_lock() but doesn't with self._is_uploading_lock: self._is_uploading.value = self._is_uploading.value + 1 def _release_uploading_lock(self): '''This method needs to be called after uploading data.''' with self._is_cleaning_cache: with self._is_uploading_lock: self._is_uploading.value = self._is_uploading.value - 1 def _is_valid_path(self, path): return self.store._is_valid_path(path) def _raise_error_if_invalid_path(self, path): self.store._raise_error_if_invalid_path(path) def get_name(self): if not self.store_metadata.exists('store_name'): self.store_metadata.write('store_name', self.store.get_name()) return self.store_metadata.get_value('store_name') def get_file(self, path_to_file): self.logger.debug("meta cache get_file %s", path_to_file) ret = self.store.get_file(path_to_file) if not self.entries.exists(path_to_file): self.entries.write(path_to_file, Entry()) entry = self.entries.get_value(path_to_file) entry.set_is_file() try: entry.size = len(ret) self.entries.write(path_to_file, entry) except NoSuchFilesytemObjectError: self.entries.delete(path_to_file) self._remove_from_parent_dir_listing(path_to_file) self.logger.debug("meta cache returning %s", repr(ret)[:10]) self._add_to_parent_dir_listing(path_to_file) return ret def _add_to_parent_dir_listing(self, path): if path != '/': parent_dir = os.path.dirname(path) self._add_parent_dir_listing(path) entry = self.entries.get_value(parent_dir) entry.add_to_listing(path) self.entries.write(parent_dir, entry) def _add_parent_dir_listing(self, path): '''Add listing for parent directory of path to cache if it does not yet exist''' if path != '/': parent_dir = os.path.dirname(path) if not self.entries.exists(parent_dir): self.entries.write(parent_dir, Entry()) entry = self.entries.get_value(parent_dir) if entry.listing == None: entry.listing = self.store.get_directory_listing(parent_dir) entry.set_is_dir() self._add_existing_items(entry, parent_dir) self.entries.write(parent_dir, entry) def _does_not_exist_in_parent_dir_listing(self, path): '''':returns: True if path does not exist in the cached directory listing''' parent_dir = os.path.dirname(path) if path == '/': return False if self.entries.exists(parent_dir): entry = self.entries.get_value(parent_dir) if entry.listing != None and (not unicode(path) in entry.listing and not path in entry.listing): self.logger.debug("%s does not exist in parent directory: %s...", path, repr(entry.listing[0:5])) return True return False def _remove_from_parent_dir_listing(self, path): parent_dir = os.path.dirname(path) if self.entries.exists(parent_dir): entry = self.entries.get_value(parent_dir) entry.remove_from_listing(path) self.entries.write(parent_dir, entry) '''entry deletion only while not uploading; use multithreading lock;at the beginning of the store methods the entries should already be created; if it fails delete them, if it succeeds update them (eventual consistency ''' def store_file(self, path_to_file, dest_dir="/", remote_file_name = None, interrupt_event=None): if dest_dir == "/": dest_dir = "" if not remote_file_name: remote_file_name = os.path.basename(path_to_file) data_len = file_util.get_file_size_in_bytes(path_to_file) path = dest_dir + "/" + remote_file_name self.logger.debug("meta cache store_file %s", dest_dir + "/" + remote_file_name) if not self.entries.exists(path): self.entries.write(path, Entry()) entry = self.entries.get_value(path) entry.set_is_file() entry.size = data_len entry.set_modified() self.entries.write(path, entry) self.logger.debug("meta cache store_file %s", path) try: self._acquire_uploading_lock() ret = self.store.store_file(path_to_file, dest_dir, remote_file_name, interrupt_event) except: # delete entry self.entries.delete(path) raise finally: self._release_uploading_lock() entry = self.entries.get_value(path) entry.set_is_file() entry.size = data_len entry.set_modified() self.entries.write(path, entry) self._add_to_parent_dir_listing(path) return ret def store_fileobject(self, fileobject, path, interrupt_event=None): self.logger.debug("meta cache store_fileobject %s", path) data_len = file_util.get_file_size_in_bytes(fileobject) if not self.entries.exists(path): self.entries.write(path, Entry()) entry = self.entries.get_value(path) entry.set_is_file() entry.size = data_len entry.set_modified() self.entries.write(path, entry) try: self._acquire_uploading_lock() ret = self.store.store_fileobject(fileobject, path, interrupt_event) except: # delete entry self.entries.delete(path) raise finally: self._release_uploading_lock() fileobject.close() entry = self.entries.get_value(path) entry.set_is_file() entry.size = data_len entry.set_modified() self._add_to_parent_dir_listing(path) self.entries.write(path, entry) return ret def delete(self, path, is_dir): self.logger.debug("meta cache delete %s", path) self.store.delete(path, is_dir) self.entries.delete(path) self._remove_from_parent_dir_listing(path) def account_info(self): if not self.store_metadata.exists('account_info'): self.store_metadata.write('account_info', self.store.account_info()) return self.store_metadata.get_value('account_info') def get_free_space(self): return self.free_space_worker.get_free_bytes_in_remote_store() def get_overall_space(self): if not self.store_metadata.exists('overall_space') or self.store_metadata.is_expired('overall_space'): self.store_metadata.write('overall_space', self.store.get_overall_space()) return self.store_metadata.get_value('overall_space') def get_used_space(self): if not self.store_metadata.exists('used_space') or self.store_metadata.is_expired('used_space'): self.store_metadata.write('used_space', self.store.get_used_space()) return self.store_metadata.get_value('used_space') def create_directory(self, directory): self.logger.debug("meta cache create_directory %s", directory) try: ret = self.store.create_directory(directory) except AlreadyExistsError: raise if not self.entries.exists(directory): self.entries.write(directory, Entry()) entry = self.entries.get_value(directory) entry.set_is_dir() entry.listing = [] entry.set_modified() self.entries.write(directory, entry) self._add_to_parent_dir_listing(directory) return ret def duplicate(self, path_to_src, path_to_dest): self.logger.debug("meta cache duplicate %s to %s", path_to_src, path_to_dest) ret = self.store.duplicate(path_to_src, path_to_dest) if self.entries.exists(path_to_src): entry = deepcopy(self.entries.get_value(path_to_src)) self.entries.write(path_to_dest, entry) else: self.entries.write(path_to_dest, Entry()) entry = self.entries.get_value(path_to_dest) entry.set_modified() self.entries.write(path_to_dest, entry) self._add_to_parent_dir_listing(path_to_dest) self.logger.debug("duplicated %s to %s", path_to_src, path_to_dest) return ret def move(self, path_to_src, path_to_dest): self.logger.debug("meta cache move %s to %s", path_to_src, path_to_dest) self.store.move(path_to_src, path_to_dest) if self.entries.exists(path_to_src): entry = self.entries.get_value(path_to_src) self.entries.write(path_to_dest, entry) else: self.entries.write(path_to_dest, Entry()) entry = self.entries.get_value(path_to_dest) entry.set_modified() self.entries.write(path_to_dest, entry) self.entries.delete(path_to_src) self._remove_from_parent_dir_listing(path_to_src) self._add_to_parent_dir_listing(path_to_dest) def get_modified(self, path): self.logger.debug("meta cache get_modified %s", path) if self.entries.exists(path): entry = self.entries.get_value(path) if not entry.modified == None: return entry.modified modified = self.store.get_modified(path) if not self.entries.exists(path): self.entries.write(path, Entry()) entry = self.entries.get_value(path) entry.set_modified(modified) self.entries.write(path, entry) return entry.modified def get_directory_listing(self, directory): self.logger.debug("meta cache get_directory_listing %s", directory) if self.entries.exists(directory): entry = self.entries.get_value(directory) if not entry.listing == None: self.logger.debug("return cached listing %s", repr(entry.listing)) return list(entry.listing) listing = self.store.get_directory_listing(directory) self.logger.debug("meta cache caching %s", repr(listing)) if not self.entries.exists(directory): self.entries.write(directory, Entry()) entry = self.entries.get_value(directory) entry.listing = listing self._add_existing_items(entry, directory) entry.set_is_dir() self.entries.write(directory, entry) self.logger.debug("asserted %s", repr(self.entries.get_value(directory).listing)) assert self.entries.get_value(directory).listing == entry.listing return list(entry.listing) def _add_existing_items(self, dir_entry, dir_entry_path): '''Add existing files or directories to *dir_entry* because they might have been uploaded recently and might not be retrievable by a directory listing from the storage provider.''' for path in self.entries.get_keys(): if os.path.dirname(path) == dir_entry_path and path != '/': try: if not self.entries.is_expired(path): dir_entry.add_to_listing(path) except KeyError, e: #KeyError means that the entry has been deleted by __clean_cache pass
class MetadataCachingStore(Store): def __init__(self, store, cache_expiration_time=60): self.store = store self.logger = logging.getLogger(self.get_logging_handler()) self.logger.debug("creating MetadataCachingStore object") self.entries = MPSynchronizeProxy( MPLRUCache(cache_expiration_time,2) ) self.store_metadata = Cache(cache_expiration_time) self.free_space_worker = GetFreeSpaceWorker(deepcopy(store), self.logger) self.free_space_worker.start() def _is_valid_path(self, path): return self.store._is_valid_path(path) def _raise_error_if_invalid_path(self, path): self.store._raise_error_if_invalid_path(path) def get_name(self): if not self.store_metadata.exists('store_name'): self.store_metadata.write('store_name', self.store.get_name()) return self.store_metadata.get_value('store_name') def get_file(self, path_to_file): self.logger.debug("meta cache get_file %s", path_to_file) ret = self.store.get_file(path_to_file) if self.entries.exists(path_to_file) and self.entries.is_expired(path_to_file): self.entries.delete(path_to_file) if not self.entries.exists(path_to_file): self.entries.write(path_to_file, Entry()) entry = self.entries.get_value(path_to_file) entry.set_is_file() try: entry.size = len(ret) self.entries.write(path_to_file, entry) except: self.entries.delete(path_to_file) self.logger.debug("meta cache returning %s", repr(ret)[:10]) self._add_to_parent_dir_listing(path_to_file) return ret def _add_to_parent_dir_listing(self, path): if path != '/': parent_dir = os.path.dirname(path) self._add_parent_dir_listing(path) entry = self.entries.get_value(parent_dir) entry.add_to_listing(path) self.entries.write(parent_dir, entry) def _add_parent_dir_listing(self, path): '''Add listing for parent directory of path to cache if it does not yet exist''' if path != '/': parent_dir = os.path.dirname(path) if not self.entries.exists(parent_dir): self.entries.write(parent_dir, Entry()) entry = self.entries.get_value(parent_dir) if entry.listing == None: entry.listing = self.store.get_directory_listing(parent_dir) entry.set_is_dir() self.entries.write(parent_dir, entry) def _does_not_exist_in_parent_dir_listing(self, path): '''':returns: True if path does not exist in the cached directory listing''' parent_dir = os.path.dirname(path) if path == '/': return False if self.entries.exists(parent_dir): entry = self.entries.get_value(parent_dir) if entry.listing != None and (not unicode(path) in entry.listing and not path in entry.listing): self.logger.debug("%s does not exist in parent directory: %s..."%(path, repr(entry.listing[0:5]))) return True return False def _remove_from_parent_dir_listing(self, path): parent_dir = os.path.dirname(path) if self.entries.exists(parent_dir): entry = self.entries.get_value(parent_dir) entry.remove_from_listing(path) self.entries.write(parent_dir, entry) def store_file(self, path_to_file, dest_dir="/", remote_file_name = None, interrupt_event=None): if dest_dir == "/": dest_dir = "" if not remote_file_name: remote_file_name = os.path.basename(path_to_file) self.logger.debug("meta cache store_file %s", dest_dir + "/" + remote_file_name) with open(path_to_file) as fileobject: fileobject.seek(0,2) data_len = fileobject.tell() path = dest_dir + "/" + remote_file_name self.logger.debug("meta cache store_file %s", path) ret = self.store.store_file(path_to_file, dest_dir, remote_file_name, interrupt_event) if self.entries.exists(path) and self.entries.is_expired(path): self.entries.delete(path) if not self.entries.exists(path): self.entries.write(path, Entry()) entry = self.entries.get_value(path) entry.set_is_file() entry.size = data_len entry.set_modified() self.entries.write(path, entry) self._add_to_parent_dir_listing(path) return ret def __get_size(self, fileobject): pos = fileobject.tell() fileobject.seek(0,2) size = fileobject.tell() fileobject.seek(pos, 0) return size def store_fileobject(self, fileobject, path, interrupt_event=None): self.logger.debug("meta cache store_fileobject %s", path) data_len = self.__get_size(fileobject) try: ret = self.store.store_fileobject(fileobject, path, interrupt_event) finally: fileobject.close() if self.entries.exists(path) and self.entries.is_expired(path): self.entries.delete(path) if not self.entries.exists(path): self.entries.write(path, Entry()) entry = self.entries.get_value(path) entry.set_is_file() entry.size = data_len entry.set_modified() self._add_to_parent_dir_listing(path) self.entries.write(path, entry) return ret def delete(self, path, is_dir): self.logger.debug("meta cache delete %s", path) self.store.delete(path, is_dir) self.entries.delete(path) self._remove_from_parent_dir_listing(path) def account_info(self): if not self.store_metadata.exists('account_info'): self.store_metadata.write('account_info', self.store.account_info()) return self.store_metadata.get_value('account_info') def get_free_space(self): return self.free_space_worker.get_free_bytes_in_remote_store() def get_overall_space(self): if not self.store_metadata.exists('overall_space') or self.store_metadata.is_expired('overall_space'): self.store_metadata.write('overall_space', self.store.get_overall_space()) return self.store_metadata.get_value('overall_space') def get_used_space(self): if not self.store_metadata.exists('used_space') or self.store_metadata.is_expired('used_space'): self.store_metadata.write('used_space', self.store.get_used_space()) return self.store_metadata.get_value('used_space') def create_directory(self, directory): self.logger.debug("meta cache create_directory %s", directory) ret = self.store.create_directory(directory) if self.entries.exists(directory) and self.entries.is_expired(directory): self.entries.delete(directory) if not self.entries.exists(directory): self.entries.write(directory, Entry()) entry = self.entries.get_value(directory) entry.set_is_dir() entry.listing = [] entry.set_modified() self.entries.write(directory, entry) self._add_to_parent_dir_listing(directory) return ret def duplicate(self, path_to_src, path_to_dest): self.logger.debug("meta cache duplicate %s to %s", path_to_src, path_to_dest) ret = self.store.duplicate(path_to_src, path_to_dest) if self.entries.exists(path_to_src) and self.entries.is_expired(path_to_src): self.entries.delete(path_to_src) if self.entries.exists(path_to_src): entry = deepcopy(self.entries.get_value(path_to_src)) self.entries.write(path_to_dest, entry) else: self.entries.write(path_to_dest, Entry()) entry = self.entries.get_value(path_to_dest) entry.set_modified() self.entries.write(path_to_dest, entry) self._add_to_parent_dir_listing(path_to_dest) self.logger.debug("duplicated %s to %s", path_to_src, path_to_dest) return ret def move(self, path_to_src, path_to_dest): self.logger.debug("meta cache move %s to %s", path_to_src, path_to_dest) self.store.move(path_to_src, path_to_dest) if self.entries.exists(path_to_dest) and self.entries.is_expired(path_to_src): self.entries.delete(path_to_src) if self.entries.exists(path_to_src): entry = self.entries.get_value(path_to_src) self.entries.write(path_to_dest, entry) else: self.entries.write(path_to_dest, Entry()) entry = self.entries.get_value(path_to_src) entry.set_modified() self.entries.write(path_to_dest, entry) self.entries.delete(path_to_src) self._remove_from_parent_dir_listing(path_to_src) self._add_to_parent_dir_listing(path_to_dest) def get_modified(self, path): self.logger.debug("meta cache get_modified %s", path) if self.entries.exists(path) and self.entries.is_expired(path): self.entries.delete(path) if self.entries.exists(path): entry = self.entries.get_value(path) if not entry.modified == None: return entry.modified modified = self.store.get_modified(path) if not self.entries.exists(path): self.entries.write(path, Entry()) entry = self.entries.get_value(path) entry.set_modified(modified) self.entries.write(path, entry) return entry.modified def get_directory_listing(self, directory): self.logger.debug("meta cache get_directory_listing %s", directory) if self.entries.exists(directory) and self.entries.is_expired(directory): self.entries.delete(directory) if self.entries.exists(directory): entry = self.entries.get_value(directory) if not entry.listing == None: self.logger.debug("return cached listing %s", repr(entry.listing)) return list(entry.listing) listing = self.store.get_directory_listing(directory) self.logger.debug("meta cache caching %s", repr(listing)) if not self.entries.exists(directory): self.entries.write(directory, Entry()) entry = self.entries.get_value(directory) entry.listing = listing self.entries.write(directory, entry) self.logger.debug("asserted %s", repr(self.entries.get_value(directory).listing)) assert self.entries.get_value(directory).listing == entry.listing return list(entry.listing) def get_bytes(self, path): self.logger.debug("meta cache get_bytes %s", path) if self.entries.exists(path) and self.entries.is_expired(path): self.entries.delete(path) if self.entries.exists(path): entry = self.entries.get_value(path) if not entry.size == None: return entry.size size = self.store.get_bytes(path) if not self.entries.exists(path): self.entries.write(path, Entry()) entry = self.entries.get_value(path) entry.size = size self.entries.write(path, entry) return entry.size def exists(self, path): self.logger.debug("meta cache exists %s", path) if self.entries.exists(path) and self.entries.is_expired(path): self.entries.delete(path) if not self.entries.exists(path): if self.store.exists(path): self.entries.write(path, Entry()) return self.entries.exists(path) def get_metadata(self, path): self.logger.debug("meta cache get_metadata %s", path) if self.entries.exists(path) and self.entries.is_expired(path): self.logger.debug("1.1") self.entries.delete(path) self._add_parent_dir_listing(path) if self._does_not_exist_in_parent_dir_listing(path): raise NoSuchFilesytemObjectError(path,0) if self.entries.exists(path): entry = self.entries.get_value(path) self.logger.debug("entry exists") if not None in [entry.is_dir, entry.modified, entry.size]: return {'is_dir': entry.is_dir, 'modified': entry.modified, 'bytes': entry.size} self.logger.debug("meta cache get_metadata entry does not exist or is expired") metadata = self.store.get_metadata(path) entry = self._prepare_entry(path, metadata) self.entries.write(path, entry) if not entry.is_dir and isinstance(self.store, BulkGetMetadata): self._prefetch_directory(os.path.dirname(path)) return {'is_dir': entry.is_dir, 'modified': entry.modified, 'bytes': entry.size} def _prepare_entry(self, path, metadata): if self.entries.exists(path): entry = self.entries.get_value(path) #preserve listings else: entry = Entry() if metadata['is_dir']: entry.set_is_dir() else: entry.set_is_file() entry.modified = metadata['modified'] entry.size = metadata['bytes'] return entry def _prefetch_directory(self, path): self.logger.debug("prefetch %s", path) bulk = self.store.get_bulk_metadata(path) bulk.items() dict = {} for path, metadata in bulk.items(): e = Entry() dict[path] = self._prepare_entry(path,metadata) try: self.entries.bulk_write(dict) except Exception,e: import traceback traceback.print_exc() self.logger.debug("prefetch succeeded %s", path)