def __init__(self, path=None, size_min=4800, size_max=5200, catalog=None, backend='lfs'): # Init path self.path = path # Init backend self.backend_cls = backends_registry[backend] # The "git add" arguments self.added = set() self.changed = set() self.removed = set() self.has_changed = False # init backend self.backend = self.backend_cls(self.path) # A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) # TODO FIXME Catalog should be moved into backend # 7. Get the catalog if catalog: self.catalog = catalog else: if self.path: self.catalog = self.get_catalog() # Log catalog_log = '{}/database.log'.format(self.path) self.logger = Logger(catalog_log) register_logger(self.logger, 'itools.database')
def __init__(self, path, size_min=4800, size_max=5200): # 1. Keep the path if not lfs.is_folder(path): error = '"%s" should be a folder, but it is not' % path raise ValueError, error folder = lfs.open(path) self.path = str(folder.path) # 2. Keep the path to the data self.path_data = '%s/database/' % self.path if not lfs.is_folder(self.path_data): error = '"%s" should be a folder, but it is not' % self.path_data raise ValueError, error # 3. Initialize the database, but chrooted self.fs = lfs.open(self.path_data) # 4. New interface to Git self.worktree = open_worktree(self.path_data) # 5. A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) # 6. The git cache self.git_cache = LRUCache(900, 1100)
def __init__(self, path=None, size_min=4800, size_max=5200, backend='lfs'): # Init path self.path = path # Init backend self.backend_cls = backends_registry[backend] # The "git add" arguments self.added = set() self.changed = set() self.removed = set() self.has_changed = False # Fields self.fields = get_register_fields() # init backend self.backend = self.backend_cls(self.path, self.fields, self.read_only) # A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False)
# (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # Import from itools from itools.core import LRUCache from generic import GenericDataType from registry import get_scheme cache = LRUCache(200) def get_reference(reference): """Returns a URI reference of the good type from the given string. """ # Hit if reference in cache: return cache[reference] # Miss if ':' in reference: scheme_name, scheme_specifics = reference.split(':', 1) scheme = get_scheme(scheme_name) else: scheme = GenericDataType
def __init__(self, size_min=4800, size_max=5200, fs=None): # A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) self.fs = fs or vfs
class RODatabase(object): """The read-only database works as a cache for file handlers. This is the base class for any other handler database. """ # Flag to know whether to commit or not. This is to avoid superfluos # actions by the 'save' and 'abort' methods. has_changed = False def __init__(self, size_min=4800, size_max=5200, fs=None): # A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) self.fs = fs or vfs ####################################################################### # Private API ####################################################################### def _sync_filesystem(self, key): """This method checks the state of the key in the cache against the filesystem. Synchronizes the state if needed by discarding the handler, or raises an error if there is a conflict. Returns the handler for the given key if it is still in the cache after all the tests, or None otherwise. """ # If the key is not in the cache nothing can be wrong handler = self.cache.get(key) if handler is None: return None # (1) Not yet loaded if handler.timestamp is None and handler.dirty is None: # Removed from the filesystem if not self.fs.exists(key): self._discard_handler(key) return None # Everything looks fine # FIXME There will be a bug if the file in the filesystem has # changed to a different type, so the handler class may not match. return handler # (2) New handler if handler.timestamp is None and handler.dirty is not None: # Everything looks fine if not self.fs.exists(key): return handler # Conflict error = 'new file in the filesystem and new handler in the cache' raise RuntimeError, error # (3) Loaded but not changed if handler.timestamp is not None and handler.dirty is None: # Removed from the filesystem if not self.fs.exists(key): self._discard_handler(key) return None # Modified in the filesystem mtime = self.fs.get_mtime(key) if mtime > handler.timestamp: self._discard_handler(key) return None # Everything looks fine return handler # (4) Loaded and changed if handler.timestamp is not None and handler.dirty is not None: # Removed from the filesystem if not self.fs.exists(key): error = 'a modified handler was removed from the filesystem' raise RuntimeError, error # Modified in the filesystem mtime = self.fs.get_mtime(key) if mtime > handler.timestamp: error = 'modified in the cache and in the filesystem' raise RuntimeError, error # Everything looks fine return handler def _discard_handler(self, key): """Unconditionally remove the handler identified by the given key from the cache, and invalidate it (and free memory at the same time). """ handler = self.cache.pop(key) # Invalidate the handler handler.__dict__.clear() def _abort_changes(self): """To be called to abandon the transaction. """ raise NotImplementedError def _cleanup(self): """For maintenance operations, this method is automatically called after a transaction is committed or aborted. """ # import gc # from itools.core import vmsize # print 'RODatabase._cleanup (0): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() self.make_room() # print 'RODatabase._cleanup (1): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() ####################################################################### # Public API ####################################################################### def normalize_key(self, key): """Resolves and returns the given key to be unique. """ return self.fs.normalize_key(key) def push_handler(self, key, handler): """Adds the given resource to the cache. """ handler.database = self handler.key = key # Folders are not stored in the cache if type(handler) is Folder: return # Store in the cache self.cache[key] = handler def make_room(self): """Remove handlers from the cache until it fits the defined size. Use with caution. If the handlers we are about to discard are still used outside the database, and one of them (or more) are modified, then there will be an error. """ # Find out how many handlers should be removed size = len(self.cache) if size < self.cache.size_max: return # Discard as many handlers as needed n = size - self.cache.size_min for key, handler in self.cache.iteritems(): # Skip externally referenced handlers (refcount should be 3: # one for the cache, one for the local variable and one for # the argument passed to getrefcount). refcount = getrefcount(handler) if refcount > 3: continue # Skip modified (not new) handlers if handler.dirty is not None: continue # Discard this handler self._discard_handler(key) # Check whether we are done n -= 1 if n == 0: return def has_handler(self, key): key = self.normalize_key(key) # Synchronize handler = self._sync_filesystem(key) if handler is not None: return True # Ask vfs return self.fs.exists(key) def get_handler_names(self, key): key = self.normalize_key(key) if self.fs.exists(key): names = self.fs.get_names(key) return list(names) return [] def get_mimetype(self, key): return self.fs.get_mimetype(key) def get_handler_class(self, key): mimetype = self.get_mimetype(key) try: return get_handler_class_by_mimetype(mimetype) except ValueError: fs = self.fs if fs.is_file(key): from file import File return File elif fs.is_folder(key): from folder import Folder return Folder raise ValueError def _get_handler(self, key, cls=None, soft=False): # Synchronize handler = self._sync_filesystem(key) if handler is not None: # Check the class matches if cls is not None and not isinstance(handler, cls): error = "expected '%s' class, '%s' found" raise LookupError, error % (cls, handler.__class__) # Cache hit self.cache.touch(key) return handler # Check the resource exists if not self.fs.exists(key): if soft: return None raise LookupError, 'the resource "%s" does not exist' % key # Folders are not cached if self.fs.is_folder(key): return Folder(key, database=self) # Cache miss if cls is None: cls = self.get_handler_class(key) # Build the handler and update the cache handler = object.__new__(cls) self.push_handler(key, handler) return handler def get_handler(self, key, cls=None, soft=False): key = self.normalize_key(key) return self._get_handler(key, cls, soft) def get_handlers(self, key): base = self.normalize_key(key) for name in self.get_handler_names(base): key = self.fs.resolve2(base, name) yield self._get_handler(key) def touch_handler(self, key, handler=None): key = self.normalize_key(key) handler = self._get_handler(key) if handler.dirty is None: # Load the handler if needed if handler.timestamp is None: handler.load_state() # Mark the handler as dirty handler.dirty = datetime.now() def set_handler(self, key, handler): raise NotImplementedError, 'cannot set handler' def del_handler(self, key): raise NotImplementedError, 'cannot del handler' def copy_handler(self, source, target): raise NotImplementedError, 'cannot copy handler' def move_handler(self, source, target): raise NotImplementedError, 'cannot move handler' def save_changes(self): raise NotImplementedError def abort_changes(self): if not self.has_changed: return self._abort_changes() self._cleanup()
class RODatabase(BaseDatabase): """The read-only database works as a cache for file handlers. """ def __init__(self, size_min=4800, size_max=5200, fs=None): # A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) self.fs = fs or vfs def _resolve_key(self, key): """Resolves and returns the given key to be unique. """ return self.fs.resolve_key(key) def _resolve_key_for_writing(self, key): raise NotImplementedError def _sync_filesystem(self, key): """This method checks the state of the key in the cache against the filesystem. Synchronizes the state if needed by discarding the handler, or raises an error if there is a conflict. Returns the handler for the given key if it is still in the cache after all the tests, or None otherwise. """ # If the key is not in the cache nothing can be wrong handler = self.cache.get(key) if handler is None: return None # (1) Not yet loaded if handler.timestamp is None and handler.dirty is None: # Removed from the filesystem if not self.fs.exists(key): self._discard_handler(key) return None # Everything looks fine # FIXME There will be a bug if the file in the filesystem has # changed to a different type, so the handler class may not match. return handler # (2) New handler if handler.timestamp is None and handler.dirty is not None: # Everything looks fine if not self.fs.exists(key): return handler # Conflict error = 'new file in the filesystem and new handler in the cache' raise RuntimeError, error # (3) Loaded but not changed if handler.timestamp is not None and handler.dirty is None: # Removed from the filesystem if not self.fs.exists(key): self._discard_handler(key) return None # Modified in the filesystem mtime = self.fs.get_mtime(key) if mtime > handler.timestamp: self._discard_handler(key) return None # Everything looks fine return handler # (4) Loaded and changed if handler.timestamp is not None and handler.dirty is not None: # Removed from the filesystem if not self.fs.exists(key): error = 'a modified handler was removed from the filesystem' raise RuntimeError, error # Modified in the filesystem mtime = self.fs.get_mtime(key) if mtime > handler.timestamp: error = 'modified in the cache and in the filesystem' raise RuntimeError, error # Everything looks fine return handler ####################################################################### # Cache API def _discard_handler(self, key): """Unconditionally remove the handler identified by the given key from the cache, and invalidate it (and free memory at the same time). """ handler = self.cache.pop(key) # Invalidate the handler handler.__dict__.clear() def push_handler(self, key, handler): """Adds the given resource to the cache. """ handler.database = self handler.key = key # Folders are not stored in the cache if isinstance(handler, Folder): return # Store in the cache self.cache[key] = handler def push_phantom(self, key, handler): handler.database = self handler.key = key def make_room(self): """Remove handlers from the cache until it fits the defined size. Use with caution. If the handlers we are about to discard are still used outside the database, and one of them (or more) are modified, then there will be an error. """ # Find out how many handlers should be removed size = len(self.cache) if size < self.cache.size_max: return # Discard as many handlers as needed n = size - self.cache.size_min for key, handler in self.cache.iteritems(): # Skip externally referenced handlers (refcount should be 3: # one for the cache, one for the local variable and one for # the argument passed to getrefcount). refcount = getrefcount(handler) if refcount > 3: continue # Skip modified (not new) handlers if handler.dirty is not None: continue # Discard this handler self._discard_handler(key) # Check whether we are done n -= 1 if n == 0: return def _has_changed(self): return False ####################################################################### # Database API def is_phantom(self, handler): return handler.timestamp is None and handler.dirty is not None def has_handler(self, key): key = self._resolve_key(key) # Synchronize handler = self._sync_filesystem(key) if handler is not None: return True # Ask vfs return self.fs.exists(key) def get_handler_names(self, key): key = self._resolve_key(key) if self.fs.exists(key): names = self.fs.get_names(key) return list(names) return [] def get_handler_class(self, key): fs = self.fs mimetype = fs.get_mimetype(key) try: return get_handler_class_by_mimetype(mimetype) except ValueError: if fs.is_file(key): from file import File return File elif fs.is_folder(key): from folder import Folder return Folder raise ValueError def get_handler(self, key, cls=None): key = self._resolve_key(key) # Synchronize handler = self._sync_filesystem(key) if handler is not None: # Check the class matches if cls is not None and not isinstance(handler, cls): error = "expected '%s' class, '%s' found" raise LookupError, error % (cls, handler.__class__) # Cache hit self.cache.touch(key) return handler # Check the resource exists if not self.fs.exists(key): raise LookupError, 'the resource "%s" does not exist' % key # Folders are not cached if self.fs.is_folder(key): if cls is None: cls = Folder folder = cls(key, database=self) return folder # Cache miss if cls is None: cls = self.get_handler_class(key) # Build the handler and update the cache handler = object.__new__(cls) self.push_handler(key, handler) return handler def get_handlers(self, key): base = self._resolve_key(key) fs = self.fs for name in fs.get_names(base): key = fs.resolve2(base, name) yield self.get_handler(key) ####################################################################### # Write API def set_handler(self, key, handler): raise ReadOnlyError, 'cannot set handler' def del_handler(self, key): raise ReadOnlyError, 'cannot del handler' def copy_handler(self, source, target): raise ReadOnlyError, 'cannot copy handler' def move_handler(self, source, target): raise ReadOnlyError, 'cannot move handler' def safe_make_file(self, key): raise ReadOnlyError, 'cannot make file' def safe_remove(self, key): raise ReadOnlyError, 'cannot remove' def safe_open(self, key, mode=None): if mode in (WRITE, READ_WRITE, APPEND): raise ReadOnlyError, 'cannot open file for writing' return self.fs.open(key, READ) def _cleanup(self): # import gc # from itools.core import vmsize # print 'RODatabase._cleanup (0): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() self.make_room()
] for name in names: if getattr(self, name) != getattr(other, name): return False return True def __str__(self): output = ['"%s"' % self.value] if self.path is not None: output.append('$Path="%s"' % self.path) if self.domain is not None: output.append('$Domain="%s"' % self.domain) return '; '.join(output) CACHE_COOKIES = LRUCache(200) class CookieDataType(DataType): """ TODO: Performances can be improved For instant we use cache """ @staticmethod def decode(data): base_data = data if CACHE_COOKIES.get(base_data): CACHE_COOKIES.touch(base_data) return CACHE_COOKIES[base_data] # Parse the cookie string parameters = [] while data:
class RODatabase(object): read_only = True backend_cls = None def __init__(self, path=None, size_min=4800, size_max=5200, backend='lfs'): # Init path self.path = path # Init backend self.backend_cls = backends_registry[backend] # The "git add" arguments self.added = set() self.changed = set() self.removed = set() self.has_changed = False # Fields self.fields = get_register_fields() # init backend self.backend = self.backend_cls(self.path, self.fields, self.read_only) # A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) @property def catalog(self): print('WARNING: Uses of context.database.catalog is obsolete') return self.backend.catalog def close(self): self.backend.close() def check_database(self): """This function checks whether the database is in a consisitent state, this is to say whether a transaction was not brutally aborted and left the working directory with changes not committed. This is meant to be used by scripts, like 'icms-start.py' """ # TODO Check if bare repository is OK print('Checking database...') return True ####################################################################### # With statement ####################################################################### def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() ####################################################################### # Private API ####################################################################### def _discard_handler(self, key): """Unconditionally remove the handler identified by the given key from the cache, and invalidate it (and free memory at the same time). """ handler = self.cache.pop(key) # Invalidate the handler handler.__dict__.clear() def _abort_changes(self): """To be called to abandon the transaction. """ raise ReadonlyError def _cleanup(self): """For maintenance operations, this method is automatically called after a transaction is committed or aborted. """ #import gc #from itools.core import vmsize #print 'RODatabase._cleanup (0): % 4d %s' % (len(self.cache), vmsize()) #print gc.get_count() self.make_room() #print 'RODatabase._cleanup (1): % 4d %s' % (len(self.cache), vmsize()) #print gc.get_count() ####################################################################### # Public API ####################################################################### def normalize_key(self, path, __root=Path('/')): return self.backend.normalize_key(path, __root) def push_handler(self, key, handler): """Adds the given resource to the cache. """ handler.database = self handler.key = key # Folders are not stored in the cache if type(handler) is Folder: return # Store in the cache self.cache[key] = handler def make_room(self): """Remove handlers from the cache until it fits the defined size. Use with caution. If the handlers we are about to discard are still used outside the database, and one of them (or more) are modified, then there will be an error. """ # Find out how many handlers should be removed size = len(self.cache) if size < self.cache.size_max: return # Discard as many handlers as needed n = size - self.cache.size_min for key, handler in self.cache.iteritems(): # Skip externally referenced handlers (refcount should be 3: # one for the cache, one for the local variable and one for # the argument passed to getrefcount). refcount = getrefcount(handler) if refcount > 3: continue # Skip modified (not new) handlers if handler.dirty is not None: continue # Discard this handler self._discard_handler(key) # Check whether we are done n -= 1 if n == 0: return def has_handler(self, key): key = self.normalize_key(key) # Synchronize handler = self.cache.get(key) if handler is not None: return True # Ask backend return self.backend.handler_exists(key) def save_handler(self, key, handler): self.backend.save_handler(key, handler) def get_handler_names(self, key): key = self.normalize_key(key) return self.backend.get_handler_names(key) def get_handler_data(self, key): return self.backend.get_handler_data(key) def get_handler_mtime(self, key): return self.backend.get_handler_mtime(key) def get_mimetype(self, key): return self.backend.get_handler_mimetype(key) def get_handler_class(self, key): mimetype = self.get_mimetype(key) return get_handler_class_by_mimetype(mimetype) def _get_handler(self, key, cls=None, soft=False): # Get resource if key in self.removed: return None # Folders are not cached if cls is Folder: return Folder(key, database=self) # Synchronize handler = self.cache.get(key) if handler is not None: # Check the class matches if cls is not None and not isinstance(handler, cls): error = "expected '%s' class, '%s' found" raise LookupError, error % (cls, handler.__class__) # Cache hit self.cache.touch(key) return handler # Check the resource exists try: data = self.backend.get_handler_data(key) except: # Do not exists if soft: return None raise LookupError('the resource "{0}" does not exist'.format(key)) # Cache miss if cls is None: cls = self.get_handler_class(key) # Build the handler and update the cache handler = object.__new__(cls) # Put handler in cache self.push_handler(key, handler) # Load handler data # FIXME We should reset handler state on errors try: handler.load_state_from_string(data) except Exception: # Remove handler from cache if cannot load it self._discard_handler(key) raise # Ok return handler def traverse_resources(self): return self.backend.traverse_resources() def get_handler(self, key, cls=None, soft=False): key = self.normalize_key(key) return self._get_handler(key, cls, soft) def get_handlers(self, key): base = self.normalize_key(key) for name in self.get_handler_names(base): yield self._get_handler(base + '/' + name) def touch_handler(self, key, handler=None): """Report a modification of the key/handler to the database. """ # FIXME touch_handler is called at handler loading # ro_database is also a rw_database, so it can save data # raise ReadonlyError, 'cannot set handler' key = self.normalize_key(key) # Mark the handler as dirty handler.dirty = datetime.now() # Do some checks if handler is None: raise ValueError if key in self.removed: raise ValueError # Set database has changed self.has_changed = True # Set in changed list self.changed.add(key) def set_handler(self, key, handler): raise ReadonlyError, 'cannot set handler' def del_handler(self, key): raise ReadonlyError, 'cannot del handler' def copy_handler(self, source, target, exclude_patterns=None): raise ReadonlyError, 'cannot copy handler' def move_handler(self, source, target): raise ReadonlyError, 'cannot move handler' ####################################################################### # Layer 1: resources ####################################################################### _resources_registry = {} @classmethod def register_resource_class(self, resource_class, format=None): if format is None: format = resource_class.class_id self._resources_registry[format] = resource_class @classmethod def unregister_resource_class(self, resource_class): registry = self._resources_registry for class_id, cls in registry.items(): if resource_class is cls: del registry[class_id] def get_resource_class(self, class_id): if type(class_id) is not str: raise TypeError, 'expected byte string, got %s' % class_id # Check dynamic models are not broken registry = self._resources_registry if class_id[0] == '/': model = self.get_resource(class_id, soft=True) if model is None: registry.pop(class_id, None) err = 'the resource "%s" does not exist' % class_id raise LookupError, err # Cache hit cls = registry.get(class_id) if cls: return cls # Cache miss: dynamic model if class_id[0] == '/': cls = model.build_resource_class() registry[class_id] = cls return cls # Cache miss: fallback on mimetype if '/' in class_id: class_id = class_id.split('/')[0] cls = registry.get(class_id) if cls: return cls # Default return self._resources_registry['application/octet-stream'] def get_resource_classes(self): registry = self._resources_registry for class_id, cls in self._resources_registry.items(): if class_id[0] == '/': model = self.get_resource(class_id, soft=True) if model is None: registry.pop(class_id, None) continue yield cls def get_metadata(self, abspath, soft=False): if type(abspath) is str: path = abspath[1:] abspath = Path(abspath) else: path = str(abspath)[1:] path_to_metadata = '%s.metadata' % path return self.get_handler(path_to_metadata, Metadata, soft=soft) def get_cls(self, class_id): cls = self.get_resource_class(class_id) return cls or self.get_resource_class('application/octet-stream') def get_resource(self, abspath, soft=False): abspath = Path(abspath) # Get metadata metadata = self.get_metadata(abspath, soft) if metadata is None: return None # Get associated class class_id = metadata.format cls = self.get_cls(class_id) # Ok return cls(abspath=abspath, database=self, metadata=metadata) def get_resource_from_brain(self, brain): cls = self.get_cls(brain.format) return cls(abspath=Path(brain.abspath), database=self, brain=brain) def remove_resource(self, resource): raise ReadonlyError def add_resource(self, resource): raise ReadonlyError def change_resource(self, resource): raise ReadonlyError def move_resource(self, source, new_path): raise ReadonlyError def save_changes(self): return def create_tag(self, tag_name, message=None): raise ReadonlyError def reset_to_tag(self, tag_name): raise ReadonlyError def abort_changes(self): return ####################################################################### # API for path ####################################################################### @staticmethod def get_basename(path): if type(path) is not Path: path = Path(path) return path.get_name() @staticmethod def get_path(path): if type(path) is not Path: path = Path(path) return str(path) @staticmethod def resolve(base, path): if type(base) is not Path: base = Path(base) path = base.resolve(path) return str(path) @staticmethod def resolve2(base, path): if type(base) is not Path: base = Path(base) path = base.resolve2(path) return str(path) ####################################################################### # Search ####################################################################### def search(self, query=None, **kw): results = self.backend.search(query, **kw) return SearchResults(database=self, results=results) def reindex_catalog(self, base_abspath, recursif=True): raise ReadonlyError
class RODatabase(object): def __init__(self, path, size_min=4800, size_max=5200): # 1. Keep the path if not lfs.is_folder(path): error = '"%s" should be a folder, but it is not' % path raise ValueError, error folder = lfs.open(path) self.path = str(folder.path) # 2. Keep the path to the data self.path_data = '%s/database/' % self.path if not lfs.is_folder(self.path_data): error = '"%s" should be a folder, but it is not' % self.path_data raise ValueError, error # 3. Initialize the database, but chrooted self.fs = lfs.open(self.path_data) # 4. New interface to Git self.worktree = open_worktree(self.path_data) # 5. A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) # 6. The git cache self.git_cache = LRUCache(900, 1100) ####################################################################### # Private API ####################################################################### def _sync_filesystem(self, key): """This method checks the state of the key in the cache against the filesystem. Synchronizes the state if needed by discarding the handler, or raises an error if there is a conflict. Returns the handler for the given key if it is still in the cache after all the tests, or None otherwise. """ # If the key is not in the cache nothing can be wrong handler = self.cache.get(key) if handler is None: return None # (1) Not yet loaded if handler.timestamp is None and handler.dirty is None: # Removed from the filesystem if not self.fs.exists(key): self._discard_handler(key) return None # Everything looks fine # FIXME There will be a bug if the file in the filesystem has # changed to a different type, so the handler class may not match. return handler # (2) New handler if handler.timestamp is None and handler.dirty is not None: # Everything looks fine if not self.fs.exists(key): return handler # Conflict error = 'new file in the filesystem and new handler in the cache' raise RuntimeError, error # (3) Loaded but not changed if handler.timestamp is not None and handler.dirty is None: # Removed from the filesystem if not self.fs.exists(key): self._discard_handler(key) return None # Modified in the filesystem mtime = self.fs.get_mtime(key) if mtime > handler.timestamp: self._discard_handler(key) return None # Everything looks fine return handler # (4) Loaded and changed if handler.timestamp is not None and handler.dirty is not None: # Removed from the filesystem if not self.fs.exists(key): error = 'a modified handler was removed from the filesystem' raise RuntimeError, error # Modified in the filesystem mtime = self.fs.get_mtime(key) if mtime > handler.timestamp: error = 'modified in the cache and in the filesystem' raise RuntimeError, error # Everything looks fine return handler def _discard_handler(self, key): """Unconditionally remove the handler identified by the given key from the cache, and invalidate it (and free memory at the same time). """ handler = self.cache.pop(key) # Invalidate the handler handler.__dict__.clear() def _abort_changes(self): """To be called to abandon the transaction. """ raise ReadonlyError def _cleanup(self): """For maintenance operations, this method is automatically called after a transaction is committed or aborted. """ # import gc # from itools.core import vmsize # print 'RODatabase._cleanup (0): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() self.make_room() # print 'RODatabase._cleanup (1): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() ####################################################################### # Public API ####################################################################### def normalize_key(self, path, __root=Path('/')): # Performance is critical so assume the path is already relative to # the repository. key = __root.resolve(path) if key and key[0] == '.git': err = "bad '%s' path, access to the '.git' folder is denied" raise ValueError, err % path return '/'.join(key) def push_handler(self, key, handler): """Adds the given resource to the cache. """ handler.database = self handler.key = key # Folders are not stored in the cache if type(handler) is Folder: return # Store in the cache self.cache[key] = handler def make_room(self): """Remove handlers from the cache until it fits the defined size. Use with caution. If the handlers we are about to discard are still used outside the database, and one of them (or more) are modified, then there will be an error. """ # Find out how many handlers should be removed size = len(self.cache) if size < self.cache.size_max: return # Discard as many handlers as needed n = size - self.cache.size_min for key, handler in self.cache.iteritems(): # Skip externally referenced handlers (refcount should be 3: # one for the cache, one for the local variable and one for # the argument passed to getrefcount). refcount = getrefcount(handler) if refcount > 3: continue # Skip modified (not new) handlers if handler.dirty is not None: continue # Discard this handler self._discard_handler(key) # Check whether we are done n -= 1 if n == 0: return def has_handler(self, key): key = self.normalize_key(key) # Synchronize handler = self._sync_filesystem(key) if handler is not None: return True # Ask vfs return self.fs.exists(key) def get_handler_names(self, key): key = self.normalize_key(key) return self.fs.get_names(key) def get_mimetype(self, key): fs = self.fs abspath = fs._resolve_path(key) return magic_from_file(abspath) def get_handler_class(self, key): mimetype = self.get_mimetype(key) try: return get_handler_class_by_mimetype(mimetype) except ValueError: log_warning('unknown handler class "{0}"'.format(mimetype)) if fs.is_file(key): from itools.handlers import File return File elif fs.is_folder(key): from itools.handlers import Folder return Folder raise ValueError def _get_handler(self, key, cls=None, soft=False): # Synchronize handler = self._sync_filesystem(key) if handler is not None: # Check the class matches if cls is not None and not isinstance(handler, cls): error = "expected '%s' class, '%s' found" raise LookupError, error % (cls, handler.__class__) # Cache hit self.cache.touch(key) return handler # Check the resource exists if not self.fs.exists(key): if soft: return None raise LookupError, 'the resource "%s" does not exist' % key # Folders are not cached if self.fs.is_folder(key): return Folder(key, database=self) # Cache miss if cls is None: cls = self.get_handler_class(key) # Build the handler and update the cache handler = object.__new__(cls) self.push_handler(key, handler) return handler def get_handler(self, key, cls=None, soft=False): key = self.normalize_key(key) return self._get_handler(key, cls, soft) def get_handlers(self, key): base = self.normalize_key(key) for name in self.get_handler_names(base): key = self.fs.resolve2(base, name) yield self._get_handler(key) def touch_handler(self, key, handler=None): """Report a modification of the key/handler to the database. We must pass the handler because of phantoms. """ raise ReadonlyError, 'cannot set handler' def set_handler(self, key, handler): raise ReadonlyError, 'cannot set handler' def del_handler(self, key): raise ReadonlyError, 'cannot del handler' def copy_handler(self, source, target, exclude_patterns=None): raise ReadonlyError, 'cannot copy handler' def move_handler(self, source, target): raise ReadonlyError, 'cannot move handler' ####################################################################### # Layer 1: resources ####################################################################### _resources_registry = {} @classmethod def register_resource_class(self, resource_class, format=None): if format is None: format = resource_class.class_id self._resources_registry[format] = resource_class @classmethod def unregister_resource_class(self, resource_class): registry = self._resources_registry for class_id, cls in registry.items(): if resource_class is cls: del registry[class_id] def get_resource_class(self, class_id): if type(class_id) is not str: raise TypeError, 'expected byte string, got %s' % class_id # Check dynamic models are not broken registry = self._resources_registry if class_id[0] == '/': model = self.get_resource(class_id, soft=True) if model is None: registry.pop(class_id, None) err = 'the resource "%s" does not exist' % class_id raise LookupError, err # Cache hit cls = registry.get(class_id) if cls: return cls # Cache miss: dynamic model if class_id[0] == '/': cls = model.build_resource_class() registry[class_id] = cls return cls # Cache miss: fallback on mimetype if '/' in class_id: class_id = class_id.split('/')[0] cls = registry.get(class_id) if cls: return cls # Default return self._resources_registry['application/octet-stream'] def get_resource_classes(self): registry = self._resources_registry for class_id, cls in self._resources_registry.items(): if class_id[0] == '/': model = self.get_resource(class_id, soft=True) if model is None: registry.pop(class_id, None) continue yield cls def get_resource(self, abspath, soft=False): if type(abspath) is str: path = abspath[1:] abspath = Path(abspath) else: path = str(abspath)[1:] path_to_metadata = '%s.metadata' % path metadata = self.get_handler(path_to_metadata, Metadata, soft=soft) if metadata is None: return None # 2. Class class_id = metadata.format cls = self.get_resource_class(class_id) if cls is None: if self.fs.exists(path): is_file = self.fs.is_file(path) else: # FIXME This is just a guess, it may fail. is_file = '/' in format if is_file: cls = self.get_resource_class('application/octet-stream') else: cls = self.get_resource_class('application/x-not-regular-file') # Ok resource = cls(metadata) resource.abspath = abspath return resource def remove_resource(self, resource): raise ReadonlyError def add_resource(self, resource): raise ReadonlyError def change_resource(self, resource): raise ReadonlyError def move_resource(self, source, new_path): raise ReadonlyError def save_changes(self): return def create_tag(self, tag_name, message=None): raise ReadonlyError def reset_to_tag(self, tag_name): raise ReadonlyError def abort_changes(self): return def push_phantom(self, key, handler): handler.database = self handler.key = key def is_phantom(self, handler): return handler.timestamp is None and handler.dirty is not None ####################################################################### # Git ####################################################################### def get_blob(self, sha, cls): if sha in self.git_cache: return self.git_cache[sha] blob = self.worktree.lookup(sha) blob = cls(string=blob.data) self.git_cache[sha] = blob return blob def get_blob_by_revision_and_path(self, sha, path, cls): """Get the file contents located at the given path after the given commit revision has been committed. """ worktree = self.worktree commit = worktree.lookup(sha) obj = worktree.lookup_from_commit_by_path(commit, path) return self.get_blob(obj.sha, cls) ####################################################################### # Search ####################################################################### @lazy def catalog(self): path = '%s/catalog' % self.path fields = get_register_fields() try: return Catalog(path, fields, read_only=True) except (DatabaseError, DatabaseOpeningError): return None def search(self, query=None, **kw): """Launch a search in the catalog. """ xquery = _get_xquery(self.catalog, query, **kw) return SearchResults(self, xquery)
class RODatabase(object): def __init__(self, path, size_min=4800, size_max=5200): # 1. Keep the path if not lfs.is_folder(path): error = '"%s" should be a folder, but it is not' % path raise ValueError, error folder = lfs.open(path) self.path = str(folder.path) # 2. Keep the path to the data self.path_data = '%s/database/' % self.path if not lfs.is_folder(self.path_data): error = '"%s" should be a folder, but it is not' % self.path_data raise ValueError, error # 3. Initialize the database, but chrooted self.fs = lfs.open(self.path_data) # 4. New interface to Git self.worktree = open_worktree(self.path_data) # 5. A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) # 6. The git cache self.git_cache = LRUCache(900, 1100) ####################################################################### # Private API ####################################################################### def _sync_filesystem(self, key): """This method checks the state of the key in the cache against the filesystem. Synchronizes the state if needed by discarding the handler, or raises an error if there is a conflict. Returns the handler for the given key if it is still in the cache after all the tests, or None otherwise. """ # If the key is not in the cache nothing can be wrong handler = self.cache.get(key) if handler is None: return None # (1) Not yet loaded if handler.timestamp is None and handler.dirty is None: # Removed from the filesystem if not self.fs.exists(key): self._discard_handler(key) return None # Everything looks fine # FIXME There will be a bug if the file in the filesystem has # changed to a different type, so the handler class may not match. return handler # (2) New handler if handler.timestamp is None and handler.dirty is not None: # Everything looks fine if not self.fs.exists(key): return handler # Conflict error = 'new file in the filesystem and new handler in the cache' raise RuntimeError, error # (3) Loaded but not changed if handler.timestamp is not None and handler.dirty is None: # Removed from the filesystem if not self.fs.exists(key): self._discard_handler(key) return None # Modified in the filesystem mtime = self.fs.get_mtime(key) if mtime > handler.timestamp: self._discard_handler(key) return None # Everything looks fine return handler # (4) Loaded and changed if handler.timestamp is not None and handler.dirty is not None: # Removed from the filesystem if not self.fs.exists(key): error = 'a modified handler was removed from the filesystem' raise RuntimeError, error # Modified in the filesystem mtime = self.fs.get_mtime(key) if mtime > handler.timestamp: error = 'modified in the cache and in the filesystem' raise RuntimeError, error # Everything looks fine return handler def _discard_handler(self, key): """Unconditionally remove the handler identified by the given key from the cache, and invalidate it (and free memory at the same time). """ handler = self.cache.pop(key) # Invalidate the handler handler.__dict__.clear() def _abort_changes(self): """To be called to abandon the transaction. """ raise ReadonlyError def _cleanup(self): """For maintenance operations, this method is automatically called after a transaction is committed or aborted. """ # import gc # from itools.core import vmsize # print 'RODatabase._cleanup (0): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() self.make_room() # print 'RODatabase._cleanup (1): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() ####################################################################### # Public API ####################################################################### def normalize_key(self, path, __root=Path('/')): # Performance is critical so assume the path is already relative to # the repository. key = __root.resolve(path) if key and key[0] == '.git': err = "bad '%s' path, access to the '.git' folder is denied" raise ValueError, err % path return '/'.join(key) def push_handler(self, key, handler): """Adds the given resource to the cache. """ handler.database = self handler.key = key # Folders are not stored in the cache if type(handler) is Folder: return # Store in the cache self.cache[key] = handler def make_room(self): """Remove handlers from the cache until it fits the defined size. Use with caution. If the handlers we are about to discard are still used outside the database, and one of them (or more) are modified, then there will be an error. """ # Find out how many handlers should be removed size = len(self.cache) if size < self.cache.size_max: return # Discard as many handlers as needed n = size - self.cache.size_min for key, handler in self.cache.iteritems(): # Skip externally referenced handlers (refcount should be 3: # one for the cache, one for the local variable and one for # the argument passed to getrefcount). refcount = getrefcount(handler) if refcount > 3: continue # Skip modified (not new) handlers if handler.dirty is not None: continue # Discard this handler self._discard_handler(key) # Check whether we are done n -= 1 if n == 0: return def has_handler(self, key): key = self.normalize_key(key) # Synchronize handler = self._sync_filesystem(key) if handler is not None: return True # Ask vfs return self.fs.exists(key) def get_handler_names(self, key): key = self.normalize_key(key) return self.fs.get_names(key) def get_mimetype(self, key): fs = self.fs abspath = fs._resolve_path(key) return magic_from_file(abspath) def get_handler_class(self, key): mimetype = self.get_mimetype(key) try: return get_handler_class_by_mimetype(mimetype) except ValueError: log_warning('unknown handler class "{0}"'.format(mimetype)) if fs.is_file(key): from itools.handlers import File return File elif fs.is_folder(key): from itools.handlers import Folder return Folder raise ValueError def _get_handler(self, key, cls=None, soft=False): # Synchronize handler = self._sync_filesystem(key) if handler is not None: # Check the class matches if cls is not None and not isinstance(handler, cls): error = "expected '%s' class, '%s' found" raise LookupError, error % (cls, handler.__class__) # Cache hit self.cache.touch(key) return handler # Check the resource exists if not self.fs.exists(key): if soft: return None raise LookupError, 'the resource "%s" does not exist' % key # Folders are not cached if self.fs.is_folder(key): return Folder(key, database=self) # Cache miss if cls is None: cls = self.get_handler_class(key) # Build the handler and update the cache handler = object.__new__(cls) self.push_handler(key, handler) return handler def get_handler(self, key, cls=None, soft=False): key = self.normalize_key(key) return self._get_handler(key, cls, soft) def get_handlers(self, key): base = self.normalize_key(key) for name in self.get_handler_names(base): key = self.fs.resolve2(base, name) yield self._get_handler(key) def touch_handler(self, key, handler=None): """Report a modification of the key/handler to the database. We must pass the handler because of phantoms. """ raise ReadonlyError, 'cannot set handler' def set_handler(self, key, handler): raise ReadonlyError, 'cannot set handler' def del_handler(self, key): raise ReadonlyError, 'cannot del handler' def copy_handler(self, source, target): raise ReadonlyError, 'cannot copy handler' def move_handler(self, source, target): raise ReadonlyError, 'cannot move handler' ####################################################################### # Layer 1: resources ####################################################################### _resources_registry = {} @classmethod def register_resource_class(self, resource_class, format=None): if format is None: format = resource_class.class_id self._resources_registry[format] = resource_class @classmethod def unregister_resource_class(self, resource_class): registry = self._resources_registry for class_id, cls in registry.items(): if resource_class is cls: del registry[class_id] def get_resource_class(self, class_id): if type(class_id) is not str: raise TypeError, 'expected byte string, got %s' % class_id # Check dynamic models are not broken registry = self._resources_registry if class_id[0] == '/': model = self.get_resource(class_id, soft=True) if model is None: registry.pop(class_id, None) err = 'the resource "%s" does not exist' % class_id raise LookupError, err # Cache hit cls = registry.get(class_id) if cls: return cls # Cache miss: dynamic model if class_id[0] == '/': cls = model.build_resource_class() registry[class_id] = cls return cls # Cache miss: fallback on mimetype if '/' in class_id: class_id = class_id.split('/')[0] cls = registry.get(class_id) if cls: return cls # Default return self._resources_registry['application/octet-stream'] def get_resource_classes(self): registry = self._resources_registry for class_id, cls in self._resources_registry.items(): if class_id[0] == '/': model = self.get_resource(class_id, soft=True) if model is None: registry.pop(class_id, None) continue yield cls def get_resource(self, abspath, soft=False): if type(abspath) is str: path = abspath[1:] abspath = Path(abspath) else: path = str(abspath)[1:] path_to_metadata = '%s.metadata' % path metadata = self.get_handler(path_to_metadata, Metadata, soft=soft) if metadata is None: return None # 2. Class class_id = metadata.format cls = self.get_resource_class(class_id) if cls is None: if self.fs.exists(path): is_file = self.fs.is_file(path) else: # FIXME This is just a guess, it may fail. is_file = '/' in format if is_file: cls = self.get_resource_class('application/octet-stream') else: cls = self.get_resource_class('application/x-not-regular-file') # Ok resource = cls(metadata) resource.abspath = abspath return resource def remove_resource(self, resource): raise ReadonlyError def add_resource(self, resource): raise ReadonlyError def change_resource(self, resource): raise ReadonlyError def move_resource(self, source, new_path): raise ReadonlyError def save_changes(self): return def abort_changes(self): return def push_phantom(self, key, handler): handler.database = self handler.key = key def is_phantom(self, handler): return handler.timestamp is None and handler.dirty is not None ####################################################################### # Git ####################################################################### def get_blob(self, sha, cls): if sha in self.git_cache: return self.git_cache[sha] blob = self.worktree.lookup(sha) blob = cls(string=blob.data) self.git_cache[sha] = blob return blob def get_blob_by_revision_and_path(self, sha, path, cls): """Get the file contents located at the given path after the given commit revision has been committed. """ worktree = self.worktree commit = worktree.lookup(sha) obj = worktree.lookup_from_commit_by_path(commit, path) return self.get_blob(obj.sha, cls) ####################################################################### # Search ####################################################################### @lazy def catalog(self): path = '%s/catalog' % self.path fields = get_register_fields() try: return Catalog(path, fields, read_only=True) except (DatabaseError, DatabaseOpeningError): return None def search(self, query=None, **kw): """Launch a search in the catalog. """ xquery = _get_xquery(self.catalog, query, **kw) return SearchResults(self, xquery)
def setUp(self): self.cache = LRUCache(3) for c in lowercase: self.cache[c] = c.upper()
class CacheTestCase(TestCase): def setUp(self): self.cache = LRUCache(3) for c in lowercase: self.cache[c] = c.upper() def tearDown(self): self.cache._check_integrity() ####################################################################### # Dict API def test_init(self): self.assertRaises(TypeError, LRUCache) self.assertRaises(TypeError, LRUCache, 'aa') self.assertRaises(TypeError, LRUCache, 5, 'aa') self.assertRaises(ValueError, LRUCache, 5, 3) self.assertRaises(TypeError, LRUCache, 5, 5, 'aa') self.assertRaises(TypeError, LRUCache, 5, True, 4) def test_len(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) def test_setitem(self): cache = self.cache key, value = 'c', 'C' cache[key] = value self.assertEqual(cache[key], value) def test_delitem(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) key = 'y' del cache[key] self.assertEqual(len(cache), cache.size_min - 1) self.assertRaises(KeyError, cache.__getitem__, key) def test_in(self): cache = self.cache self.assert_('x' in cache) self.assert_('n' not in cache) def test_clear(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) cache.clear() self.assertEqual(len(cache), 0) def test_copy(self): cache = self.cache self.assertRaises(NotImplementedError, cache.copy) def test_fromkeys(self): cache = self.cache self.assertRaises(NotImplementedError, cache.fromkeys, 'abc') def test_get(self): cache = self.cache self.assertEqual(cache.get('y'), 'Y') self.assertEqual(cache.get('c'), None) self.assertEqual(cache.get('c', 69), 69) def test_items(self): cache = self.cache items = cache.items() self.assertEqual(items, [('x', 'X'), ('y', 'Y'), ('z', 'Z')]) def test_iteritems(self): cache = self.cache items = cache.iteritems() items = list(items) self.assertEqual(items, [('x', 'X'), ('y', 'Y'), ('z', 'Z')]) def test_iterkeys(self): cache = self.cache keys = cache.iterkeys() keys = list(keys) self.assertEqual(keys, list('xyz')) def test_itervalues(self): cache = self.cache values = cache.itervalues() values = list(values) self.assertEqual(values, list('XYZ')) def test_keys(self): cache = self.cache keys = cache.keys() self.assertEqual(keys, list('xyz')) def test_pop(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) key = 'y' value = cache.pop(key) self.assertEqual(value, key.upper()) self.assertEqual(len(cache), cache.size_min - 1) self.assertRaises(KeyError, cache.__getitem__, key) def test_popitem(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) item = cache.popitem() self.assertEqual(len(cache), cache.size_min - 1) self.assertEqual(item, ('x', 'X')) def test_setdefault(self): cache = self.cache self.assertRaises(NotImplementedError, cache.setdefault, 7) def test_update(self): cache = self.cache self.assertRaises(NotImplementedError, cache.update) def test_values(self): cache = self.cache values = cache.values() self.assertEqual(values, list('XYZ')) ####################################################################### # Specific API def test_touch(self): cache = self.cache self.assertRaises(KeyError, cache.touch, 'r') cache.touch('x') keys = cache.keys() self.assertEqual(keys, list('yzx'))
class RODatabase(object): read_only = True backend_cls = None def __init__(self, path=None, size_min=4800, size_max=5200, catalog=None, backend='lfs'): # Init path self.path = path # Init backend self.backend_cls = backends_registry[backend] # The "git add" arguments self.added = set() self.changed = set() self.removed = set() self.has_changed = False # init backend self.backend = self.backend_cls(self.path) # A mapping from key to handler self.cache = LRUCache(size_min, size_max, automatic=False) # TODO FIXME Catalog should be moved into backend # 7. Get the catalog if catalog: self.catalog = catalog else: if self.path: self.catalog = self.get_catalog() # Log catalog_log = '{}/database.log'.format(self.path) self.logger = Logger(catalog_log) register_logger(self.logger, 'itools.database') def check_catalog(self): pass def close(self): self.catalog.close() def check_database(self): """This function checks whether the database is in a consisitent state, this is to say whether a transaction was not brutally aborted and left the working directory with changes not committed. This is meant to be used by scripts, like 'icms-start.py' """ # TODO Check if bare repository is OK print('Checking database...') return True ####################################################################### # With statement ####################################################################### def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() ####################################################################### # Private API ####################################################################### def _discard_handler(self, key): """Unconditionally remove the handler identified by the given key from the cache, and invalidate it (and free memory at the same time). """ handler = self.cache.pop(key) # Invalidate the handler handler.__dict__.clear() def _abort_changes(self): """To be called to abandon the transaction. """ raise ReadonlyError def _cleanup(self): """For maintenance operations, this method is automatically called after a transaction is committed or aborted. """ # import gc # from itools.core import vmsize # print 'RODatabase._cleanup (0): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() self.make_room() # print 'RODatabase._cleanup (1): % 4d %s' % (len(self.cache), vmsize()) # print gc.get_count() ####################################################################### # Public API ####################################################################### def normalize_key(self, path, __root=Path('/')): return self.backend.normalize_key(path, __root) def push_handler(self, key, handler): """Adds the given resource to the cache. """ handler.database = self handler.key = key # Folders are not stored in the cache if type(handler) is Folder: return # Store in the cache self.cache[key] = handler def make_room(self): """Remove handlers from the cache until it fits the defined size. Use with caution. If the handlers we are about to discard are still used outside the database, and one of them (or more) are modified, then there will be an error. """ # Find out how many handlers should be removed size = len(self.cache) if size < self.cache.size_max: return # Discard as many handlers as needed n = size - self.cache.size_min for key, handler in self.cache.iteritems(): # Skip externally referenced handlers (refcount should be 3: # one for the cache, one for the local variable and one for # the argument passed to getrefcount). refcount = getrefcount(handler) if refcount > 3: continue # Skip modified (not new) handlers if handler.dirty is not None: continue # Discard this handler self._discard_handler(key) # Check whether we are done n -= 1 if n == 0: return def has_handler(self, key): key = self.normalize_key(key) # Synchronize handler = self.cache.get(key) if handler is not None: return True # Ask backend return self.backend.handler_exists(key) def save_handler(self, key, handler): self.backend.save_handler(key, handler) def get_handler_names(self, key): key = self.normalize_key(key) return self.backend.get_handler_names(key) def get_handler_data(self, key): return self.backend.get_handler_data(key) def get_handler_mtime(self, key): return self.backend.get_handler_mtime(key) def get_mimetype(self, key): return self.backend.get_handler_mimetype(key) def get_handler_class(self, key): mimetype = self.get_mimetype(key) return get_handler_class_by_mimetype(mimetype) def _get_handler(self, key, cls=None, soft=False): # Get resource if key in self.removed: return None # Folders are not cached if cls is Folder: return Folder(key, database=self) # Synchronize handler = self.cache.get(key) if handler is not None: # Check the class matches if cls is not None and not isinstance(handler, cls): error = "expected '%s' class, '%s' found" raise LookupError, error % (cls, handler.__class__) # Cache hit self.cache.touch(key) return handler # Check the resource exists try: data = self.backend.get_handler_data(key) except: # Do not exists if soft: return None raise LookupError('the resource "{0}" does not exist'.format(key)) # Cache miss if cls is None: cls = self.get_handler_class(key) # Build the handler and update the cache handler = object.__new__(cls) # Put handler in cache self.push_handler(key, handler) # Load handler data # FIXME We should reset handler state on errors try: handler.load_state_from_string(data) except Exception: # Remove handler from cache if cannot load it self._discard_handler(key) raise # Ok return handler def traverse_resources(self): return self.backend.traverse_resources() def get_handler(self, key, cls=None, soft=False): key = self.normalize_key(key) return self._get_handler(key, cls, soft) def get_handlers(self, key): base = self.normalize_key(key) for name in self.get_handler_names(base): yield self._get_handler(base + '/' + name) def touch_handler(self, key, handler=None): """Report a modification of the key/handler to the database. """ # FIXME touch_handler is called at handler loading # ro_database is also a rw_database, so it can save data # raise ReadonlyError, 'cannot set handler' key = self.normalize_key(key) # Mark the handler as dirty handler.dirty = datetime.now() # Do some checks if handler is None: raise ValueError if key in self.removed: raise ValueError # Set database has changed self.has_changed = True # Set in changed list self.changed.add(key) def set_handler(self, key, handler): raise ReadonlyError, 'cannot set handler' def del_handler(self, key): raise ReadonlyError, 'cannot del handler' def copy_handler(self, source, target, exclude_patterns=None): raise ReadonlyError, 'cannot copy handler' def move_handler(self, source, target): raise ReadonlyError, 'cannot move handler' ####################################################################### # Layer 1: resources ####################################################################### _resources_registry = {} @classmethod def register_resource_class(self, resource_class, format=None): if format is None: format = resource_class.class_id self._resources_registry[format] = resource_class @classmethod def unregister_resource_class(self, resource_class): registry = self._resources_registry for class_id, cls in registry.items(): if resource_class is cls: del registry[class_id] def get_resource_class(self, class_id): if type(class_id) is not str: raise TypeError, 'expected byte string, got %s' % class_id # Check dynamic models are not broken registry = self._resources_registry if class_id[0] == '/': model = self.get_resource(class_id, soft=True) if model is None: registry.pop(class_id, None) err = 'the resource "%s" does not exist' % class_id raise LookupError, err # Cache hit cls = registry.get(class_id) if cls: return cls # Cache miss: dynamic model if class_id[0] == '/': cls = model.build_resource_class() registry[class_id] = cls return cls # Cache miss: fallback on mimetype if '/' in class_id: class_id = class_id.split('/')[0] cls = registry.get(class_id) if cls: return cls # Default return self._resources_registry['application/octet-stream'] def get_resource_classes(self): registry = self._resources_registry for class_id, cls in self._resources_registry.items(): if class_id[0] == '/': model = self.get_resource(class_id, soft=True) if model is None: registry.pop(class_id, None) continue yield cls def get_metadata(self, abspath, soft=False): if type(abspath) is str: path = abspath[1:] abspath = Path(abspath) else: path = str(abspath)[1:] path_to_metadata = '%s.metadata' % path return self.get_handler(path_to_metadata, Metadata, soft=soft) def get_cls(self, class_id): cls = self.get_resource_class(class_id) return cls or self.get_resource_class('application/octet-stream') def get_resource(self, abspath, soft=False): abspath = Path(abspath) # Get metadata metadata = self.get_metadata(abspath, soft) if metadata is None: return None # Get associated class class_id = metadata.format cls = self.get_cls(class_id) # Ok return cls(abspath=abspath, database=self, metadata=metadata) def get_resource_from_brain(self, brain): cls = self.get_cls(brain.format) return cls(abspath=Path(brain.abspath), database=self, brain=brain) def remove_resource(self, resource): raise ReadonlyError def add_resource(self, resource): raise ReadonlyError def change_resource(self, resource): raise ReadonlyError def move_resource(self, source, new_path): raise ReadonlyError def save_changes(self): return def create_tag(self, tag_name, message=None): raise ReadonlyError def reset_to_tag(self, tag_name): raise ReadonlyError def abort_changes(self): return ####################################################################### # API for path ####################################################################### @staticmethod def get_basename(path): if type(path) is not Path: path = Path(path) return path.get_name() @staticmethod def get_path(path): if type(path) is not Path: path = Path(path) return str(path) @staticmethod def resolve(base, path): if type(base) is not Path: base = Path(base) path = base.resolve(path) return str(path) @staticmethod def resolve2(base, path): if type(base) is not Path: base = Path(base) path = base.resolve2(path) return str(path) ####################################################################### # Search ####################################################################### def get_catalog(self): path = '%s/catalog' % self.path fields = get_register_fields() root = self.get_resource('/', soft=True) return Catalog(path, fields, read_only=self.read_only, root=root) def search(self, query=None, **kw): """Launch a search in the catalog. """ xquery = _get_xquery(self.catalog, query, **kw) return SearchResults(self, xquery) def reindex_catalog(self, base_abspath, recursif=True): raise ReadonlyError
class CacheTestCase(TestCase): def setUp(self): self.cache = LRUCache(3) for c in lowercase: self.cache[c] = c.upper() def tearDown(self): self.cache._check_integrity() ####################################################################### # Dict API def test_init(self): self.assertRaises(TypeError, LRUCache) self.assertRaises(TypeError, LRUCache, "aa") self.assertRaises(TypeError, LRUCache, 5, "aa") self.assertRaises(ValueError, LRUCache, 5, 3) self.assertRaises(TypeError, LRUCache, 5, 5, "aa") self.assertRaises(TypeError, LRUCache, 5, True, 4) def test_len(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) def test_setitem(self): cache = self.cache key, value = "c", "C" cache[key] = value self.assertEqual(cache[key], value) def test_delitem(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) key = "y" del cache[key] self.assertEqual(len(cache), cache.size_min - 1) self.assertRaises(KeyError, cache.__getitem__, key) def test_in(self): cache = self.cache self.assert_("x" in cache) self.assert_("n" not in cache) def test_clear(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) cache.clear() self.assertEqual(len(cache), 0) def test_copy(self): cache = self.cache self.assertRaises(NotImplementedError, cache.copy) def test_fromkeys(self): cache = self.cache self.assertRaises(NotImplementedError, cache.fromkeys, "abc") def test_get(self): cache = self.cache self.assertEqual(cache.get("y"), "Y") self.assertEqual(cache.get("c"), None) self.assertEqual(cache.get("c", 69), 69) def test_items(self): cache = self.cache items = cache.items() self.assertEqual(items, [("x", "X"), ("y", "Y"), ("z", "Z")]) def test_iteritems(self): cache = self.cache items = cache.iteritems() items = list(items) self.assertEqual(items, [("x", "X"), ("y", "Y"), ("z", "Z")]) def test_iterkeys(self): cache = self.cache keys = cache.iterkeys() keys = list(keys) self.assertEqual(keys, list("xyz")) def test_itervalues(self): cache = self.cache values = cache.itervalues() values = list(values) self.assertEqual(values, list("XYZ")) def test_keys(self): cache = self.cache keys = cache.keys() self.assertEqual(keys, list("xyz")) def test_pop(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) key = "y" value = cache.pop(key) self.assertEqual(value, key.upper()) self.assertEqual(len(cache), cache.size_min - 1) self.assertRaises(KeyError, cache.__getitem__, key) def test_popitem(self): cache = self.cache self.assertEqual(len(cache), cache.size_min) item = cache.popitem() self.assertEqual(len(cache), cache.size_min - 1) self.assertEqual(item, ("x", "X")) def test_setdefault(self): cache = self.cache self.assertRaises(NotImplementedError, cache.setdefault, 7) def test_update(self): cache = self.cache self.assertRaises(NotImplementedError, cache.update) def test_values(self): cache = self.cache values = cache.values() self.assertEqual(values, list("XYZ")) ####################################################################### # Specific API def test_touch(self): cache = self.cache self.assertRaises(KeyError, cache.touch, "r") cache.touch("x") keys = cache.keys() self.assertEqual(keys, list("yzx"))