def testSplitPath(self): # No containing paths of '/'. self.assertEqual([], utils.split_path('/')) # / contains /file self.assertEqual(['/'], utils.split_path('/file')) # / and /foo contain /foo/bar self.assertEqual(['/', '/foo'], utils.split_path('/foo/bar')) expected = ['/', '/path', '/path/to', '/path/to/some'] self.assertEqual(expected, utils.split_path('/path/to/some/file.txt')) self.assertEqual(expected, utils.split_path('/path/to/some/file')) expected = []
def compute_affected_dirs(self, modified_paths): """Compute which dirs are affected by path modifications. Args: modified_paths: A list of ModifiedPath objects. Raises: NamespaceMismatchError: If mixing namespaces. Returns: A dictionary containing 'dirs_with_adds' and 'dirs_with_deletes', both of which are sets of strings containing the affect dir paths. """ if modified_paths: namespace = modified_paths[0].namespace # First, merge file path modifications. # Perform an in-order pass to get the final modified state of each file. sorted_paths = sorted(modified_paths, key=lambda path: path.modified) new_modified_paths = {} for modified_path in sorted_paths: if modified_path.namespace != namespace: raise NamespaceMismatchError( 'Namespace "{}" does not match namespace "{}".'.format( modified_path.namespace, namespace)) new_modified_paths[modified_path.path] = modified_path sorted_paths = sorted(new_modified_paths.values(), key=lambda path: path.modified) # Second, generate the set of affected directory paths. # This does not need to collapse dirs which are added and then deleted, # the dir should be present in both lists if it is affected by both an # add and a delete. dirs_with_adds = set() dirs_with_deletes = set() for modified_path in sorted_paths: current_dirs = utils.split_path(modified_path.path) if modified_path.action == ModifiedPath.WRITE: dirs_with_adds = dirs_with_adds.union(set(current_dirs)) elif modified_path.action == ModifiedPath.DELETE: dirs_with_deletes = dirs_with_deletes.union(set(current_dirs)) # Ignore root dir; it's hard-coded elsewhere to always exist. dirs_with_adds.discard('/') dirs_with_deletes.discard('/') affected_dirs = { 'namespace': namespace, 'dirs_with_adds': dirs_with_adds, 'dirs_with_deletes': dirs_with_deletes, } return affected_dirs
def write(self, content=None, blob=None, mime_type=None, meta=None, encoding=None, created=None, modified=None, created_by=None, modified_by=None, _delete_old_blob=True): """Write or update a File. Updates: if the File already exists, write will accept any of the given args and only perform an update of the given data, without affecting other data. Args: content: File contents, either as a str or unicode object. This will handle content greater than the 1MB limit by storing it in blobstore. However, this technique should only be used for relatively small files; it is much less efficient than directly uploading to blobstore and passing the resulting BlobKeys to the blobs argument. blob: If content is not provided, a BlobKey pointing to the file. mime_type: Content type of the file; will be guessed if not given. meta: A dictionary of properties to be added to the file. encoding: The optional encoding of the content if given a bytestring. The encoding will be automatically determined if "content" is passed a unicode string. created: Optional datetime.datetime to override the created property. modified: Optional datetime.datetime to override the modified property. created_by: Optional TitanUser to override the created_by property. modified_by: Optional TitanUser to override the modified_by property. _delete_old_blob: Whether or not to delete the old blob if it changed. Raises: TypeError: For missing arguments. ValueError: For invalid arguments. BadFileError: If updating meta information on a non-existent file. Returns: Self-reference. """ logging.info('Writing Titan file: %s', self.real_path) # Argument sanity checks. _TitanFile.validate_meta_properties(meta) is_content_update = content is not None or blob is not None is_meta_update = (mime_type is not None or meta is not None or created is not None or modified is not None or created_by is not None or modified_by is not None) if not is_content_update and not is_meta_update: raise TypeError('Arguments expected, but none given.') if not self.exists and is_meta_update and not is_content_update: raise BadFileError('File does not exist: %s' % self.real_path) if created is not None and not hasattr(created, 'timetuple'): raise ValueError('"created" must be a datetime.datetime instance.') if modified is not None and not hasattr(modified, 'timetuple'): raise ValueError('"modified" must be a datetime.datetime instance.') if created_by is not None and not isinstance(created_by, users.TitanUser): raise ValueError('"created_by" must be a users.TitanUser instance.') if modified_by is not None and not isinstance(modified_by, users.TitanUser): raise ValueError('"modified_by" must be a users.TitanUser instance.') if encoding is not None and content is None and blob is None: raise TypeError( '"content" or "blob" must be passed if "encoding" is passed.') # If given unicode, encode it as UTF-8 and flag it for future decoding. content, encoding = self._maybe_encode_content(content, encoding) # If big enough, store content in blobstore. Must come after encoding. content, blob = self._maybe_write_to_blobstore(content, blob) now = datetime.datetime.now() override_created_by = created_by is not None created_by = created_by or users.get_current_user() modified_by = modified_by or users.get_current_user() if not self.exists: # Create new _File entity. # Guess the MIME type if not given. if not mime_type: mime_type = utils.guess_mime_type(self.real_path) # Create a new _File. paths = utils.split_path(self.real_path) file_ent = _TitanFile( # NDB args: id=self.real_path, namespace=self.namespace, # Model: name=os.path.basename(self.real_path), dir_path=paths[-1], paths=paths, # Root files are at depth 0. depth=len(paths) - 1, mime_type=mime_type, encoding=encoding, created=created or now, modified=modified or now, content=content, blob=blob, # Backwards-compatibility with deprecated "blobs" property: blobs=[], created_by=created_by, modified_by=modified_by, md5_hash=None if blob else hashlib.md5(content).hexdigest(), ) # Add meta attributes. if meta: for key, value in meta.iteritems(): setattr(file_ent, key, value) self._file_ent = file_ent self._file_ent.put() return self # Updating an existing _File. file_ent = self._file blob_to_delete = None if override_created_by: file_ent.created_by = created_by file_ent.modified_by = modified_by if mime_type and file_ent.mime_type != mime_type: file_ent.mime_type = mime_type if created: file_ent.created = created file_ent.modified = modified or now # Auto-migrate entities from old "blobs" to new "blob" property on write: if file_ent.blobs: file_ent.blob = file_ent.blobs[0] file_ent.blobs = [] if content is not None and file_ent.content != content: file_ent.content = content file_ent.md5_hash = hashlib.md5(content).hexdigest() if file_ent.blob and _delete_old_blob: blob_to_delete = self.blob # Clear the current blob association for this file. file_ent.blob = None if blob is not None and file_ent.blob != blob: if file_ent.blob and _delete_old_blob: blob_to_delete = self.blob # Associate the new blob to this file. file_ent.blob = blob file_ent.md5_hash = None file_ent.content = None if encoding != file_ent.encoding: file_ent.encoding = encoding # Update meta attributes. if meta is not None: for key, value in meta.iteritems(): if not hasattr(file_ent, key) or getattr(file_ent, key) != value: setattr(file_ent, key, value) self._file_ent = file_ent self._file_ent.put() if blob_to_delete and _delete_old_blob: # Delete the actual blobstore data after the file write to avoid # orphaned files. _delete_blobs(blobs=[blob_to_delete], file_paths=[self.real_path]) return self
class DirService(object): """Service for managing directory entities.""" def compute_affected_dirs(self, modified_paths): """Compute which dirs are affected by path modifications. Args: modified_paths: A list of ModifiedPath objects. Raises: NamespaceMismatchError: If mixing namespaces. Returns: A dictionary containing 'dirs_with_adds' and 'dirs_with_deletes', both of which are sets of strings containing the affect dir paths. """ if modified_paths: namespace = modified_paths[0].namespace # First, merge file path modifications. # Perform an in-order pass to get the final modified state of each file. sorted_paths = sorted(modified_paths, key=lambda path: path.modified) new_modified_paths = {} for modified_path in sorted_paths: if modified_path.namespace != namespace: raise NamespaceMismatchError( 'Namespace "{}" does not match namespace "{}".'.format( modified_path.namespace, namespace)) new_modified_paths[modified_path.path] = modified_path sorted_paths = sorted(new_modified_paths.values(), key=lambda path: path.modified) # Second, generate the set of affected directory paths. # This does not need to collapse dirs which are added and then deleted, # the dir should be present in both lists if it is affected by both an # add and a delete. dirs_with_adds = set() dirs_with_deletes = set() for modified_path in sorted_paths: current_dirs = utils.split_path(modified_path.path) if modified_path.action == ModifiedPath.WRITE: dirs_with_adds = dirs_with_adds.union(set(current_dirs)) elif modified_path.action == ModifiedPath.DELETE: dirs_with_deletes = dirs_with_deletes.union(set(current_dirs)) # Ignore root dir; it's hard-coded elsewhere to always exist. dirs_with_adds.discard('/') dirs_with_deletes.discard('/') affected_dirs = { 'namespace': namespace, 'dirs_with_adds': dirs_with_adds, 'dirs_with_deletes': dirs_with_deletes, } return affected_dirs @ndb.toplevel def update_affected_dirs(self, dirs_with_adds, dirs_with_deletes, namespace=None, async=False): """Manage changes to _TitanDir entities computed by compute_affected_dirs.""" # Order deletes by depth first. This isn't actually by depth, but all we # need to guarantee here is that paths with common subdirs are deleted # depth-first, which can be accomplished by sorting in reverse # alphabetical order. dirs_with_deletes = sorted(list(dirs_with_deletes), reverse=True) # For every directory which contained a deleted file (including children), # check if the directory should disappear. It should disappear if: # 1. There are no files in the directory, and... # 2. There are no child directories, and... # 3. The directory path is not present in dirs_with_adds. dirs_paths_to_delete = [] for path in dirs_with_deletes: if path in dirs_with_adds or files.Files.list( path, namespace=namespace, limit=1, _internal=True): # The directory is marked for addition, or files still exist in it. continue subdirs = Dirs.list(path, limit=2) if len(subdirs) > 1: # Multiple subdirs exist, cannot delete dir. continue elif len(subdirs) == 1: # Handle the case where the only remaining subdir is marked for delete. if subdirs.values()[0].path not in dirs_paths_to_delete: continue dirs_paths_to_delete.append(path) # Batch get all directory entities, both added and deleted. ns = namespace dir_keys = [ ndb.Key(_TitanDir, path, namespace=ns) for path in dirs_paths_to_delete ] dir_keys += [ ndb.Key(_TitanDir, path, namespace=ns) for path in dirs_with_adds ] existing_dir_ents = ndb.get_multi(dir_keys) # Transform into a dictionary mapping paths to existing entities: existing_dirs = {} for ent in existing_dir_ents: if ent: existing_dirs[ent.path] = ent changed_dir_ents = [] for path in dirs_paths_to_delete: if path in existing_dirs: # Existing directory, mark as deleted. ent = existing_dirs[path] if ent.status == _STATUS_DELETED: # Skip this entity entirely if it's already correct. continue ent.status = _STATUS_DELETED else: # Missing directory entity, create a new one and mark as deleted. ent = _TitanDir( # NDB properties: id=path, namespace=namespace, # Model properties: name=os.path.basename(path), parent_path=os.path.dirname(path), parent_paths=utils.split_path(path), status=_STATUS_DELETED, ) # Whitespace. Important. changed_dir_ents.append(ent) for path in dirs_with_adds: if path in existing_dirs: # Existing directory, make sure it's marked as available. ent = existing_dirs[path] if ent.status == _STATUS_AVAILABLE: # Skip this entity entirely if it's already correct. continue ent.status = _STATUS_AVAILABLE else: # Missing directory entity, create a new one and mark as available. ent = _TitanDir( # NDB properties: id=path, namespace=namespace, # Model properties: name=os.path.basename(path), parent_path=os.path.dirname(path), parent_paths=utils.split_path(path), status=_STATUS_AVAILABLE, ) # Whitespace. Important. changed_dir_ents.append(ent) for dir_ents in utils.chunk_generator(changed_dir_ents, chunk_size=100): if not async: ndb.put_multi(dir_ents) else: ndb.put_multi_async(dir_ents)