Beispiel #1
0
    def testSplitPath(self):
        # No containing paths of '/'.
        self.assertEqual([], utils.split_path('/'))
        # / contains /file
        self.assertEqual(['/'], utils.split_path('/file'))
        # / and /foo contain /foo/bar
        self.assertEqual(['/', '/foo'], utils.split_path('/foo/bar'))

        expected = ['/', '/path', '/path/to', '/path/to/some']
        self.assertEqual(expected, utils.split_path('/path/to/some/file.txt'))
        self.assertEqual(expected, utils.split_path('/path/to/some/file'))

        expected = []
  def testSplitPath(self):
    # No containing paths of '/'.
    self.assertEqual([], utils.split_path('/'))
    # / contains /file
    self.assertEqual(['/'], utils.split_path('/file'))
    # / and /foo contain /foo/bar
    self.assertEqual(['/', '/foo'], utils.split_path('/foo/bar'))

    expected = ['/', '/path', '/path/to', '/path/to/some']
    self.assertEqual(expected, utils.split_path('/path/to/some/file.txt'))
    self.assertEqual(expected, utils.split_path('/path/to/some/file'))

    expected = []
Beispiel #3
0
  def compute_affected_dirs(self, modified_paths):
    """Compute which dirs are affected by path modifications.

    Args:
      modified_paths: A list of ModifiedPath objects.
    Raises:
      NamespaceMismatchError: If mixing namespaces.
    Returns:
      A dictionary containing 'dirs_with_adds' and 'dirs_with_deletes',
      both of which are sets of strings containing the affect dir paths.
    """
    if modified_paths:
      namespace = modified_paths[0].namespace
    # First, merge file path modifications.
    # Perform an in-order pass to get the final modified state of each file.
    sorted_paths = sorted(modified_paths, key=lambda path: path.modified)
    new_modified_paths = {}
    for modified_path in sorted_paths:
      if modified_path.namespace != namespace:
        raise NamespaceMismatchError(
            'Namespace "{}" does not match namespace "{}".'.format(
                modified_path.namespace, namespace))
      new_modified_paths[modified_path.path] = modified_path
    sorted_paths = sorted(new_modified_paths.values(),
                          key=lambda path: path.modified)

    # Second, generate the set of affected directory paths.
    # This does not need to collapse dirs which are added and then deleted,
    # the dir should be present in both lists if it is affected by both an
    # add and a delete.
    dirs_with_adds = set()
    dirs_with_deletes = set()
    for modified_path in sorted_paths:
      current_dirs = utils.split_path(modified_path.path)
      if modified_path.action == ModifiedPath.WRITE:
        dirs_with_adds = dirs_with_adds.union(set(current_dirs))
      elif modified_path.action == ModifiedPath.DELETE:
        dirs_with_deletes = dirs_with_deletes.union(set(current_dirs))

    # Ignore root dir; it's hard-coded elsewhere to always exist.
    dirs_with_adds.discard('/')
    dirs_with_deletes.discard('/')

    affected_dirs = {
        'namespace': namespace,
        'dirs_with_adds': dirs_with_adds,
        'dirs_with_deletes': dirs_with_deletes,
    }
    return affected_dirs
Beispiel #4
0
    def compute_affected_dirs(self, modified_paths):
        """Compute which dirs are affected by path modifications.

    Args:
      modified_paths: A list of ModifiedPath objects.
    Raises:
      NamespaceMismatchError: If mixing namespaces.
    Returns:
      A dictionary containing 'dirs_with_adds' and 'dirs_with_deletes',
      both of which are sets of strings containing the affect dir paths.
    """
        if modified_paths:
            namespace = modified_paths[0].namespace
        # First, merge file path modifications.
        # Perform an in-order pass to get the final modified state of each file.
        sorted_paths = sorted(modified_paths, key=lambda path: path.modified)
        new_modified_paths = {}
        for modified_path in sorted_paths:
            if modified_path.namespace != namespace:
                raise NamespaceMismatchError(
                    'Namespace "{}" does not match namespace "{}".'.format(
                        modified_path.namespace, namespace))
            new_modified_paths[modified_path.path] = modified_path
        sorted_paths = sorted(new_modified_paths.values(),
                              key=lambda path: path.modified)

        # Second, generate the set of affected directory paths.
        # This does not need to collapse dirs which are added and then deleted,
        # the dir should be present in both lists if it is affected by both an
        # add and a delete.
        dirs_with_adds = set()
        dirs_with_deletes = set()
        for modified_path in sorted_paths:
            current_dirs = utils.split_path(modified_path.path)
            if modified_path.action == ModifiedPath.WRITE:
                dirs_with_adds = dirs_with_adds.union(set(current_dirs))
            elif modified_path.action == ModifiedPath.DELETE:
                dirs_with_deletes = dirs_with_deletes.union(set(current_dirs))

        # Ignore root dir; it's hard-coded elsewhere to always exist.
        dirs_with_adds.discard('/')
        dirs_with_deletes.discard('/')

        affected_dirs = {
            'namespace': namespace,
            'dirs_with_adds': dirs_with_adds,
            'dirs_with_deletes': dirs_with_deletes,
        }
        return affected_dirs
Beispiel #5
0
  def write(self, content=None, blob=None, mime_type=None, meta=None,
            encoding=None, created=None, modified=None, created_by=None,
            modified_by=None, _delete_old_blob=True):
    """Write or update a File.

    Updates: if the File already exists, write will accept any of the given args
    and only perform an update of the given data, without affecting other data.

    Args:
      content: File contents, either as a str or unicode object.
          This will handle content greater than the 1MB limit by storing it in
          blobstore. However, this technique should only be used for relatively
          small files; it is much less efficient than directly uploading
          to blobstore and passing the resulting BlobKeys to the blobs argument.
      blob: If content is not provided, a BlobKey pointing to the file.
      mime_type: Content type of the file; will be guessed if not given.
      meta: A dictionary of properties to be added to the file.
      encoding: The optional encoding of the content if given a bytestring.
          The encoding will be automatically determined if "content" is passed
          a unicode string.
      created: Optional datetime.datetime to override the created property.
      modified: Optional datetime.datetime to override the modified property.
      created_by: Optional TitanUser to override the created_by property.
      modified_by: Optional TitanUser to override the modified_by property.
      _delete_old_blob: Whether or not to delete the old blob if it changed.
    Raises:
      TypeError: For missing arguments.
      ValueError: For invalid arguments.
      BadFileError: If updating meta information on a non-existent file.
    Returns:
      Self-reference.
    """
    logging.info('Writing Titan file: %s', self.real_path)

    # Argument sanity checks.
    _TitanFile.validate_meta_properties(meta)
    is_content_update = content is not None or blob is not None
    is_meta_update = (mime_type is not None or meta is not None
                      or created is not None or modified is not None
                      or created_by is not None or modified_by is not None)
    if not is_content_update and not is_meta_update:
      raise TypeError('Arguments expected, but none given.')
    if not self.exists and is_meta_update and not is_content_update:
      raise BadFileError('File does not exist: %s' % self.real_path)
    if created is not None and not hasattr(created, 'timetuple'):
      raise ValueError('"created" must be a datetime.datetime instance.')
    if modified is not None and not hasattr(modified, 'timetuple'):
      raise ValueError('"modified" must be a datetime.datetime instance.')
    if created_by is not None and not isinstance(created_by, users.TitanUser):
      raise ValueError('"created_by" must be a users.TitanUser instance.')
    if modified_by is not None and not isinstance(modified_by, users.TitanUser):
      raise ValueError('"modified_by" must be a users.TitanUser instance.')
    if encoding is not None and content is None and blob is None:
      raise TypeError(
          '"content" or "blob" must be passed if "encoding" is passed.')

    # If given unicode, encode it as UTF-8 and flag it for future decoding.
    content, encoding = self._maybe_encode_content(content, encoding)

    # If big enough, store content in blobstore. Must come after encoding.
    content, blob = self._maybe_write_to_blobstore(content, blob)

    now = datetime.datetime.now()
    override_created_by = created_by is not None
    created_by = created_by or users.get_current_user()
    modified_by = modified_by or users.get_current_user()
    if not self.exists:
      # Create new _File entity.
      # Guess the MIME type if not given.
      if not mime_type:
        mime_type = utils.guess_mime_type(self.real_path)

      # Create a new _File.
      paths = utils.split_path(self.real_path)
      file_ent = _TitanFile(
          # NDB args:
          id=self.real_path,
          namespace=self.namespace,
          # Model:
          name=os.path.basename(self.real_path),
          dir_path=paths[-1],
          paths=paths,
          # Root files are at depth 0.
          depth=len(paths) - 1,
          mime_type=mime_type,
          encoding=encoding,
          created=created or now,
          modified=modified or now,
          content=content,
          blob=blob,
          # Backwards-compatibility with deprecated "blobs" property:
          blobs=[],
          created_by=created_by,
          modified_by=modified_by,
          md5_hash=None if blob else hashlib.md5(content).hexdigest(),
      )
      # Add meta attributes.
      if meta:
        for key, value in meta.iteritems():
          setattr(file_ent, key, value)
      self._file_ent = file_ent
      self._file_ent.put()
      return self

    # Updating an existing _File.
    file_ent = self._file

    blob_to_delete = None

    if override_created_by:
      file_ent.created_by = created_by
    file_ent.modified_by = modified_by

    if mime_type and file_ent.mime_type != mime_type:
      file_ent.mime_type = mime_type

    if created:
      file_ent.created = created
    file_ent.modified = modified or now

    # Auto-migrate entities from old "blobs" to new "blob" property on write:
    if file_ent.blobs:
      file_ent.blob = file_ent.blobs[0]
      file_ent.blobs = []

    if content is not None and file_ent.content != content:
      file_ent.content = content
      file_ent.md5_hash = hashlib.md5(content).hexdigest()
      if file_ent.blob and _delete_old_blob:
        blob_to_delete = self.blob
      # Clear the current blob association for this file.
      file_ent.blob = None

    if blob is not None and file_ent.blob != blob:
      if file_ent.blob and _delete_old_blob:
        blob_to_delete = self.blob
      # Associate the new blob to this file.
      file_ent.blob = blob
      file_ent.md5_hash = None
      file_ent.content = None

    if encoding != file_ent.encoding:
      file_ent.encoding = encoding

    # Update meta attributes.
    if meta is not None:
      for key, value in meta.iteritems():
        if not hasattr(file_ent, key) or getattr(file_ent, key) != value:
          setattr(file_ent, key, value)
    self._file_ent = file_ent
    self._file_ent.put()

    if blob_to_delete and _delete_old_blob:
      # Delete the actual blobstore data after the file write to avoid
      # orphaned files.
      _delete_blobs(blobs=[blob_to_delete], file_paths=[self.real_path])

    return self
Beispiel #6
0
class DirService(object):
    """Service for managing directory entities."""
    def compute_affected_dirs(self, modified_paths):
        """Compute which dirs are affected by path modifications.

    Args:
      modified_paths: A list of ModifiedPath objects.
    Raises:
      NamespaceMismatchError: If mixing namespaces.
    Returns:
      A dictionary containing 'dirs_with_adds' and 'dirs_with_deletes',
      both of which are sets of strings containing the affect dir paths.
    """
        if modified_paths:
            namespace = modified_paths[0].namespace
        # First, merge file path modifications.
        # Perform an in-order pass to get the final modified state of each file.
        sorted_paths = sorted(modified_paths, key=lambda path: path.modified)
        new_modified_paths = {}
        for modified_path in sorted_paths:
            if modified_path.namespace != namespace:
                raise NamespaceMismatchError(
                    'Namespace "{}" does not match namespace "{}".'.format(
                        modified_path.namespace, namespace))
            new_modified_paths[modified_path.path] = modified_path
        sorted_paths = sorted(new_modified_paths.values(),
                              key=lambda path: path.modified)

        # Second, generate the set of affected directory paths.
        # This does not need to collapse dirs which are added and then deleted,
        # the dir should be present in both lists if it is affected by both an
        # add and a delete.
        dirs_with_adds = set()
        dirs_with_deletes = set()
        for modified_path in sorted_paths:
            current_dirs = utils.split_path(modified_path.path)
            if modified_path.action == ModifiedPath.WRITE:
                dirs_with_adds = dirs_with_adds.union(set(current_dirs))
            elif modified_path.action == ModifiedPath.DELETE:
                dirs_with_deletes = dirs_with_deletes.union(set(current_dirs))

        # Ignore root dir; it's hard-coded elsewhere to always exist.
        dirs_with_adds.discard('/')
        dirs_with_deletes.discard('/')

        affected_dirs = {
            'namespace': namespace,
            'dirs_with_adds': dirs_with_adds,
            'dirs_with_deletes': dirs_with_deletes,
        }
        return affected_dirs

    @ndb.toplevel
    def update_affected_dirs(self,
                             dirs_with_adds,
                             dirs_with_deletes,
                             namespace=None,
                             async=False):
        """Manage changes to _TitanDir entities computed by compute_affected_dirs."""
        # Order deletes by depth first. This isn't actually by depth, but all we
        # need to guarantee here is that paths with common subdirs are deleted
        # depth-first, which can be accomplished by sorting in reverse
        # alphabetical order.
        dirs_with_deletes = sorted(list(dirs_with_deletes), reverse=True)

        # For every directory which contained a deleted file (including children),
        # check if the directory should disappear. It should disappear if:
        #   1. There are no files in the directory, and...
        #   2. There are no child directories, and...
        #   3. The directory path is not present in dirs_with_adds.
        dirs_paths_to_delete = []
        for path in dirs_with_deletes:
            if path in dirs_with_adds or files.Files.list(
                    path, namespace=namespace, limit=1, _internal=True):
                # The directory is marked for addition, or files still exist in it.
                continue
            subdirs = Dirs.list(path, limit=2)
            if len(subdirs) > 1:
                # Multiple subdirs exist, cannot delete dir.
                continue
            elif len(subdirs) == 1:
                # Handle the case where the only remaining subdir is marked for delete.
                if subdirs.values()[0].path not in dirs_paths_to_delete:
                    continue
            dirs_paths_to_delete.append(path)

        # Batch get all directory entities, both added and deleted.
        ns = namespace
        dir_keys = [
            ndb.Key(_TitanDir, path, namespace=ns)
            for path in dirs_paths_to_delete
        ]
        dir_keys += [
            ndb.Key(_TitanDir, path, namespace=ns) for path in dirs_with_adds
        ]
        existing_dir_ents = ndb.get_multi(dir_keys)
        # Transform into a dictionary mapping paths to existing entities:
        existing_dirs = {}
        for ent in existing_dir_ents:
            if ent:
                existing_dirs[ent.path] = ent

        changed_dir_ents = []
        for path in dirs_paths_to_delete:
            if path in existing_dirs:
                # Existing directory, mark as deleted.
                ent = existing_dirs[path]
                if ent.status == _STATUS_DELETED:
                    # Skip this entity entirely if it's already correct.
                    continue
                ent.status = _STATUS_DELETED
            else:
                # Missing directory entity, create a new one and mark as deleted.
                ent = _TitanDir(
                    # NDB properties:
                    id=path,
                    namespace=namespace,
                    # Model properties:
                    name=os.path.basename(path),
                    parent_path=os.path.dirname(path),
                    parent_paths=utils.split_path(path),
                    status=_STATUS_DELETED,
                )
            # Whitespace. Important.
            changed_dir_ents.append(ent)

        for path in dirs_with_adds:
            if path in existing_dirs:
                # Existing directory, make sure it's marked as available.
                ent = existing_dirs[path]
                if ent.status == _STATUS_AVAILABLE:
                    # Skip this entity entirely if it's already correct.
                    continue
                ent.status = _STATUS_AVAILABLE
            else:
                # Missing directory entity, create a new one and mark as available.
                ent = _TitanDir(
                    # NDB properties:
                    id=path,
                    namespace=namespace,
                    # Model properties:
                    name=os.path.basename(path),
                    parent_path=os.path.dirname(path),
                    parent_paths=utils.split_path(path),
                    status=_STATUS_AVAILABLE,
                )
            # Whitespace. Important.
            changed_dir_ents.append(ent)

        for dir_ents in utils.chunk_generator(changed_dir_ents,
                                              chunk_size=100):
            if not async:
                ndb.put_multi(dir_ents)
            else:
                ndb.put_multi_async(dir_ents)