Example #1
0
class VFSHandler(IO[bytes], metaclass=abc.ABCMeta):
    """Base class for handling objects in the VFS."""
    supported_pathtype = rdf_paths.PathSpec.PathType.UNSET

    # Should this handler be auto-registered?
    auto_register = False

    size = 0
    offset = 0

    # This is the VFS path to this specific handler.
    # TODO: "/" is a problematic default value because it is not
    # guaranteed that path is set correctly (e.g. by TSK). None would be a better
    # default and to guarantee a valid value would be best.
    path = "/"

    # This will be set by the VFSOpen factory to the pathspec of the final
    # destination of this handler. This pathspec will be case corrected and
    # updated to reflect any potential recursion.
    pathspec = None
    base_fd = None

    def __init__(self,
                 base_fd,
                 handlers,
                 pathspec=None,
                 progress_callback=None):
        """Constructor.

    Args:
      base_fd: A handler to the predecessor handler.
      handlers: A mapping from rdf_paths.PathSpec.PathType to classes
        implementing VFSHandler.
      pathspec: The pathspec to open.
      progress_callback: A callback to indicate that the open call is still
        working but needs more time.

    Raises:
      IOError: if this handler can not be instantiated over the
      requested path.
    """
        del pathspec  # Unused.
        self.base_fd = base_fd
        self.progress_callback = progress_callback
        self._handlers = handlers
        if base_fd is None:
            self.pathspec = rdf_paths.PathSpec()
        else:
            # Make a copy of the base pathspec.
            self.pathspec = base_fd.pathspec.Copy()
        self.metadata = {}

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.Close()
        return False

    def Seek(self, offset, whence=os.SEEK_SET):
        """Seek to an offset in the file."""
        if whence == os.SEEK_SET:
            self.offset = offset
        elif whence == os.SEEK_CUR:
            self.offset += offset
        elif whence == os.SEEK_END:
            self.offset = self.size + offset
        else:
            raise ValueError("Illegal whence value %s" % whence)

    @abc.abstractmethod
    def Read(self, length):
        """Reads some data from the file."""
        raise NotImplementedError

    @abc.abstractmethod
    def Stat(
        self,
        ext_attrs: bool = False,
        follow_symlink: bool = True,
    ) -> rdf_client_fs.StatEntry:
        """Collects stat information about the file.

    If links resolving is enabled, if called on a symlink, the function will
    return information about the file the symlink is pointing to. Otherwise,
    information about the link itself is going to be returned.

    Args:
      ext_attrs: Whether to collect extended attributes of the file as well.
      follow_symlink: Whether links should be resolved.

    Returns:
      A stat entry corresponding to the file.
    """
        raise NotImplementedError

    @abc.abstractmethod
    def IsDirectory(self):
        """Returns true if this object can contain other objects."""
        raise NotImplementedError

    def Tell(self):
        return self.offset

    def Close(self):
        """Close internal file descriptors."""

    def OpenAsContainer(self, pathtype):
        """Guesses a container from the current object."""
        if self.IsDirectory():
            return self

        # TODO(user): Add support for more containers here (e.g. registries, zip
        # files etc).
        else:
            if pathtype != rdf_paths.PathSpec.PathType.NTFS:
                # For now just guess TSK.
                pathtype = rdf_paths.PathSpec.PathType.TSK
            handler = self._handlers[pathtype]
            pathspec = rdf_paths.PathSpec(path="/", pathtype=pathtype)
            return handler(base_fd=self,
                           handlers=self._handlers,
                           pathspec=pathspec,
                           progress_callback=self.progress_callback)

    def MatchBestComponentName(self, component, pathtype):
        """Returns the name of the component which matches best our base listing.

    In order to do the best case insensitive matching we list the files in the
    base handler and return the base match for this component.

    Args:
      component: A component name which should be present in this directory.
      pathtype: Pathtype of the component.

    Returns:
      the best component name.
    """
        fd = self.OpenAsContainer(pathtype)

        # Adjust the component casing
        file_listing = set(fd.ListNames())

        # First try an exact match
        if component not in file_listing:
            # Now try to match lower case
            lower_component = component.lower()
            for x in file_listing:
                if lower_component == x.lower():
                    component = x
                    break

        if fd.supported_pathtype != self.pathspec.pathtype:
            new_pathspec = rdf_paths.PathSpec(path=component,
                                              pathtype=fd.supported_pathtype)
        else:
            new_pathspec = self.pathspec.last.Copy()
            new_pathspec.path = component

        return new_pathspec

    def ListFiles(self, ext_attrs=False):
        """Returns an iterator over all VFS files contained in this directory.

    Generates a StatEntry for each file or directory.

    Args:
      ext_attrs: Whether stat entries should contain extended attributes.

    Raises:
      IOError: if this fails.
    """
        del ext_attrs  # Unused.
        return []

    def ListNames(self):
        """A generator for all names in this directory."""
        return []

    # These are file object conformant namings for library functions that
    # grr uses, and that expect to interact with 'real' file objects.
    read = utils.Proxy("Read")
    seek = utils.Proxy("Seek")
    stat = utils.Proxy("Stat")
    tell = utils.Proxy("Tell")
    close = utils.Proxy("Close")

    def seekable(self) -> bool:
        return True

    def readable(self) -> bool:
        return True

    def writable(self) -> bool:
        return False

    def closed(self) -> bool:
        # TODO(hanuszczak): `Close` is actually implemented only for the Windows
        # registry handler. Otherwise it just uses default implementation that does
        # nothing. It might make sense to implement this `closed` logic in the base
        # class or simply always return `False`.
        raise NotImplementedError()

    def isatty(self) -> bool:
        return False

    def fileno(self) -> int:
        raise io.UnsupportedOperation()

    @property
    def name(self) -> str:
        raise io.UnsupportedOperation()

    @property
    def mode(self) -> str:
        return "rb"

    def readline(self, limit: int = 0) -> bytes:
        raise io.UnsupportedOperation()

    def readlines(self, hint: int = 0) -> List[bytes]:
        raise io.UnsupportedOperation()

    def write(self, s: bytes) -> int:
        raise io.UnsupportedOperation()

    def writelines(self, lines: Iterable[bytes]) -> None:
        raise io.UnsupportedOperation()

    def truncate(self, size: Optional[int] = None) -> int:
        raise io.UnsupportedOperation()

    def flush(self) -> None:
        raise io.UnsupportedOperation()

    @classmethod
    def Open(cls,
             fd,
             component,
             handlers,
             pathspec=None,
             progress_callback=None):
        """Try to correct the casing of component.

    This method is called when we failed to open the component directly. We try
    to transform the component into something which is likely to work.

    In this implementation, we correct the case of the component until we can
    not open the path any more.

    Args:
      fd: The base fd we will use.
      component: The component we should open.
      handlers: A mapping from rdf_paths.PathSpec.PathType to classes
        implementing VFSHandler.
      pathspec: The rest of the pathspec object.
      progress_callback: A callback to indicate that the open call is still
        working but needs more time.

    Returns:
      A file object.

    Raises:
      IOError: If nothing could be opened still.
    """
        # The handler for this component
        try:
            handler = handlers[component.pathtype]
        except KeyError:
            raise UnsupportedHandlerError(component.pathtype)

        # We will not do any case folding unless requested.
        if component.path_options == rdf_paths.PathSpec.Options.CASE_LITERAL:
            return handler(base_fd=fd, pathspec=component, handlers=handlers)

        path_components = client_utils.LocalPathToCanonicalPath(component.path)
        path_components = ["/"] + list(filter(None,
                                              path_components.split("/")))

        for i, path_component in enumerate(path_components):
            try:
                if fd:
                    new_pathspec = fd.MatchBestComponentName(
                        path_component, component.pathtype)
                else:
                    new_pathspec = component.Copy()
                    new_pathspec.path = path_component

                # If the component has a stream_name (NTFS alternate data stream) set,
                # set it on the last path_component.
                if i == len(path_components) - 1 and component.HasField(
                        "stream_name"):
                    new_pathspec.stream_name = component.stream_name

                # The handler for this component
                try:
                    handler = handlers[new_pathspec.pathtype]
                except KeyError:
                    raise UnsupportedHandlerError(new_pathspec.pathtype)

                fd = handler(base_fd=fd,
                             handlers=handlers,
                             pathspec=new_pathspec,
                             progress_callback=progress_callback)
            except IOError as e:
                # Can not open the first component, we must raise here.
                if i <= 1:
                    raise IOError("File not found: {}".format(component))

                # Do not try to use TSK to open a not-found registry entry, fail
                # instead. Using TSK would lead to confusing error messages, hiding
                # the fact that the Registry entry is simply not there.
                if component.pathtype == rdf_paths.PathSpec.PathType.REGISTRY:
                    raise IOError("Registry entry not found: {}".format(e))

                # Insert the remaining path at the front of the pathspec.
                pathspec.Insert(0,
                                path=utils.JoinPath(*path_components[i:]),
                                pathtype=rdf_paths.PathSpec.PathType.TSK)
                break

        return fd

    def GetMetadata(self):
        return self.metadata

    @property
    def native_path(self) -> Optional[str]:
        """Returns the path to a native file this handler corresponds to.

    Returns None if this handler doesn't correspond to a native file.
    """
        return None
Example #2
0
class BlobStream(object):
  """File-like object for reading from blobs."""

  def __init__(self, client_path, blob_refs, hash_id):
    self._client_path = client_path
    self._blob_refs = blob_refs
    self._hash_id = hash_id

    self._max_unbound_read = config.CONFIG["Server.max_unbound_read_size"]

    self._offset = 0
    self._length = self._blob_refs[-1].offset + self._blob_refs[-1].size

    self._current_ref = None
    self._current_chunk = None

  def _GetChunk(self):
    """Fetches a chunk corresponding to the current offset."""

    found_ref = None
    for ref in self._blob_refs:
      if self._offset >= ref.offset and self._offset < (ref.offset + ref.size):
        found_ref = ref
        break

    if not found_ref:
      return None, None

    # If self._current_ref == found_ref, then simply return previously found
    # chunk. Otherwise, update self._current_chunk value.
    if self._current_ref != found_ref:
      self._current_ref = found_ref

      data = data_store.BLOBS.ReadBlobs([found_ref.blob_id])
      self._current_chunk = data[found_ref.blob_id]

    return self._current_chunk, self._current_ref

  def Read(self, length=None):
    """Reads data."""

    if length is None:
      length = self._length - self._offset

    if length > self._max_unbound_read:
      raise OversizedReadError("Attempted to read %d bytes when "
                               "Server.max_unbound_read_size is %d" %
                               (length, self._max_unbound_read))

    result = io.BytesIO()
    while result.tell() < length:
      chunk, ref = self._GetChunk()
      if not chunk:
        break

      part = chunk[self._offset - ref.offset:]
      if not part:
        break

      result.write(part)
      self._offset += min(length, len(part))

    return result.getvalue()[:length]

  def Tell(self):
    """Returns current reading cursor position."""

    return self._offset

  def Seek(self, offset, whence=os.SEEK_SET):
    """Moves the reading cursor."""

    if whence == os.SEEK_SET:
      self._offset = offset
    elif whence == os.SEEK_CUR:
      self._offset += offset
    elif whence == os.SEEK_END:
      self._offset = self._length + offset
    else:
      raise ValueError("Invalid whence argument: %s" % whence)

  read = utils.Proxy("Read")
  tell = utils.Proxy("Tell")
  seek = utils.Proxy("Seek")

  @property
  def size(self):
    """Size of the hashed data."""
    return self._length

  @property
  def hash_id(self):
    """Hash ID identifying hashed data."""
    return self._hash_id

  def Path(self):
    return self._client_path.Path()
Example #3
0
class Dict(rdf_structs.RDFProtoStruct):
    """A high level interface for protobuf Dict objects.

  This effectively converts from a dict to a proto and back.
  The dict may contain strings (python unicode objects), int64,
  or binary blobs (python string objects) as keys and values.
  """
    protobuf = jobs_pb2.Dict
    rdf_deps = [
        KeyValue,
    ]

    _values = None

    def __init__(self, initializer=None, **kwargs):
        super(Dict, self).__init__(initializer=None)

        self.dat = None  # type: Union[List[KeyValue], rdf_structs.RepeatedFieldHelper]

        # Support initializing from a mapping
        if isinstance(initializer, dict):
            self.FromDict(initializer)

        # Can be initialized from kwargs (like a dict).
        elif initializer is None:
            self.FromDict(kwargs)

        # Initialize from another Dict.
        elif isinstance(initializer, Dict):
            self.FromDict(initializer.ToDict())

        else:
            raise rdfvalue.InitializeError(
                "Invalid initializer for ProtoDict.")

    def ToDict(self):
        result = {}
        for x in itervalues(self._values):
            key = x.k.GetValue()
            result[key] = x.v.GetValue()
            try:
                # Try to unpack nested AttributedDicts
                result[key] = result[key].ToDict()
            except AttributeError:
                pass

        return result

    def FromDict(self, dictionary, raise_on_error=True):
        # First clear and then set the dictionary.
        self._values = {}
        for key, value in iteritems(dictionary):
            self._values[key] = KeyValue(
                k=DataBlob().SetValue(key, raise_on_error=raise_on_error),
                v=DataBlob().SetValue(value, raise_on_error=raise_on_error))
        self.dat = itervalues(self._values)
        return self

    def __getitem__(self, key):
        return self._values[key].v.GetValue()

    def __contains__(self, key):
        return key in self._values

    # TODO: This implementation is flawed. It returns a new instance
    # on each invocation, effectively preventing changes to mutable
    # datastructures, e.g. `dct["key"] = []; dct["key"].append(5)`.
    def GetItem(self, key, default=None):
        if key in self._values:
            return self._values[key].v.GetValue()
        return default

    def Items(self):
        for x in itervalues(self._values):
            yield x.k.GetValue(), x.v.GetValue()

    def Values(self):
        for x in itervalues(self._values):
            yield x.v.GetValue()

    def Keys(self):
        for x in itervalues(self._values):
            yield x.k.GetValue()

    get = utils.Proxy("GetItem")
    items = utils.Proxy("Items")
    keys = utils.Proxy("Keys")
    values = utils.Proxy("Values")

    def __delitem__(self, key):
        # TODO(user):pytype: assigning "dirty" here is a hack. The assumption
        # that self.dat is RepeatedFieldHelper may not hold. For some reason the
        # type checker doesn not respect the isinstance check below and explicit
        # cast is required.
        if not isinstance(self.dat, rdf_structs.RepeatedFieldHelper):
            raise TypeError("self.dat has an unexpected type %s" %
                            self.dat.__class__)
        cast(rdf_structs.RepeatedFieldHelper, self.dat).dirty = True
        del self._values[key]

    def __len__(self):
        return len(self._values)

    def SetItem(self, key, value, raise_on_error=True):
        """Alternative to __setitem__ that can ignore errors.

    Sometimes we want to serialize a structure that contains some simple
    objects, and some that can't be serialized.  This method gives the caller a
    way to specify that they don't care about values that can't be
    serialized.

    Args:
      key: dict key
      value: dict value
      raise_on_error: if True, raise if we can't serialize.  If False, set the
        key to an error string.
    """
        # TODO(user):pytype: assigning "dirty" here is a hack. The assumption
        # that self.dat is RepeatedFieldHelper may not hold. For some reason the
        # type checker doesn not respect the isinstance check below and explicit
        # cast is required.
        if not isinstance(self.dat, rdf_structs.RepeatedFieldHelper):
            raise TypeError("self.dat has an unexpected type %s" %
                            self.dat.__class__)
        cast(rdf_structs.RepeatedFieldHelper, self.dat).dirty = True
        self._values[key] = KeyValue(
            k=DataBlob().SetValue(key, raise_on_error=raise_on_error),
            v=DataBlob().SetValue(value, raise_on_error=raise_on_error))

    def __setitem__(self, key, value):
        # TODO(user):pytype: assigning "dirty" here is a hack. The assumption
        # that self.dat is RepeatedFieldHelper may not hold. For some reason the
        # type checker doesn not respect the isinstance check below and explicit
        # cast is required.
        if not isinstance(self.dat, rdf_structs.RepeatedFieldHelper):
            raise TypeError("self.dat has an unexpected type %s" %
                            self.dat.__class__)
        cast(rdf_structs.RepeatedFieldHelper, self.dat).dirty = True
        self._values[key] = KeyValue(k=DataBlob().SetValue(key),
                                     v=DataBlob().SetValue(value))

    def __iter__(self):
        for x in itervalues(self._values):
            yield x.k.GetValue()

    # Required, because in Python 3 overriding `__eq__` nullifies `__hash__`.
    __hash__ = rdf_structs.RDFProtoStruct.__hash__

    def __eq__(self, other):
        if isinstance(other, dict):
            return self.ToDict() == other
        elif isinstance(other, Dict):
            return self.ToDict() == other.ToDict()
        else:
            return False

    def GetRawData(self):
        self.dat = itervalues(self._values)
        return super(Dict, self).GetRawData()

    def _CopyRawData(self):
        self.dat = itervalues(self._values)
        return super(Dict, self)._CopyRawData()

    def SetRawData(self, raw_data):
        super(Dict, self).SetRawData(raw_data)
        self._values = {}
        for d in self.dat:
            self._values[d.k.GetValue()] = d

    def SerializeToBytes(self):
        self.dat = itervalues(self._values)
        return super(Dict, self).SerializeToBytes()

    def __str__(self):
        return str(self.ToDict())
Example #4
0
class Dict(rdf_structs.RDFProtoStruct):
    """A high level interface for protobuf Dict objects.

  This effectively converts from a dict to a proto and back.
  The dict may contain strings (python unicode objects), int64,
  or binary blobs (python string objects) as keys and values.
  """
    protobuf = jobs_pb2.Dict
    rdf_deps = [
        KeyValue,
    ]

    _values = None

    def __init__(self, initializer=None, age=None, **kwarg):
        super(Dict, self).__init__(initializer=None, age=age)

        # Support initializing from a mapping
        if isinstance(initializer, dict):
            self.FromDict(initializer)

        # Can be initialized from kwargs (like a dict).
        elif initializer is None:
            self.FromDict(kwarg)

        # Initialize from another Dict.
        elif isinstance(initializer, Dict):
            self.FromDict(initializer.ToDict())
            self.age = initializer.age

        else:
            raise rdfvalue.InitializeError(
                "Invalid initializer for ProtoDict.")

    def ToDict(self):
        result = {}
        for x in itervalues(self._values):
            key = x.k.GetValue()
            result[key] = x.v.GetValue()
            try:
                # Try to unpack nested AttributedDicts
                result[key] = result[key].ToDict()
            except AttributeError:
                pass

        return result

    def FromDict(self, dictionary, raise_on_error=True):
        # First clear and then set the dictionary.
        self._values = {}
        for key, value in dictionary.iteritems():
            self._values[key] = KeyValue(
                k=DataBlob().SetValue(key, raise_on_error=raise_on_error),
                v=DataBlob().SetValue(value, raise_on_error=raise_on_error))
        self.dat = itervalues(self._values)
        return self

    def __getitem__(self, key):
        return self._values[key].v.GetValue()

    def __contains__(self, key):
        return key in self._values

    def GetItem(self, key, default=None):
        if key in self._values:
            return self._values[key].v.GetValue()
        return default

    def Items(self):
        for x in itervalues(self._values):
            yield x.k.GetValue(), x.v.GetValue()

    def Values(self):
        for x in itervalues(self._values):
            yield x.v.GetValue()

    def Keys(self):
        for x in itervalues(self._values):
            yield x.k.GetValue()

    get = utils.Proxy("GetItem")
    items = utils.Proxy("Items")
    keys = utils.Proxy("Keys")
    values = utils.Proxy("Values")

    def __delitem__(self, key):
        self.dat.dirty = True
        del self._values[key]

    def __len__(self):
        return len(self._values)

    def SetItem(self, key, value, raise_on_error=True):
        """Alternative to __setitem__ that can ignore errors.

    Sometimes we want to serialize a structure that contains some simple
    objects, and some that can't be serialized.  This method gives the caller a
    way to specify that they don't care about values that can't be
    serialized.

    Args:
      key: dict key
      value: dict value
      raise_on_error: if True, raise if we can't serialize.  If False, set the
        key to an error string.
    """
        self.dat.dirty = True
        self._values[key] = KeyValue(
            k=DataBlob().SetValue(key, raise_on_error=raise_on_error),
            v=DataBlob().SetValue(value, raise_on_error=raise_on_error))

    def __setitem__(self, key, value):
        self.dat.dirty = True
        self._values[key] = KeyValue(k=DataBlob().SetValue(key),
                                     v=DataBlob().SetValue(value))

    def __iter__(self):
        for x in itervalues(self._values):
            yield x.k.GetValue()

    def __eq__(self, other):
        if isinstance(other, dict):
            return self.ToDict() == other
        elif isinstance(other, Dict):
            return self.ToDict() == other.ToDict()
        else:
            return False

    def GetRawData(self):
        self.dat = itervalues(self._values)
        return super(Dict, self).GetRawData()

    def _CopyRawData(self):
        self.dat = itervalues(self._values)
        return super(Dict, self)._CopyRawData()

    def SetRawData(self, raw_data):
        super(Dict, self).SetRawData(raw_data)
        self._values = {}
        for d in self.dat:
            self._values[d.k.GetValue()] = d

    def SerializeToString(self):
        self.dat = itervalues(self._values)
        return super(Dict, self).SerializeToString()

    def ParseFromString(self, value):
        super(Dict, self).ParseFromString(value)
        self._values = {}
        for d in self.dat:
            self._values[d.k.GetValue()] = d

    def __str__(self):
        return str(self.ToDict())
Example #5
0
class GRRFuseDatastoreOnly(object):
    """We implement the FUSE methods in this class."""

    # Directories to hide. Readdir will not return them.
    ignored_dirs = [
        # We don't want to show AFF4Index objects.
        "/index/client"
    ]

    def __init__(self, root="/", token=None):
        self.root = rdfvalue.RDFURN(root)
        self.token = token
        self.default_file_mode = _DEFAULT_MODE_FILE
        self.default_dir_mode = _DEFAULT_MODE_DIRECTORY

        try:
            logging.info("Making sure supplied aff4path actually exists....")
            self.getattr(root)
            logging.info("OK")
        except fuse.FuseOSError:
            logging.info("Supplied aff4path didn't exist!")
            raise IOError("Supplied aff4 path '%s' does not exist." %
                          self.root)

    def MakePartialStat(self, fd):
        """Try and give a 'stat' for something not in the data store.

    Args:
      fd: The object with no stat.

    Returns:
      A dictionary corresponding to what we'll say the 'stat' is
      for objects which are not actually files, so have no OS level stat.

    """

        is_dir = "Container" in fd.behaviours

        return {
            "pathspec": fd.Get(fd.Schema.PATHSPEC, ""),
            "st_atime": fd.Get(fd.Schema.LAST, 0),
            "st_blksize": 0,
            "st_blocks": 0,
            "st_ctime": 0,
            "st_dev": 0,
            "st_gid": 0,
            "st_ino": 0,
            "st_mode":
            self.default_dir_mode if is_dir else self.default_file_mode,
            "st_mtime": 0,
            "st_nlink": 0,
            "st_rdev": 0,
            "st_size": fd.Get(fd.Schema.SIZE, 0),
            "st_uid": 0
        }

    def _IsDir(self, path):
        """True if and only if the path has the directory bit set in its mode."""
        return stat.S_ISDIR(int(self.getattr(path)["st_mode"]))

    def Readdir(self, path, fh=None):
        """Reads a directory given by path.

    Args:
      path: The path to list children of.
      fh: A file handler. Not used.

    Yields:
      A generator of filenames.

    Raises:
      FuseOSError: If we try and list a file.

    """
        del fh

        # We can't read a path if it's a file.
        if not self._IsDir(path):
            raise fuse.FuseOSError(errno.ENOTDIR)

        fd = aff4.FACTORY.Open(self.root.Add(path), token=self.token)

        children = fd.ListChildren()

        # Make these special directories unicode to be consistent with the rest of
        # aff4.
        for directory in [u".", u".."]:
            yield directory

        # ListChildren returns a generator, so we do the same.
        for child in children:
            # Filter out any directories we've chosen to ignore.
            if child.Path() not in self.ignored_dirs:
                yield child.Basename()

    def Getattr(self, path, fh=None):
        """Performs a stat on a file or directory.

    Args:
      path: The path to stat.
      fh: A file handler. Not used.

    Returns:
      A dictionary mapping st_ names to their values.

    Raises:
      FuseOSError: When a path is supplied that grr doesn't know about, ie an
      invalid file path.
      ValueError: If an empty path is passed. (The empty string, when passed to
      self.root.Add, returns a path for aff4:/, the root directory, which is not
      the behaviour we want.)
    """
        del fh

        if not path:
            raise fuse.FuseOSError(errno.ENOENT)

        if path != self.root:
            full_path = self.root.Add(path)
        else:
            full_path = path

        fd = aff4.FACTORY.Open(full_path, token=self.token)

        # The root aff4 path technically doesn't exist in the data store, so
        # it is a special case.
        if full_path == "/":
            return self.MakePartialStat(fd)

        fd = aff4.FACTORY.Open(full_path, token=self.token)
        # Grab the stat according to aff4.
        aff4_stat = fd.Get(fd.Schema.STAT)

        # If the Schema for the object has a STAT attribute, go ahead and return
        # it as a dictionary.
        if aff4_stat:
            return aff4_stat.AsDict()

        # If the object didn't have a stored stat, we figure out if it is a special
        # grr object, or just doesn't exist.

        # We now check if the aff4 object actually has a row in the data store.
        # This prevents us from being able to cd to directories that don't exist,
        # since such directories have a newly-created empty AFF4Object,
        # but no row in the data store. Anything that is a
        # row in the data store will have a LAST attribute, so we check that.
        elif fd.Get(fd.Schema.LAST) is None:
            # We raise the "no such file or directory" error.
            raise fuse.FuseOSError(errno.ENOENT)
        else:
            # This is an object that exists in the datastore, but has no STAT, so we
            # don't know how to handle it.
            pass

        # If the object was in the data store, but didn't have a stat, we just
        # try and guess some sensible values.
        return self.MakePartialStat(fd)

    def Read(self, path, length=None, offset=0, fh=None):
        """Reads data from a file.

    Args:
      path: The path to the file to read.
      length: How many bytes to read.
      offset: Offset in bytes from which reading should start.
      fh: A file handler. Not used.

    Returns:
      A string containing the file contents requested.

    Raises:
      FuseOSError: If we try and read a directory or if we try and read an
      object that doesn't support reading.

    """
        del fh

        if self._IsDir(path):
            raise fuse.FuseOSError(errno.EISDIR)

        fd = aff4.FACTORY.Open(self.root.Add(path), token=self.token)

        # If the object has Read() and Seek() methods, let's use them.
        if all((hasattr(fd, "Read"), hasattr(fd, "Seek"), callable(fd.Read),
                callable(fd.Seek))):
            # By default, read the whole file.
            if length is None:
                length = fd.Get(fd.Schema.SIZE)

            fd.Seek(offset)
            return fd.Read(length)
        else:
            # If we don't have Read/Seek methods, we probably can't read this object.
            raise fuse.FuseOSError(errno.EIO)

    def RaiseReadOnlyError(self):
        """Raise an error complaining that the file system is read-only."""
        raise fuse.FuseOSError(errno.EROFS)

    # pylint: disable=invalid-name
    def mkdir(self, *unused_args, **unused_kwargs):
        """Unimplemented on purpose. File system is read-only."""
        self.RaiseReadOnlyError()

    def symlink(self, *unused_args, **unused_kwargs):
        """Unimplemented on purpose. File system is read-only."""
        self.RaiseReadOnlyError()

    def rename(self, *unused_args, **unused_kwargs):
        """Unimplemented on purpose. File system is read-only."""
        self.RaiseReadOnlyError()

    def link(self, *unused_args, **unused_kwargs):
        """Unimplemented on purpose. File system is read-only."""
        self.RaiseReadOnlyError()

    def write(self, *unused_args, **unused_kwargs):
        """Unimplemented on purpose. File system is read-only."""
        self.RaiseReadOnlyError()

    def truncate(self, *unused_args, **unused_kwargs):
        """Unimplemented on purpose. File system is read-only."""
        self.RaiseReadOnlyError()

    def create(self, *unused_args, **unused_kwargs):
        """Unimplemented on purpose. File system is read-only."""
        self.RaiseReadOnlyError()

    # pylint: enable=unused-argument,invalid-name

    # FUSE expects the names of the functions to be standard
    # filesystem function style (all lower case), so we set them so here.

    read = utils.Proxy("Read")
    readdir = utils.Proxy("Readdir")
    getattr = utils.Proxy("Getattr")
Example #6
0
File: vfs.py Project: x35029/grr
class VFSHandler(with_metaclass(registry.MetaclassRegistry, object)):
    """Base class for handling objects in the VFS."""
    supported_pathtype = -1

    # Should this handler be auto-registered?
    auto_register = False

    size = 0
    offset = 0

    # This is the VFS path to this specific handler.
    path = "/"

    # This will be set by the VFSOpen factory to the pathspec of the final
    # destination of this handler. This pathspec will be case corrected and
    # updated to reflect any potential recursion.
    pathspec = None
    base_fd = None

    def __init__(self, base_fd, pathspec=None, progress_callback=None):
        """Constructor.

    Args:
      base_fd: A handler to the predecessor handler.
      pathspec: The pathspec to open.
      progress_callback: A callback to indicate that the open call is still
        working but needs more time.

    Raises:
      IOError: if this handler can not be instantiated over the
      requested path.
    """
        _ = pathspec
        self.base_fd = base_fd
        self.progress_callback = progress_callback
        if base_fd is None:
            self.pathspec = rdf_paths.PathSpec()
        else:
            # Make a copy of the base pathspec.
            self.pathspec = base_fd.pathspec.Copy()
        self.metadata = {}

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.Close()
        return False

    def Seek(self, offset, whence=os.SEEK_SET):
        """Seek to an offset in the file."""
        if whence == os.SEEK_SET:
            self.offset = offset
        elif whence == os.SEEK_CUR:
            self.offset += offset
        elif whence == os.SEEK_END:
            self.offset = self.size + offset
        else:
            raise ValueError("Illegal whence value %s" % whence)

    @abc.abstractmethod
    def Read(self, length):
        """Reads some data from the file."""
        raise NotImplementedError

    @abc.abstractmethod
    def Stat(self, ext_attrs=False):
        """Returns a StatEntry about this file."""
        raise NotImplementedError

    @abc.abstractmethod
    def IsDirectory(self):
        """Returns true if this object can contain other objects."""
        raise NotImplementedError

    def Tell(self):
        return self.offset

    def Close(self):
        """Close internal file descriptors."""

    def OpenAsContainer(self):
        """Guesses a container from the current object."""
        if self.IsDirectory():
            return self

        # TODO(user): Add support for more containers here (e.g. registries, zip
        # files etc).
        else:  # For now just guess TSK.
            return VFS_HANDLERS[rdf_paths.PathSpec.PathType.TSK](
                self,
                rdf_paths.PathSpec(path="/",
                                   pathtype=rdf_paths.PathSpec.PathType.TSK),
                progress_callback=self.progress_callback)

    def MatchBestComponentName(self, component):
        """Returns the name of the component which matches best our base listing.

    In order to do the best case insensitive matching we list the files in the
    base handler and return the base match for this component.

    Args:
      component: A component name which should be present in this directory.

    Returns:
      the best component name.
    """
        fd = self.OpenAsContainer()

        # Adjust the component casing
        file_listing = set(fd.ListNames())

        # First try an exact match
        if component not in file_listing:
            # Now try to match lower case
            lower_component = component.lower()
            for x in file_listing:
                if lower_component == x.lower():
                    component = x
                    break

        if fd.supported_pathtype != self.pathspec.pathtype:
            new_pathspec = rdf_paths.PathSpec(path=component,
                                              pathtype=fd.supported_pathtype)
        else:
            new_pathspec = self.pathspec.last.Copy()
            new_pathspec.path = component

        return new_pathspec

    def ListFiles(self, ext_attrs=False):
        """An iterator over all VFS files contained in this directory.

    Generates a StatEntry for each file or directory.

    Args:
      ext_attrs: Whether stat entries should contain extended attributes.

    Raises:
      IOError: if this fails.
    """
        del ext_attrs  # Unused.

    def ListNames(self):
        """A generator for all names in this directory."""
        return []

    # These are file object conformant namings for library functions that
    # grr uses, and that expect to interact with 'real' file objects.
    read = utils.Proxy("Read")
    seek = utils.Proxy("Seek")
    stat = utils.Proxy("Stat")
    tell = utils.Proxy("Tell")
    close = utils.Proxy("Close")

    @classmethod
    def Open(cls, fd, component, pathspec=None, progress_callback=None):
        """Try to correct the casing of component.

    This method is called when we failed to open the component directly. We try
    to transform the component into something which is likely to work.

    In this implementation, we correct the case of the component until we can
    not open the path any more.

    Args:
      fd: The base fd we will use.
      component: The component we should open.
      pathspec: The rest of the pathspec object.
      progress_callback: A callback to indicate that the open call is still
        working but needs more time.

    Returns:
      A file object.

    Raises:
      IOError: If nothing could be opened still.
    """
        # The handler for this component
        try:
            handler = VFS_HANDLERS[component.pathtype]
        except KeyError:
            raise IOError("VFS handler %d not supported." % component.pathtype)

        # We will not do any case folding unless requested.
        if component.path_options == rdf_paths.PathSpec.Options.CASE_LITERAL:
            return handler(base_fd=fd, pathspec=component)

        path_components = client_utils.LocalPathToCanonicalPath(component.path)
        path_components = ["/"] + list(filter(None,
                                              path_components.split("/")))
        for i, path_component in enumerate(path_components):
            try:
                if fd:
                    new_pathspec = fd.MatchBestComponentName(path_component)
                else:
                    new_pathspec = component
                    new_pathspec.path = path_component

                # The handler for this component
                try:
                    handler = VFS_HANDLERS[new_pathspec.pathtype]
                except KeyError:
                    raise IOError("VFS handler %d not supported." %
                                  new_pathspec.pathtype)

                fd = handler(base_fd=fd,
                             pathspec=new_pathspec,
                             progress_callback=progress_callback)
            except IOError as e:
                # Can not open the first component, we must raise here.
                if i <= 1:
                    raise IOError("File not found: {}".format(component))

                # Do not try to use TSK to open a not-found registry entry, fail
                # instead. Using TSK would lead to confusing error messages, hiding
                # the fact that the Registry entry is simply not there.
                if component.pathtype == rdf_paths.PathSpec.PathType.REGISTRY:
                    raise IOError("Registry entry not found: {}".format(e))

                # Insert the remaining path at the front of the pathspec.
                pathspec.Insert(0,
                                path=utils.JoinPath(*path_components[i:]),
                                pathtype=rdf_paths.PathSpec.PathType.TSK)
                break

        return fd

    def GetMetadata(self):
        return self.metadata