Ejemplo n.º 1
0
Archivo: vfs.py Proyecto: qsdj/grr
    def Handle(self, args, token=None):
        ValidateVfsPath(args.file_path)

        if args.timestamp:
            age = args.timestamp
        else:
            age = aff4.ALL_TIMES

        file_obj = aff4.FACTORY.Open(args.client_id.ToClientURN().Add(
            args.file_path),
                                     mode="r",
                                     age=age,
                                     token=token)

        if data_store.RelationalDBReadEnabled(category="vfs"):
            # These are not really "files" so they cannot be stored in the database
            # but they still can be queried so we need to return something. Sometimes
            # they contain a trailing slash so we need to take care of that.
            #
            # TODO(hanuszczak): Require VFS paths to be normalized so that trailing
            # slash is either forbidden or mandatory.
            if args.file_path.endswith("/"):
                args.file_path = args.file_path[:-1]
            if args.file_path in ["fs", "registry", "temp", "fs/os", "fs/tsk"]:
                api_file = ApiFile()
                api_file.name = api_file.path = args.file_path
                api_file.is_directory = True
                return ApiGetFileDetailsResult(file=api_file)

            path_type, components = rdf_objects.ParseCategorizedPath(
                args.file_path)

            # TODO(hanuszczak): The tests passed even without support for timestamp
            # filtering. The test suite should be probably improved in that regard.
            path_id = rdf_objects.PathID(components)
            path_info = data_store.REL_DB.FindPathInfoByPathID(
                str(args.client_id),
                path_type,
                path_id,
                timestamp=args.timestamp)

            if path_info:
                stat_entry = path_info.stat_entry
                hash_entry = path_info.hash_entry
            else:
                stat_entry = rdf_client.StatEntry()
                hash_entry = rdf_crypto.Hash()
        else:
            stat_entry = None
            hash_entry = None

        return ApiGetFileDetailsResult(
            file=ApiFile().InitFromAff4Object(file_obj,
                                              stat_entry=stat_entry,
                                              hash_entry=hash_entry,
                                              with_details=True))
Ejemplo n.º 2
0
def GetUrnHashEntry(urn, token=None):
    """Returns an `rdf_crypto.Hash` instance for given URN of an AFF4 file."""
    if data_store.RelationalDBReadEnabled(category="vfs"):
        client_id, vfs_path = urn.Split(2)
        path_type, components = rdf_objects.ParseCategorizedPath(vfs_path)
        path_id = rdf_objects.PathID(components)

        path_info = data_store.REL_DB.FindPathInfoByPathID(
            client_id, path_type, path_id)
        return path_info.hash_entry
    else:
        with aff4.FACTORY.Open(urn, token=token) as fd:
            return GetFileHashEntry(fd)
Ejemplo n.º 3
0
def MigrateClientVfs(client_urn):
    """Migrates entire VFS of given client to the relational data store."""
    vfs = ListVfs(client_urn)

    path_infos = []

    for vfs_urn in vfs:
        _, vfs_path = vfs_urn.Split(2)
        path_type, components = rdf_objects.ParseCategorizedPath(vfs_path)

        path_info = rdf_objects.PathInfo(path_type=path_type,
                                         components=components)
        path_infos.append(path_info)

    data_store.REL_DB.WritePathInfos(client_urn.Basename(), path_infos)

    for vfs_group in utils.Grouper(vfs, _VFS_GROUP_SIZE):
        stat_entries = dict()
        hash_entries = dict()

        for fd in aff4.FACTORY.MultiOpen(vfs_group, age=aff4.ALL_TIMES):
            _, vfs_path = fd.urn.Split(2)
            path_type, components = rdf_objects.ParseCategorizedPath(vfs_path)
            path_info = rdf_objects.PathInfo(path_type=path_type,
                                             components=components)

            for stat_entry in fd.GetValuesForAttribute(fd.Schema.STAT):
                stat_path_info = path_info.Copy()
                stat_path_info.timestamp = stat_entry.age
                stat_entries[stat_path_info] = stat_entry

            for hash_entry in fd.GetValuesForAttribute(fd.Schema.HASH):
                hash_path_info = path_info.Copy()
                hash_path_info.timestamp = hash_entry.age
                hash_entries[hash_path_info] = hash_entry

        data_store.REL_DB.MultiWritePathHistory(client_urn.Basename(),
                                                stat_entries, hash_entries)
Ejemplo n.º 4
0
def CreateFolder(client_id, path, timestamp, token=None):
    """Creates a VFS folder."""
    with test_lib.FakeTime(timestamp):
        with aff4.FACTORY.Create(client_id.Add(path),
                                 aff4_type=aff4_standard.VFSDirectory,
                                 mode="w",
                                 token=token) as _:
            pass

        if data_store.RelationalDBWriteEnabled():
            path_type, components = rdf_objects.ParseCategorizedPath(path)

            path_info = rdf_objects.PathInfo()
            path_info.path_type = path_type
            path_info.components = components
            path_info.directory = True

            data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
Ejemplo n.º 5
0
    def _CreateFile(self, path, content, hashing=False):
        with aff4.FACTORY.Create(path, aff4.AFF4MemoryStream,
                                 token=self.token) as fd:
            fd.Write(content)

            if hashing:
                digest = hashlib.sha256(content).digest()
                fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest))

                if data_store.RelationalDBWriteEnabled():
                    client_id, vfs_path = path.Split(2)
                    path_type, components = rdf_objects.ParseCategorizedPath(
                        vfs_path)

                    path_info = rdf_objects.PathInfo()
                    path_info.path_type = path_type
                    path_info.components = components
                    path_info.hash_entry.sha256 = digest
                    data_store.REL_DB.WritePathInfos(client_id, [path_info])
Ejemplo n.º 6
0
def CreateFileVersion(client_id, path, content="", timestamp=None, token=None):
    """Add a new version for a file."""
    if timestamp is None:
        timestamp = rdfvalue.RDFDatetime.Now()

    with test_lib.FakeTime(timestamp):
        with aff4.FACTORY.Create(client_id.Add(path),
                                 aff4_type=aff4_grr.VFSFile,
                                 mode="w",
                                 token=token) as fd:
            fd.Write(content)
            fd.Set(fd.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now())

        if data_store.RelationalDBWriteEnabled():
            path_type, components = rdf_objects.ParseCategorizedPath(path)

            path_info = rdf_objects.PathInfo()
            path_info.path_type = path_type
            path_info.components = components
            path_info.directory = False

            data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
Ejemplo n.º 7
0
    def CreateFileVersions(self, client_id, file_path):
        """Add a new version for a file."""
        path_type, components = rdf_objects.ParseCategorizedPath(file_path)

        with test_lib.FakeTime(self.time_1):
            token = access_control.ACLToken(username="******")
            fd = aff4.FACTORY.Create(client_id.Add(file_path),
                                     aff4.AFF4MemoryStream,
                                     mode="w",
                                     token=token)
            fd.Write("Hello World")
            fd.Close()

            if data_store.RelationalDBWriteEnabled():
                path_info = rdf_objects.PathInfo()
                path_info.path_type = path_type
                path_info.components = components
                path_info.directory = False

                data_store.REL_DB.WritePathInfos(client_id.Basename(),
                                                 [path_info])

        with test_lib.FakeTime(self.time_2):
            fd = aff4.FACTORY.Create(client_id.Add(file_path),
                                     aff4.AFF4MemoryStream,
                                     mode="w",
                                     token=token)
            fd.Write("Goodbye World")
            fd.Close()

            if data_store.RelationalDBWriteEnabled():
                path_info = rdf_objects.PathInfo()
                path_info.path_type = path_type
                path_info.components = components
                path_info.directory = False

                data_store.REL_DB.WritePathInfos(client_id.Basename(),
                                                 [path_info])
Ejemplo n.º 8
0
  def AddFile(self, fd):
    """Adds a file to the hash file store.

    We take a file in the client space:
      aff4:/C.123123123/fs/os/usr/local/blah

    Hash it, update the hash in the original file if its different to the
    one calculated on the client, and copy the original AFF4 object to

      aff4:/files/hash/generic/sha256/123123123 (canonical reference)

    We then create symlinks for all other hash types:

      aff4:/files/hash/generic/sha1/345345345
      aff4:/files/hash/generic/md5/456456456
      aff4:/files/hash/pecoff/md5/aaaaaaaa (only for PEs)
      aff4:/files/hash/pecoff/sha1/bbbbbbbb (only for PEs)

    When present in PE files, the signing data (revision, cert_type,
    certificate) is added to the original object.

    This can't be done simply in the FileStore.Write() method with fixed hash
    buffer sizes because the authenticode hashes need to track hashing of
    different-sized regions based on the signature information.

    Args:
      fd: File open for reading.

    Raises:
      IOError: If there was an error writing the file.
    """
    hashes = self._HashFile(fd)

    # The empty file is very common, we don't keep the back references for it
    # in the DB since it just takes up too much space.
    empty_hash = ("e3b0c44298fc1c149afbf4c8996fb924"
                  "27ae41e4649b934ca495991b7852b855")
    if hashes.sha256 == empty_hash:
      return

    # Update the hashes field now that we have calculated them all.
    fd.Set(fd.Schema.HASH, hashes)
    fd.Flush()

    if data_store.RelationalDBWriteEnabled():
      client_id, vfs_path = fd.urn.Split(2)
      path_type, components = rdf_objects.ParseCategorizedPath(vfs_path)
      path_info = rdf_objects.PathInfo(
          path_type=path_type, components=components, hash_entry=hashes)
      data_store.REL_DB.WritePathInfos(client_id, [path_info])

    # sha256 is the canonical location.
    canonical_urn = self.PATH.Add("generic/sha256").Add(str(hashes.sha256))
    if not list(aff4.FACTORY.Stat(canonical_urn)):
      aff4.FACTORY.Copy(fd.urn, canonical_urn)
      # Remove the STAT entry, it makes no sense to copy it between clients.
      with aff4.FACTORY.Open(
          canonical_urn, mode="rw", token=self.token) as new_fd:
        new_fd.Set(new_fd.Schema.STAT(None))

    self._AddToIndex(canonical_urn, fd.urn)

    for hash_type, hash_digest in hashes.ListSetFields():
      # Determine fingerprint type.
      hash_type = hash_type.name
      # No need to create a symlink for sha256, it's the canonical location.
      if hash_type == "sha256":
        continue
      hash_digest = str(hash_digest)
      fingerprint_type = "generic"
      if hash_type.startswith("pecoff_"):
        fingerprint_type = "pecoff"
        hash_type = hash_type[len("pecoff_"):]
      if hash_type not in self.HASH_TYPES[fingerprint_type]:
        continue

      file_store_urn = self.PATH.Add(fingerprint_type).Add(hash_type).Add(
          hash_digest)

      with aff4.FACTORY.Create(
          file_store_urn, aff4.AFF4Symlink, token=self.token) as symlink:
        symlink.Set(symlink.Schema.SYMLINK_TARGET, canonical_urn)

    # We do not want to be externally written here.
    return None
Ejemplo n.º 9
0
  def _HandleRelational(self, args, token=None):
    client_id = args.client_id.ToClientURN()

    if not args.file_path or args.file_path == "/":
      return self._GetRootChildren(args, token=token)

    if args.file_path == "fs":
      return self._GetFilesystemChildren(args)

    path_type, components = rdf_objects.ParseCategorizedPath(args.file_path)
    path_id = rdf_objects.PathID(components)

    child_path_ids = data_store.REL_DB.FindDescendentPathIDs(
        client_id=client_id.Basename(),
        path_type=path_type,
        path_id=path_id,
        max_depth=1)

    child_path_infos = data_store.REL_DB.FindPathInfosByPathIDs(
        client_id=client_id.Basename(),
        path_type=path_type,
        path_ids=child_path_ids).values()

    items = []

    for child_path_info in child_path_infos:
      if args.directories_only and not child_path_info.directory:
        continue

      child_item = ApiFile()
      child_item.name = child_path_info.basename

      if path_type == rdf_objects.PathInfo.PathType.OS:
        prefix = "fs/os/"
      elif path_type == rdf_objects.PathInfo.PathType.TSK:
        prefix = "fs/tsk/"
      elif path_type == rdf_objects.PathInfo.PathType.REGISTRY:
        prefix = "registry/"
      elif path_type == rdf_objects.PathInfo.PathType.TEMP:
        prefix = "temp/"

      child_item.path = prefix + "/".join(child_path_info.components)

      # TODO(hanuszczak): `PathInfo#directory` tells us whether given path has
      # ever been observed as a directory. Is this what we want here or should
      # we use `st_mode` information instead?
      child_item.is_directory = child_path_info.directory
      child_item.stat = child_path_info.stat_entry

      # The `age` field collides with RDF `age` pseudo-property so `Set` lets us
      # set the right thing.
      child_item.Set("age", child_path_info.timestamp)

      items.append(child_item)

    # TODO(hanuszczak): Instead of getting the whole list from the database and
    # then filtering the results we should do the filtering directly in the
    # database query.
    if args.filter:
      pattern = re.compile(args.filter, re.IGNORECASE)
      is_matching = lambda item: pattern.search(item.name)
      items = filter(is_matching, items)

    items.sort(key=lambda item: item.path)

    if args.count:
      items = items[args.offset:args.offset + args.count]
    else:
      items = items[args.offset:]

    return ApiListFilesResult(items=items)
Ejemplo n.º 10
0
    def testParseIncorrect(self):
        with self.assertRaisesRegexp(ValueError, "path"):
            rdf_objects.ParseCategorizedPath("foo/bar")

        with self.assertRaisesRegexp(ValueError, "path"):
            rdf_objects.ParseCategorizedPath("fs")
Ejemplo n.º 11
0
 def testParseTskExtraSlashes(self):
     path_type, components = rdf_objects.ParseCategorizedPath(
         "/fs///tsk/foo///bar")
     self.assertEqual(path_type, rdf_objects.PathInfo.PathType.TSK)
     self.assertEqual(components, ["foo", "bar"])
Ejemplo n.º 12
0
 def testParseOsRoot(self):
     path_type, components = rdf_objects.ParseCategorizedPath("fs/os")
     self.assertEqual(path_type, rdf_objects.PathInfo.PathType.OS)
     self.assertEqual(components, [])
Ejemplo n.º 13
0
 def testParseTemp(self):
     path_type, components = rdf_objects.ParseCategorizedPath(
         "temp/os/registry")
     self.assertEqual(path_type, rdf_objects.PathInfo.PathType.TEMP)
     self.assertEqual(components, ["os", "registry"])
Ejemplo n.º 14
0
 def testParseRegistry(self):
     path_type, components = rdf_objects.ParseCategorizedPath(
         "registry/thud/blargh")
     self.assertEqual(path_type, rdf_objects.PathInfo.PathType.REGISTRY)
     self.assertEqual(components, ["thud", "blargh"])
Ejemplo n.º 15
0
 def testParseTsk(self):
     path_type, components = rdf_objects.ParseCategorizedPath(
         "fs/tsk/quux/norf")
     self.assertEqual(path_type, rdf_objects.PathInfo.PathType.TSK)
     self.assertEqual(components, ["quux", "norf"])
Ejemplo n.º 16
0
 def testParseOs(self):
     path_type, components = rdf_objects.ParseCategorizedPath(
         "fs/os/foo/bar")
     self.assertEqual(path_type, rdf_objects.PathInfo.PathType.OS)
     self.assertEqual(components, ["foo", "bar"])