Beispiel #1
0
    def testFindActionCrossDev(self):
        """Test that devices boundaries don't get crossed, also by default."""
        pathspec = rdfvalue.PathSpec(path="/mock2/",
                                     pathtype=rdfvalue.PathSpec.PathType.OS)
        request = rdfvalue.FindSpec(pathspec=pathspec,
                                    cross_devs=True,
                                    path_regex=".")
        request.iterator.number = 200
        results = self.RunAction("Find", request)
        all_files = [
            x.hit for x in results if isinstance(x, rdfvalue.FindSpec)
        ]
        self.assertEqual(len(all_files), 9)

        request = rdfvalue.FindSpec(pathspec=pathspec,
                                    cross_devs=False,
                                    path_regex=".")
        request.iterator.number = 200
        results = self.RunAction("Find", request)
        all_files = [
            x.hit for x in results if isinstance(x, rdfvalue.FindSpec)
        ]
        self.assertEqual(len(all_files), 7)

        request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".")
        request.iterator.number = 200
        results = self.RunAction("Find", request)
        all_files = [
            x.hit for x in results if isinstance(x, rdfvalue.FindSpec)
        ]
        self.assertEqual(len(all_files), 7)
Beispiel #2
0
  def testFindAction(self):
    """Test the find action."""
    # First get all the files at once
    pathspec = rdfvalue.PathSpec(path="/mock2/",
                                 pathtype=rdfvalue.PathSpec.PathType.OS)
    request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".")
    request.iterator.number = 200
    result = self.RunAction("Find", request)
    all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)]

    # Ask for the files one at the time
    files = []
    request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".")
    request.iterator.number = 1

    while True:
      result = self.RunAction("Find", request)
      if request.iterator.state == rdfvalue.Iterator.State.FINISHED:
        break

      self.assertEqual(len(result), 2)
      self.assertTrue(isinstance(result[0], rdfvalue.FindSpec))
      self.assertTrue(isinstance(result[1], rdfvalue.Iterator))
      files.append(result[0].hit)

      request.iterator = result[1].Copy()

    for x, y in zip(all_files, files):
      self.assertRDFValueEqual(x, y)

    # Make sure the iterator is finished
    self.assertEqual(request.iterator.state, rdfvalue.Iterator.State.FINISHED)

    # Ensure we remove old states from client_state
    self.assertEqual(len(request.iterator.client_state.dat), 0)
Beispiel #3
0
  def GetFindSpecs(self):
    """Determine the Find specifications.

    Yields:
      A path specification to search

    Raises:
      OSError: If the client operating system is not supported.
    """
    path_spec = rdfvalue.PathSpec(
        path=self.GetSophosAVInfectedPath(),
        pathtype=self.args.pathtype)

    yield rdfvalue.FindSpec(
        pathspec=path_spec,
        path_regex=".*",
        max_depth=1)

    path_spec = rdfvalue.PathSpec(
        path=self.GetSophosAVLogsPath(),
        pathtype=self.args.pathtype)

    yield rdfvalue.FindSpec(
        pathspec=path_spec,
        path_regex=self.GetSophosAVLogsPathRegex(),
        max_depth=1)
Beispiel #4
0
  def Start(self):
    """Determine the Firefox history directory."""
    self.state.Register("hist_count", 0)
    self.state.Register("history_paths", [])

    if self.args.history_path:
      self.state.history_paths.append(self.args.history_path)
    else:
      self.state.history_paths = self.GuessHistoryPaths(self.args.username)

      if not self.state.history_paths:
        raise flow.FlowError("Could not find valid History paths.")

    if self.runner.output:
      self.runner.output = aff4.FACTORY.Create(
          self.runner.output.urn, "VFSAnalysisFile", token=self.token)

    filename = "places.sqlite"
    for path in self.state.history_paths:
      findspec = rdfvalue.FindSpec(max_depth=2, path_regex="^%s$" % filename)

      findspec.pathspec.path = path
      findspec.pathspec.pathtype = self.args.pathtype

      self.CallFlow("FetchFiles", findspec=findspec,
                    next_state="ParseFiles")
Beispiel #5
0
    def testFindWithMaxFiles(self):
        """Test that the Find flow works when specifying proto directly."""

        client_mock = action_mocks.ActionMock("Find")
        output_path = "analysis/FindFlowTest4"

        # Prepare a findspec.
        findspec = rdfvalue.FindSpec(
            path_regex=".*",
            pathspec=rdfvalue.PathSpec(path="/",
                                       pathtype=rdfvalue.PathSpec.PathType.OS))

        for _ in test_lib.TestFlowHelper("FindFiles",
                                         client_mock,
                                         client_id=self.client_id,
                                         token=self.token,
                                         findspec=findspec,
                                         iteration_count=3,
                                         output=output_path,
                                         max_results=7):
            pass

        # Check the output file is created
        fd = aff4.FACTORY.Open(self.client_id.Add(output_path),
                               token=self.token)

        # Make sure we got the right number of results.
        self.assertEqual(len(fd), 7)
Beispiel #6
0
    def testFindDirectories(self):
        """Test that the Find flow works with directories."""

        client_mock = action_mocks.ActionMock("Find")
        output_path = "analysis/FindFlowTest2"

        # Prepare a findspec.
        findspec = rdfvalue.FindSpec(
            path_regex="bin",
            pathspec=rdfvalue.PathSpec(path="/",
                                       pathtype=rdfvalue.PathSpec.PathType.OS))

        for _ in test_lib.TestFlowHelper("FindFiles",
                                         client_mock,
                                         client_id=self.client_id,
                                         token=self.token,
                                         output=output_path,
                                         findspec=findspec):
            pass

        # Check the output file is created
        fd = aff4.FACTORY.Open(self.client_id.Add(output_path),
                               token=self.token)

        # Make sure that bin is a directory
        self.assertEqual(len(fd), 2)
        for child in fd:
            path = utils.SmartStr(child.aff4path)
            self.assertTrue("bin" in path)
            self.assertEqual(child.__class__.__name__, "StatEntry")
Beispiel #7
0
    def runTest(self):
        """Launch our flows."""
        for flow, args in [
            ("ListDirectory", {
                "pathspec":
                rdfvalue.PathSpec(pathtype=rdfvalue.PathSpec.PathType.REGISTRY,
                                  path=self.reg_path)
            }),
            ("FindFiles", {
                "findspec":
                rdfvalue.FindSpec(pathspec=rdfvalue.PathSpec(
                    path=self.reg_path,
                    pathtype=rdfvalue.PathSpec.PathType.REGISTRY),
                                  path_regex="ProfileImagePath"),
                "output":
                self.output_path
            })
        ]:

            if self.local_worker:
                self.session_id = debugging.StartFlowAndWorker(
                    self.client_id, flow, **args)
            else:
                self.session_id = flow_utils.StartFlowAndWait(self.client_id,
                                                              flow_name=flow,
                                                              token=self.token,
                                                              **args)

        self.CheckFlow()
Beispiel #8
0
  def Start(self):
    """Determine the Chrome directory."""
    self.state.Register("hist_count", 0)
    # List of paths where history files are located
    self.state.Register("history_paths", [])
    if self.state.args.history_path:
      self.state.history_paths.append(self.state.args.history_path)

    if self.runner.output:
      self.runner.output = aff4.FACTORY.Create(
          self.runner.output.urn, "VFSAnalysisFile", token=self.token)

    if not self.state.history_paths:
      self.state.history_paths = self.GuessHistoryPaths(
          self.state.args.username)

    if not self.state.history_paths:
      raise flow.FlowError("Could not find valid History paths.")

    filenames = ["History"]
    if self.state.args.get_archive:
      filenames.append("Archived History")

    for path in self.state.history_paths:
      for fname in filenames:
        findspec = rdfvalue.FindSpec(
            max_depth=1, path_regex="^{0}$".format(fname),
            pathspec=rdfvalue.PathSpec(pathtype=self.state.args.pathtype,
                                       path=path))

        self.CallFlow("FetchFiles", findspec=findspec,
                      next_state="ParseFiles")
Beispiel #9
0
    def testFindFilesWithGlob(self):
        """Test that the Find flow works with glob."""
        client_mock = action_mocks.ActionMock("Find")
        output_path = "analysis/FindFlowTest1"

        # Prepare a findspec.
        findspec = rdfvalue.FindSpec(
            path_glob="bash*",
            pathspec=rdfvalue.PathSpec(path="/",
                                       pathtype=rdfvalue.PathSpec.PathType.OS))

        for _ in test_lib.TestFlowHelper("FindFiles",
                                         client_mock,
                                         client_id=self.client_id,
                                         token=self.token,
                                         output=output_path,
                                         findspec=findspec):
            pass

        # Check the output file is created
        fd = aff4.FACTORY.Open(self.client_id.Add(output_path),
                               token=self.token)

        # Make sure that bash is a file.
        matches = set([x.aff4path.Basename() for x in fd])
        self.assertEqual(sorted(matches), ["bash"])

        self.assertEqual(len(fd), 2)
        for child in fd:
            path = utils.SmartStr(child.aff4path)
            self.assertTrue(path.endswith("bash"))
            self.assertEqual(child.__class__.__name__, "StatEntry")
Beispiel #10
0
 def testNoFilters(self):
   """Test the we get all files with no filters in place."""
   # First get all the files at once
   pathspec = rdfvalue.PathSpec(path="/mock2/",
                                pathtype=rdfvalue.PathSpec.PathType.OS)
   request = rdfvalue.FindSpec(pathspec=pathspec, cross_devs=True)
   request.iterator.number = 200
   result = self.RunAction("Find", request)
   all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)]
   self.assertEqual(len(all_files), 9)
Beispiel #11
0
class TestFindTSKLinux(TestListDirectoryTSKLinux):
    """Tests if the find flow works on Linux and Darwin using Sleuthkit."""
    flow = "FindFiles"

    args = {
        "findspec":
        rdfvalue.FindSpec(path_regex=".",
                          pathspec=rdfvalue.PathSpec(
                              path="/bin/",
                              pathtype=rdfvalue.PathSpec.PathType.TSK))
    }
Beispiel #12
0
class TestFindOSLinuxDarwin(TestListDirectoryOSLinuxDarwin):
    """Tests if the find flow works on Linux and Darwin."""
    flow = "FindFiles"

    args = {
        "findspec":
        rdfvalue.FindSpec(path_regex=".",
                          pathspec=rdfvalue.PathSpec(
                              path="/bin/",
                              pathtype=rdfvalue.PathSpec.PathType.OS))
    }
Beispiel #13
0
    def testFindAction2(self):
        """Test the find action path regex."""
        pathspec = rdfvalue.PathSpec(path="/mock2/",
                                     pathtype=rdfvalue.PathSpec.PathType.OS)
        request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".*mp3")
        request.iterator.number = 200
        result = self.RunAction("Find", request)
        all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)]

        self.assertEqual(len(all_files), 1)
        self.assertEqual(all_files[0].pathspec.Basename(), "file.mp3")
Beispiel #14
0
    def ProcessEntry(self, responses):
        """Process the responses from the client."""
        if not responses.success:
            return

        # If we get a response with an unfinished iterator then we missed some
        # files. Call Find on the client until we're done.
        if (responses.iterator and
                responses.iterator.state != responses.iterator.State.FINISHED):
            findspec = rdfvalue.FindSpec(responses.request.request.args)
            findspec.iterator = responses.iterator
            self.CallClient("Find",
                            findspec,
                            next_state="ProcessEntry",
                            request_data=responses.request_data)

        # The Find client action does not return a StatEntry but a
        # FindSpec. Normalize to a StatEntry.
        stat_responses = [
            r.hit if isinstance(r, rdfvalue.FindSpec) else r for r in responses
        ]

        # If this was a pure path matching call without any regex / recursion, we
        # know exactly which node in the component tree we have to process next and
        # get it from the component_path. If this was a regex match though, we
        # sent the client a combined regex that matches all nodes in order to save
        # round trips and client processing time. In that case we only get the
        # base node and have to check for all subnodes if the response actually
        # matches that subnode before we continue processing.
        component_path = responses.request_data.get("component_path")
        if component_path is not None:

            for response in stat_responses:
                self._ProcessResponse(response, [component_path])

        else:
            # This is a combined match.
            base_path = responses.request_data["base_path"]
            base_node = self.FindNode(base_path)
            for response in stat_responses:
                matching_components = []
                for next_node in base_node.keys():
                    pathspec = rdfvalue.PathSpec(next_node)

                    if self._MatchPath(pathspec, response):
                        matching_path = base_path + [next_node]
                        matching_components.append(matching_path)

                if matching_components:
                    self._ProcessResponse(response,
                                          matching_components,
                                          base_wildcard=True)
Beispiel #15
0
 def testFindAction3(self):
     """Test the find action data regex."""
     # First get all the files at once
     pathspec = rdfvalue.PathSpec(path="/mock2/",
                                  pathtype=rdfvalue.PathSpec.PathType.OS)
     request = rdfvalue.FindSpec(pathspec=pathspec,
                                 data_regex="Secret",
                                 cross_devs=True)
     request.iterator.number = 200
     result = self.RunAction("Find", request)
     all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)]
     self.assertEqual(len(all_files), 2)
     self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt")
     self.assertEqual(all_files[1].pathspec.Basename(), "long_file.text")
Beispiel #16
0
  def Start(self):
    """Call the find flow to get the MRU data for each user."""
    fd = aff4.FACTORY.Open(self.client_id, mode="r", token=self.token)
    for user in fd.Get(fd.Schema.USER):
      mru_path = ("HKEY_USERS/%s/Software/Microsoft/Windows"
                  "/CurrentVersion/Explorer/ComDlg32"
                  "/OpenSavePidlMRU" % user.sid)

      findspec = rdfvalue.FindSpec(max_depth=2, path_regex=".")
      findspec.iterator.number = 1000
      findspec.pathspec.path = mru_path
      findspec.pathspec.pathtype = rdfvalue.PathSpec.PathType.REGISTRY

      self.CallFlow("FindFiles", findspec=findspec, output=None,
                    next_state="StoreMRUs",
                    request_data=dict(username=user.username))
Beispiel #17
0
    def testDownloadDirectorySub(self):
        """Test a FetchFiles flow with depth=5."""
        vfs.VFS_HANDLERS[
            rdfvalue.PathSpec.PathType.OS] = test_lib.ClientVFSHandlerFixture

        # Mock the client actions FetchFiles uses
        client_mock = test_lib.ActionMock("HashFile", "HashBuffer", "StatFile",
                                          "Find", "TransferBuffer")

        pathspec = rdfvalue.PathSpec(path="/c/Downloads",
                                     pathtype=rdfvalue.PathSpec.PathType.OS)

        for _ in test_lib.TestFlowHelper("FetchFiles",
                                         client_mock,
                                         client_id=self.client_id,
                                         findspec=rdfvalue.FindSpec(
                                             max_depth=5,
                                             pathspec=pathspec,
                                             path_glob="*"),
                                         token=self.token):
            pass

        # Check if the base path was created
        output_path = self.client_id.Add("fs/os/c/Downloads")

        output_fd = aff4.FACTORY.Open(output_path, token=self.token)

        children = list(output_fd.OpenChildren())

        # There should be 5 children: a.txt, b.txt, c.txt, d.txt, sub1
        self.assertEqual(len(children), 5)

        self.assertEqual("a.txt b.txt c.txt d.txt sub1".split(),
                         sorted([child.urn.Basename() for child in children]))

        # Find the child named: sub1
        for child in children:
            if child.urn.Basename() == "sub1":
                break

        children = list(child.OpenChildren())

        # There should be 4 children: a.txt, b.txt, c.txt, d.txt
        self.assertEqual(len(children), 4)

        self.assertEqual("a.txt b.txt c.txt d.txt".split(),
                         sorted([child.urn.Basename() for child in children]))
Beispiel #18
0
    def Start(self, responses):
        """Start."""
        client_id = responses.request.client_id

        for path in self.state.paths:
            request = rdfvalue.FindSpec()
            request.pathspec.path = path
            request.pathspec.pathtype = rdfvalue.PathSpec.PathType.REGISTRY

            if self.state.max_depth:
                request.max_depth = self.state.max_depth

            # Hard coded limit so this does not get too big.
            request.iterator.number = 10000
            self.CallClient("Find",
                            request,
                            client_id=client_id,
                            next_state="StoreResults")
Beispiel #19
0
  def testFindSizeLimits(self):
    """Test the find action size limits."""
    # First get all the files at once
    request = rdfvalue.FindSpec(min_file_size=4, max_file_size=15,
                                cross_devs=True)
    request.pathspec.Append(path="/mock2/",
                            pathtype=rdfvalue.PathSpec.PathType.OS)

    request.iterator.number = 200
    results = self.RunAction("Find", request)
    all_files = []
    for result in results:
      if isinstance(result, rdfvalue.FindSpec):
        all_files.append(result.hit.pathspec.Basename())
    self.assertEqual(len(all_files), 5)

    for filename in all_files:
      # Our mock filesize is the length of the base filename, check all the
      # files we got match the size criteria
      self.assertTrue(4 <= len(filename) <= 15)
Beispiel #20
0
  def StartRequests(self):
    """Generate and send the Find requests."""
    client = aff4.FACTORY.Open(self.client_id, token=self.token)
    if self.runner.output:
      self.runner.output.Set(
          self.runner.output.Schema.DESCRIPTION("CacheGrep for {0}".format(
              self.args.data_regex)))

    usernames = ["%s\\%s" % (u.domain, u.username) for u in self.state.users]
    usernames = [u.lstrip("\\") for u in usernames]  # Strip \\ if no domain.

    for path in self.state.all_paths:
      full_paths = flow_utils.InterpolatePath(path, client, users=usernames)
      for full_path in full_paths:
        findspec = rdfvalue.FindSpec(data_regex=self.args.data_regex)
        findspec.iterator.number = 800
        findspec.pathspec.path = full_path
        findspec.pathspec.pathtype = self.args.pathtype

        self.CallFlow("FetchFiles", findspec=findspec,
                      next_state="HandleResults")
Beispiel #21
0
    def testCollectionOverwriting(self):
        """Test we overwrite the collection every time the flow is executed."""

        client_mock = action_mocks.ActionMock("Find")
        output_path = "analysis/FindFlowTest5"

        # Prepare a findspec.
        findspec = rdfvalue.FindSpec()
        findspec.path_regex = "bin"
        findspec.pathspec.path = "/"
        findspec.pathspec.pathtype = rdfvalue.PathSpec.PathType.OS

        for _ in test_lib.TestFlowHelper("FindFiles",
                                         client_mock,
                                         client_id=self.client_id,
                                         token=self.token,
                                         findspec=findspec,
                                         output=output_path):
            pass

        # Check the output file with the right number of results.
        fd = aff4.FACTORY.Open(self.client_id.Add(output_path),
                               token=self.token)

        self.assertEqual(len(fd), 2)

        # Now find a new result, should overwrite the collection
        findspec.path_regex = "dd"
        for _ in test_lib.TestFlowHelper("FindFiles",
                                         client_mock,
                                         client_id=self.client_id,
                                         token=self.token,
                                         findspec=findspec,
                                         output=output_path,
                                         max_results=1):
            pass

        fd = aff4.FACTORY.Open(self.client_id.Add(output_path),
                               token=self.token)
        self.assertEqual(len(fd), 1)
Beispiel #22
0
    def Iterate(self, request, client_state):
        """Restores its way through the directory using an Iterator."""
        self.request = request

        limit = request.iterator.number

        # TODO(user): What is a reasonable measure of work here?
        for count, f in enumerate(
                self.ListDirectory(request.pathspec, client_state)):

            # Only send the reply if the file matches all criteria
            if self.FilterFile(f):
                self.SendReply(rdfvalue.FindSpec(hit=f))

            # We only check a limited number of files in each iteration. This might
            # result in returning an empty response - but the iterator is not yet
            # complete. Flows must check the state of the iterator explicitly.
            if count >= limit - 1:
                logging.debug("Processed %s entries, quitting", count)
                return

        # End this iterator
        request.iterator.state = rdfvalue.Iterator.State.FINISHED
Beispiel #23
0
  def testPermissionFilter(self):
    """Test filtering based on file/folder permission happens correctly."""

    pathspec = rdfvalue.PathSpec(path="/mock2/",
                                 pathtype=rdfvalue.PathSpec.PathType.OS)

    # Look for files that match exact permissions

    request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".",
                                perm_mode=0o644, cross_devs=True)
    request.iterator.number = 200
    result = self.RunAction("Find", request)
    all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)]

    self.assertEqual(len(all_files), 2)
    self.assertEqual(all_files[0].pathspec.Dirname().Basename(),
                     "directory2")
    self.assertEqual(all_files[0].pathspec.Basename(), "file.jpg")
    self.assertEqual(all_files[1].pathspec.Dirname().Basename(),
                     "directory2")
    self.assertEqual(all_files[1].pathspec.Basename(), "file.mp3")

    # Look for files/folders where 'others' have 'write' permission. All other
    # attributes don't matter. Setuid bit must also be set and guid or sticky
    # bit must not be set.

    request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".",
                                perm_mode=0o4002, perm_mask=0o7002,
                                cross_devs=True)
    request.iterator.number = 200
    result = self.RunAction("Find", request)
    all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)]

    self.assertEqual(len(all_files), 2)
    self.assertEqual(all_files[0].pathspec.Dirname().Basename(),
                     "directory1")
    self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt")
    self.assertEqual(all_files[1].pathspec.Dirname().Basename(),
                     "directory1")
    self.assertEqual(all_files[1].pathspec.Basename(), "file2.txt")

    # Look for files where 'others' have 'execute' permission. All other
    # attributes don't matter. Only look for 'regular' files.

    request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".",
                                perm_mode=0o10001, perm_mask=0o10001,
                                cross_devs=True)
    request.iterator.number = 200
    result = self.RunAction("Find", request)
    all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)]

    self.assertEqual(len(all_files), 2)
    self.assertEqual(all_files[0].pathspec.Dirname().Basename(),
                     "directory3")
    self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt")
    self.assertEqual(all_files[1].pathspec.Dirname().Basename(),
                     "directory3")
    self.assertEqual(all_files[1].pathspec.Basename(), "long_file.text")

    # Look for folders where 'group' have 'execute' permission. All other
    # attributes don't matter. Only look for folders.

    request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".",
                                perm_mode=0o40010, perm_mask=0o40010,
                                cross_devs=True)
    request.iterator.number = 200
    result = self.RunAction("Find", request)
    all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)]

    self.assertEqual(len(all_files), 3)
    self.assertEqual(all_files[0].pathspec.Basename(), "directory2")
    self.assertEqual(all_files[1].pathspec.Basename(), "directory1")
    self.assertEqual(all_files[2].pathspec.Basename(), "directory3")
Beispiel #24
0
  def _ProcessResponse(self, response, component_paths, base_wildcard=False):
    for component_path in component_paths:
      regexes_to_get = []
      recursions_to_get = {}

      node = self.FindNode(component_path)

      if not node:
        # Node is empty representing a leaf node - we found a hit - report it.
        self.GlobReportMatch(response)
        return

      # There are further components in the tree - iterate over them.
      for component_str, next_node in node.items():
        component = rdfvalue.PathSpec(component_str)
        next_component = component_path + [component_str]

        # If we reach this point, we are instructed to go deeper into the
        # directory structure. We only want to actually do this if
        # - the last response was a proper directory,
        # - or it was a file (an image) that was explicitly given meaning
        #   no wildcards or groupings,
        # - or no_file_type_check was set.
        #
        # This reduces the number of TSK opens on the client that may
        # sometimes lead to instabilities due to bugs in the library.

        if response and (
            not (stat.S_ISDIR(response.st_mode) or
                 not base_wildcard or
                 self.state.no_file_type_check)):
          continue

        if component.path_options == component.Options.RECURSIVE:
          recursions_to_get.setdefault(
              component.recursion_depth, []).append(component)
        elif component.path_options == component.Options.REGEX:
          regexes_to_get.append(component)

        elif component.path_options == component.Options.CASE_INSENSITIVE:
          # Here we need to create the next pathspec by appending the current
          # component to what we already have. If we don't have anything yet, we
          # fall back to the root path. If there is no root path either, the
          # current component becomes the new base.
          base_pathspec = self._GetBasePathspec(response)
          if base_pathspec:
            pathspec = base_pathspec.Append(component)
          else:
            pathspec = component

          if not next_node:
            # Check for the existence of the last node.
            request = rdfvalue.ListDirRequest(pathspec=pathspec)

            if (response is None or
                (response and
                 (response.st_mode == 0 or
                  not stat.S_ISREG(response.st_mode)))):
              # If next node is empty, this node is a leaf node, we therefore
              # must stat it to check that it is there. There is a special case
              # here where this pathspec points to a file/directory in the root
              # directory. In this case, response will be None but we still need
              # to stat it.
              self.CallClient(
                  "StatFile", request, next_state="ProcessEntry",
                  request_data=dict(component_path=next_component))
          else:
            # There is no need to go back to the client for intermediate
            # paths in the prefix tree, just emulate this by recursively
            # calling this state inline.
            self.CallStateInline(
                [rdfvalue.StatEntry(pathspec=pathspec)],
                next_state="ProcessEntry",
                request_data=dict(component_path=next_component))

      if recursions_to_get or regexes_to_get:
        # Recursions or regexes need a base pathspec to operate on. If we
        # have neither a response or a root path, we send a default pathspec
        # that opens the root with pathtype "OS".
        base_pathspec = self._GetBasePathspec(response)
        if not base_pathspec:
          base_pathspec = rdfvalue.PathSpec(path="/", pathtype="OS")

        for depth, recursions in recursions_to_get.iteritems():
          path_regex = "(?i)^" + "$|^".join(
              set([c.path for c in recursions])) + "$"

          findspec = rdfvalue.FindSpec(pathspec=base_pathspec,
                                       cross_devs=True,
                                       max_depth=depth,
                                       path_regex=path_regex)

          findspec.iterator.number = self.FILE_MAX_PER_DIR
          self.CallClient("Find", findspec,
                          next_state="ProcessEntry",
                          request_data=dict(base_path=component_path))

        if regexes_to_get:
          path_regex = "(?i)^" + "$|^".join(
              set([c.path for c in regexes_to_get])) + "$"
          findspec = rdfvalue.FindSpec(pathspec=base_pathspec,
                                       max_depth=1,
                                       path_regex=path_regex)

          findspec.iterator.number = self.FILE_MAX_PER_DIR
          self.CallClient("Find", findspec,
                          next_state="ProcessEntry",
                          request_data=dict(base_path=component_path))
Beispiel #25
0
    def ProcessEntry(self, responses):
        """Process the responses from the client."""
        if not responses.success:
            return

        component_path = responses.request_data["component_path"]
        node = self.FindNode(component_path)

        # If we get a response with an unfinished iterator then we missed some
        # files. Call Find on the client until we're done.
        if (responses.iterator and
                responses.iterator.state != responses.iterator.State.FINISHED):
            findspec = rdfvalue.FindSpec(responses.request.request.args)
            findspec.iterator = responses.iterator
            self.CallClient("Find",
                            findspec,
                            next_state="ProcessEntry",
                            request_data=responses.request_data)

        regexes_to_get = []
        recursions_to_get = {}
        for response in responses:
            # The Find client action does not return a StatEntry but a
            # FindSpec. Normalize to a StatEntry.
            if isinstance(response, rdfvalue.FindSpec):
                response = response.hit

            if node:
                # There are further components in the tree - iterate over them.
                for component_str, next_node in node.items():
                    component = rdfvalue.PathSpec(component_str)
                    next_component = component_path + [component_str]

                    # Use the pathtype from the flow args.
                    component.pathtype = self.state.args.pathtype
                    if component.path_options == component.Options.RECURSIVE:
                        recursions_to_get.setdefault(component.recursion_depth,
                                                     []).append(component)
                    elif component.path_options == component.Options.REGEX:
                        regexes_to_get.append(component)

                    elif component.path_options == component.Options.CASE_INSENSITIVE:
                        # Check for the existence of the last node.
                        if not next_node:
                            pathspec = response.pathspec.Copy().AppendPath(
                                component.path)
                            request = rdfvalue.ListDirRequest(
                                pathspec=pathspec)

                            if response.st_mode == 0 or not stat.S_ISREG(
                                    response.st_mode):
                                # If next node is empty, this node is a leaf node, we therefore
                                # must stat it to check that it is there.
                                self.CallClient(
                                    "StatFile",
                                    request,
                                    next_state="ProcessEntry",
                                    request_data=dict(
                                        component_path=next_component))

                        else:
                            pathspec = response.pathspec.Copy().AppendPath(
                                component.path)

                            # There is no need to go back to the client for intermediate paths
                            # in the prefix tree, just emulate this by recursively calling
                            # this state inline.
                            self.CallStateInline(
                                [rdfvalue.StatEntry(pathspec=pathspec)],
                                next_state="ProcessEntry",
                                request_data=dict(
                                    component_path=next_component))

                if recursions_to_get:
                    for depth, recursions in recursions_to_get.iteritems():
                        path_regex = "(?i)^" + "$|^".join(
                            set([c.path for c in recursions])) + "$"

                        findspec = rdfvalue.FindSpec(
                            pathspec=response.pathspec,
                            cross_devs=True,
                            max_depth=depth,
                            path_regex=path_regex)

                        findspec.iterator.number = self.FILE_MAX_PER_DIR
                        self.CallClient(
                            "Find",
                            findspec,
                            next_state="ProcessEntry",
                            request_data=dict(component_path=next_component))

                if regexes_to_get:
                    path_regex = "(?i)^" + "$|^".join(
                        set([c.path for c in regexes_to_get])) + "$"
                    findspec = rdfvalue.FindSpec(pathspec=response.pathspec,
                                                 max_depth=1,
                                                 path_regex=path_regex)

                    findspec.iterator.number = self.FILE_MAX_PER_DIR
                    self.CallClient(
                        "Find",
                        findspec,
                        next_state="ProcessEntry",
                        request_data=dict(component_path=next_component))

            else:
                # Node is empty representing a leaf node - we found a hit - report it.
                self.ReportMatch(response)
Beispiel #26
0
    def testFetchFilesFlow(self):

        # Very small chunks to stress test this flow.
        with test_lib.MultiStubber(
            (transfer.MultiGetFile, "CHUNK_SIZE", self.chunk_size),
            (transfer.MultiGetFile, "MIN_CALL_TO_FILE_STORE", 10)):
            with test_lib.Instrument(filestore.FileStore,
                                     "CheckHashes") as check_hashes_instrument:

                path = os.path.join(self.base_path, "winexec_img.dd")
                self.findspec = rdfvalue.FindSpec(path_regex=r"\.(exe|sys)$")
                self.findspec.pathspec.path = path
                self.findspec.pathspec.pathtype = rdfvalue.PathSpec.PathType.OS
                self.findspec.pathspec.Append(
                    path="/", pathtype=rdfvalue.PathSpec.PathType.TSK)

                self.base_pathspec = self.findspec.pathspec.Copy()

                # First create some existing files in the VFS so we can ensure they get
                # updated.
                inspect_path = self.base_pathspec.Copy()
                inspect_path.AppendPath("Ext2IFS_1_10b.exe")

                urn = aff4.AFF4Object.VFSGRRClient.PathspecToURN(
                    inspect_path, self.client_id)

                fd = aff4.FACTORY.Create(urn,
                                         "AFF4MemoryStream",
                                         token=self.token)
                fd.Write("hello")
                fd.Close()

                # Now run the fetch all files.
                client_mock = test_lib.ActionMock("TransferBuffer", "StatFile",
                                                  "Find", "HashFile",
                                                  "HashBuffer")

                for _ in test_lib.TestFlowHelper("FetchFiles",
                                                 client_mock,
                                                 token=self.token,
                                                 client_id=self.client_id,
                                                 findspec=self.findspec):
                    pass

                self.CheckFindExeFiles()
                self.CheckPresenceOfSignedData()
                self.CheckIndexLookup()
                pathlist = [
                    "/a/b/c/g/f/pciide.sys", "pciide.sys",
                    "/a/b/c/g/h/pciide.sys", "/a/b/c/g/pciide.sys"
                ]
                self.CheckExistingFile(pathlist)

                # In this test we limit the maximum number of times the filestore check
                # hashes is called to 10. There are 23 hits in the test data, so we
                # expect 3 calls, of 10, 10, and 3:
                self.assertEqual(len(check_hashes_instrument.args), 3)

                self.assertEqual(len(check_hashes_instrument.args[0][1]), 10)
                self.assertEqual(len(check_hashes_instrument.args[1][1]), 10)
                self.assertEqual(len(check_hashes_instrument.args[2][1]), 3)

                fd = aff4.FACTORY.Open(
                    self.client_id.Add("analysis/FetchFiles"),
                    token=self.token)
                collection = list(fd.OpenChildren())[0]
                self.assertEqual(len(collection), 23)