Ejemplo n.º 1
0
 def ParseFiles(self, responses):
     """Take each file we retrieved and get the history from it."""
     if responses:
         for response in responses:
             client_path = db.ClientPath.FromPathSpec(
                 self.client_id, response.stat_entry.pathspec)
             fd = file_store.OpenFile(client_path)
             hist = firefox3_history.Firefox3History()
             count = 0
             for epoch64, dtype, url, dat1, in hist.Parse(fd):
                 count += 1
                 str_entry = "%s %s %s %s" % (
                     datetime.datetime.utcfromtimestamp(
                         epoch64 / 1e6), url, dat1, dtype)
                 self.SendReply(rdfvalue.RDFString(str_entry))
             self.Log(
                 "Wrote %d Firefox History entries for user %s from %s",
                 count, self.args.username,
                 response.stat_entry.pathspec.Basename())
             self.state.hist_count += count
Ejemplo n.º 2
0
    def Handle(self, args, context=None):
        result = ApiGetFileDecodersResult()

        path_type, components = rdf_objects.ParseCategorizedPath(
            args.file_path)
        client_path = db.ClientPath(client_id=str(args.client_id),
                                    path_type=path_type,
                                    components=components)

        for decoder_name in decoders.FACTORY.Names():
            decoder = decoders.FACTORY.Create(decoder_name)

            filedesc = file_store.OpenFile(client_path)
            filectx = context_lib.NullContext(filedesc)

            with filectx as filedesc:
                if decoder.Check(filedesc):
                    result.decoder_names.append(decoder_name)

        return result
Ejemplo n.º 3
0
    def testDownloadDirectory(self):
        """Test a FileFinder flow with depth=1."""
        # Mock the client actions FileFinder uses.
        client_mock = action_mocks.FileFinderClientMock()

        test_dir = self._SetupTestDir("testDownloadDirectory")

        flow_test_lib.TestFlowHelper(
            compatibility.GetName(file_finder.FileFinder),
            client_mock,
            client_id=self.client_id,
            paths=[test_dir + "/*"],
            action=rdf_file_finder.FileFinderAction.Download(),
            token=self.token)

        # There should be 5 children:
        expected_filenames = ["a.txt", "b.txt", "c.txt", "d.txt", "sub1"]

        if data_store.AFF4Enabled():
            output_path = self.client_id.Add("fs/os").Add(test_dir)

            output_fd = aff4.FACTORY.Open(output_path, token=self.token)
            children = list(output_fd.OpenChildren())

            filenames = [child.urn.Basename() for child in children]

            self.assertCountEqual(filenames, expected_filenames)

            fd = aff4.FACTORY.Open(output_path.Add("a.txt"))
            self.assertEqual(fd.read(), "Hello World!\n")
        else:
            children = data_store.REL_DB.ListChildPathInfos(
                self.client_id.Basename(), rdf_objects.PathInfo.PathType.OS,
                test_dir.strip("/").split("/"))

            filenames = [child.components[-1] for child in children]
            self.assertCountEqual(filenames, expected_filenames)
            fd = file_store.OpenFile(
                db.ClientPath.FromPathInfo(self.client_id.Basename(),
                                           children[0]))
            self.assertEqual(fd.read(), "Hello World!\n")
Ejemplo n.º 4
0
  def testMultiGetFileSetsFileHashAttributeWhenMultipleChunksDownloaded(self):
    client_mock = action_mocks.MultiGetFileClientMock()
    pathspec = rdf_paths.PathSpec(
        pathtype=rdf_paths.PathSpec.PathType.OS,
        path=os.path.join(self.base_path, "test_img.dd"))

    args = transfer.MultiGetFileArgs(pathspecs=[pathspec])
    flow_test_lib.TestFlowHelper(
        transfer.MultiGetFile.__name__,
        client_mock,
        token=self.token,
        client_id=self.client_id,
        args=args)

    h = hashlib.sha256()
    with open(os.path.join(self.base_path, "test_img.dd"), "rb") as model_fd:
      h.update(model_fd.read())

    if not data_store.RelationalDBReadEnabled(category="filestore"):
      # Fix path for Windows testing.
      pathspec.path = pathspec.path.replace("\\", "/")
      # Test the AFF4 file that was created.
      urn = pathspec.AFF4Path(self.client_id)
      fd_hash = data_store_utils.GetUrnHashEntry(urn)

      self.assertTrue(fd_hash)
      self.assertEqual(fd_hash.sha256, h.digest())

    if data_store.RelationalDBReadEnabled(category="filestore"):
      cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec)
      fd_rel_db = file_store.OpenFile(cp)
      self.assertEqual(fd_rel_db.hash_id.AsBytes(), h.digest())

      # Check that SHA256 hash of the file matches the contents
      # hash and that MD5 and SHA1 are set.
      history = data_store.REL_DB.ReadPathInfoHistory(
          cp.client_id, cp.path_type, cp.components)
      self.assertEqual(history[-1].hash_entry.sha256,
                       fd_rel_db.hash_id.AsBytes())
      self.assertIsNotNone(history[-1].hash_entry.sha1)
      self.assertIsNotNone(history[-1].hash_entry.md5)
Ejemplo n.º 5
0
    def testMultiGetFileDeduplication(self):
        client_mock = action_mocks.MultiGetFileClientMock()

        pathspecs = []
        # Make 10 files to download.
        for i in range(10):
            path = os.path.join(self.temp_dir, "test_%s.txt" % i)
            with open(path, "wb") as fd:
                fd.write("Hello")

            pathspecs.append(
                rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                   path=path))

        # All those files are the same so the individual chunks should
        # only be downloaded once. By forcing maximum_pending_files=1,
        # there should only be a single TransferBuffer call.
        args = transfer.MultiGetFileArgs(pathspecs=pathspecs,
                                         maximum_pending_files=1)
        flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__,
                                     client_mock,
                                     token=self.token,
                                     client_id=self.client_id,
                                     args=args)

        self.assertEqual(client_mock.action_counts["TransferBuffer"], 1)

        for pathspec in pathspecs:
            # Check that each referenced file can be read.
            cp = db.ClientPath.FromPathSpec(self.client_id, pathspec)
            fd_rel_db = file_store.OpenFile(cp)
            self.assertEqual("Hello", fd_rel_db.read())

            # Check that SHA256 hash of the file matches the contents
            # hash and that MD5 and SHA1 are set.
            history = data_store.REL_DB.ReadPathInfoHistory(
                cp.client_id, cp.path_type, cp.components)
            self.assertEqual(history[-1].hash_entry.sha256,
                             fd_rel_db.hash_id.AsBytes())
            self.assertIsNotNone(history[-1].hash_entry.sha1)
            self.assertIsNotNone(history[-1].hash_entry.md5)
Ejemplo n.º 6
0
  def ParseFiles(self, responses):
    """Take each file we retrieved and get the history from it."""
    # Note that some of these Find requests will fail because some paths don't
    # exist, e.g. Chromium on most machines, so we don't check for success.
    if responses:
      for response in responses:
        client_path = db.ClientPath.FromPathSpec(self.client_id,
                                                 response.stat_entry.pathspec)
        filepath = response.stat_entry.pathspec.CollapsePath()
        fd = file_store.OpenFile(client_path)
        hist = chrome_history.ChromeParser()
        count = 0
        for epoch64, dtype, url, dat1, dat2, dat3 in hist.Parse(filepath, fd):
          count += 1
          str_entry = "%s %s %s %s %s %s" % (datetime.datetime.utcfromtimestamp(
              epoch64 / 1e6), url, dat1, dat2, dat3, dtype)
          self.SendReply(rdfvalue.RDFString(str_entry))

        self.Log("Wrote %d Chrome History entries for user %s from %s", count,
                 self.args.username, response.stat_entry.pathspec.Basename())
        self.state.hist_count += count
Ejemplo n.º 7
0
 def ParseFiles(self, responses):
   """Take each file we retrieved and get the history from it."""
   if responses:
     for response in responses:
       if data_store.RelationalDBReadEnabled("filestore"):
         client_path = db.ClientPath.FromPathSpec(self.client_id,
                                                  response.stat_entry.pathspec)
         fd = file_store.OpenFile(client_path)
       else:
         fd = aff4.FACTORY.Open(
             response.stat_entry.AFF4Path(self.client_urn), token=self.token)
       hist = firefox3_history.Firefox3History(fd)
       count = 0
       for epoch64, dtype, url, dat1, in hist.Parse():
         count += 1
         str_entry = "%s %s %s %s" % (datetime.datetime.utcfromtimestamp(
             epoch64 / 1e6), url, dat1, dtype)
         self.SendReply(rdfvalue.RDFString(utils.SmartStr(str_entry)))
       self.Log("Wrote %d Firefox History entries for user %s from %s", count,
                self.args.username, response.stat_entry.pathspec.Basename())
       self.state.hist_count += count
Ejemplo n.º 8
0
    def CheckFilesDownloaded(self, fnames):
        for fname in fnames:
            path_info = data_store.REL_DB.ReadPathInfo(
                self.client_id,
                rdf_objects.PathInfo.PathType.OS,
                components=self.FilenameToPathComponents(fname))
            size = path_info.stat_entry.st_size

            filepath = os.path.join(self.base_path, "searching", fname)
            with io.open(filepath, mode="rb") as fd:
                test_data = fd.read()

            self.assertEqual(size, len(test_data))

            fd = file_store.OpenFile(
                db.ClientPath(self.client_id,
                              rdf_objects.PathInfo.PathType.OS,
                              components=self.FilenameToPathComponents(fname)))

            # Make sure we can actually read the file.
            self.assertEqual(fd.read(), test_data)
Ejemplo n.º 9
0
    def CheckFilesNotDownloaded(self, fnames):
        for fname in fnames:
            if data_store.RelationalDBReadEnabled(category="filestore"):
                try:
                    file_store.OpenFile(
                        db.ClientPath(
                            self.client_id.Basename(),
                            rdf_objects.PathInfo.PathType.OS,
                            components=self.FilenameToPathComponents(fname)))
                    self.Fail("Found downloaded file: %s" % fname)
                except file_store.FileHasNoContentError:
                    pass
            else:
                file_urn = self.FileNameToURN(fname)
                with aff4.FACTORY.Open(file_urn, token=self.token) as fd:
                    # Directories have no size attribute.
                    if fd.Get(fd.Schema.TYPE
                              ) == aff4_standard.VFSDirectory.__name__:
                        continue

                    self.assertEqual(fd.Get(fd.Schema.SIZE), 0)
Ejemplo n.º 10
0
    def testMultiGetFile(self):
        """Test MultiGetFile."""

        client_mock = action_mocks.MultiGetFileClientMock()
        pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                      path=os.path.join(
                                          self.base_path, "test_img.dd"))
        expected_size = os.path.getsize(pathspec.path)

        args = transfer.MultiGetFileArgs(pathspecs=[pathspec, pathspec])
        with test_lib.Instrument(transfer.MultiGetFile,
                                 "_StoreStat") as storestat_instrument:
            flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__,
                                         client_mock,
                                         token=self.token,
                                         client_id=self.client_id,
                                         args=args)

            # We should only have called StoreStat once because the two paths
            # requested were identical.
            self.assertLen(storestat_instrument.args, 1)

        # Fix path for Windows testing.
        pathspec.path = pathspec.path.replace("\\", "/")

        with open(pathspec.path, "rb") as fd2:
            # Test the file that was created.
            cp = db.ClientPath.FromPathSpec(self.client_id, pathspec)
            fd_rel_db = file_store.OpenFile(cp)
            self.CompareFDs(fd2, fd_rel_db)

        # Check that SHA256 hash of the file matches the contents
        # hash and that MD5 and SHA1 are set.
        history = data_store.REL_DB.ReadPathInfoHistory(
            cp.client_id, cp.path_type, cp.components)
        self.assertEqual(history[-1].hash_entry.sha256,
                         fd_rel_db.hash_id.AsBytes())
        self.assertEqual(history[-1].hash_entry.num_bytes, expected_size)
        self.assertIsNotNone(history[-1].hash_entry.sha1)
        self.assertIsNotNone(history[-1].hash_entry.md5)
Ejemplo n.º 11
0
    def testMultiGetFileSizeLimit(self):
        client_mock = action_mocks.MultiGetFileClientMock()
        image_path = os.path.join(self.base_path, "test_img.dd")
        pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                      path=image_path)

        # Read a bit more than one chunk (600 * 1024).
        expected_size = 750 * 1024
        args = transfer.MultiGetFileArgs(pathspecs=[pathspec],
                                         file_size=expected_size)
        flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__,
                                     client_mock,
                                     token=self.token,
                                     client_id=self.client_id,
                                     args=args)

        with open(image_path, "rb") as fd:
            expected_data = fd.read(expected_size)

        cp = db.ClientPath.FromPathSpec(self.client_id, pathspec)
        fd_rel_db = file_store.OpenFile(cp)

        self.assertEqual(fd_rel_db.size, expected_size)

        data = fd_rel_db.read(2 * expected_size)
        self.assertLen(data, expected_size)

        d = hashlib.sha256()
        d.update(expected_data)
        self.assertEqual(fd_rel_db.hash_id.AsBytes(), d.digest())

        # Check that SHA256 hash of the file matches the contents
        # hash and that MD5 and SHA1 are set.
        history = data_store.REL_DB.ReadPathInfoHistory(
            cp.client_id, cp.path_type, cp.components)
        self.assertEqual(history[-1].hash_entry.sha256,
                         fd_rel_db.hash_id.AsBytes())
        self.assertEqual(history[-1].hash_entry.num_bytes, expected_size)
        self.assertIsNotNone(history[-1].hash_entry.sha1)
        self.assertIsNotNone(history[-1].hash_entry.md5)
Ejemplo n.º 12
0
    def testGetFilePathCorrection(self):
        """Tests that the pathspec returned is used for the aff4path."""
        client_mock = action_mocks.GetFileClientMock()
        # Deliberately using the wrong casing.
        pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                      path=os.path.join(
                                          self.base_path, "TEST_IMG.dd"))
        expected_size = os.path.getsize(
            os.path.join(self.base_path, "test_img.dd"))

        session_id = flow_test_lib.TestFlowHelper(transfer.GetFile.__name__,
                                                  client_mock,
                                                  token=self.token,
                                                  client_id=self.client_id,
                                                  pathspec=pathspec)

        results = flow_test_lib.GetFlowResults(self.client_id, session_id)
        self.assertLen(results, 1)
        res_pathspec = results[0].pathspec

        # Fix path for Windows testing.
        pathspec.path = pathspec.path.replace("\\", "/")
        with open(res_pathspec.path, "rb") as fd2:
            fd2.seek(0, 2)

            cp = db.ClientPath.FromPathSpec(self.client_id, res_pathspec)

            fd_rel_db = file_store.OpenFile(cp)
            self.CompareFDs(fd2, fd_rel_db)

        # Only the sha256 hash of the contents should have been calculated:
        # in order to put file contents into the file store.
        history = data_store.REL_DB.ReadPathInfoHistory(
            cp.client_id, cp.path_type, cp.components)
        self.assertEqual(history[-1].hash_entry.sha256,
                         fd_rel_db.hash_id.AsBytes())
        self.assertEqual(history[-1].hash_entry.num_bytes, expected_size)
        self.assertIsNone(history[-1].hash_entry.sha1)
        self.assertIsNone(history[-1].hash_entry.md5)
Ejemplo n.º 13
0
    def testClientFileFinderUpload(self):
        paths = [os.path.join(self.base_path, "{**,.}/*.plist")]
        action = rdf_file_finder.FileFinderAction.Download()

        session_id = self._RunClientFileFinder(paths, action)
        collection = flow.GRRFlow.ResultCollectionForFID(session_id)
        results = list(collection)
        self.assertEqual(len(results), 5)
        relpaths = [
            os.path.relpath(p.stat_entry.pathspec.path, self.base_path)
            for p in results
        ]
        self.assertItemsEqual(relpaths, [
            "History.plist", "History.xml.plist", "test.plist",
            "parser_test/com.google.code.grr.plist",
            "parser_test/InstallHistory.plist"
        ])

        for r in results:
            aff4_obj = aff4.FACTORY.Open(r.stat_entry.pathspec.AFF4Path(
                self.client_id),
                                         token=self.token)
            data = open(r.stat_entry.pathspec.path, "rb").read()
            self.assertEqual(aff4_obj.Read(100), data[:100])

            if data_store.RelationalDBReadEnabled(category="filestore"):
                fd = file_store.OpenFile(
                    db.ClientPath.FromPathSpec(self.client_id.Basename(),
                                               r.stat_entry.pathspec))
                self.assertEqual(fd.read(100), data[:100])

                self.assertEqual(fd.hash_id.AsBytes(),
                                 hashlib.sha256(data).digest())
            else:
                hash_obj = data_store_utils.GetFileHashEntry(aff4_obj)
                self.assertEqual(hash_obj.sha1, hashlib.sha1(data).hexdigest())
                self.assertEqual(hash_obj.sha256,
                                 hashlib.sha256(data).hexdigest())
                self.assertEqual(hash_obj.md5, hashlib.md5(data).hexdigest())
Ejemplo n.º 14
0
  def testDownloadsSingleHuntFileIfAuthorizationIsPresent(self):
    hunt = self._CreateHuntWithDownloadedFile()
    results = self.GetHuntResults(hunt)

    self.RequestAndGrantHuntApproval(hunt.Basename())

    self.Open("/")
    self.Click("css=a[grrtarget=hunts]")
    self.Click("css=td:contains('GenericHunt')")
    self.Click("css=li[heading=Results]")

    if data_store.RelationalDBEnabled():
      fd = file_store.OpenFile(
          flow_export.CollectionItemToClientPath(results[0]))
    else:
      fd = aff4.FACTORY.Open(
          flow_export.CollectionItemToAff4Path(results[0]), token=self.token)

    with mock.patch.object(fd.__class__, "Read") as mock_obj:
      self.Click(
          "css=grr-results-collection button:has(span.glyphicon-download)")
      self.WaitUntil(lambda: mock_obj.called)
Ejemplo n.º 15
0
    def testGetFile(self):
        """Test that the GetFile flow works."""

        client_mock = action_mocks.GetFileClientMock()
        pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                      path=os.path.join(
                                          self.base_path, "test_img.dd"))

        flow_test_lib.TestFlowHelper(transfer.GetFile.__name__,
                                     client_mock,
                                     token=self.token,
                                     client_id=self.client_id,
                                     pathspec=pathspec)

        # Fix path for Windows testing.
        pathspec.path = pathspec.path.replace("\\", "/")
        fd2 = open(pathspec.path, "rb")

        if data_store.RelationalDBReadEnabled():
            cp = db.ClientPath.FromPathSpec(self.client_id.Basename(),
                                            pathspec)
            fd_rel_db = file_store.OpenFile(cp)
            self.CompareFDs(fd2, fd_rel_db)

            # Only the sha256 hash of the contents should have been calculated:
            # in order to put file contents into the file store.
            history = data_store.REL_DB.ReadPathInfoHistory(
                cp.client_id, cp.path_type, cp.components)
            self.assertEqual(history[-1].hash_entry.sha256,
                             fd_rel_db.hash_id.AsBytes())
            self.assertIsNone(history[-1].hash_entry.sha1)
            self.assertIsNone(history[-1].hash_entry.md5)
        else:
            # Test the AFF4 file that was created.
            urn = pathspec.AFF4Path(self.client_id)
            fd1 = aff4.FACTORY.Open(urn, token=self.token)
            fd2.seek(0, 2)
            self.assertEqual(fd2.tell(), int(fd1.Get(fd1.Schema.SIZE)))
            self.CompareFDs(fd1, fd2)
Ejemplo n.º 16
0
    def testMultiGetFileMultiFiles(self):
        """Test MultiGetFile downloading many files at once."""
        client_mock = action_mocks.MultiGetFileClientMock()

        pathspecs = []
        # Make 30 files to download.
        for i in range(30):
            path = os.path.join(self.temp_dir, "test_%s.txt" % i)
            with io.open(path, "wb") as fd:
                fd.write(b"Hello")

            pathspecs.append(
                rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                   path=path))

        args = transfer.MultiGetFileArgs(pathspecs=pathspecs,
                                         maximum_pending_files=10)
        flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__,
                                     client_mock,
                                     token=self.token,
                                     client_id=self.client_id,
                                     args=args)

        # Now open each file and make sure the data is there.
        for pathspec in pathspecs:
            cp = db.ClientPath.FromPathSpec(self.client_id, pathspec)
            fd_rel_db = file_store.OpenFile(cp)
            self.assertEqual(b"Hello", fd_rel_db.read())

            # Check that SHA256 hash of the file matches the contents
            # hash and that MD5 and SHA1 are set.
            history = data_store.REL_DB.ReadPathInfoHistory(
                cp.client_id, cp.path_type, cp.components)
            self.assertEqual(history[-1].hash_entry.sha256,
                             fd_rel_db.hash_id.AsBytes())
            self.assertEqual(history[-1].hash_entry.num_bytes, 5)
            self.assertIsNotNone(history[-1].hash_entry.sha1)
            self.assertIsNotNone(history[-1].hash_entry.md5)
Ejemplo n.º 17
0
    def testFlowCollectFile(self):
        with temp.AutoTempFilePath() as temp_file_path:
            with io.open(temp_file_path, mode="w", encoding="utf-8") as fd:
                fd.write("Just sample text to put in the file.")

            table = f"""
      [
        {{ "collect_column": "{temp_file_path}" }}
      ]
      """

            with osquery_test_lib.FakeOsqueryiOutput(stdout=table, stderr=""):
                results = self._RunFlow("Doesn't matter", ["collect_column"])

        self.assertLen(results, 2)
        self.assertIsInstance(results[0], rdf_osquery.OsqueryResult)
        self.assertIsInstance(results[1], rdf_client_fs.StatEntry)

        pathspec = results[1].pathspec
        client_path = db.ClientPath.FromPathSpec(self.client_id, pathspec)
        fd_rel_db = file_store.OpenFile(client_path)
        file_text = fd_rel_db.read().decode("utf-8")
        self.assertEqual(file_text, "Just sample text to put in the file.")
Ejemplo n.º 18
0
  def testFirefoxHistoryFetch(self):
    """Test that downloading the Firefox history works."""
    with self.MockClientRawDevWithImage():
      # Run the flow in the simulated way
      session_id = flow_test_lib.TestFlowHelper(
          webhistory.FirefoxHistory.__name__,
          self.client_mock,
          check_flow_errors=False,
          client_id=self.client_id,
          username="******",
          token=self.token,
          pathtype=rdf_paths.PathSpec.PathType.TSK)

    # Now check that the right files were downloaded.
    fs_path = "/home/test/.mozilla/firefox/adts404t.default/places.sqlite"

    output_path = self.client_id.Add("fs/tsk").Add("/".join(
        [self.base_path.replace("\\", "/"), "test_img.dd"])).Add(
            fs_path.replace("\\", "/"))

    # Check if the History file is created.
    if data_store.RelationalDBReadEnabled("filestore"):
      cp = db.ClientPath.TSK(self.client_id.Basename(),
                             tuple(output_path.Split()[3:]))
      rel_fd = file_store.OpenFile(cp)
      self.assertEqual(rel_fd.read(15), "SQLite format 3")
    else:
      fd = aff4.FACTORY.Open(output_path, token=self.token)
      self.assertGreater(fd.size, 20000)
      self.assertEqual(fd.read(15), "SQLite format 3")

    # Check for analysis file.
    results = flow_test_lib.GetFlowResults(self.client_id, session_id)
    self.assertGreater(len(results), 3)
    data = "\n".join(map(str, results))
    self.assertTrue(data.find("Welcome to Firefox") != -1)
    self.assertTrue(data.find("sport.orf.at") != -1)
Ejemplo n.º 19
0
    def testMultiGetFileMultiFiles(self):
        """Test MultiGetFile downloading many files at once."""
        client_mock = action_mocks.MultiGetFileClientMock()

        pathspecs = []
        # Make 30 files to download.
        for i in range(30):
            path = os.path.join(self.temp_dir, "test_%s.txt" % i)
            with open(path, "wb") as fd:
                fd.write("Hello")

            pathspecs.append(
                rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                   path=path))

        args = transfer.MultiGetFileArgs(pathspecs=pathspecs,
                                         maximum_pending_files=10)
        session_id = flow_test_lib.TestFlowHelper(
            transfer.MultiGetFile.__name__,
            client_mock,
            token=self.token,
            client_id=self.client_id,
            args=args)

        if data_store.RelationalDBReadEnabled():
            # Now open each file and make sure the data is there.
            for pathspec in pathspecs:
                cp = db.ClientPath.FromPathSpec(self.client_id.Basename(),
                                                pathspec)
                fd_rel_db = file_store.OpenFile(cp)
                self.assertEqual("Hello", fd_rel_db.read())

                # Check that SHA256 hash of the file matches the contents
                # hash and that MD5 and SHA1 are set.
                history = data_store.REL_DB.ReadPathInfoHistory(
                    cp.client_id, cp.path_type, cp.components)
                self.assertEqual(history[-1].hash_entry.sha256,
                                 fd_rel_db.hash_id.AsBytes())
                self.assertIsNotNone(history[-1].hash_entry.sha1)
                self.assertIsNotNone(history[-1].hash_entry.md5)
        else:
            # Check up on the internal flow state.
            flow_state = flow_test_lib.GetFlowState(self.client_id,
                                                    session_id,
                                                    token=self.token)
            # All the pathspecs should be in this list.
            self.assertLen(flow_state.indexed_pathspecs, 30)

            # At any one time, there should not be more than 10 files or hashes
            # pending.
            self.assertLessEqual(len(flow_state.pending_files), 10)
            self.assertLessEqual(len(flow_state.pending_hashes), 10)

            # When we finish there should be no pathspecs stored in the flow state.
            for flow_pathspec in flow_state.indexed_pathspecs:
                self.assertIsNone(flow_pathspec)
            for flow_request_data in flow_state.request_data_list:
                self.assertIsNone(flow_request_data)

            for pathspec in pathspecs:
                urn = pathspec.AFF4Path(self.client_id)
                fd = aff4.FACTORY.Open(urn, token=self.token)
                self.assertEqual("Hello", fd.read())
Ejemplo n.º 20
0
    def testMultiGetFileOfSpecialFiles(self):
        """Test that special /proc/ files are handled correctly.

    /proc/ files have the property that they are non seekable from their end
    (i.e. seeking them relative to the end is not supported). They also return
    an st_size of 0. For example:

    $ stat /proc/self/maps
    File: '/proc/self/maps'
    Size: 0   Blocks: 0   IO Block: 1024 regular empty file

    $ head /proc/self/maps
    00400000-00409000 r-xp 00000000 fc:01 9180740 /usr/bin/head
    00608000-00609000 r--p 00008000 fc:01 9180740 /usr/bin/head
    ...

    When we try to use the MultiGetFile flow, it deduplicates the files and
    since it thinks the file has a zero size, the flow will not download the
    file, and instead copy the zero size file into it.
    """
        client_mock = action_mocks.MultiGetFileClientMock()

        # # Create a zero sized file.
        zero_sized_filename = os.path.join(self.temp_dir, "zero_size")
        with open(zero_sized_filename, "wb") as fd:
            pass

        pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                      path=zero_sized_filename)

        flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__,
                                     client_mock,
                                     token=self.token,
                                     file_size="1MiB",
                                     client_id=self.client_id,
                                     pathspecs=[pathspec])

        # Now if we try to fetch a real /proc/ filename this will fail because the
        # filestore already contains the zero length file
        # aff4:/files/nsrl/da39a3ee5e6b4b0d3255bfef95601890afd80709.
        pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                      path="/proc/self/environ")

        flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__,
                                     client_mock,
                                     token=self.token,
                                     file_size=1024 * 1024,
                                     client_id=self.client_id,
                                     pathspecs=[pathspec])

        data = open(pathspec.last.path, "rb").read()

        if data_store.RelationalDBReadEnabled():
            cp = db.ClientPath.FromPathSpec(self.client_id.Basename(),
                                            pathspec)
            fd_rel_db = file_store.OpenFile(cp)
            self.assertEqual(fd_rel_db.size, len(data))
            self.assertMultiLineEqual(fd_rel_db.read(), data)

            # Check that SHA256 hash of the file matches the contents
            # hash and that MD5 and SHA1 are set.
            history = data_store.REL_DB.ReadPathInfoHistory(
                cp.client_id, cp.path_type, cp.components)
            self.assertEqual(history[-1].hash_entry.sha256,
                             fd_rel_db.hash_id.AsBytes())
            self.assertIsNotNone(history[-1].hash_entry.sha1)
            self.assertIsNotNone(history[-1].hash_entry.md5)
        else:
            # Test the AFF4 file that was created - it should be empty since by
            # default we judge the file size based on its stat.st_size.
            urn = pathspec.AFF4Path(self.client_id)
            fd = aff4.FACTORY.Open(urn, token=self.token)
            self.assertEqual(fd.size, len(data))
            self.assertMultiLineEqual(fd.read(len(data)), data)
Ejemplo n.º 21
0
 def _ReadFromPathspec(self, pathspec, num_bytes):
     fd = file_store.OpenFile(
         db.ClientPath.FromPathSpec(self.client_id, pathspec))
     return fd.read(num_bytes)
Ejemplo n.º 22
0
    def Generate(self, items, token=None):
        """Generates archive from a given collection.

    Iterates the collection and generates an archive by yielding contents
    of every referenced AFF4Stream.

    Args:
      items: Iterable of rdf_client_fs.StatEntry objects
      token: User's ACLToken.

    Yields:
      Binary chunks comprising the generated archive.
    """

        del token  # unused, to be removed with AFF4 code

        client_ids = set()
        for item_batch in collection.Batch(items, self.BATCH_SIZE):

            fds_to_write = {}
            for item in item_batch:
                try:
                    urn = flow_export.CollectionItemToAff4Path(
                        item, self.client_id)
                    client_path = flow_export.CollectionItemToClientPath(
                        item, self.client_id)
                except flow_export.ItemNotExportableError:
                    continue

                fd = file_store.OpenFile(client_path)
                self.total_files += 1

                if not self.predicate(client_path):
                    self.ignored_files.append(utils.SmartUnicode(urn))
                    continue

                client_ids.add(client_path.client_id)

                # content_path = os.path.join(self.prefix, *urn_components)
                self.archived_files += 1

                # Make sure size of the original file is passed. It's required
                # when output_writer is StreamingTarWriter.
                st = os.stat_result((0o644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0))
                fds_to_write[fd] = (client_path, urn, st)

            if fds_to_write:
                for fd, (client_path, urn, st) in iteritems(fds_to_write):
                    try:
                        for i, chunk in enumerate(
                                file_store.StreamFilesChunks([client_path])):
                            if i == 0:
                                target_path = os.path.join(
                                    self.prefix,
                                    urn.Path()[1:])
                                yield self.archive_generator.WriteFileHeader(
                                    target_path, st=st)

                            yield self.archive_generator.WriteFileChunk(
                                chunk.data)

                        yield self.archive_generator.WriteFileFooter()
                    except Exception as exception:  # pylint: disable=broad-except
                        logging.exception(exception)

                        self.archived_files -= 1
                        self.failed_files.append(unicode(urn))

                if self.archive_generator.is_file_write_in_progress:
                    yield self.archive_generator.WriteFileFooter()

        if client_ids:
            for client_id, client_info in iteritems(
                    data_store.REL_DB.MultiReadClientFullInfo(client_ids)):
                client = api_client.ApiClient().InitFromClientInfo(client_info)
                for chunk in self._GenerateClientInfo(client_id, client):
                    yield chunk

        for chunk in self._GenerateDescription():
            yield chunk

        yield self.archive_generator.Close()
Ejemplo n.º 23
0
    def testMultiGetFileOfSpecialFiles(self):
        """Test that special /proc/ files are handled correctly.

    /proc/ files have the property that they are non seekable from their end
    (i.e. seeking them relative to the end is not supported). They also return
    an st_size of 0. For example:

    $ stat /proc/self/maps
    File: '/proc/self/maps'
    Size: 0   Blocks: 0   IO Block: 1024 regular empty file

    $ head /proc/self/maps
    00400000-00409000 r-xp 00000000 fc:01 9180740 /usr/bin/head
    00608000-00609000 r--p 00008000 fc:01 9180740 /usr/bin/head
    ...

    When we try to use the MultiGetFile flow, it deduplicates the files and
    since it thinks the file has a zero size, the flow will not download the
    file, and instead copy the zero size file into it.
    """
        client_mock = action_mocks.MultiGetFileClientMock()

        # # Create a zero sized file.
        zero_sized_filename = os.path.join(self.temp_dir, "zero_size")
        with open(zero_sized_filename, "wb"):
            pass

        pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                      path=zero_sized_filename)

        flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__,
                                     client_mock,
                                     creator=self.test_username,
                                     file_size="1MiB",
                                     client_id=self.client_id,
                                     pathspecs=[pathspec])

        # Now if we try to fetch a real /proc/ filename this will fail because the
        # filestore already contains the zero length file
        pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS,
                                      path="/proc/self/environ")

        flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__,
                                     client_mock,
                                     creator=self.test_username,
                                     file_size=1024 * 1024,
                                     client_id=self.client_id,
                                     pathspecs=[pathspec])

        with open(pathspec.last.path, "rb") as fd:
            data = fd.read()

        cp = db.ClientPath.FromPathSpec(self.client_id, pathspec)
        fd_rel_db = file_store.OpenFile(cp)
        self.assertEqual(fd_rel_db.size, len(data))
        self.assertEqual(fd_rel_db.read(), data)

        # Check that SHA256 hash of the file matches the contents
        # hash and that MD5 and SHA1 are set.
        history = data_store.REL_DB.ReadPathInfoHistory(
            cp.client_id, cp.path_type, cp.components)
        self.assertEqual(history[-1].hash_entry.sha256,
                         fd_rel_db.hash_id.AsBytes())
        self.assertEqual(history[-1].hash_entry.num_bytes, len(data))
        self.assertIsNotNone(history[-1].hash_entry.sha1)
        self.assertIsNotNone(history[-1].hash_entry.md5)
Ejemplo n.º 24
0
 def testRaisesForNonExistentFile(self):
     with self.assertRaises(file_store.FileNotFoundError):
         file_store.OpenFile(self.client_path)
Ejemplo n.º 25
0
 def _OpenFile(self, pathspec: rdf_paths.PathSpec) -> file_store.BlobStream:
     # TODO(amoser): This is not super efficient, AFF4 provided an api to open
     # all pathspecs at the same time, investigate if optimizing this is worth
     # it.
     client_path = db.ClientPath.FromPathSpec(self._client_id, pathspec)
     return file_store.OpenFile(client_path, max_timestamp=self._timestamp)
Ejemplo n.º 26
0
 def testRaisesForFileWithSinglePathInfoWithUnknownHash(self):
     data_store.REL_DB.WritePathInfos(
         self.client_id, [self._PathInfo(self.invalid_hash_id)])
     with self.assertRaises(file_store.FileHasNoContentError):
         file_store.OpenFile(self.client_path)
Ejemplo n.º 27
0
 def testOpensFileWithSinglePathInfoWithHash(self):
     data_store.REL_DB.WritePathInfos(self.client_id,
                                      [self._PathInfo(self.hash_id)])
     fd = file_store.OpenFile(self.client_path)
     self.assertEqual(fd.read(), self.data)
Ejemplo n.º 28
0
def ApplyParsersToResponses(parser_factory, responses, flow_obj):
  """Parse responses with applicable parsers.

  Args:
    parser_factory: A parser factory for specific artifact.
    responses: A list of responses from the client.
    flow_obj: An artifact collection flow.

  Returns:
    A list of (possibly parsed) responses.
  """
  if not parser_factory.HasParsers():
    # If we don't have any parsers, we expect to use the unparsed responses.
    return responses

  # We have some processors to run.
  knowledge_base = flow_obj.state.knowledge_base

  @contextlib.contextmanager
  def ParseErrorHandler():
    try:
      yield
    except parsers.ParseError as error:
      flow_obj.Log("Error encountered when parsing responses: %s", error)

  parsed_responses = []

  if parser_factory.HasSingleResponseParsers():
    for response in responses:
      for parser in parser_factory.SingleResponseParsers():
        with ParseErrorHandler():
          parsed_responses.extend(
              parser.ParseResponse(knowledge_base, response,
                                   flow_obj.args.path_type))

  for parser in parser_factory.MultiResponseParsers():
    with ParseErrorHandler():
      parsed_responses.extend(parser.ParseResponses(knowledge_base, responses))

  has_single_file_parsers = parser_factory.HasSingleFileParsers()
  has_multi_file_parsers = parser_factory.HasMultiFileParsers()

  if has_single_file_parsers or has_multi_file_parsers:
    precondition.AssertIterableType(responses, rdf_client_fs.StatEntry)
    pathspecs = [response.pathspec for response in responses]
    # TODO(amoser): This is not super efficient, AFF4 provided an api to open
    # all pathspecs at the same time, investigate if optimizing this is worth
    # it.
    filedescs = []
    for pathspec in pathspecs:
      client_path = db.ClientPath.FromPathSpec(flow_obj.client_id, pathspec)
      filedescs.append(file_store.OpenFile(client_path))

  if has_single_file_parsers:
    for response, filedesc in zip(responses, filedescs):
      for parser in parser_factory.SingleFileParsers():
        with ParseErrorHandler():
          parsed_responses.extend(
              parser.ParseFile(knowledge_base, response.pathspec, filedesc))

  if has_multi_file_parsers:
    for parser in parser_factory.MultiFileParsers():
      with ParseErrorHandler():
        parsed_responses.extend(
            parser.ParseFiles(knowledge_base, pathspecs, filedescs))

  return parsed_responses
Ejemplo n.º 29
0
 def testRaisesForFileWithSinglePathInfoWithoutHash(self):
   data_store.REL_DB.WritePathInfos(self.client_id, [self._PathInfo()])
   with self.assertRaises(file_store.FileHasNoContent):
     file_store.OpenFile(self.client_path)