Example #1
0
    def testMissingFilesAreListedInManifest(self):
        generator = archive_generator.FlowArchiveGenerator(
            self.flow, archive_generator.ArchiveFormat.ZIP)
        mappings = [
            flow_base.ClientPathArchiveMapping(self.path1, "foo/file"),
            flow_base.ClientPathArchiveMapping(
                db.ClientPath.OS(self.client_id, ["non", "existing"]),
                "foo/bar/file"),
        ]
        fd_path = self._GenerateArchive(generator.Generate(mappings))

        zip_fd = zipfile.ZipFile(fd_path)
        names = [str(s) for s in sorted(zip_fd.namelist())]

        # Expecting in the archive: 1 file (the other shouldn't be found)
        # and a manifest.
        self.assertLen(names, 2)

        contents = zip_fd.read(os.path.join(generator.prefix, "foo", "file"))
        self.assertEqual(contents, b"hello1")

        manifest = yaml.safe_load(
            zip_fd.read(os.path.join(generator.prefix, "MANIFEST")))
        self.assertCountEqual(manifest["processed_files"].items(),
                              [(self.path1.vfs_path, "foo/file")])
        self.assertCountEqual(manifest["missing_files"],
                              ["fs/os/non/existing"])
Example #2
0
    def testCreatesZipContainingTwoMappedFilesAndManifest(self):
        generator = archive_generator.FlowArchiveGenerator(
            self.flow, archive_generator.ArchiveFormat.ZIP)
        mappings = [
            flow_base.ClientPathArchiveMapping(self.path1, "foo/file"),
            flow_base.ClientPathArchiveMapping(self.path2, "foo/bar/file"),
        ]
        fd_path = self._GenerateArchive(generator.Generate(mappings))

        zip_fd = zipfile.ZipFile(fd_path)
        names = [str(s) for s in sorted(zip_fd.namelist())]

        # Expecting in the archive: 2 files and a manifest.
        self.assertLen(names, 3)

        contents = zip_fd.read(os.path.join(generator.prefix, "foo", "file"))
        self.assertEqual(contents, b"hello1")

        contents = zip_fd.read(
            os.path.join(generator.prefix, "foo", "bar", "file"))
        self.assertEqual(contents, b"hello2")

        manifest = yaml.safe_load(
            zip_fd.read(os.path.join(generator.prefix, "MANIFEST")))
        self.assertCountEqual(manifest["processed_files"].items(),
                              [(self.path1.vfs_path, "foo/file"),
                               (self.path2.vfs_path, "foo/bar/file")])
        self.assertCountEqual(manifest["missing_files"], [])
        self.assertEqual(manifest["client_id"], self.client_id)
        self.assertEqual(manifest["flow_id"], self.flow_id)
Example #3
0
    def testCreatesTarContainingTwoMappedFilesAndManifest(self):
        generator = archive_generator.FlowArchiveGenerator(
            self.flow, archive_generator.ArchiveFormat.TAR_GZ)
        mappings = [
            flow_base.ClientPathArchiveMapping(self.path1, "foo/file"),
            flow_base.ClientPathArchiveMapping(self.path2, "foo/bar/file"),
        ]
        fd_path = self._GenerateArchive(generator.Generate(mappings))

        with tarfile.open(fd_path, encoding="utf-8") as tar_fd:
            self.assertLen(tar_fd.getnames(), 3)

            contents = tar_fd.extractfile(
                os.path.join(generator.prefix, "foo", "file")).read()
            self.assertEqual(contents, b"hello1")

            contents = tar_fd.extractfile(
                os.path.join(generator.prefix, "foo", "bar", "file")).read()
            self.assertEqual(contents, b"hello2")

            manifest = yaml.safe_load(
                tar_fd.extractfile(os.path.join(generator.prefix,
                                                "MANIFEST")).read())
            self.assertCountEqual(manifest["processed_files"].items(),
                                  [(self.path1.vfs_path, "foo/file"),
                                   (self.path2.vfs_path, "foo/bar/file")])
            self.assertCountEqual(manifest["missing_files"], [])
            self.assertEqual(manifest["client_id"], self.client_id)
            self.assertEqual(manifest["flow_id"], self.flow_id)
Example #4
0
    def testCorrectlyGeneratesArchiveMappings(self):
        with mock.patch.object(collectors, "ArtifactCollectorFlow",
                               MockArtifactCollectorFlow):
            flow_id, _, _ = self._RunCollectBrowserHistory(browsers=[
                webhistory.Browser.CHROME,
                webhistory.Browser.SAFARI,
            ])
            flow = flow_base.FlowBase.CreateFlowInstance(
                flow_test_lib.GetFlowObj(self.client_id, flow_id))
            results = flow_test_lib.GetRawFlowResults(self.client_id, flow_id)

            mappings = flow.GetFilesArchiveMappings(results)

        self.assertCountEqual(mappings, [
            flow_base.ClientPathArchiveMapping(
                db.ClientPath.OS(self.client_id,
                                 ("home", "foo", "ChromeHistory")),
                "chrome/ChromeHistory",
            ),
            flow_base.ClientPathArchiveMapping(
                db.ClientPath.OS(self.client_id,
                                 ("home", "foo", "SafariHistory")),
                "safari/SafariHistory",
            ),
        ])
Example #5
0
  def testPropagatesStreamingExceptions(self):
    generator = archive_generator.FlowArchiveGenerator(
        self.flow, archive_generator.ArchiveFormat.TAR_GZ)
    mappings = [
        flow_base.ClientPathArchiveMapping(self.path1, "foo/file"),
        flow_base.ClientPathArchiveMapping(self.path2, "foo/bar/file"),
    ]

    with mock.patch.object(
        file_store, "StreamFilesChunks", side_effect=Exception("foobar")):
      with self.assertRaises(Exception) as context:
        self._GenerateArchive(generator.Generate(mappings))
      self.assertEqual(str(context.exception), "foobar")
Example #6
0
    def testArchiveMappingsForDuplicateFilesInResult(self):
        with temp.AutoTempFilePath() as temp_file_path:
            with io.open(temp_file_path, mode="w", encoding="utf-8") as fd:
                fd.write("Just sample text to put in the file.")

            table = f"""
      [
        {{ "collect_column": "{temp_file_path}" }}
      ]
      """

            with osquery_test_lib.FakeOsqueryiOutput(stdout=table, stderr=""):
                flow_id = self._InitializeFlow(
                    file_collection_columns=["collect_column"])

        flow = flow_base.FlowBase.CreateFlowInstance(
            flow_test_lib.GetFlowObj(self.client_id, flow_id))
        results = list(flow_test_lib.GetRawFlowResults(self.client_id,
                                                       flow_id))

        # This is how we emulate duplicate filenames in the results
        duplicated_results = results + results + results

        mappings = list(flow.GetFilesArchiveMappings(iter(duplicated_results)))
        self.assertCountEqual(mappings, [
            flow_base.ClientPathArchiveMapping(
                db.ClientPath.OS(self.client_id,
                                 temp_file_path.split("/")[1:]),
                f"osquery_collected_files{temp_file_path}",
            ),
            flow_base.ClientPathArchiveMapping(
                db.ClientPath.OS(self.client_id,
                                 temp_file_path.split("/")[1:]),
                f"osquery_collected_files{temp_file_path}-1",
            ),
            flow_base.ClientPathArchiveMapping(
                db.ClientPath.OS(self.client_id,
                                 temp_file_path.split("/")[1:]),
                f"osquery_collected_files{temp_file_path}-2",
            ),
        ])
Example #7
0
    def testArchiveMappingsForMultipleFiles(self):
        with temp.AutoTempDirPath(remove_non_empty=True) as temp_dir_path:
            temp_file_path1 = os.path.join(temp_dir_path, "foo")
            temp_file_path2 = os.path.join(temp_dir_path, "bar")

            with io.open(temp_file_path1, mode="w", encoding="utf-8") as fd:
                fd.write("Just sample text to put in the file 1.")
            with io.open(temp_file_path2, mode="w", encoding="utf-8") as fd:
                fd.write("Just sample text to put in the file 2.")

            table = f"""
      [
        {{ "collect_column": "{temp_file_path1}" }},
        {{ "collect_column": "{temp_file_path2}" }}
      ]
      """

            with osquery_test_lib.FakeOsqueryiOutput(stdout=table, stderr=""):
                flow_id = self._InitializeFlow(
                    file_collection_columns=["collect_column"])

        flow = flow_base.FlowBase.CreateFlowInstance(
            flow_test_lib.GetFlowObj(self.client_id, flow_id))
        results = flow_test_lib.GetRawFlowResults(self.client_id, flow_id)

        mappings = list(flow.GetFilesArchiveMappings(iter(results)))
        self.assertCountEqual(mappings, [
            flow_base.ClientPathArchiveMapping(
                db.ClientPath.OS(self.client_id,
                                 temp_file_path1.split("/")[1:]),
                f"osquery_collected_files{temp_file_path1}",
            ),
            flow_base.ClientPathArchiveMapping(
                db.ClientPath.OS(self.client_id,
                                 temp_file_path2.split("/")[1:]),
                f"osquery_collected_files{temp_file_path2}",
            ),
        ])
Example #8
0
    def GetFilesArchiveMappings(
        self, flow_results: Iterator[rdf_flow_objects.FlowResult]
    ) -> Iterator[flow_base.ClientPathArchiveMapping]:
        path_counters = collections.Counter()
        for r in flow_results:
            p = cast(CollectBrowserHistoryResult, r.payload)
            client_path = db.ClientPath.FromPathSpec(self.client_id,
                                                     p.stat_entry.pathspec)

            target_path = os.path.join(p.browser.name.lower(),
                                       p.stat_entry.pathspec.Basename())
            if path_counters[target_path] > 0:
                fname, ext = os.path.splitext(target_path)
                target_path = f"{fname}_{path_counters[target_path]}{ext}"

            path_counters[target_path] += 1

            yield flow_base.ClientPathArchiveMapping(client_path, target_path)
Example #9
0
    def GetFilesArchiveMappings(
        self,
        flow_results: Iterator[rdf_flow_objects.FlowResult],
    ) -> Iterator[flow_base.ClientPathArchiveMapping]:
        target_path_generator = _UniquePathGenerator()

        for result in flow_results:
            try:
                osquery_file = _ExtractFileInfo(result)
            except _ResultNotRelevantError:
                continue

            client_path = db.ClientPath.FromPathSpec(
                self.client_id, osquery_file.stat_entry.pathspec)
            target_path = target_path_generator.GeneratePath(
                osquery_file.stat_entry.pathspec)

            yield flow_base.ClientPathArchiveMapping(client_path=client_path,
                                                     archive_path=target_path)
Example #10
0
    def testGeneratesTarGzArchiveForFlowWithCustomMappings(self):
        path = abstract_db.ClientPath.OS(
            self.client_id,
            self.base_path.lstrip("/").split("/") + ["test.plist"])
        mappings = [
            flow_base.ClientPathArchiveMapping(path, "foo/file"),
        ]
        with mock.patch.object(file_finder.FileFinder,
                               "GetFilesArchiveMappings",
                               return_value=mappings):
            result = self.handler.Handle(
                flow_plugin.ApiGetFlowFilesArchiveArgs(
                    client_id=self.client_id,
                    flow_id=self.flow_id,
                    archive_format="TAR_GZ"),
                token=self.token)

        manifest = self._GetTarGzManifest(result)
        self.assertEqual(manifest["client_id"], self.client_id)
        self.assertEqual(manifest["flow_id"], self.flow_id)
        self.assertEqual(manifest["processed_files"],
                         {path.vfs_path: "foo/file"})
        self.assertEmpty(manifest["missing_files"])