def test_update_file_list(self):
        signature0 = Signature(path="file", hash="12345")
        signature1 = Signature(path="readme", hash="54321")
        signature2 = Signature(path="readme", hash="56789")
        version0 = VersionDefinition(version="1.0", signatures=[signature0, signature1])
        version1 = VersionDefinition(version="1.1", signatures=[signature0, signature1])
        version2 = VersionDefinition(version="1.2", signatures=[signature0, signature2])
        version_list = VersionList(producer="producer", key="key", versions=[version0, version1])
        file_list = self.version_builder.create_file_list_from_version_list(version_list)
        version_list.versions.append(version2)

        self.version_builder.update_file_list(file_list, version_list)

        file = [file for file in file_list.files if file.path == "file"][0]
        self.assertEqual(file.path, "file")
        self.assertEqual(len(file.signatures), 1)
        self.assertEqual(file.signatures[0].hash, "12345")
        self.assertEqual(len(file.signatures[0].versions), 3)

        readme = [file for file in file_list.files if file.path == "readme"][0]
        self.assertEqual(readme.path, "readme")
        self.assertEqual(len(readme.signatures), 2)
        self.assertEqual(readme.signatures[0].hash, "54321")
        self.assertEqual(len(readme.signatures[0].versions), 2)
        self.assertIn("1.0", readme.signatures[0].versions)
        self.assertIn("1.1", readme.signatures[0].versions)
        self.assertEqual(readme.signatures[1].hash, "56789")
        self.assertEqual(readme.signatures[1].versions, ["1.2"])
    def test_flag_as_containing_version(self):
        class FakeHasher:
            algo = "CUST"

            def hash(hasher, file_path, chunk_cb):
                if file_path == "/some/path/random1234/readme.txt":
                    chunk_cb(b"Readme for version 1.2.3 test")
                    chunk_cb(b"weird chunk")
                elif file_path == "/some/path/random1234/license.txt":
                    chunk_cb(b"MIT...")
                else:
                    raise FileNotFoundError()
                return "12345"

        with patch('openwebvulndb.common.hash.walk') as walk:
            walk.return_value = [
                ("/some/path/random1234", [],
                 ["readme.txt", "license.txt", "index.php"]),
            ]
            collector = HashCollector(path="/some/path/random1234",
                                      hasher=FakeHasher(),
                                      prefix="wp-content/plugins/my-plugin",
                                      lookup_version="1.2.3")

            signatures = list(collector.collect())

            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/readme.txt",
                          hash="12345",
                          algo="CUST",
                          contains_version=True), signatures)
            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/license.txt",
                          hash="12345",
                          algo="CUST"), signatures)
    def test_exclude_php_files(self):
        with patch('openwebvulndb.common.hash.walk') as walk:
            walk.return_value = [
                ("/some/path/random1234", ["js", "css"],
                 ["readme.txt", "license.txt", "index.php"]),
                ("/some/path/random1234/js", [], ["index.js", "index.php"]),
                ("/some/path/random1234/css", [], ["custom.css", "index.php"]),
            ]
            collector = HashCollector(path="/some/path/random1234",
                                      hasher=MagicMock(),
                                      prefix="wp-content/plugins/my-plugin")
            collector.hasher.algo = "CONST"
            collector.hasher.hash.return_value = "12345"

            signatures = list(collector.collect())

            walk.assert_called_with("/some/path/random1234")

            self.assertNotIn(
                Signature(path="wp-content/plugins/my-plugin/index.php",
                          hash="12345",
                          algo="CONST"), signatures)
            self.assertNotIn(
                Signature(path="wp-content/plugins/my-plugin/js/index.php",
                          hash="12345",
                          algo="CONST"), signatures)
            self.assertNotIn(
                Signature(path="wp-content/plugins/my-plugin/css/index.php",
                          hash="12345",
                          algo="CONST"), signatures)
    def test_exclude_empty_files(self):
        class FakeHasher:

            algo = "sha256"

            def hash(hasher, file_path, chunk_cb):
                if file_path == "/path/empty":
                    raise ValueError("File is empty")
                return "12345"

        with patch('openwebvulndb.common.hash.walk') as walk:
            walk.return_value = [
                ("/path", [], ["readme.txt", "empty", "license.txt"]),
            ]
            collector = HashCollector(path="/path",
                                      hasher=FakeHasher(),
                                      prefix="wp-content/plugins/my-plugin",
                                      lookup_version="1.2.3")

            signatures = list(collector.collect())

            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/readme.txt",
                          hash="12345",
                          algo="sha256"), signatures)
            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/license.txt",
                          hash="12345",
                          algo="sha256"), signatures)
    def test_compare_signature_dont_return_files_that_are_removed_in_current_version(self):
        file0 = Signature(path="file0", hash="0")
        file1 = Signature(path="file1", hash="1")
        previous_version = VersionDefinition(version="1.0", signatures=[file0, file1])
        current_version = VersionDefinition(version="1.1", signatures=[file1])

        diff = self.version_builder._compare_versions_signatures(previous_version, current_version)

        self.assertEqual(len(diff), 0)
    def test_get_signature_return_signature_with_specified_file_path_in_version_definition(self):
        signature0 = Signature(path="file0", hash="1")
        signature1 = Signature(path="file1", hash="2")
        signature2 = Signature(path="file2", hash="3")
        version = VersionDefinition(version="1.0", signatures=[signature0, signature1, signature2])

        sign0 = self.version_builder._get_signature("file0", version)
        sign1 = self.version_builder._get_signature("file1", version)
        sign2 = self.version_builder._get_signature("file2", version)

        self.assertEqual(sign0, signature0)
        self.assertEqual(sign1, signature1)
        self.assertEqual(sign2, signature2)
    def test_exclude_files_removes_files_beginning_with_branches(self):
        signature0 = Signature(path="wp-content/plugins/my-plugin/branches/file0", hash="1")
        signature1 = Signature(path="wp-content/plugins/my-plugin/file1", hash="2")
        signature2 = Signature(path="wp-content/plugins/my-plugin/file2", hash="3")
        signature3 = Signature(path="wp-content/plugins/my-plugin/branches/file3", hash="4")
        version = VersionDefinition(version="1.2", signatures=[signature0, signature1, signature2, signature3])
        version_list = VersionList(producer="producer", key="plugins/my-plugin", versions=[version])
        self.version_builder.version_list = version_list

        self.version_builder._exclude_files()

        self.assertEqual(len(version.signatures), 2)
        self.assertIn(signature1, version.signatures)
        self.assertIn(signature2, version.signatures)
    def test_export_plugins_regroup_plugins_in_one_file(self):
        version_definition = VersionDefinition(
            version="1.0", signatures=[Signature(path="file")])
        plugin0_version_list = VersionList(key="plugin0",
                                           producer="unittest",
                                           versions=[version_definition])
        plugin1_version_list = VersionList(key="plugin1",
                                           producer="unittest",
                                           versions=[version_definition])
        self.exporter._list_keys = MagicMock()
        self.exporter._list_keys.return_value = ["plugin0", "plugin1"]
        self.storage.version_list = [
            plugin0_version_list, plugin1_version_list
        ]

        self.exporter.export_plugins(export_path="path")

        args, kwargs = self.exporter._dump.call_args
        plugins = args[1]
        schema = args[2]
        self.assertIsInstance(schema, FileListGroupSchema)
        self.assertEqual(plugins.key, "plugins")
        self.assertEqual(plugins.producer, "Vane2Export")
        self.assert_object_with_attribute_value_in_container(
            "key", "plugin0", plugins.file_lists)
        self.assert_object_with_attribute_value_in_container(
            "producer", "Vane2Export", plugins.file_lists)
        self.assert_object_with_attribute_value_in_container(
            "key", "plugin1", plugins.file_lists)
    def test_get_differences_between_versions_return_all_files_that_differ_or_are_added_between_versions(self):
        version0 = VersionDefinition(version="1.0")
        version1 = VersionDefinition(version="1.1")
        version2 = VersionDefinition(version="1.2")
        for i in range(0, 5):  # All versions are equal
            same_signature = Signature(path="file%d" % i, hash=str(i))
            version0.signatures.append(same_signature)
            version1.signatures.append(same_signature)
            version2.signatures.append(same_signature)
        for i in range(5, 10):  # 5 diff between each version
            version0.add_signature(path="file%d" % i, hash=str(i))
            version1.add_signature(path="file%d" % i, hash="A%d" % i)
            version2.add_signature(path="file%d" % i, hash="B%d" % i)
        for i in range(10, 15):  # 10 diff between 1.0 and 1.1
            version0.add_signature(path="file%d" % i, hash=str(i))
            version1.add_signature(path="file%d" % i, hash="A%d" % i)
            version2.add_signature(path="file%d" % i, hash="A%d" % i)
        for i in range(15, 20):  # 10 diff between 1.1 and 1.2
            version0.add_signature(path="file%d" % i, hash=str(i))
            version1.add_signature(path="file%d" % i, hash=str(i))
            version2.add_signature(path="file%d" % i, hash="A%d" % i)
        for i in range(20, 25):  # 15 diff between 1.1 and 1.2
            version2.add_signature(path="file%d" % i, hash=str(i))

        version_list = VersionList(producer="producer", key="key", versions=[version0, version1, version2])
        self.version_builder.version_list = version_list

        diff_list = self.version_builder._get_differences_between_versions()

        self.assertEqual(len(diff_list["1.1"]), 10)
        self.assertEqual(len(diff_list["1.2"]), 15)
    async def test_skip_loaded_versions(self, fake_future):
        workspace = MagicMock()
        workspace.list_versions.return_value = fake_future(
            ["1.0", "10.1", "2.0"])

        hasher = RepositoryHasher(storage=MagicMock(), hasher=MagicMock())
        hasher.collect_for_version = MagicMock()
        hasher.collect_for_version.return_value = fake_future([
            Signature(path="wp-content/plugins/a-plugin/readme.txt",
                      hash="12345")
        ])

        stored = VersionList(producer="RepositoryHasher",
                             key="plugins/a-plugin")
        stored.get_version("1.0", create_missing=True)
        stored.get_version("2.0", create_missing=True)

        hasher.storage.read_versions.return_value = stored

        await hasher.collect_for_workspace(
            "plugins/a-plugin",
            workspace,
            prefix="wp-content/plugins/a-plugin")

        hasher.storage.read_versions.assert_called_with("plugins/a-plugin")
        hasher.collect_for_version.assert_called_once_with(
            workspace, "10.1", prefix="wp-content/plugins/a-plugin")
    def test_collect_files_has_issues_with_os(self):
        # Ex: OSError: [Errno 40] Too many levels of symbolic links
        with patch('openwebvulndb.common.hash.walk') as walk:
            walk.return_value = [
                ("/some/path/random1234", ["js", "css"],
                 ["readme.txt", "license.txt"]),
                ("/some/path/random1234/js", [], ["index.js"]),
                ("/some/path/random1234/css", [], ["custom.css"]),
            ]
            collector = HashCollector(path="/some/path/random1234",
                                      hasher=MagicMock(),
                                      prefix="wp-content/plugins/my-plugin")
            collector.hasher.algo = "CONST"
            collector.hasher.hash.side_effect = [
                "12345",
                "12345",
                OSError(),
                "12345",
            ]

            signatures = list(collector.collect())

            walk.assert_called_with("/some/path/random1234")

            collector.hasher.hash.assert_has_calls([
                call("/some/path/random1234/readme.txt",
                     chunk_cb=collector.version_checker),
                call("/some/path/random1234/license.txt",
                     chunk_cb=collector.version_checker),
                call("/some/path/random1234/css/custom.css",
                     chunk_cb=collector.version_checker),
                call("/some/path/random1234/js/index.js",
                     chunk_cb=collector.version_checker),
            ],
                                                   any_order=True)

            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/readme.txt",
                          hash="12345",
                          algo="CONST"), signatures)
            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/css/custom.css",
                          hash="12345",
                          algo="CONST"), signatures)
            self.assertEqual(3, len(signatures))
            self.assertTrue(signatures[0].dirty)
    def test_export_wordpress_dump_wordpress_versions(self):
        versions = [
            VersionDefinition(
                version="1.0",
                signatures=[Signature(path="file.html", hash="a1b2c3")]),
            VersionDefinition(version="2.0",
                              signatures=[
                                  Signature(path="file.html", hash="d4e5f6"),
                                  Signature(path="style.css", hash="12345")
                              ])
        ]
        wordpress_version_list = VersionList(key="wordpress",
                                             producer="unittest",
                                             versions=versions)
        self.exporter._list_keys = MagicMock()
        self.exporter._list_keys.return_value = ["wordpress"]
        self.storage.version_list = [wordpress_version_list]

        self.exporter.export_wordpress("path")

        args, kwargs = self.exporter._dump.call_args
        file_name = args[0]
        wordpress_file_list = args[1]
        schema = args[2]
        self.assertEqual(file_name, "path/vane2_wordpress_versions.json")
        self.assertIsInstance(schema, FileListSchema)
        self.assertEqual(wordpress_file_list.key, "wordpress")
        self.assert_object_with_attribute_value_in_container(
            "path", "file.html", wordpress_file_list.files)
        self.assert_object_with_attribute_value_in_container(
            "path", "style.css", wordpress_file_list.files)
        path_file = self.get_object_with_attribute_value_in_container(
            "path", "file.html", wordpress_file_list.files)
        style_file = self.get_object_with_attribute_value_in_container(
            "path", "style.css", wordpress_file_list.files)
        path_version1_signature = self.get_object_with_attribute_value_in_container(
            "hash", "a1b2c3", path_file.signatures)
        path_version2_signature = self.get_object_with_attribute_value_in_container(
            "hash", "d4e5f6", path_file.signatures)
        style_signature = style_file.signatures[0]
        self.assertEqual(path_version1_signature.versions, ["1.0"])
        self.assertEqual(path_version2_signature.versions, ["2.0"])
        self.assertEqual(style_signature.versions, ["2.0"])
    def test_create_file_list_from_version_list_shrink_version_list_if_too_many_files_per_version(self):
        self.version_builder._shrink_version_list = MagicMock()
        self.version_builder._is_version_list_empty = MagicMock(return_value=False)
        signatures = [Signature(path=str(i), hash=i) for i in range(0, 100)]
        version_list = VersionList(key="key", producer="producer",
                                   versions=[VersionDefinition(version="1.0", signatures=signatures)])

        self.version_builder.create_file_list_from_version_list(version_list, 50)

        self.version_builder._shrink_version_list.assert_called_once_with()
    def test_get_file_paths_from_version_list(self):
        signature0 = Signature(path="file0", hash="1")
        signature1 = Signature(path="file1", hash="2")
        signature2 = Signature(path="file2", hash="3")
        signature3 = Signature(path="file3", hash="4")
        signature4 = Signature(path="file0", hash="5")
        version0 = VersionDefinition(version="1.0", signatures=[signature0, signature1, signature2])
        version1 = VersionDefinition(version="1.1", signatures=[signature0, signature1, signature3])
        version2 = VersionDefinition(version="1.2", signatures=[signature4, signature2])
        version_list = VersionList(producer="producer", key="key", versions=[version0, version1, version2])
        self.version_builder.version_list = version_list

        file_paths = self.version_builder._get_file_paths_from_version_list()

        self.assertEqual(len(file_paths), 4)
        self.assertIn("file0", file_paths)
        self.assertIn("file1", file_paths)
        self.assertIn("file2", file_paths)
        self.assertIn("file3", file_paths)
    def test_export_plugins_create_file_list_from_version_list_for_plugin(
            self):
        versions = [
            VersionDefinition(
                version="1.0",
                signatures=[Signature(path="file.html", hash="a1b2c3")]),
            VersionDefinition(version="2.0",
                              signatures=[
                                  Signature(path="file.html", hash="d4e5f6"),
                                  Signature(path="style.css", hash="12345")
                              ])
        ]
        plugin_version_list = VersionList(key="my-plugin",
                                          producer="unittest",
                                          versions=versions)
        self.exporter._list_keys = MagicMock()
        self.exporter._list_keys.return_value = ["my-plugin"]
        self.storage.version_list = [plugin_version_list]

        self.exporter.export_plugins("path")

        args, kwargs = self.exporter._dump.call_args
        plugins = args[1]
        plugin = plugins.file_lists[0]
        self.assertEqual(plugin.key, "my-plugin")
        self.assert_object_with_attribute_value_in_container(
            "path", "file.html", plugin.files)
        self.assert_object_with_attribute_value_in_container(
            "path", "style.css", plugin.files)
        path_file = self.get_object_with_attribute_value_in_container(
            "path", "file.html", plugin.files)
        style_file = self.get_object_with_attribute_value_in_container(
            "path", "style.css", plugin.files)
        path_version1_signature = self.get_object_with_attribute_value_in_container(
            "hash", "a1b2c3", path_file.signatures)
        path_version2_signature = self.get_object_with_attribute_value_in_container(
            "hash", "d4e5f6", path_file.signatures)
        style_signature = style_file.signatures[0]
        self.assertEqual(path_version1_signature.versions, ["1.0"])
        self.assertEqual(path_version2_signature.versions, ["2.0"])
        self.assertEqual(style_signature.versions, ["2.0"])
    def test_create_file_from_version_list_regroup_all_versions_with_equal_hash_for_file_in_same_file_signature(self):
        signature0 = Signature(path="file", hash="12345")
        signature1 = Signature(path="readme", hash="54321")
        version0 = VersionDefinition(version="1.0", signatures=[signature0, signature1])
        version1 = VersionDefinition(version="1.1", signatures=[signature0, signature1])
        version2 = VersionDefinition(version="1.2", signatures=[signature0, signature1])
        version_list = VersionList(producer="producer", key="key", versions=[version0, version1, version2])
        self.version_builder.version_list = version_list

        file0 = self.version_builder._create_file_from_version_list("file")
        file1 = self.version_builder._create_file_from_version_list("readme")

        file_signature0 = file0.signatures[0]
        file_signature1 = file1.signatures[0]
        self.assertEqual(len(file0.signatures), 1)
        self.assertEqual(len(file1.signatures), 1)
        self.assertEqual(file_signature0.hash, signature0.hash)
        self.assertEqual(file_signature1.hash, signature1.hash)
        versions = [version.version for version in version_list.versions]
        self.assertTrue(all(version in versions for version in file_signature0.versions))
        self.assertTrue(all(version in versions for version in file_signature1.versions))
    def test_collect_files(self):
        with patch('openwebvulndb.common.hash.walk') as walk:
            walk.return_value = [
                ("/some/path/random1234", ["js", "css"],
                 ["readme.txt", "license.txt"]),
                ("/some/path/random1234/js", [], ["index.js"]),
                ("/some/path/random1234/css", [], ["custom.css"]),
            ]
            collector = HashCollector(path="/some/path/random1234",
                                      hasher=MagicMock(),
                                      prefix="wp-content/plugins/my-plugin")
            collector.hasher.algo = "CONST"
            collector.hasher.hash.return_value = "12345"

            signatures = list(collector.collect())

            walk.assert_called_with("/some/path/random1234")

            collector.hasher.hash.assert_has_calls([
                call("/some/path/random1234/readme.txt",
                     chunk_cb=collector.version_checker),
                call("/some/path/random1234/license.txt",
                     chunk_cb=collector.version_checker),
                call("/some/path/random1234/css/custom.css",
                     chunk_cb=collector.version_checker),
                call("/some/path/random1234/js/index.js",
                     chunk_cb=collector.version_checker),
            ],
                                                   any_order=True)

            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/readme.txt",
                          hash="12345",
                          algo="CONST"), signatures)
            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/css/custom.css",
                          hash="12345",
                          algo="CONST"), signatures)
            self.assertTrue(signatures[0].dirty)
    def test_strip_filenames(self):
        with patch('openwebvulndb.common.hash.walk') as walk:
            walk.return_value = [("/some/path/random1234", [],
                                  ["readme.txt ", "license.txt "])]
            collector = HashCollector(path="/some/path/random1234",
                                      hasher=MagicMock(),
                                      prefix="wp-content/plugins/my-plugin")
            collector.hasher.algo = "sha256"
            collector.hasher.hash.return_value = "12345"

            signatures = list(collector.collect())

            walk.assert_called_with("/some/path/random1234")

            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/readme.txt",
                          hash="12345",
                          algo="sha256"), signatures)
            self.assertIn(
                Signature(path="wp-content/plugins/my-plugin/license.txt",
                          hash="12345",
                          algo="sha256"), signatures)
    def test_shrink_version_list_keep_max_files_from_most_common_files_if_no_changes_between_version(self):
        version0 = VersionDefinition(version="1.0")
        version1 = VersionDefinition(version="1.1")
        for i in range(0, 10):  # All versions are equal
            same_signature = Signature(path="file%d" % i, hash=str(i))
            version0.signatures.append(same_signature)
            version1.signatures.append(same_signature)
        version_list = VersionList(producer="producer", key="key", versions=[version0, version1])
        self.version_builder.version_list = version_list
        self.version_builder.files_per_version = 10

        self.version_builder._shrink_version_list()

        self.assertEqual(len(version0.signatures), 10)
        for i in range(0, 10):
            self.assertIn("file%d" % i, [signature.path for signature in version0.signatures])
    async def test_brand_new_file(self, fake_future):
        workspace = MagicMock()
        workspace.list_versions.return_value = fake_future(
            ["1.0", "10.1", "2.0"])

        hasher = RepositoryHasher(storage=MagicMock(), hasher=MagicMock())
        hasher.collect_for_version = MagicMock()
        hasher.collect_for_version.return_value = fake_future([
            Signature(path="wp-content/plugins/a-plugin/readme.txt",
                      hash="12345")
        ])

        hasher.storage.read_versions.side_effect = FileNotFoundError()

        await hasher.collect_for_workspace(
            "plugins/a-plugin",
            workspace,
            prefix="wp-content/plugins/a-plugin")

        hasher.storage.read_versions.assert_called_with("plugins/a-plugin")
        hasher.collect_for_version.assert_has_calls([
            call(workspace, "1.0", prefix="wp-content/plugins/a-plugin"),
            call(workspace, "2.0", prefix="wp-content/plugins/a-plugin"),
            call(workspace, "10.1", prefix="wp-content/plugins/a-plugin"),
        ],
                                                    any_order=False)

        expect = VersionList(producer="RepositoryHasher",
                             key="plugins/a-plugin")
        v1 = expect.get_version("1.0", create_missing=True)
        v1.add_signature("wp-content/plugins/a-plugin/readme.txt",
                         hash="12345")
        v2 = expect.get_version("2.0", create_missing=True)
        v2.add_signature("wp-content/plugins/a-plugin/readme.txt",
                         hash="12345")
        v10 = expect.get_version("10.1", create_missing=True)
        v10.add_signature("wp-content/plugins/a-plugin/readme.txt",
                          hash="12345")

        hasher.storage.write_versions.assert_called_with(expect)
    def test_update_file_list_keep_file_order_and_append_new_file_at_end(self):
        self.version_builder._shrink_version_list = MagicMock()
        signature0 = Signature(path="path/to/files/abc.html", hash="12345")
        signature1 = Signature(path="path/to/files/file.js", hash="12345")
        signature2 = Signature(path="path/to/files/js/file.js", hash="12345")
        signature3 = Signature(path="path/to/files/readme.txt", hash="12345")
        signature4 = Signature(path="path/to/files/style/color.css", hash="12345")
        signature5 = Signature(path="path/to/files/style/style.css", hash="12345")
        signature6 = Signature(path="path/to/files/style/color.css", hash="23456")
        signature7 = Signature(path="path/to/files/readme.txt", hash="23456")
        signatures = [signature0, signature1, signature2, signature3, signature4]
        version_list = VersionList(key="key", producer="producer",
                                   versions=[VersionDefinition(version="1.0", signatures=signatures)])
        file_list = self.version_builder.create_file_list_from_version_list(version_list, 50)
        initial_file_order = [file.path for file in file_list.files]
        version = version_list.get_version("1.1", create_missing=True)
        version.signatures = [signature5, signature6, signature7]

        self.version_builder.update_file_list(file_list, version_list, 50)

        for i in range(len(initial_file_order)):
            self.assertEqual(file_list.files[i].path, initial_file_order[i])
        self.assertEqual(signature5.path, file_list.files[-1].path)