Exemple #1
0
 def test_dir_identifier(self):
     self.assertEqual(
         Directory.from_dict(self.directory).id, self.directory["id"])
     self.assertEqual(
         Directory.from_dict(remove_id(self.directory)).id,
         self.directory["id"],
     )
Exemple #2
0
def test_lookup_directory_with_revision_unknown_content(
        archive_data, new_revision):
    unknown_content_ = random_content()

    dir_path = "README.md"

    # A directory that points to unknown content
    dir = Directory(entries=(DirectoryEntry(
        name=bytes(dir_path.encode("utf-8")),
        type="file",
        target=hash_to_bytes(unknown_content_["sha1_git"]),
        perms=DentryPerms.content,
    ), ))

    # Create a revision that points to a directory
    # Which points to unknown content
    new_revision = new_revision.to_dict()
    new_revision["directory"] = dir.id
    del new_revision["id"]
    new_revision = Revision.from_dict(new_revision)

    # Add the directory and revision in mem
    archive_data.directory_add([dir])
    archive_data.revision_add([new_revision])
    new_revision_id = hash_to_hex(new_revision.id)
    with pytest.raises(NotFoundExc) as e:
        archive.lookup_directory_with_revision(new_revision_id, dir_path)
    assert e.match("Content not found for revision %s" % new_revision_id)
Exemple #3
0
    def test_directory_revision_data(self, swh_storage):
        target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd"

        dir = Directory(entries=(DirectoryEntry(
            name=b"submodule",
            type="rev",
            target=hashutil.hash_to_bytes(target_rev),
            perms=0o100644,
        ), ), )
        swh_storage.directory_add([dir])

        with cook_extract_directory_dircooker(swh_storage,
                                              dir.swhid(),
                                              fsck=False) as p:
            assert (p / "submodule").is_symlink()
            assert os.readlink(str(p / "submodule")) == target_rev
Exemple #4
0
    def load_directory(self, obj_id: Sha1Git,
                       raw_manifest: Optional[bytes]) -> None:
        # Load the directory
        entries_it: Optional[
            Iterable[DirectoryEntry]] = stream_results_optional(
                self.storage.directory_get_entries, obj_id)

        if entries_it is None:
            logger.error("Missing swh:1:dir:%s, ignoring.",
                         hash_to_hex(obj_id))
            return

        directory = Directory(id=obj_id,
                              entries=tuple(entries_it),
                              raw_manifest=raw_manifest)
        git_object = raw_manifest or git_objects.directory_git_object(
            directory)
        self.write_object(obj_id, git_object)

        # Add children to the stack
        entry_loaders: Dict[str, Optional[List[Sha1Git]]] = {
            "file": self._cnt_stack,
            "dir": self._dir_stack,
            "rev":
            None,  # Do not include submodule targets (rejected by git-fsck)
        }
        for entry in directory.entries:
            stack = entry_loaders[entry.type]
            if stack is not None:
                self._push(stack, [entry.target])
Exemple #5
0
    def test_revision_metadata_indexer_single_root_dir(self):
        metadata_indexer = RevisionMetadataIndexer(config=REVISION_METADATA_CONFIG)
        fill_obj_storage(metadata_indexer.objstorage)
        fill_storage(metadata_indexer.storage)

        # Add a parent directory, that is the only directory at the root
        # of the revision
        rev = REVISION
        assert rev.directory == DIRECTORY2.id

        directory = Directory(
            entries=(
                DirectoryEntry(
                    name=b"foobar-1.0.0", type="dir", target=rev.directory, perms=16384,
                ),
            ),
        )
        assert directory.id is not None
        metadata_indexer.storage.directory_add([directory])

        new_rev_dict = {**rev.to_dict(), "directory": directory.id}
        new_rev_dict.pop("id")
        new_rev = Revision.from_dict(new_rev_dict)
        metadata_indexer.storage.revision_add([new_rev])

        tool = metadata_indexer.idx_storage.indexer_configuration_get(
            {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()}
        )
        assert tool is not None

        metadata_indexer.idx_storage.content_metadata_add(
            [
                ContentMetadataRow(
                    id=DIRECTORY2.entries[0].target,
                    indexer_configuration_id=tool["id"],
                    metadata=YARN_PARSER_METADATA,
                )
            ]
        )

        metadata_indexer.run([new_rev.id])

        results = list(
            metadata_indexer.idx_storage.revision_intrinsic_metadata_get([new_rev.id])
        )

        expected_results = [
            RevisionIntrinsicMetadataRow(
                id=new_rev.id,
                tool=TRANSLATOR_TOOL,
                metadata=YARN_PARSER_METADATA,
                mappings=["npm"],
            )
        ]

        for result in results:
            del result.tool["id"]

        # then
        self.assertEqual(results, expected_results)
Exemple #6
0
 def test_dir_identifier_entry_order(self):
     # Reverse order of entries, check the id is still the same.
     directory = {"entries": reversed(self.directory["entries"])}
     self.assertEqual(
         Directory.from_dict(remove_id(directory)).id,
         self.directory["id"],
     )
Exemple #7
0
def dulwich_tree_to_directory(obj: ShaFile) -> Directory:
    """Format a tree as a directory"""
    if obj.type_name != b"tree":
        raise ValueError("Argument is not a tree.")
    tree = cast(Tree, obj)

    entries = []

    for entry in tree.iteritems():
        if entry.mode & COMMIT_MODE_MASK == COMMIT_MODE_MASK:
            type_ = "rev"
        elif entry.mode & TREE_MODE_MASK == TREE_MODE_MASK:
            type_ = "dir"
        else:
            type_ = "file"

        entries.append(
            DirectoryEntry(
                type=type_,
                perms=entry.mode,
                name=entry.path,
                target=hash_to_bytes(entry.sha.decode("ascii")),
            ))

    dir_ = Directory(
        id=tree.sha().digest(),
        entries=tuple(entries),
    )

    if dir_.compute_hash() != dir_.id:
        expected_id = dir_.id
        actual_id = dir_.compute_hash()
        logger.warning(
            "Expected directory to have id %s, but got %s. Recording raw_manifest.",
            hash_to_hex(expected_id),
            hash_to_hex(actual_id),
        )
        raw_string = tree.as_raw_string()
        dir_ = attr.evolve(
            dir_,
            raw_manifest=git_object_header("tree", len(raw_string)) +
            raw_string)

    check_id(dir_)
    return dir_
 def _directory_with_entries(self, sample_data, nb_entries):
     """Returns a dir with ``nb_entries``, all pointing to
     the same content"""
     return Directory(entries=tuple(
         DirectoryEntry(
             name=f"file{i:10}".encode(),
             type="file",
             target=sample_data.content.sha1_git,
             perms=from_disk.DentryPerms.directory,
         ) for i in range(nb_entries)))
Exemple #9
0
    def test_revision_submodule(self, swh_storage, cook_extract_revision,
                                ingest_target_revision):
        date = TimestampWithTimezone.from_datetime(
            datetime.datetime.now(
                datetime.timezone.utc).replace(microsecond=0))

        target_rev = Revision(
            message=b"target_rev",
            author=Person.from_fullname(b"me <*****@*****.**>"),
            date=date,
            committer=Person.from_fullname(b"me <*****@*****.**>"),
            committer_date=date,
            parents=(),
            type=RevisionType.GIT,
            directory=bytes.fromhex(
                "3333333333333333333333333333333333333333"),
            metadata={},
            synthetic=True,
        )
        if ingest_target_revision:
            swh_storage.revision_add([target_rev])

        dir = Directory(entries=(DirectoryEntry(
            name=b"submodule",
            type="rev",
            target=target_rev.id,
            perms=0o160000,
        ), ), )
        swh_storage.directory_add([dir])

        rev = Revision(
            message=b"msg",
            author=Person.from_fullname(b"me <*****@*****.**>"),
            date=date,
            committer=Person.from_fullname(b"me <*****@*****.**>"),
            committer_date=date,
            parents=(),
            type=RevisionType.GIT,
            directory=dir.id,
            metadata={},
            synthetic=True,
        )
        swh_storage.revision_add([rev])

        with cook_extract_revision(swh_storage, rev.swhid()) as (ert, p):
            ert.checkout(b"HEAD")
            pattern = b"160000 submodule\x00%s" % target_rev.id
            tree = ert.repo[b"HEAD"].tree
            assert pattern in ert.repo[tree].as_raw_string()
 def get(ids):
     return [
         Directory(
             id=ids[0],
             entries=tuple(
                 map(
                     lambda entry: DirectoryEntry(
                         name=entry["name"],
                         type=entry["type"],
                         target=entry["sha1_git"],
                         perms=entry["perms"],
                     ),
                     swh_storage.directory_ls(ids[0]),
                 )),
         )
     ]
Exemple #11
0
 def __getitem__(self, item):
     if item == "target":
         return Directory.from_dict(
             {
                 "entries": [
                     {
                         "name": entry["name"],
                         "target": entry["target"],
                         "type": entry["type"],
                         "perms": entry["perms"],
                     }
                     for entry in self.data["entries"]
                 ]
             }
         ).id
     else:
         return self.data[item]
def directory_converter(db: BaseDb, directory_d: Dict[str, Any]) -> Directory:
    """Convert directory from the flat representation to swh model
    compatible objects.

    """
    columns = ["target", "name", "perms"]
    query_template = """
    select %(columns)s
    from directory_entry_%(type)s
    where id in %%s
    """

    types = ["file", "dir", "rev"]

    entries = []
    with db.cursor() as cur:
        for type in types:
            ids = directory_d.pop("%s_entries" % type)
            if not ids:
                continue
            query = query_template % {
                "columns": ",".join(columns),
                "type": type,
            }
            cur.execute(query, (tuple(ids), ))
            for row in cur:
                entry_d = dict(zip(columns, row))
                entry = DirectoryEntry(
                    name=entry_d["name"],
                    type=type,
                    target=entry_d["target"],
                    perms=entry_d["perms"],
                )
                entries.append(entry)

    return Directory(
        id=directory_d["id"],
        entries=tuple(entries),
        raw_manifest=directory_d["raw_manifest"],
    )
Exemple #13
0
def test_api_revision_directory_ok_returns_revision(api_client, archive_data,
                                                    revision, person, date):
    rev_path = "foo"
    _dir = Directory(entries=(DirectoryEntry(
        name=rev_path.encode(),
        type="rev",
        target=hash_to_bytes(revision),
        perms=DentryPerms.revision,
    ), ))
    archive_data.directory_add([_dir])

    rev = Revision(
        directory=_dir.id,
        author=person,
        committer=person,
        message=b"commit message",
        date=TimestampWithTimezone.from_datetime(date),
        committer_date=TimestampWithTimezone.from_datetime(date),
        synthetic=False,
        type=RevisionType.GIT,
    )
    archive_data.revision_add([rev])

    revision_id = hash_to_hex(rev.id)
    rev_data = archive_data.revision_get(revision)
    url = reverse(
        "api-1-revision-directory",
        {
            "sha1_git": revision_id,
            "dir_path": rev_path
        },
    )
    rv = check_api_get_responses(api_client, url, status_code=200)

    assert rv.data == {
        "content": enrich_revision(rev_data, request=rv.wsgi_request),
        "path": rev_path,
        "type": "rev",
        "revision": revision_id,
    }
Exemple #14
0
    def test_original_malformed_objects(self, swh_storage,
                                        cook_extract_snapshot):
        """Tests that objects that were originally malformed:

        * are still interpreted somewhat correctly (if the loader could make sense of
          them), especially that they still have links to children
        * have their original manifest in the bundle
        """
        date = TimestampWithTimezone.from_numeric_offset(
            Timestamp(1643819927, 0), 0, False)

        content = Content.from_data(b"foo")
        swh_storage.content_add([content])

        # disordered
        # fmt: off
        malformed_dir_manifest = (b"" + b"100644 file2\x00" +
                                  content.sha1_git + b"100644 file1\x00" +
                                  content.sha1_git)
        # fmt: on
        directory = Directory(
            entries=(
                DirectoryEntry(name=b"file1",
                               type="file",
                               perms=0o100644,
                               target=content.sha1_git),
                DirectoryEntry(name=b"file2",
                               type="file",
                               perms=0o100644,
                               target=content.sha1_git),
            ),
            raw_manifest=f"tree {len(malformed_dir_manifest)}\x00".encode() +
            malformed_dir_manifest,
        )
        swh_storage.directory_add([directory])

        # 'committer' and 'author' swapped
        # fmt: off
        malformed_rev_manifest = (
            b"tree " + hashutil.hash_to_bytehex(directory.id) + b"\n" +
            b"committer me <*****@*****.**> 1643819927 +0000\n" +
            b"author me <*****@*****.**> 1643819927 +0000\n" + b"\n" +
            b"rev")
        # fmt: on
        revision = Revision(
            message=b"rev",
            author=Person.from_fullname(b"me <*****@*****.**>"),
            date=date,
            committer=Person.from_fullname(b"me <*****@*****.**>"),
            committer_date=date,
            parents=(),
            type=RevisionType.GIT,
            directory=directory.id,
            synthetic=True,
            raw_manifest=f"commit {len(malformed_rev_manifest)}\x00".encode() +
            malformed_rev_manifest,
        )
        swh_storage.revision_add([revision])

        # 'tag' and 'tagger' swapped
        # fmt: off
        malformed_rel_manifest = (
            b"object " + hashutil.hash_to_bytehex(revision.id) + b"\n" +
            b"type commit\n" +
            b"tagger me <*****@*****.**> 1643819927 +0000\n" +
            b"tag v1.1.0\n")
        # fmt: on

        release = Release(
            name=b"v1.1.0",
            message=None,
            author=Person.from_fullname(b"me <*****@*****.**>"),
            date=date,
            target=revision.id,
            target_type=ModelObjectType.REVISION,
            synthetic=True,
            raw_manifest=f"tag {len(malformed_rel_manifest)}\x00".encode() +
            malformed_rel_manifest,
        )
        swh_storage.release_add([release])

        snapshot = Snapshot(
            branches={
                b"refs/tags/v1.1.0":
                SnapshotBranch(target=release.id,
                               target_type=TargetType.RELEASE),
                b"HEAD":
                SnapshotBranch(target=revision.id,
                               target_type=TargetType.REVISION),
            })
        swh_storage.snapshot_add([snapshot])

        with cook_extract_snapshot(swh_storage, snapshot.swhid()) as (ert, p):
            tag = ert.repo[b"refs/tags/v1.1.0"]
            assert tag.as_raw_string() == malformed_rel_manifest

            commit = ert.repo[tag.object[1]]
            assert commit.as_raw_string() == malformed_rev_manifest

            tree = ert.repo[commit.tree]
            assert tree.as_raw_string() == malformed_dir_manifest
Exemple #15
0
        snapshot=hash_to_bytes("9e78d7105c5e0f886487511e2a92377b4ee4c32a"),
        metadata=None,
    ),
    OriginVisitStatus(
        origin=ORIGINS[1].url,
        date=datetime.datetime(2015, 11, 27, 17, 22, 18, tzinfo=UTC),
        visit=2,
        type="hg",
        status="partial",
        snapshot=hash_to_bytes("0e7f84ede9a254f2cd55649ad5240783f557e65f"),
        metadata=None,
    ),
]

DIRECTORIES = [
    Directory(id=hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
              entries=()),
    Directory(
        id=hash_to_bytes("87b339104f7dc2a8163dec988445e3987995545f"),
        entries=(
            DirectoryEntry(
                name=b"file1.ext",
                perms=0o644,
                type="file",
                target=CONTENTS[0].sha1_git,
            ),
            DirectoryEntry(
                name=b"dir1",
                perms=0o755,
                type="dir",
                target=hash_to_bytes(
                    "4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
Exemple #16
0
    {"type": "git", "origin": ORIGINS[6].url},
]


DIRECTORY = Directory(
    id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"),
    entries=(
        DirectoryEntry(
            name=b"index.js",
            type="file",
            target=hash_to_bytes("01c9379dfc33803963d07c1ccc748d3fe4c96bb5"),
            perms=0o100644,
        ),
        DirectoryEntry(
            name=b"package.json",
            type="file",
            target=hash_to_bytes("26a9f72a7c87cc9205725cfd879f514ff4f3d8d5"),
            perms=0o100644,
        ),
        DirectoryEntry(
            name=b".github",
            type="dir",
            target=Directory(entries=()).id,
            perms=0o040000,
        ),
    ),
)

DIRECTORY2 = Directory(
    id=b"\xf8zz\xa1\x12`<1$\xfav\xf9\x01\xfd5\x85F`\xf2\xb6",
    entries=(