def test_microsecond_insensitive(self): """Checks the microseconds of the datetime.datetime does not affect the hashed manifest.""" metadata = { **self.minimal, "discovery_date": datetime.datetime( 2021, 1, 25, 11, 27, 51, 123456, tzinfo=datetime.timezone.utc, ), } self.assertEqual( git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(self.minimal)), git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(metadata)), ) self.assertEqual( RawExtrinsicMetadata.from_dict(self.minimal).id, RawExtrinsicMetadata.from_dict(metadata).id, ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"), )
def test_nonascii_path(self): metadata = { **self.minimal, "path": b"/ab\nc/d\xf0\x9f\xa4\xb7e\x00f", } git_object = ( b"raw_extrinsic_metadata 231\0" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" b"discovery_date 1611574071\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" b"path /ab\n" b" c/d\xf0\x9f\xa4\xb7e\x00f\n" b"\n" b'{"foo": "bar"}') self.assertEqual( git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(metadata)), git_object, ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, hashlib.sha1(git_object).digest(), ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, _x("7cc83fd1912176510c083f5df43f01b09af4b333"), )
def test_timezone_insensitive(self): """Checks the timezone of the datetime.datetime does not affect the hashed git_object.""" utc_plus_one = datetime.timezone(datetime.timedelta(hours=1)) metadata = { **self.minimal, "discovery_date": datetime.datetime( 2021, 1, 25, 12, 27, 51, tzinfo=utc_plus_one, ), } self.assertEqual( git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(self.minimal)), git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(metadata)), ) self.assertEqual( RawExtrinsicMetadata.from_dict(self.minimal).id, RawExtrinsicMetadata.from_dict(metadata).id, ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"), )
def test_maximal(self): git_object = ( b"raw_extrinsic_metadata 533\0" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" b"discovery_date 1611574071\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" b"origin https://forge.softwareheritage.org/source/swh-model/\n" b"visit 42\n" b"snapshot swh:1:snp:0000000000000000000000000000000000000000\n" b"release swh:1:rel:0101010101010101010101010101010101010101\n" b"revision swh:1:rev:0202020202020202020202020202020202020202\n" b"path /abc/def\n" b"directory swh:1:dir:0303030303030303030303030303030303030303\n" b"\n" b'{"foo": "bar"}') self.assertEqual( git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(self.maximal)), git_object, ) self.assertEqual( RawExtrinsicMetadata.from_dict(self.maximal).id, hashlib.sha1(git_object).digest(), ) self.assertEqual( RawExtrinsicMetadata.from_dict(self.maximal).id, _x("f96966e1093d15236a31fde07e47d5b1c9428049"), )
def test_maven_loader_extrinsic_metadata(swh_storage, expected_releases, expected_json_metadata, expected_pom_metadata): """With no prior visit, loading a jar ends up with 1 snapshot. Extrinsic metadata is the pom file associated to the source jar. """ loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" for i, expected_release in enumerate(expected_releases): expected_release_id = expected_release.id release = swh_storage.release_get([expected_release_id])[0] assert release is not None release_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=expected_release_id) directory_swhid = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=release.target) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url=REPO_BASE_URL, ) expected_metadata = [ RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.maven.loader.MavenLoader", version=__version__, ), discovery_date=loader.visit_date, format="maven-pom", metadata=expected_pom_metadata[i], origin=MVN_ORIGIN_URL, release=release_swhid, ), RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.maven.loader.MavenLoader", version=__version__, ), discovery_date=loader.visit_date, format="maven-json", metadata=json.dumps(expected_json_metadata[i]).encode(), origin=MVN_ORIGIN_URL, release=release_swhid, ), ] res = swh_storage.raw_extrinsic_metadata_get(directory_swhid, metadata_authority) assert res.next_page_token is None assert set(res.results) == set(expected_metadata)
def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata: target = row["raw_extrinsic_metadata.target"] if not target.startswith("swh:1:"): warnings.warn("Fetching raw_extrinsic_metadata row with URL target", DeprecationWarning) target = str(Origin(url=target).swhid()) return RawExtrinsicMetadata( target=ExtendedSWHID.from_string(target), authority=MetadataAuthority( type=MetadataAuthorityType(row["metadata_authority.type"]), url=row["metadata_authority.url"], ), fetcher=MetadataFetcher( name=row["metadata_fetcher.name"], version=row["metadata_fetcher.version"], ), discovery_date=row["discovery_date"], format=row["format"], metadata=row["raw_extrinsic_metadata.metadata"], origin=row["origin"], visit=row["visit"], snapshot=map_optional(CoreSWHID.from_string, row["snapshot"]), release=map_optional(CoreSWHID.from_string, row["release"]), revision=map_optional(CoreSWHID.from_string, row["revision"]), path=row["path"], directory=map_optional(CoreSWHID.from_string, row["directory"]), )
def _assert_deposit_info_on_metadata(swh_storage, metadata_swhid, deposit, metadata_fetcher): swh_authority = MetadataAuthority( MetadataAuthorityType.REGISTRY, "http://deposit.softwareheritage.example/", ) page_results = swh_storage.raw_extrinsic_metadata_get( metadata_swhid, swh_authority) assert len(page_results.results) == 1 assert page_results.next_page_token is None expected_xml_data = textwrap.dedent(f"""\ <deposit xmlns="https://www.softwareheritage.org/schema/2018/deposit"> <deposit_id>{deposit.id}</deposit_id> <deposit_client>https://hal-test.archives-ouvertes.fr/</deposit_client> <deposit_collection>test</deposit_collection> </deposit> """) assert page_results == PagedResult( results=[ RawExtrinsicMetadata( target=metadata_swhid, discovery_date=deposit.complete_date, authority=swh_authority, fetcher=metadata_fetcher, format="xml-deposit-info", metadata=expected_xml_data.encode(), ) ], next_page_token=None, )
def test_load_artifact_metadata(swh_storage, caplog): loader = MetadataTestLoader(swh_storage, ORIGIN_URL) load_status = loader.load() assert load_status == { "status": "eventful", "snapshot_id": FULL_SNAPSHOT_ID, } authority = MetadataAuthority( type=MetadataAuthorityType.REGISTRY, url="https://softwareheritage.org/", ) result = swh_storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority, ) assert result.next_page_token is None assert len(result.results) == 1 assert result.results[0] == RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=result.results[0].discovery_date, authority=authority, fetcher=FETCHER, format="original-artifacts-json", metadata=b'[{"artifact_key": "value", "length": 0}]', origin=ORIGIN_URL, release=RELEASE_SWHID, )
def row_to_raw_extrinsic_metadata( row: RawExtrinsicMetadataRow) -> RawExtrinsicMetadata: discovery_date = row.discovery_date.replace(tzinfo=datetime.timezone.utc) return RawExtrinsicMetadata( target=ExtendedSWHID.from_string(row.target), authority=MetadataAuthority( type=MetadataAuthorityType(row.authority_type), url=row.authority_url, ), fetcher=MetadataFetcher( name=row.fetcher_name, version=row.fetcher_version, ), discovery_date=discovery_date, format=row.format, metadata=row.metadata, origin=row.origin, visit=row.visit, snapshot=map_optional(CoreSWHID.from_string, row.snapshot), release=map_optional(CoreSWHID.from_string, row.release), revision=map_optional(CoreSWHID.from_string, row.revision), path=row.path, directory=map_optional(CoreSWHID.from_string, row.directory), )
def load_metadata( storage, revision_id, directory_id, discovery_date: datetime.datetime, metadata: Dict[str, Any], format: str, authority: MetadataAuthority, origin: Optional[str], dry_run: bool, ): """Does the actual loading to swh-storage.""" directory_swhid = ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id) revision_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id) obj = RawExtrinsicMetadata( target=directory_swhid, discovery_date=discovery_date, authority=authority, fetcher=FETCHER, format=format, metadata=json.dumps(metadata).encode(), origin=origin, revision=revision_swhid, ) if not dry_run: storage.raw_extrinsic_metadata_add([obj])
def test_negative_epoch(self): metadata = { **self.minimal, "discovery_date": datetime.datetime( 1969, 12, 31, 23, 59, 59, 1, tzinfo=datetime.timezone.utc, ), } git_object = ( b"raw_extrinsic_metadata 202\0" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" b"discovery_date -1\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" b"\n" b'{"foo": "bar"}') self.assertEqual( git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(metadata)), git_object, ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, hashlib.sha1(git_object).digest(), ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, _x("be7154a8fd49d87f81547ea634d1e2152907d089"), )
def test_negative_timestamp(self): metadata = { **self.minimal, "discovery_date": datetime.datetime( 1960, 1, 25, 11, 27, 51, tzinfo=datetime.timezone.utc, ), } git_object = ( b"raw_extrinsic_metadata 210\0" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" b"discovery_date -313504329\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" b"\n" b'{"foo": "bar"}') self.assertEqual( git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(metadata)), git_object, ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, hashlib.sha1(git_object).digest(), ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, _x("895d0821a2991dd376ddc303424aceb7c68280f9"), )
def test_epoch(self): metadata = { **self.minimal, "discovery_date": datetime.datetime( 1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc, ), } git_object = ( b"raw_extrinsic_metadata 201\0" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" b"discovery_date 0\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" b"\n" b'{"foo": "bar"}') self.assertEqual( git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(metadata)), git_object, ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, hashlib.sha1(git_object).digest(), ) self.assertEqual( RawExtrinsicMetadata.from_dict(metadata).id, _x("27a53df54ace35ebd910493cdc70b334d6b7cb88"), )
def test_minimal(self): git_object = ( b"raw_extrinsic_metadata 210\0" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" b"discovery_date 1611574071\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" b"\n" b'{"foo": "bar"}') self.assertEqual( git_objects.raw_extrinsic_metadata_git_object( RawExtrinsicMetadata.from_dict(self.minimal)), git_object, ) self.assertEqual( RawExtrinsicMetadata.from_dict(self.minimal).id, hashlib.sha1(git_object).digest(), ) self.assertEqual( RawExtrinsicMetadata.from_dict(self.minimal).id, _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"), )
DIRECTORY_ID = hash_to_bytes("aa" * 20) DIRECTORY_SWHID = ExtendedSWHID.from_string(f"swh:1:dir:{DIRECTORY_ID.hex()}") FETCHER = MetadataFetcher( name="swh.loader.package.tests.test_loader_metadata.MetadataTestLoader", version=__version__, ) DISCOVERY_DATE = datetime.datetime.now(tz=datetime.timezone.utc) DIRECTORY_METADATA = [ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=DISCOVERY_DATE, authority=AUTHORITY, fetcher=FETCHER, format="test-format1", metadata=b"foo bar", origin=ORIGIN_URL, release=RELEASE_SWHID, ), RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=DISCOVERY_DATE + datetime.timedelta(seconds=1), authority=AUTHORITY, fetcher=FETCHER, format="test-format2", metadata=b"bar baz", origin=ORIGIN_URL, release=RELEASE_SWHID, ), ]
def test_put_update_metadata_done_deposit_nominal( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, sample_data, swh_storage, ): """Nominal scenario, client send an update of metadata on a deposit with status "done" with an existing swhid. Such swhid has its metadata updated accordingly both in the deposit backend and in the metadata storage. Response: 204 """ deposit_swhid = CoreSWHID.from_string(complete_deposit.swhid) assert deposit_swhid.object_type == ObjectType.DIRECTORY directory_id = hash_to_bytes(deposit_swhid.object_id) # directory targeted by the complete_deposit does not exist in the storage assert list(swh_storage.directory_missing([directory_id ])) == [directory_id] # so let's create a directory reference in the storage (current deposit targets an # unknown swhid) existing_directory = sample_data.directory swh_storage.directory_add([existing_directory]) assert list(swh_storage.directory_missing([existing_directory.id])) == [] # and patch one complete deposit swhid so it targets said reference complete_deposit.swhid = str(existing_directory.swhid()) complete_deposit.save() actual_existing_requests_archive = DepositRequest.objects.filter( deposit=complete_deposit, type="archive") nb_archives = len(actual_existing_requests_archive) actual_existing_requests_metadata = DepositRequest.objects.filter( deposit=complete_deposit, type="metadata") nb_metadata = len(actual_existing_requests_metadata) update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = put_atom( authenticated_client, update_uri, data=atom_dataset["entry-data1"], HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_204_NO_CONTENT new_requests_meta = DepositRequest.objects.filter(deposit=complete_deposit, type="metadata") assert len(new_requests_meta) == nb_metadata + 1 request_meta1 = new_requests_meta[0] raw_metadata1 = request_meta1.raw_metadata assert raw_metadata1 == atom_dataset["entry-data1"] # check we did not touch the other parts requests_archive1 = DepositRequest.objects.filter(deposit=complete_deposit, type="archive") assert len(requests_archive1) == nb_archives assert set(actual_existing_requests_archive) == set(requests_archive1) # Ensure metadata stored in the metadata storage is consistent metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=complete_deposit.client.provider_url, ) actual_authority = swh_storage.metadata_authority_get( MetadataAuthorityType.DEPOSIT_CLIENT, url=complete_deposit.client.provider_url) assert actual_authority == metadata_authority config = APIConfig() metadata_fetcher = MetadataFetcher( name=config.tool["name"], version=config.tool["version"], ) actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"], config.tool["version"]) assert actual_fetcher == metadata_fetcher directory_swhid = ExtendedSWHID.from_string(complete_deposit.swhid) page_results = swh_storage.raw_extrinsic_metadata_get( directory_swhid, metadata_authority) assert page_results == PagedResult( results=[ RawExtrinsicMetadata( target=directory_swhid, discovery_date=request_meta1.date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=raw_metadata1.encode(), origin=complete_deposit.origin_url, ) ], next_page_token=None, )
def test_gnu(): original_artifacts = [{ "length": 842501, "filename": "gperf-3.0.1.tar.gz", "checksums": { "sha1": "c4453ee492032b369006ee464f4dd4e2c0c0e650", "sha256": "5be283ef62e1bd26abdaaf88b416dbea4b14c360b09befcda2f055656dc43f87", "sha1_git": "bf1d5bb57d571101dd7b6acab2b78ae11bb861de", "blake2s256": "661f84afeb1e0b914defe2b249d424af1dfe380a96016b3282ae758c70e19a70", }, }] row = { "id": b"\x00\x1cqE\x8e@[%\xba\xcc\xc8\x0b\x99\xf6cM\xff\x9d+\x18", "directory": DIRECTORY_ID, "date": datetime.datetime(2003, 6, 13, 0, 11, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime(2003, 6, 13, 0, 11, tzinfo=datetime.timezone.utc), "type": "tar", "message": b"swh-loader-package: synthetic revision message", "metadata": { "extrinsic": { "raw": { "url": "https://ftp.gnu.org/gnu/gperf/gperf-3.0.1.tar.gz", "time": "2003-06-13T00:11:00+00:00", "length": 842501, "version": "3.0.1", "filename": "gperf-3.0.1.tar.gz", }, "when": "2019-11-27T11:17:38.318997+00:00", "provider": "https://ftp.gnu.org/gnu/gperf/", }, "intrinsic": {}, "original_artifact": original_artifacts, }, } origin_url = "https://ftp.gnu.org/gnu/gperf/" storage = Mock() def origin_get(urls): assert urls == [origin_url] return [Origin(url=origin_url)] storage.origin_get.side_effect = origin_get deposit_cur = None handle_row(row, storage, deposit_cur, dry_run=False) assert storage.method_calls == [ call.origin_get([origin_url]), call.raw_extrinsic_metadata_add([ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime(2019, 11, 27, 11, 17, 38, 318997, tzinfo=datetime.timezone.utc), authority=SWH_AUTHORITY, fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, revision=CoreSWHID.from_string( "swh:1:rev:001c71458e405b25baccc80b99f6634dff9d2b18"), ), ]), ]
def test_deposit_metadata_origin( url, authenticated_client, deposit_collection, atom_dataset, swh_storage, ): """Posting a swhid reference is stored on raw extrinsic metadata storage""" xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url) origin_swhid = Origin(url).swhid() deposit_client = authenticated_client.deposit_client swh_storage.origin_add([Origin(url)]) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) assert response.status_code == status.HTTP_201_CREATED, response.content.decode( ) response_content = ElementTree.fromstring(response.content) # Ensure the deposit is finalized deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) # we got not swhid as input so we cannot have those assert deposit.swhid is None assert deposit.swhid_context is None assert deposit.complete_date == deposit.reception_date assert deposit.complete_date is not None assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS # Ensure metadata stored in the metadata storage is consistent metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url, ) actual_authority = swh_storage.metadata_authority_get( MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url) assert actual_authority == metadata_authority config = APIConfig() metadata_fetcher = MetadataFetcher( name=config.tool["name"], version=config.tool["version"], ) actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"], config.tool["version"]) assert actual_fetcher == metadata_fetcher # Get the deposited metadata object and check it: page_results = swh_storage.raw_extrinsic_metadata_get( origin_swhid, metadata_authority) assert len(page_results.results) == 1 assert page_results.next_page_token is None metadata = RawExtrinsicMetadata( target=origin_swhid, discovery_date=deposit.complete_date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=xml_data.encode(), ) assert page_results == PagedResult( results=[metadata], next_page_token=None, ) # Get metadata about the deposited metadata object and check it: _assert_deposit_info_on_metadata(swh_storage, metadata.swhid(), deposit, metadata_fetcher)
def test_deposit_metadata_swhid( swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage, ): """Posting a swhid reference is stored on raw extrinsic metadata storage""" swhid_reference = QualifiedSWHID.from_string(swhid) swhid_target = extended_swhid_from_qualified(swhid_reference) xml_data = atom_dataset["entry-data-with-swhid"].format( swhid=swhid, metadata_provenance_url= "https://hal-test.archives-ouvertes.fr/hal-abcdefgh", ) deposit_client = authenticated_client.deposit_client _insert_object(swh_storage, swhid_reference) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) assert response.status_code == status.HTTP_201_CREATED, response.content.decode( ) response_content = ElementTree.fromstring(response.content) # Ensure the deposit is finalized deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.swhid == str(swhid_target) assert deposit.swhid_context == str(swhid_reference) assert deposit.complete_date == deposit.reception_date assert deposit.complete_date is not None assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS # Ensure metadata stored in the metadata storage is consistent metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url, ) actual_authority = swh_storage.metadata_authority_get( MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url) assert actual_authority == metadata_authority config = APIConfig() metadata_fetcher = MetadataFetcher( name=config.tool["name"], version=config.tool["version"], ) actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"], config.tool["version"]) assert actual_fetcher == metadata_fetcher # Get the deposited metadata object and check it: page_results = swh_storage.raw_extrinsic_metadata_get( swhid_target, metadata_authority) assert len(page_results.results) == 1 assert page_results.next_page_token is None metadata_context = compute_metadata_context(swhid_reference) metadata = RawExtrinsicMetadata( target=swhid_target, discovery_date=deposit.complete_date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=xml_data.encode(), **metadata_context, ) assert page_results == PagedResult( results=[metadata], next_page_token=None, ) # Get metadata about the deposited metadata object and check it: _assert_deposit_info_on_metadata(swh_storage, metadata.swhid(), deposit, metadata_fetcher)
def test_nixguix(): extrinsic_metadata = { "url": "https://files.pythonhosted.org/packages/source/a/alerta/alerta-7.4.5.tar.gz", "integrity": "sha256-km8RAaG1ep+tYR8eHVr3UWk+/MNEqdsBr1Di/g02LYQ=", } original_artifacts = [{ "length": 34903, "filename": "alerta-7.4.5.tar.gz", "checksums": { "sha1": "66db4398b664de272fd5aa6610caa776b5e64651", "sha256": "926f1101a1b57a9fad611f1e1d5af751693efcc344a9db01af50e2fe0d362d84", }, }] row = { "id": b"\x00\x01\xbaM\xd0S\x94\x85\x02\x11\xd7\xb3\x85M\x99\x13\xd2:\xe3y", "directory": DIRECTORY_ID, "date": None, "committer_date": None, "type": "tar", "message": b"", "metadata": { "extrinsic": { "raw": extrinsic_metadata, "when": "2020-06-03T11:25:05.259341+00:00", "provider": "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json", }, "original_artifact": original_artifacts, }, } origin_url = "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json" storage = Mock() def origin_get(urls): assert urls == [origin_url] return [Origin(url=origin_url)] storage.origin_get.side_effect = origin_get deposit_cur = None handle_row(row, storage, deposit_cur, dry_run=False) assert storage.method_calls == [ call.origin_get([origin_url]), call.raw_extrinsic_metadata_add([ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime(2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc), authority=NIX_UNSTABLE_AUTHORITY, fetcher=FETCHER, format="nixguix-sources-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, revision=CoreSWHID.from_string( "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379"), ), ]), call.raw_extrinsic_metadata_add([ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime(2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc), authority=SWH_AUTHORITY, fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, revision=CoreSWHID.from_string( "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379"), ), ]), ]
def test_pypi_good_origin(): """Tests loading a revision whose origin we can find""" source_original_artifact = { "url": "https://files.pythonhosted.org/packages/34/4f/30087f22eaae8ad7077a28ce157342745a2977e264b8a8e4e7f804a8aa5e/PyPDFLite-0.1.32.tar.gz", "date": "2014-05-07T22:03:00", "sha1": "3289269f75b4111dd00eaea53e00330db9a1db12", "size": 46644, "sha256": "911497d655cf7ef6530c5b57773dad7da97e21cf4d608ad9ad1e38bd7bec7824", "filename": "PyPDFLite-0.1.32.tar.gz", "sha1_git": "1e5c38014731242cfa8594839bcba8a0c4e158c5", "blake2s256": "45792e57873f56d385c694e36c98a580cbba60d5ea91eb6fd0a2d1c71c1fb385", "archive_type": "tar", } dest_original_artifacts = [{ "url": "https://files.pythonhosted.org/packages/34/4f/30087f22eaae8ad7077a28ce157342745a2977e264b8a8e4e7f804a8aa5e/PyPDFLite-0.1.32.tar.gz", "filename": "PyPDFLite-0.1.32.tar.gz", "archive_type": "tar", "length": 46644, "checksums": { "sha1": "3289269f75b4111dd00eaea53e00330db9a1db12", "sha256": "911497d655cf7ef6530c5b57773dad7da97e21cf4d608ad9ad1e38bd7bec7824", "sha1_git": "1e5c38014731242cfa8594839bcba8a0c4e158c5", "blake2s256": "45792e57873f56d385c694e36c98a580cbba60d5ea91eb6fd0a2d1c71c1fb385", }, }] revision_id = b"N\xa9\x91|\xdfS\xcd\x13SJ\x04.N\xb3x{\x86\xc84\xd2" row = { "id": revision_id, "directory": DIRECTORY_ID, "date": datetime.datetime(2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime(2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc), "type": "tar", "message": b"0.1.32", "metadata": { "original_artifact": source_original_artifact }, } origin_url = "https://pypi.org/project/PyPDFLite/" storage = get_storage("memory") snapshot_id = b"42" * 10 storage.origin_add([Origin(url=origin_url)]) storage.origin_visit_add( [OriginVisit(origin=origin_url, visit=1, date=now(), type="pypi")]) storage.origin_visit_status_add([ OriginVisitStatus( origin=origin_url, visit=1, date=now(), status="partial", snapshot=snapshot_id, ) ]) storage.snapshot_add([ Snapshot( id=snapshot_id, branches={ b"foo": SnapshotBranch( target_type=TargetType.REVISION, target=revision_id, ) }, ) ]) storage.metadata_authority_add([ attr.evolve(PYPI_AUTHORITY, metadata={}), attr.evolve(SWH_AUTHORITY, metadata={}), ]) storage.metadata_fetcher_add([FETCHER]) deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) revision_swhid = CoreSWHID.from_string( "swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") assert storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[], next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, ), authority=SWH_AUTHORITY, fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, revision=revision_swhid, ), ], next_page_token=None, )
def test_pypi_3(mocker): """Tests loading a revision generated by a very old PyPI loader that does not have a provider or has 'project' metadata.""" mocker.patch( "swh.storage.migrate_extrinsic_metadata.urlopen", side_effect=urllib.error.HTTPError(None, 404, "Not Found", None, None), ) source_original_artifact = { "url": "https://files.pythonhosted.org/packages/34/4f/30087f22eaae8ad7077a28ce157342745a2977e264b8a8e4e7f804a8aa5e/PyPDFLite-0.1.32.tar.gz", "date": "2014-05-07T22:03:00", "sha1": "3289269f75b4111dd00eaea53e00330db9a1db12", "size": 46644, "sha256": "911497d655cf7ef6530c5b57773dad7da97e21cf4d608ad9ad1e38bd7bec7824", "filename": "PyPDFLite-0.1.32.tar.gz", "sha1_git": "1e5c38014731242cfa8594839bcba8a0c4e158c5", "blake2s256": "45792e57873f56d385c694e36c98a580cbba60d5ea91eb6fd0a2d1c71c1fb385", "archive_type": "tar", } dest_original_artifacts = [{ "url": "https://files.pythonhosted.org/packages/34/4f/30087f22eaae8ad7077a28ce157342745a2977e264b8a8e4e7f804a8aa5e/PyPDFLite-0.1.32.tar.gz", "filename": "PyPDFLite-0.1.32.tar.gz", "archive_type": "tar", "length": 46644, "checksums": { "sha1": "3289269f75b4111dd00eaea53e00330db9a1db12", "sha256": "911497d655cf7ef6530c5b57773dad7da97e21cf4d608ad9ad1e38bd7bec7824", "sha1_git": "1e5c38014731242cfa8594839bcba8a0c4e158c5", "blake2s256": "45792e57873f56d385c694e36c98a580cbba60d5ea91eb6fd0a2d1c71c1fb385", }, }] row = { "id": b"N\xa9\x91|\xdfS\xcd\x13SJ\x04.N\xb3x{\x86\xc84\xd2", "directory": DIRECTORY_ID, "date": datetime.datetime(2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime(2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc), "type": "tar", "message": b"0.1.32", "metadata": { "original_artifact": source_original_artifact }, } origin_url = "https://pypi.org/project/PyPDFLite/" storage = get_storage("memory") storage.origin_add([Origin(url=origin_url)]) storage.metadata_authority_add([ attr.evolve(PYPI_AUTHORITY, metadata={}), attr.evolve(SWH_AUTHORITY, metadata={}), ]) storage.metadata_fetcher_add([FETCHER]) deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) revision_swhid = CoreSWHID.from_string( "swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") assert storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[], next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, ), authority=SWH_AUTHORITY, fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=None, revision=revision_swhid, ), ], next_page_token=None, )
def test_pypi_2(mocker): """Tests loading a revision generated by an old PyPI loader that does not have a provider, but has 'project' metadata.""" mocker.patch( "swh.storage.migrate_extrinsic_metadata.urlopen", side_effect=urllib.error.HTTPError(None, 404, "Not Found", None, None), ) extrinsic_metadata = { "name": "jupyterhub-simx", "author": "Jupyter Development Team", "license": "BSD", "summary": "JupyterHub: A multi-user server for Jupyter notebooks", "version": "1.0.5", # ... } source_original_artifacts = [{ "url": "https://files.pythonhosted.org/packages/72/28/a8098763d78e2c4607cb67602c0d726a97ac38d4c1f531aac28f49de2e1a/jupyterhub-simx-1.0.5.tar.gz", "date": "2019-01-23T22:10:55", "sha1": "ede3eadd5a06e70912e3ba7cfccef789c4ad3168", "size": 2346538, "sha256": "0399d7f5f0d90c525d369f0507ad0e8ef8729c1c7fa63aadfc46a27514d14a46", "filename": "jupyterhub-simx-1.0.5.tar.gz", "sha1_git": "734301124712182eb30fc90e97cc18cef5432f02", "blake2s256": "bb4aa82ffb5891a05dcf6d4dce3ad56fd2c18e9abdba9d20972910649d869322", "archive_type": "tar", }] dest_original_artifacts = [{ "url": "https://files.pythonhosted.org/packages/72/28/a8098763d78e2c4607cb67602c0d726a97ac38d4c1f531aac28f49de2e1a/jupyterhub-simx-1.0.5.tar.gz", "filename": "jupyterhub-simx-1.0.5.tar.gz", "archive_type": "tar", "length": 2346538, "checksums": { "sha1": "ede3eadd5a06e70912e3ba7cfccef789c4ad3168", "sha256": "0399d7f5f0d90c525d369f0507ad0e8ef8729c1c7fa63aadfc46a27514d14a46", "sha1_git": "734301124712182eb30fc90e97cc18cef5432f02", "blake2s256": "bb4aa82ffb5891a05dcf6d4dce3ad56fd2c18e9abdba9d20972910649d869322", }, }] row = { "id": b"\x00\x00\x04\xd68,J\xd4\xc0Q\x92fbl6U\x1f\x0eQ\xca", "directory": DIRECTORY_ID, "date": datetime.datetime(2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime(2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc), "type": "tar", "message": b"1.0.5", "metadata": { "project": extrinsic_metadata, "original_artifact": source_original_artifacts, }, } origin_url = "https://pypi.org/project/jupyterhub-simx/" storage = get_storage("memory") storage.origin_add([Origin(url=origin_url)]) storage.metadata_authority_add([ attr.evolve(PYPI_AUTHORITY, metadata={}), attr.evolve(SWH_AUTHORITY, metadata={}), ]) storage.metadata_fetcher_add([FETCHER]) deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) revision_swhid = CoreSWHID.from_string( "swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca") assert storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, ), authority=PYPI_AUTHORITY, fetcher=FETCHER, format="pypi-project-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=None, revision=revision_swhid, ), ], next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, ), authority=SWH_AUTHORITY, fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=None, revision=revision_swhid, ), ], next_page_token=None, )
def test_pypi_1(): """Tests loading a revision generated by a new PyPI loader that has a provider.""" extrinsic_metadata = { "url": "https://files.pythonhosted.org/packages/70/89/a498245baf1bf3dde73d3da00b4b067a8aa7c7378ad83472078803ea3e43/m3-ui-2.2.73.tar.gz", "size": 3933168, "digests": { "md5": "a374ac3f655e97df5db5335e2142d344", "sha256": "1bc2756f7d0d2e15cf5880ca697682ff35e8b58116bf73eb9c78b3db358c5b7d", }, "has_sig": False, "filename": "m3-ui-2.2.73.tar.gz", "downloads": -1, "md5_digest": "a374ac3f655e97df5db5335e2142d344", "packagetype": "sdist", "upload_time": "2019-11-11T06:21:20", "comment_text": "", "python_version": "source", "requires_python": None, "upload_time_iso_8601": "2019-11-11T06:21:20.073082Z", } original_artifacts = [{ "length": 3933168, "filename": "m3-ui-2.2.73.tar.gz", "checksums": { "sha1": "9f4ec7ce64b7fea4b122e85d47ea31146c367b03", "sha256": "1bc2756f7d0d2e15cf5880ca697682ff35e8b58116bf73eb9c78b3db358c5b7d", }, }] row = { "id": b"\x00\x00\x07a{S\xe7\xb1E\x8fi]\xd0}\xe4\xceU\xaf\x15\x17", "directory": DIRECTORY_ID, "date": datetime.datetime( 2019, 11, 11, 6, 21, 20, tzinfo=datetime.timezone.utc, ), "committer_date": datetime.datetime( 2019, 11, 11, 6, 21, 20, tzinfo=datetime.timezone.utc, ), "type": "tar", "message": b"2.2.73", "metadata": { "extrinsic": { "raw": extrinsic_metadata, "when": "2020-01-23T18:43:09.109407+00:00", "provider": "https://pypi.org/pypi/m3-ui/json", }, "intrinsic": { "raw": { "name": "m3-ui", "summary": "======", "version": "2.2.73", # ... "metadata_version": "1.1", }, "tool": "PKG-INFO", }, "original_artifact": original_artifacts, }, } origin_url = "https://pypi.org/project/m3-ui/" storage = get_storage("memory") storage.origin_add([Origin(url=origin_url)]) storage.metadata_authority_add([ attr.evolve(PYPI_AUTHORITY, metadata={}), attr.evolve(SWH_AUTHORITY, metadata={}), ]) storage.metadata_fetcher_add([FETCHER]) deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) revision_swhid = CoreSWHID.from_string( "swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517") assert storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, ), authority=PYPI_AUTHORITY, fetcher=FETCHER, format="pypi-project-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, revision=revision_swhid, ), ], next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, ), authority=SWH_AUTHORITY, fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, revision=revision_swhid, ), ], next_page_token=None, )
def test_opam_metadata(tmpdir, requests_mock_datadir, fake_opam_root, swh_storage, datadir): opam_url = f"file://{datadir}/fake_opam_repo" opam_root = fake_opam_root opam_instance = "loadertest" opam_package = "ocb" url = f"opam+{opam_url}/packages/{opam_package}" loader = OpamLoader( swh_storage, url, opam_root, opam_instance, opam_url, opam_package, initialize_opam_root=True, ) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" expected_release_id = hash_to_bytes( "c231e541eb29c712635ada394b04127ac69e9fb0") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), branches={ b"HEAD": SnapshotBranch( target=b"ocb.0.1", target_type=TargetType.ALIAS, ), b"ocb.0.1": SnapshotBranch( target=expected_release_id, target_type=TargetType.RELEASE, ), }, ) assert_last_visit_matches(swh_storage, url, status="full", type="opam", snapshot=expected_snapshot.id) check_snapshot(expected_snapshot, swh_storage) release = swh_storage.release_get([expected_release_id])[0] assert release is not None release_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=expected_release_id) directory_swhid = ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=release.target) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url=opam_url, ) expected_metadata = [ RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.opam.loader.OpamLoader", version=__version__, ), discovery_date=loader.visit_date, format="opam-package-definition", metadata=OCB_METADATA, origin=url, release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( directory_swhid, metadata_authority, ) == PagedResult( next_page_token=None, results=expected_metadata, )
), ] METADATA_FETCHERS = [ MetadataFetcher( name="test-fetcher", version="1.0.0", metadata={}, ) ] RAW_EXTRINSIC_METADATA = [ RawExtrinsicMetadata( target=Origin("http://example.org/foo.git").swhid(), discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC), authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None), fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None), format="json", metadata=b'{"foo": "bar"}', ), RawExtrinsicMetadata( target=ExtendedSWHID.from_string(str(CONTENTS[0].swhid())), discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC), authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None), fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None), format="json", metadata=b'{"foo": "bar"}', ), ] TEST_OBJECTS: Dict[str, Sequence[BaseModel]] = { "content": CONTENTS,
class StorageData: """Data model objects to use within tests.""" content = Content( data=b"42\n", length=3, sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), sha256=hash_to_bytes( "084c799cd551dd1d8d5c5f9a5d593b2e931f5e36122ee5c793c1d08a19839cc0" ), blake2s256=hash_to_bytes( "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d" ), status="visible", ) content2 = Content( data=b"4242\n", length=5, sha1=hash_to_bytes("61c2b3a30496d329e21af70dd2d7e097046d07b7"), sha1_git=hash_to_bytes("36fade77193cb6d2bd826161a0979d64c28ab4fa"), sha256=hash_to_bytes( "859f0b154fdb2d630f45e1ecae4a862915435e663248bb8461d914696fc047cd" ), blake2s256=hash_to_bytes( "849c20fad132b7c2d62c15de310adfe87be94a379941bed295e8141c6219810d" ), status="visible", ) content3 = Content( data=b"424242\n", length=7, sha1=hash_to_bytes("3e21cc4942a4234c9e5edd8a9cacd1670fe59f13"), sha1_git=hash_to_bytes("c932c7649c6dfa4b82327d121215116909eb3bea"), sha256=hash_to_bytes( "92fb72daf8c6818288a35137b72155f507e5de8d892712ab96277aaed8cf8a36" ), blake2s256=hash_to_bytes( "76d0346f44e5a27f6bafdd9c2befd304aff83780f93121d801ab6a1d4769db11" ), status="visible", ctime=datetime.datetime(2019, 12, 1, tzinfo=datetime.timezone.utc), ) contents: Tuple[Content, ...] = (content, content2, content3) skipped_content = SkippedContent( length=1024 * 1024 * 200, sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8920"), sha1=hash_to_bytes("43e45d56f88993aae6a0198013efa80716fd8920"), sha256=hash_to_bytes( "7bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" ), blake2s256=hash_to_bytes( "ade18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b" ), reason="Content too long", status="absent", origin="file:///dev/zero", ) skipped_content2 = SkippedContent( length=1024 * 1024 * 300, sha1_git=hash_to_bytes("44e45d56f88993aae6a0198013efa80716fd8921"), sha1=hash_to_bytes("54e45d56f88993aae6a0198013efa80716fd8920"), sha256=hash_to_bytes( "8cbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" ), blake2s256=hash_to_bytes( "9ce18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b" ), reason="Content too long", status="absent", ) skipped_contents: Tuple[SkippedContent, ...] = (skipped_content, skipped_content2) directory5 = Directory( id=hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), entries=(), ) directory = Directory( id=hash_to_bytes("5256e856a0a0898966d6ba14feb4388b8b82d302"), entries=tuple([ DirectoryEntry( name=b"foo", type="file", target=content.sha1_git, perms=from_disk.DentryPerms.content, ), DirectoryEntry( name=b"bar\xc3", type="dir", target=directory5.id, perms=from_disk.DentryPerms.directory, ), ], ), ) directory2 = Directory( id=hash_to_bytes("8505808532953da7d2581741f01b29c04b1cb9ab"), entries=tuple([ DirectoryEntry( name=b"oof", type="file", target=content2.sha1_git, perms=from_disk.DentryPerms.content, ) ], ), ) directory3 = Directory( id=hash_to_bytes("13089e6e544f78df7c9a40a3059050d10dee686a"), entries=tuple([ DirectoryEntry( name=b"foo", type="file", target=content.sha1_git, perms=from_disk.DentryPerms.content, ), DirectoryEntry( name=b"subdir", type="dir", target=directory.id, perms=from_disk.DentryPerms.directory, ), DirectoryEntry( name=b"hello", type="file", target=content2.sha1_git, perms=from_disk.DentryPerms.content, ), ], ), ) directory4 = Directory( id=hash_to_bytes("cd5dfd9c09d9e99ed123bc7937a0d5fddc3cd531"), entries=tuple([ DirectoryEntry( name=b"subdir1", type="dir", target=directory3.id, perms=from_disk.DentryPerms.directory, ) ], ), ) directory6 = Directory( id=hash_to_bytes("afa0105cfcaa14fdbacee344e96659170bb1bda5"), entries=tuple([ DirectoryEntry( name=b"foo", type="file", target=b"\x00" * 20, perms=from_disk.DentryPerms.content, ), DirectoryEntry( name=b"bar", type="dir", target=b"\x01" * 20, perms=from_disk.DentryPerms.directory, ), ], ), raw_manifest=( b"tree 61\x00" b"100644 foo\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" # noqa b"40000 bar\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01" # noqa ), ) directories: Tuple[Directory, ...] = ( directory2, directory, directory3, directory4, directory5, directory6, ) revision = Revision( id=hash_to_bytes("01a7114f36fddd5ef2511b2cadda237a68adbb12"), message=b"hello", author=Person( name=b"Nicolas Dandrimont", email=b"*****@*****.**", fullname=b"Nicolas Dandrimont <*****@*****.**> ", ), date=TimestampWithTimezone( timestamp=Timestamp(seconds=1234567890, microseconds=0), offset_bytes=b"+0200", ), committer=Person( name=b"St\xc3fano Zacchiroli", email=b"*****@*****.**", fullname=b"St\xc3fano Zacchiroli <*****@*****.**>", ), committer_date=TimestampWithTimezone( timestamp=Timestamp(seconds=1123456789, microseconds=0), offset_bytes=b"+0200", ), parents=(), type=RevisionType.GIT, directory=directory.id, metadata={ "checksums": { "sha1": "tarball-sha1", "sha256": "tarball-sha256", }, "signed-off-by": "some-dude", }, extra_headers=( (b"gpgsig", b"test123"), (b"mergetag", b"foo\\bar"), (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"), ), synthetic=True, ) revision2 = Revision( id=hash_to_bytes("a646dd94c912829659b22a1e7e143d2fa5ebde1b"), message=b"hello again", author=Person( name=b"Roberto Dicosmo", email=b"*****@*****.**", fullname=b"Roberto Dicosmo <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp( seconds=1234567843, microseconds=220000, ), offset_bytes=b"-1200", ), committer=Person( name=b"tony", email=b"*****@*****.**", fullname=b"tony <*****@*****.**>", ), committer_date=TimestampWithTimezone( timestamp=Timestamp( seconds=1123456789, microseconds=220000, ), offset_bytes=b"+0000", ), parents=tuple([revision.id]), type=RevisionType.GIT, directory=directory2.id, metadata=None, extra_headers=(), synthetic=False, ) revision3 = Revision( id=hash_to_bytes("beb2844dff30658e27573cb46eb55980e974b391"), message=b"a simple revision with no parents this time", author=Person( name=b"Roberto Dicosmo", email=b"*****@*****.**", fullname=b"Roberto Dicosmo <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp( seconds=1234567843, microseconds=220000, ), offset_bytes=b"-1200", ), committer=Person( name=b"tony", email=b"*****@*****.**", fullname=b"tony <*****@*****.**>", ), committer_date=TimestampWithTimezone( timestamp=Timestamp( seconds=1127351742, microseconds=220000, ), offset_bytes=b"+0000", ), parents=tuple([revision.id, revision2.id]), type=RevisionType.GIT, directory=directory2.id, metadata=None, extra_headers=(), synthetic=True, ) revision4 = Revision( id=hash_to_bytes("ae860aec43700c7f5a295e2ef47e2ae41b535dfe"), message=b"parent of self.revision2", author=Person( name=b"me", email=b"*****@*****.**", fullname=b"me <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp( seconds=1234567843, microseconds=220000, ), offset_bytes=b"-1200", ), committer=Person( name=b"committer-dude", email=b"*****@*****.**", fullname=b"committer-dude <*****@*****.**>", ), committer_date=TimestampWithTimezone( timestamp=Timestamp( seconds=1244567843, microseconds=220000, ), offset_bytes=b"-1200", ), parents=tuple([revision3.id]), type=RevisionType.GIT, directory=directory.id, metadata=None, extra_headers=(), synthetic=False, ) git_revisions: Tuple[Revision, ...] = (revision, revision2, revision3, revision4) hg_revision = Revision( id=hash_to_bytes("951c9503541e7beaf002d7aebf2abd1629084c68"), message=b"hello", author=Person( name=b"Nicolas Dandrimont", email=b"*****@*****.**", fullname=b"Nicolas Dandrimont <*****@*****.**> ", ), date=TimestampWithTimezone( timestamp=Timestamp(seconds=1234567890, microseconds=0), offset_bytes=b"+0200", ), committer=Person( name=b"St\xc3fano Zacchiroli", email=b"*****@*****.**", fullname=b"St\xc3fano Zacchiroli <*****@*****.**>", ), committer_date=TimestampWithTimezone( timestamp=Timestamp(seconds=1123456789, microseconds=0), offset_bytes=b"+0200", ), parents=(), type=RevisionType.MERCURIAL, directory=directory.id, metadata={ "checksums": { "sha1": "tarball-sha1", "sha256": "tarball-sha256", }, "signed-off-by": "some-dude", "node": "a316dfb434af2b451c1f393496b7eaeda343f543", }, extra_headers=(), synthetic=True, ) hg_revision2 = Revision( id=hash_to_bytes("df4afb063236300eb13b96a0d7fff03f7b7cbbaf"), message=b"hello again", author=Person( name=b"Roberto Dicosmo", email=b"*****@*****.**", fullname=b"Roberto Dicosmo <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp( seconds=1234567843, microseconds=220000, ), offset_bytes=b"-1200", ), committer=Person( name=b"tony", email=b"*****@*****.**", fullname=b"tony <*****@*****.**>", ), committer_date=TimestampWithTimezone( timestamp=Timestamp( seconds=1123456789, microseconds=220000, ), offset_bytes=b"+0000", ), parents=tuple([hg_revision.id]), type=RevisionType.MERCURIAL, directory=directory2.id, metadata=None, extra_headers=( (b"node", hash_to_bytes("fa1b7c84a9b40605b67653700f268349a6d6aca1")), ), synthetic=False, ) hg_revision3 = Revision( id=hash_to_bytes("84d8e7081b47ebb88cad9fa1f25de5f330872a37"), message=b"a simple revision with no parents this time", author=Person( name=b"Roberto Dicosmo", email=b"*****@*****.**", fullname=b"Roberto Dicosmo <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp( seconds=1234567843, microseconds=220000, ), offset_bytes=b"-1200", ), committer=Person( name=b"tony", email=b"*****@*****.**", fullname=b"tony <*****@*****.**>", ), committer_date=TimestampWithTimezone( timestamp=Timestamp( seconds=1127351742, microseconds=220000, ), offset_bytes=b"+0000", ), parents=tuple([hg_revision.id, hg_revision2.id]), type=RevisionType.MERCURIAL, directory=directory2.id, metadata=None, extra_headers=( (b"node", hash_to_bytes("7f294a01c49065a90b3fe8b4ad49f08ce9656ef6")), ), synthetic=True, ) hg_revision4 = Revision( id=hash_to_bytes("4683324ba26dfe941a72cc7552e86eaaf7c27fe3"), message=b"parent of self.revision2", author=Person( name=b"me", email=b"*****@*****.**", fullname=b"me <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp( seconds=1234567843, microseconds=220000, ), offset_bytes=b"-1200", ), committer=Person( name=b"committer-dude", email=b"*****@*****.**", fullname=b"committer-dude <*****@*****.**>", ), committer_date=TimestampWithTimezone( timestamp=Timestamp( seconds=1244567843, microseconds=220000, ), offset_bytes=b"-1200", ), parents=tuple([hg_revision3.id]), type=RevisionType.MERCURIAL, directory=directory.id, metadata=None, extra_headers=( (b"node", hash_to_bytes("f4160af0485c85823d9e829bae2c00b00a2e6297")), ), synthetic=False, ) hg_revisions: Tuple[Revision, ...] = ( hg_revision, hg_revision2, hg_revision3, hg_revision4, ) revisions: Tuple[Revision, ...] = git_revisions + hg_revisions origins: Tuple[Origin, ...] = ( Origin(url="https://github.com/user1/repo1"), Origin(url="https://github.com/user2/repo1"), Origin(url="https://github.com/user3/repo1"), Origin(url="https://gitlab.com/user1/repo1"), Origin(url="https://gitlab.com/user2/repo1"), Origin(url="https://forge.softwareheritage.org/source/repo1"), Origin(url="https://example.рф/🏛️.txt"), ) origin, origin2 = origins[:2] metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url="http://hal.inria.example.com/", ) metadata_authority2 = MetadataAuthority( type=MetadataAuthorityType.REGISTRY, url="http://wikidata.example.com/", ) authorities: Tuple[MetadataAuthority, ...] = ( metadata_authority, metadata_authority2, ) metadata_fetcher = MetadataFetcher( name="swh-deposit", version="0.0.1", ) metadata_fetcher2 = MetadataFetcher( name="swh-example", version="0.0.1", ) fetchers: Tuple[MetadataFetcher, ...] = (metadata_fetcher, metadata_fetcher2) date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) type_visit1 = "git" type_visit2 = "hg" type_visit3 = "deb" origin_visit = OriginVisit( origin=origin.url, visit=1, date=date_visit1, type=type_visit1, ) origin_visit2 = OriginVisit( origin=origin.url, visit=2, date=date_visit2, type=type_visit1, ) origin_visit3 = OriginVisit( origin=origin2.url, visit=1, date=date_visit1, type=type_visit2, ) origin_visits: Tuple[OriginVisit, ...] = ( origin_visit, origin_visit2, origin_visit3, ) release = Release( id=hash_to_bytes("f7f222093a18ec60d781070abec4a630c850b837"), name=b"v0.0.1", author=Person( name=b"olasd", email=b"*****@*****.**", fullname=b"olasd <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp(seconds=1234567890, microseconds=0), offset_bytes=b"+0042", ), target=revision.id, target_type=ObjectType.REVISION, message=b"synthetic release", synthetic=True, ) release2 = Release( id=hash_to_bytes("db81a26783a3f4a9db07b4759ffc37621f159bb2"), name=b"v0.0.2", author=Person( name=b"tony", email=b"*****@*****.**", fullname=b"tony <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp(seconds=1634366813, microseconds=0), offset_bytes=b"-0200", ), target=revision2.id, target_type=ObjectType.REVISION, message=b"v0.0.2\nMisc performance improvements + bug fixes", synthetic=False, ) release3 = Release( id=hash_to_bytes("1c5d42e603ce2eea44917fadca76c78bad76aeb9"), name=b"v0.0.2", author=Person( name=b"tony", email=b"*****@*****.**", fullname=b"tony <*****@*****.**>", ), date=TimestampWithTimezone( timestamp=Timestamp(seconds=1634366813, microseconds=0), offset_bytes=b"-0200", ), target=revision3.id, target_type=ObjectType.REVISION, message=b"yet another synthetic release", synthetic=True, ) releases: Tuple[Release, ...] = (release, release2, release3) snapshot = Snapshot( id=hash_to_bytes("9b922e6d8d5b803c1582aabe5525b7b91150788e"), branches={ b"master": SnapshotBranch( target=revision.id, target_type=TargetType.REVISION, ), }, ) empty_snapshot = Snapshot( id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={}, ) complete_snapshot = Snapshot( id=hash_to_bytes("db99fda25b43dc5cd90625ee4b0744751799c917"), branches={ b"directory": SnapshotBranch( target=directory.id, target_type=TargetType.DIRECTORY, ), b"directory2": SnapshotBranch( target=directory2.id, target_type=TargetType.DIRECTORY, ), b"content": SnapshotBranch( target=content.sha1_git, target_type=TargetType.CONTENT, ), b"alias": SnapshotBranch( target=b"revision", target_type=TargetType.ALIAS, ), b"revision": SnapshotBranch( target=revision.id, target_type=TargetType.REVISION, ), b"release": SnapshotBranch( target=release.id, target_type=TargetType.RELEASE, ), b"snapshot": SnapshotBranch( target=empty_snapshot.id, target_type=TargetType.SNAPSHOT, ), b"dangling": None, }, ) snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot) content_metadata1 = RawExtrinsicMetadata( target=ExtendedSWHID(object_type=ExtendedObjectType.CONTENT, object_id=content.sha1_git), origin=origin.url, discovery_date=datetime.datetime(2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc), authority=metadata_authority, fetcher=metadata_fetcher, format="json", metadata=b'{"foo": "bar"}', ) content_metadata2 = RawExtrinsicMetadata( target=ExtendedSWHID(object_type=ExtendedObjectType.CONTENT, object_id=content.sha1_git), origin=origin2.url, discovery_date=datetime.datetime(2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc), authority=metadata_authority, fetcher=metadata_fetcher, format="yaml", metadata=b"foo: bar", ) content_metadata3 = RawExtrinsicMetadata( target=ExtendedSWHID(object_type=ExtendedObjectType.CONTENT, object_id=content.sha1_git), discovery_date=datetime.datetime(2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc), authority=attr.evolve(metadata_authority2, metadata=None), fetcher=attr.evolve(metadata_fetcher2, metadata=None), format="yaml", metadata=b"foo: bar", origin=origin.url, visit=42, snapshot=snapshot.swhid(), release=release.swhid(), revision=revision.swhid(), directory=directory.swhid(), path=b"/foo/bar", ) content_metadata: Tuple[RawExtrinsicMetadata, ...] = ( content_metadata1, content_metadata2, content_metadata3, ) origin_metadata1 = RawExtrinsicMetadata( target=Origin(origin.url).swhid(), discovery_date=datetime.datetime(2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc), authority=attr.evolve(metadata_authority, metadata=None), fetcher=attr.evolve(metadata_fetcher, metadata=None), format="json", metadata=b'{"foo": "bar"}', ) origin_metadata2 = RawExtrinsicMetadata( target=Origin(origin.url).swhid(), discovery_date=datetime.datetime(2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc), authority=attr.evolve(metadata_authority, metadata=None), fetcher=attr.evolve(metadata_fetcher, metadata=None), format="yaml", metadata=b"foo: bar", ) origin_metadata3 = RawExtrinsicMetadata( target=Origin(origin.url).swhid(), discovery_date=datetime.datetime(2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc), authority=attr.evolve(metadata_authority2, metadata=None), fetcher=attr.evolve(metadata_fetcher2, metadata=None), format="yaml", metadata=b"foo: bar", ) origin_metadata: Tuple[RawExtrinsicMetadata, ...] = ( origin_metadata1, origin_metadata2, origin_metadata3, ) extid1 = ExtID( target=CoreSWHID(object_type=SwhidObjectType.REVISION, object_id=revision.id), extid_type="git", extid=revision.id, ) extid2 = ExtID( target=CoreSWHID(object_type=SwhidObjectType.REVISION, object_id=hg_revision.id), extid_type="mercurial", extid=hash_to_bytes("a316dfb434af2b451c1f393496b7eaeda343f543"), ) extid3 = ExtID( target=CoreSWHID(object_type=SwhidObjectType.DIRECTORY, object_id=directory.id), extid_type="directory", extid=b"something", ) extid4 = ExtID( target=CoreSWHID(object_type=SwhidObjectType.DIRECTORY, object_id=directory2.id), extid_type="directory", extid=b"something", extid_version=2, ) extids: Tuple[ExtID, ...] = ( extid1, extid2, extid3, extid4, )
def test_pypi_release_metadata_structure( swh_storage, requests_mock_datadir, _0805nexter_api_info ): url = "https://pypi.org/project/0805nexter" loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" assert actual_load_status["snapshot_id"] is not None expected_release_id = hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), branches={ b"HEAD": SnapshotBranch( target=b"releases/1.2.0", target_type=TargetType.ALIAS, ), b"releases/1.1.0": SnapshotBranch( target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( target=expected_release_id, target_type=TargetType.RELEASE, ), }, ) assert_last_visit_matches( swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage) release = swh_storage.release_get([expected_release_id])[0] assert release is not None release_swhid = CoreSWHID( object_type=ObjectType.RELEASE, object_id=expected_release_id ) directory_swhid = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=release.target ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://pypi.org/", ) expected_metadata = [ RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.pypi.loader.PyPILoader", version=__version__, ), discovery_date=loader.visit_date, format="pypi-project-json", metadata=json.dumps( json.loads(_0805nexter_api_info)["releases"]["1.2.0"][0] ).encode(), origin=url, release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( directory_swhid, metadata_authority, ) == PagedResult( next_page_token=None, results=expected_metadata, )
def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info): package = "org" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("0996ca28d6280499abcf485b51c4e3941b057249") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) release_id = "d38cc0b571cd41f3c85513864e049766b42032a7" versions = [ ("0.0.2", release_id), ("0.0.3", "62bf7076bae9aa2cb4d6cb3bf7ce0ea4fdd5b295"), ("0.0.4", "6e976db82f6c310596b21fb0ed8b11f507631434"), ] expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target=b"releases/0.0.4", target_type=TargetType.ALIAS ), **{ b"releases/" + version_name.encode(): SnapshotBranch( target=hash_to_bytes(version_id), target_type=TargetType.RELEASE, ) for (version_name, version_id) in versions }, }, ) check_snapshot(expected_snapshot, swh_storage) assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release( name=b"0.0.2", message=b"Synthetic release for NPM source package org version 0.0.2\n", target=hash_to_bytes("42753c0c2ab00c4501b552ac4671c68f3cf5aece"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person( fullname=b"mooz <*****@*****.**>", name=b"mooz", email=b"*****@*****.**", ), date=TimestampWithTimezone.from_datetime( datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc) ), id=hash_to_bytes(release_id), ) contents = swh_storage.content_get(_expected_new_contents_first_visit) count = sum(0 if content is None else 1 for content in contents) assert count == len(_expected_new_contents_first_visit) assert ( list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == [] ) assert list(swh_storage.release_missing(_expected_new_releases_first_visit)) == [] metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", ) for (version_name, release_id) in versions: release = swh_storage.release_get([hash_to_bytes(release_id)])[0] assert release.target_type == ModelObjectType.DIRECTORY directory_id = release.target directory_swhid = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id, ) release_swhid = CoreSWHID( object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id), ) expected_metadata = [ RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.npm.loader.NpmLoader", version=__version__, ), discovery_date=loader.visit_date, format="replicate-npm-package-json", metadata=json.dumps( json.loads(org_api_info)["versions"][version_name] ).encode(), origin="https://www.npmjs.com/package/org", release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( directory_swhid, metadata_authority, ) == PagedResult( next_page_token=None, results=expected_metadata, ) stats = get_stats(swh_storage) assert { "content": len(_expected_new_contents_first_visit), "directory": len(_expected_new_directories_first_visit), "origin": 1, "origin_visit": 1, "release": len(_expected_new_releases_first_visit), "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats
def test_loader_one_visit(swh_storage, requests_mock_datadir, raw_sources): loader = NixGuixLoader(swh_storage, sources_url) load_status = loader.load() expected_snapshot_id = SNAPSHOT1.id expected_snapshot_id_hex = expected_snapshot_id.hex() assert load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id_hex, } release_id = SNAPSHOT1.branches[ b"https://github.com/owner-1/repository-1/revision-1.tgz"].target check_snapshot(SNAPSHOT1, storage=swh_storage) assert swh_storage.release_get([release_id])[0] == Release( id=release_id, name=b"https://github.com/owner-1/repository-1/revision-1.tgz", message=None, target=hash_to_bytes("4de2e07d3742718d928e974b8a4c721b9f7b33bf"), target_type=ObjectType.DIRECTORY, synthetic=True, author=Person.from_fullname(b""), date=None, ) stats = get_stats(swh_storage) assert { "content": 1, "directory": 3, "origin": 1, "origin_visit": 1, "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats # The visit is partial because urls pointing to non tarball file # are not handled yet assert_last_visit_matches(swh_storage, sources_url, status="partial", type="nixguix") visit_status = origin_get_latest_visit_status(swh_storage, sources_url) snapshot_swhid = ExtendedSWHID(object_type=ExtendedObjectType.SNAPSHOT, object_id=visit_status.snapshot) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url=sources_url, ) expected_metadata = [ RawExtrinsicMetadata( target=snapshot_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.nixguix.loader.NixGuixLoader", version=__version__, ), discovery_date=loader.visit_date, format="nixguix-sources-json", metadata=raw_sources, origin=sources_url, ) ] assert swh_storage.raw_extrinsic_metadata_get( snapshot_swhid, metadata_authority, ) == PagedResult( next_page_token=None, results=expected_metadata, )