def test_npm_artifact_use_mtime_if_no_time(swh_storage, requests_mock_datadir): """With no time upload, artifact is skipped""" package = "jammit-express" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("33b8f105d48ce16b6c59158af660e0cc78bcbef4") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } # artifact is used expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target_type=TargetType.ALIAS, target=b"releases/0.0.1" ), b"releases/0.0.1": SnapshotBranch( target_type=TargetType.RELEASE, target=hash_to_bytes("3e3b800570869fa9b3dbc302500553e62400cc06"), ), }, ) assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage)
def test_npm_loader_incremental_visit(swh_storage, requests_mock_datadir_visits): package = "org" url = package_url(package) loader = NpmLoader(swh_storage, url) expected_snapshot_id = hash_to_bytes("0996ca28d6280499abcf485b51c4e3941b057249") actual_load_status = loader.load() assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) stats = get_stats(swh_storage) assert { "content": len(_expected_new_contents_first_visit), "directory": len(_expected_new_directories_first_visit), "origin": 1, "origin_visit": 1, "release": len(_expected_new_releases_first_visit), "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats # reset loader internal state del loader._cached_info del loader._cached__raw_info actual_load_status2 = loader.load() assert actual_load_status2["status"] == "eventful" snap_id2 = actual_load_status2["snapshot_id"] assert snap_id2 is not None assert snap_id2 != actual_load_status["snapshot_id"] assert_last_visit_matches(swh_storage, url, status="full", type="npm") stats = get_stats(swh_storage) assert { # 3 new releases artifacts "content": len(_expected_new_contents_first_visit) + 14, "directory": len(_expected_new_directories_first_visit) + 15, "origin": 1, "origin_visit": 2, "release": len(_expected_new_releases_first_visit) + 3, "revision": 0, "skipped_content": 0, "snapshot": 2, } == stats urls = [ m.url for m in requests_mock_datadir_visits.request_history if m.url.startswith("https://registry.npmjs.org") ] assert len(urls) == len(set(urls)) # we visited each artifact once across
def test_npm_origin_not_found(swh_storage, requests_mock_datadir): url = package_url("non-existent-url") loader = NpmLoader(swh_storage, url) assert loader.load() == {"status": "failed"} assert_last_visit_matches( swh_storage, url, status="not_found", type="npm", snapshot=None )
def test_npm_no_artifact(swh_storage, requests_mock_datadir): """If no artifacts at all is found for origin, the visit fails completely""" package = "catify" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() assert actual_load_status == { "status": "failed", } assert_last_visit_matches(swh_storage, url, status="failed", type="npm")
def test_npm_loader_version_divergence(swh_storage): package = "@aller/shared" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("68eed3d3bc852e7f435a84f18ee77e23f6884be2") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target_type=TargetType.ALIAS, target=b"releases/0.1.0" ), b"releases/0.1.0": SnapshotBranch( target_type=TargetType.RELEASE, target=hash_to_bytes("0c486b50b407f847ef7581f595c2b6c2062f1089"), ), b"releases/0.1.1-alpha.14": SnapshotBranch( target_type=TargetType.RELEASE, target=hash_to_bytes("79d80c87c0a8d104a216cc539baad962a454802a"), ), }, ) check_snapshot(expected_snapshot, swh_storage) stats = get_stats(swh_storage) assert { # 1 new releases artifacts "content": 534, "directory": 153, "origin": 1, "origin_visit": 1, "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats
def test_npm_artifact_with_no_upload_time(swh_storage, requests_mock_datadir): """With no time upload, artifact is skipped""" package = "jammit-no-time" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() # no branch as one artifact without any intrinsic metadata expected_snapshot = Snapshot( id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={}, ) assert actual_load_status == { "status": "uneventful", "snapshot_id": expected_snapshot.id.hex(), } assert_last_visit_matches( swh_storage, url, status="partial", type="npm", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage)
def test_npm_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir): """Skip artifact with no intrinsic metadata during ingestion""" package = "nativescript-telerik-analytics" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() # no branch as one artifact without any intrinsic metadata expected_snapshot = Snapshot( id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={}, ) assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot.id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage)
def test_npm_loader_duplicate_shasum(swh_storage, requests_mock_datadir): """Test with two versions that have exactly the same tarball""" package = "org_version_mismatch" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("ac867a4c22ba4e22a022d319f309714477412a5a") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) beta_release_id = "e6d5490a02ac2a8dcd49702f9ccd5a64c90a46f1" release_id = "f6985f437e28db6eb1b7533230e05ed99f2c91f0" versions = [ ("0.0.3-beta", beta_release_id), ("0.0.3", release_id), ] expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target=b"releases/0.0.3", target_type=TargetType.ALIAS ), **{ b"releases/" + version_name.encode(): SnapshotBranch( target=hash_to_bytes(version_id), target_type=TargetType.RELEASE, ) for (version_name, version_id) in versions }, }, ) check_snapshot(expected_snapshot, swh_storage) assert swh_storage.release_get([hash_to_bytes(beta_release_id)])[0] == Release( name=b"0.0.3-beta", message=( b"Synthetic release for NPM source package org_version_mismatch " b"version 0.0.3-beta\n" ), target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person.from_fullname(b"Masafumi Oyamada <*****@*****.**>"), date=TimestampWithTimezone.from_datetime( datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc) ), id=hash_to_bytes(beta_release_id), ) assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release( name=b"0.0.3", message=( b"Synthetic release for NPM source package org_version_mismatch " b"version 0.0.3\n" ), target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person.from_fullname(b"Masafumi Oyamada <*****@*****.**>"), date=TimestampWithTimezone.from_datetime( datetime.datetime(2014, 1, 1, 15, 55, 45, tzinfo=datetime.timezone.utc) ), id=hash_to_bytes(release_id), ) # Check incremental re-load keeps it unchanged loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() assert actual_load_status == { "status": "uneventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id )
def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info): package = "org" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("0996ca28d6280499abcf485b51c4e3941b057249") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) release_id = "d38cc0b571cd41f3c85513864e049766b42032a7" versions = [ ("0.0.2", release_id), ("0.0.3", "62bf7076bae9aa2cb4d6cb3bf7ce0ea4fdd5b295"), ("0.0.4", "6e976db82f6c310596b21fb0ed8b11f507631434"), ] expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target=b"releases/0.0.4", target_type=TargetType.ALIAS ), **{ b"releases/" + version_name.encode(): SnapshotBranch( target=hash_to_bytes(version_id), target_type=TargetType.RELEASE, ) for (version_name, version_id) in versions }, }, ) check_snapshot(expected_snapshot, swh_storage) assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release( name=b"0.0.2", message=b"Synthetic release for NPM source package org version 0.0.2\n", target=hash_to_bytes("42753c0c2ab00c4501b552ac4671c68f3cf5aece"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person( fullname=b"mooz <*****@*****.**>", name=b"mooz", email=b"*****@*****.**", ), date=TimestampWithTimezone.from_datetime( datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc) ), id=hash_to_bytes(release_id), ) contents = swh_storage.content_get(_expected_new_contents_first_visit) count = sum(0 if content is None else 1 for content in contents) assert count == len(_expected_new_contents_first_visit) assert ( list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == [] ) assert list(swh_storage.release_missing(_expected_new_releases_first_visit)) == [] metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", ) for (version_name, release_id) in versions: release = swh_storage.release_get([hash_to_bytes(release_id)])[0] assert release.target_type == ModelObjectType.DIRECTORY directory_id = release.target directory_swhid = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id, ) release_swhid = CoreSWHID( object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id), ) expected_metadata = [ RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.npm.loader.NpmLoader", version=__version__, ), discovery_date=loader.visit_date, format="replicate-npm-package-json", metadata=json.dumps( json.loads(org_api_info)["versions"][version_name] ).encode(), origin="https://www.npmjs.com/package/org", release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( directory_swhid, metadata_authority, ) == PagedResult( next_page_token=None, results=expected_metadata, ) stats = get_stats(swh_storage) assert { "content": len(_expected_new_contents_first_visit), "directory": len(_expected_new_directories_first_visit), "origin": 1, "origin_visit": 1, "release": len(_expected_new_releases_first_visit), "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats
def load_npm(**kwargs): """Load Npm package""" return NpmLoader.from_configfile(**kwargs).load()