def test_create_compressed_batch(cidc_api, clean_db, monkeypatch):
    user_id = setup_user(cidc_api, monkeypatch)
    file_id_1, file_id_2 = setup_downloadable_files(cidc_api)
    with cidc_api.app_context():
        url_1 = DownloadableFiles.find_by_id(file_id_1).object_url
        url_2 = DownloadableFiles.find_by_id(file_id_2).object_url

    client = cidc_api.test_client()

    url = "/downloadable_files/compressed_batch"

    # A JSON body containing a file ID list must be provided
    res = client.post(url)
    assert res.status_code == 422

    # User has no permissions, so no files should be found
    short_file_list = {"file_ids": [file_id_1, file_id_2]}
    res = client.post(url, json=short_file_list)
    assert res.status_code == 404

    # Give the user one permission
    with cidc_api.app_context():
        perm = Permissions(
            granted_to_user=user_id,
            trial_id=trial_id_1,
            upload_type=upload_types[0],
            granted_by_user=user_id,
        )
        perm.insert()

    # Mock GCS client
    blob = MagicMock()
    bucket = MagicMock()
    bucket.blob.return_value = blob
    monkeypatch.setattr(
        "cidc_api.resources.downloadable_files.gcloud_client._get_bucket",
        lambda _: bucket,
    )
    signed_url = "fake/signed/url"
    monkeypatch.setattr(
        "cidc_api.resources.downloadable_files.gcloud_client.get_signed_url",
        lambda *_: signed_url,
    )

    # User has one permission, s0 the endpoint should try to create
    # a compressed batch file with the single file the user has
    # access to in it.
    res = client.post(url, json=short_file_list)
    assert res.status_code == 200
    assert res.json == signed_url
    print(bucket.get_blob.call_args_list)
    bucket.get_blob.assert_called_with(url_1)
    blob.upload_from_filename.assert_called_once()

    bucket.reset_mock()
    blob.reset_mock()

    make_admin(user_id, cidc_api)

    # Admin has access to both files, but together they are too large
    res = client.post(url, json=short_file_list)
    assert res.status_code == 400
    assert "batch too large" in res.json["_error"]["message"]
    bucket.get_blob.assert_not_called()
    blob.upload_from_filename.assert_not_called()

    # Decrease the size of one of the files and try again
    with cidc_api.app_context():
        df = DownloadableFiles.find_by_id(file_id_1)
        df.file_size_bytes = 1
        df.update()

    res = client.post(url, json=short_file_list)
    assert res.status_code == 200
    assert res.json == signed_url
    assert call(url_1) in bucket.get_blob.call_args_list
    assert call(url_2) in bucket.get_blob.call_args_list
    blob.upload_from_filename.assert_called_once()
Beispiel #2
0
def test_create_downloadable_file_from_blob(clean_db, monkeypatch):
    """Try to create a downloadable file from a GCS blob"""
    fake_blob = MagicMock()
    fake_blob.name = "name"
    fake_blob.md5_hash = "12345"
    fake_blob.crc32c = "54321"
    fake_blob.size = 5
    fake_blob.time_created = datetime.now()

    clean_db.add(
        TrialMetadata(
            trial_id="id",
            metadata_json={
                "protocol_identifier": "id",
                "allowed_collection_event_names": [],
                "allowed_cohort_names": [],
                "participants": [],
            },
        )
    )
    df = DownloadableFiles.create_from_blob(
        "id", "pbmc", "Shipping Manifest", "pbmc/shipping", fake_blob
    )

    # Mock artifact upload publishing
    publisher = MagicMock()
    monkeypatch.setattr("cidc_api.models.models.publish_artifact_upload", publisher)

    # Check that the file was created
    assert 1 == clean_db.query(DownloadableFiles).count()
    df_lookup = DownloadableFiles.find_by_id(df.id)
    assert df_lookup.object_url == fake_blob.name
    assert df_lookup.data_format == "Shipping Manifest"
    assert df_lookup.file_size_bytes == fake_blob.size
    assert df_lookup.md5_hash == fake_blob.md5_hash
    assert df_lookup.crc32c_hash == fake_blob.crc32c

    # uploading second time to check non duplicating entries
    fake_blob.size = 6
    fake_blob.md5_hash = "6"
    df = DownloadableFiles.create_from_blob(
        "id", "pbmc", "Shipping Manifest", "pbmc/shipping", fake_blob
    )

    # Check that the file was created
    assert 1 == clean_db.query(DownloadableFiles).count()
    df_lookup = DownloadableFiles.find_by_id(df.id)
    assert df_lookup.file_size_bytes == 6
    assert df_lookup.md5_hash == "6"

    # Check that no artifact upload event was published
    publisher.assert_not_called()

    # Check that artifact upload publishes
    DownloadableFiles.create_from_blob(
        "id",
        "pbmc",
        "Shipping Manifest",
        "pbmc/shipping",
        fake_blob,
        alert_artifact_upload=True,
    )
    publisher.assert_called_once_with(fake_blob.name)