Exemple #1
0
    def setUpClass(cls):
        """
        Initialize SwaggerClient using a test HCAConfig.
        """
        swagger_response = requests.models.Response()
        swagger_response.status_code = 200
        with open(os.path.join(TEST_DIR, "res", cls.swagger_filename), 'rb') as fh:
            # load test swagger JSON file
            content = fh.read()
            swagger_response._content = content
            cls.test_swagger_json = json.loads(content.decode("utf-8"))

        cls.url_base = (cls.test_swagger_json['schemes'][0] + "://" +
                        cls.test_swagger_json['host'] +
                        cls.test_swagger_json['basePath'])

        with mock.patch('requests.Session.get') as mock_get, \
                mock.patch("builtins.open", mock_open()), \
                mock.patch('hca.util.fs.atomic_write'), \
                mock.patch('hca.dss.SwaggerClient.load_swagger_json') as mock_load_swagger_json:
            # init SwaggerClient with test swagger JSON file
            mock_get.return_value = swagger_response
            mock_load_swagger_json.return_value = json.loads(swagger_response._content.decode("utf-8"))

            config = HCAConfig(save_on_exit=False)
            config['SwaggerClient'] = {}
            config['SwaggerClient'].swagger_url = cls.swagger_url
            cls.client = hca.util.SwaggerClient(config)
            cls.client.build_argparse_subparsers(cls.subparsers)
    def setUpClass(cls):
        super().setUpClass()
        cls.dss_endpoint = os.getenv("TEST_DSS_ENDPOINT", "https://hca-dss-4.ucsc-cgp-dev.org/v1")
        cls.staging_bucket = os.getenv('DSS_S3_STAGING_BUCKET', 'commons-dss-upload')

        # Work around problems with DSSClient initialization when there is
        # existing HCA configuration. The following issue has been submitted:
        # Problems accessing an alternate DSS from user scripts or unit tests #170
        # https://github.com/HumanCellAtlas/dcp-cli/issues/170
        monkey_patch_hca_config()
        HCAConfig._user_config_home = '/tmp/'
        dss_config = HCAConfig(name='loader-test', save_on_exit=False, autosave=False)
        dss_config['DSSClient'].swagger_url = f'{cls.dss_endpoint}/swagger.json'
        cls.dss_client = DSSClient(config=dss_config)
Exemple #3
0
def dss_client(deployment: Optional[str] = None) -> DSSClient:
    """
    Return a DSS client to DSS production or the specified DSS deployment.

    :param deployment: The name of a DSS deployment like `dev`, `integration` or `staging`. If None, the production
                       deployment (`prod`) will be used.
    """
    # Work around https://github.com/HumanCellAtlas/dcp-cli/issues/142
    hca_config = HCAConfig()
    deployment = deployment + "." if deployment else ""
    hca_config[
        'DSSClient'].swagger_url = f'https://dss.{deployment}data.humancellatlas.org/v1/swagger.json'
    # Clear the cached swagger specs that may come from a different deployment. This work-around isn't thread safe but
    # neither is the caching iteself.
    DSSClient._swagger_spec = None
    client = DSSClient(config=hca_config)
    client.timeout_policy = Timeout(connect=10, read=40)
    return client
Exemple #4
0
def main():
    logging.basicConfig(level=logging.INFO)
    hca_config = HCAConfig()
    hca_config[
        "DSSClient"].swagger_url = f"https://dss.dev.data.humancellatlas.org/v1/swagger.json"
    dss = DSSClient(config=hca_config)

    projects = get_target_project_dirs(follow_links=True)

    for project in projects:
        log.info('Uploading %s', project)
        bundle_uuid = project.name
        assert str(UUID(bundle_uuid)) == bundle_uuid
        bundle = project / 'bundle'

        def file_uuid_callback(file_path: str):
            file_path = Path(file_path)
            file_name = file_path.name
            file_uuid = generate_file_uuid(bundle_uuid, file_name)
            log.info('Allocated UUID %s for file %s', file_uuid, file_path)
            if file_name.endswith('.json'):
                with file_path.open('rt') as f:
                    document = json.load(f)
                    if file_name == 'links.json':
                        pass
                    elif file_name == 'project_0.json':
                        assert document['provenance'][
                            'document_id'] == bundle_uuid
                    else:
                        assert document['provenance'][
                            'document_id'] == file_uuid
            return file_uuid

        if bundle.is_dir():
            response = dss.upload(src_dir=str(bundle),
                                  replica='aws',
                                  staging_bucket='lon-test-data',
                                  bundle_uuid=bundle_uuid,
                                  file_uuid_callback=file_uuid_callback)
            print(
                f'Successful upload.  Bundle information is:\n{json.dumps(response, indent=4)}'
            )
        else:
            log.warning('Skipping %s because metadata is missing', project)
    def __init__(self, dss_endpoint: str, staging_bucket: str,
                 google_project_id: str, dry_run: bool) -> None:
        """
        Functions for uploading files to a given DSS.

        :param dss_endpoint: The URL to a Swagger DSS API.  e.g. "https://commons-dss.ucsc-cgp-dev.org/v1"
        :param staging_bucket: The name of the AWS S3 bucket to be used when staging files for uploading
        to the DSS. As an example, local files are uploaded to the staging bucket, then file metadata tags
        required by the DSS are assigned to it, then the file is loaded into the DSS (by copy).
        The bucket must be accessible by the DSS. .e.g. 'commons-dss-upload'
        :param google_project_id: A Google `Project ID` to be used when accessing GCP requester pays buckets.
        e.g. "platform-dev-178517"
        One way to find a `Project ID` is provided here:
        https://console.cloud.google.com/cloud-resource-manager
        :param dry_run: If True, log the actions that would be performed yet don't actually execute them.
        Otherwise, actually perform the operations.
        """
        self.dss_endpoint = dss_endpoint
        self.staging_bucket = staging_bucket
        self.google_project_id = google_project_id
        self.dry_run = dry_run
        self.s3_client = boto3.client("s3")
        self.s3_blobstore = s3.S3BlobStore(self.s3_client)
        self.gs_client = Client()

        # Work around problems with DSSClient initialization when there is
        # existing HCA configuration. The following issue has been submitted:
        # Problems accessing an alternate DSS from user scripts or unit tests #170
        # https://github.com/HumanCellAtlas/dcp-cli/issues/170
        monkey_patch_hca_config()
        HCAConfig._user_config_home = '/tmp/'
        dss_config = HCAConfig(name='loader',
                               save_on_exit=False,
                               autosave=False)
        dss_config[
            'DSSClient'].swagger_url = f'{self.dss_endpoint}/swagger.json'
        self.dss_client = DSSClient(config=dss_config)
    def setUpClass(cls):
        """
        Initialize SwaggerClient with a test HCAConfig.
        """
        cls.swagger_url = "test_swagger_url"
        cls.open_fn_name = "builtins.open"
        cls.test_response = requests.models.Response()
        cls.test_response.status_code = 200
        with open(os.path.join(TEST_DIR, "res", "test_swagger.json"),
                  'rb') as fh:
            cls.test_response._content = fh.read()

        with mock.patch('requests.Session.get') as mock_get, \
                mock.patch(cls.open_fn_name, mock_open()), \
                mock.patch('hca.util.fs.atomic_write'), \
                mock.patch('hca.dss.SwaggerClient.load_swagger_json') as mock_load_swagger_json:
            mock_get.return_value = cls.test_response
            mock_load_swagger_json.return_value = json.loads(
                cls.test_response._content.decode("utf-8"))

            config = HCAConfig(save_on_exit=False)
            config['SwaggerClient'] = {}
            config['SwaggerClient'].swagger_url = cls.swagger_url
            cls.client = hca.util.SwaggerClient(config)
Exemple #7
0
from hca import HCAConfig
from hca.dss import DSSClient

hca_config = HCAConfig()
hca_config[
    'DSSClient'].swagger_url = f'https://dss.dev.data.humancellatlas.org/v1/swagger.json'
dss = DSSClient(config=hca_config)
for i in dss.post_search.iterate(replica='aws', es_query={}):
    uuid, version = i['bundle_fqid'].split('.', 1)
    try:
        s = f'Bundle: {uuid}.{version}\n'
        for j in dss.get_bundle(replica='aws', uuid=uuid,
                                version=version)['bundle']['files']:
            file_version = j['version']
            file_uuid = j['uuid']
            s += f'    File: {file_uuid}.{file_version}\n'
        print(s[:-1])
        break
    except:
        pass  # print(f'Does not exist: {uuid}.{version}')