def setUpClass(cls): """ Initialize SwaggerClient using a test HCAConfig. """ swagger_response = requests.models.Response() swagger_response.status_code = 200 with open(os.path.join(TEST_DIR, "res", cls.swagger_filename), 'rb') as fh: # load test swagger JSON file content = fh.read() swagger_response._content = content cls.test_swagger_json = json.loads(content.decode("utf-8")) cls.url_base = (cls.test_swagger_json['schemes'][0] + "://" + cls.test_swagger_json['host'] + cls.test_swagger_json['basePath']) with mock.patch('requests.Session.get') as mock_get, \ mock.patch("builtins.open", mock_open()), \ mock.patch('hca.util.fs.atomic_write'), \ mock.patch('hca.dss.SwaggerClient.load_swagger_json') as mock_load_swagger_json: # init SwaggerClient with test swagger JSON file mock_get.return_value = swagger_response mock_load_swagger_json.return_value = json.loads(swagger_response._content.decode("utf-8")) config = HCAConfig(save_on_exit=False) config['SwaggerClient'] = {} config['SwaggerClient'].swagger_url = cls.swagger_url cls.client = hca.util.SwaggerClient(config) cls.client.build_argparse_subparsers(cls.subparsers)
def setUpClass(cls): super().setUpClass() cls.dss_endpoint = os.getenv("TEST_DSS_ENDPOINT", "https://hca-dss-4.ucsc-cgp-dev.org/v1") cls.staging_bucket = os.getenv('DSS_S3_STAGING_BUCKET', 'commons-dss-upload') # Work around problems with DSSClient initialization when there is # existing HCA configuration. The following issue has been submitted: # Problems accessing an alternate DSS from user scripts or unit tests #170 # https://github.com/HumanCellAtlas/dcp-cli/issues/170 monkey_patch_hca_config() HCAConfig._user_config_home = '/tmp/' dss_config = HCAConfig(name='loader-test', save_on_exit=False, autosave=False) dss_config['DSSClient'].swagger_url = f'{cls.dss_endpoint}/swagger.json' cls.dss_client = DSSClient(config=dss_config)
def dss_client(deployment: Optional[str] = None) -> DSSClient: """ Return a DSS client to DSS production or the specified DSS deployment. :param deployment: The name of a DSS deployment like `dev`, `integration` or `staging`. If None, the production deployment (`prod`) will be used. """ # Work around https://github.com/HumanCellAtlas/dcp-cli/issues/142 hca_config = HCAConfig() deployment = deployment + "." if deployment else "" hca_config[ 'DSSClient'].swagger_url = f'https://dss.{deployment}data.humancellatlas.org/v1/swagger.json' # Clear the cached swagger specs that may come from a different deployment. This work-around isn't thread safe but # neither is the caching iteself. DSSClient._swagger_spec = None client = DSSClient(config=hca_config) client.timeout_policy = Timeout(connect=10, read=40) return client
def main(): logging.basicConfig(level=logging.INFO) hca_config = HCAConfig() hca_config[ "DSSClient"].swagger_url = f"https://dss.dev.data.humancellatlas.org/v1/swagger.json" dss = DSSClient(config=hca_config) projects = get_target_project_dirs(follow_links=True) for project in projects: log.info('Uploading %s', project) bundle_uuid = project.name assert str(UUID(bundle_uuid)) == bundle_uuid bundle = project / 'bundle' def file_uuid_callback(file_path: str): file_path = Path(file_path) file_name = file_path.name file_uuid = generate_file_uuid(bundle_uuid, file_name) log.info('Allocated UUID %s for file %s', file_uuid, file_path) if file_name.endswith('.json'): with file_path.open('rt') as f: document = json.load(f) if file_name == 'links.json': pass elif file_name == 'project_0.json': assert document['provenance'][ 'document_id'] == bundle_uuid else: assert document['provenance'][ 'document_id'] == file_uuid return file_uuid if bundle.is_dir(): response = dss.upload(src_dir=str(bundle), replica='aws', staging_bucket='lon-test-data', bundle_uuid=bundle_uuid, file_uuid_callback=file_uuid_callback) print( f'Successful upload. Bundle information is:\n{json.dumps(response, indent=4)}' ) else: log.warning('Skipping %s because metadata is missing', project)
def __init__(self, dss_endpoint: str, staging_bucket: str, google_project_id: str, dry_run: bool) -> None: """ Functions for uploading files to a given DSS. :param dss_endpoint: The URL to a Swagger DSS API. e.g. "https://commons-dss.ucsc-cgp-dev.org/v1" :param staging_bucket: The name of the AWS S3 bucket to be used when staging files for uploading to the DSS. As an example, local files are uploaded to the staging bucket, then file metadata tags required by the DSS are assigned to it, then the file is loaded into the DSS (by copy). The bucket must be accessible by the DSS. .e.g. 'commons-dss-upload' :param google_project_id: A Google `Project ID` to be used when accessing GCP requester pays buckets. e.g. "platform-dev-178517" One way to find a `Project ID` is provided here: https://console.cloud.google.com/cloud-resource-manager :param dry_run: If True, log the actions that would be performed yet don't actually execute them. Otherwise, actually perform the operations. """ self.dss_endpoint = dss_endpoint self.staging_bucket = staging_bucket self.google_project_id = google_project_id self.dry_run = dry_run self.s3_client = boto3.client("s3") self.s3_blobstore = s3.S3BlobStore(self.s3_client) self.gs_client = Client() # Work around problems with DSSClient initialization when there is # existing HCA configuration. The following issue has been submitted: # Problems accessing an alternate DSS from user scripts or unit tests #170 # https://github.com/HumanCellAtlas/dcp-cli/issues/170 monkey_patch_hca_config() HCAConfig._user_config_home = '/tmp/' dss_config = HCAConfig(name='loader', save_on_exit=False, autosave=False) dss_config[ 'DSSClient'].swagger_url = f'{self.dss_endpoint}/swagger.json' self.dss_client = DSSClient(config=dss_config)
def setUpClass(cls): """ Initialize SwaggerClient with a test HCAConfig. """ cls.swagger_url = "test_swagger_url" cls.open_fn_name = "builtins.open" cls.test_response = requests.models.Response() cls.test_response.status_code = 200 with open(os.path.join(TEST_DIR, "res", "test_swagger.json"), 'rb') as fh: cls.test_response._content = fh.read() with mock.patch('requests.Session.get') as mock_get, \ mock.patch(cls.open_fn_name, mock_open()), \ mock.patch('hca.util.fs.atomic_write'), \ mock.patch('hca.dss.SwaggerClient.load_swagger_json') as mock_load_swagger_json: mock_get.return_value = cls.test_response mock_load_swagger_json.return_value = json.loads( cls.test_response._content.decode("utf-8")) config = HCAConfig(save_on_exit=False) config['SwaggerClient'] = {} config['SwaggerClient'].swagger_url = cls.swagger_url cls.client = hca.util.SwaggerClient(config)
from hca import HCAConfig from hca.dss import DSSClient hca_config = HCAConfig() hca_config[ 'DSSClient'].swagger_url = f'https://dss.dev.data.humancellatlas.org/v1/swagger.json' dss = DSSClient(config=hca_config) for i in dss.post_search.iterate(replica='aws', es_query={}): uuid, version = i['bundle_fqid'].split('.', 1) try: s = f'Bundle: {uuid}.{version}\n' for j in dss.get_bundle(replica='aws', uuid=uuid, version=version)['bundle']['files']: file_version = j['version'] file_uuid = j['uuid'] s += f' File: {file_uuid}.{file_version}\n' print(s[:-1]) break except: pass # print(f'Does not exist: {uuid}.{version}')