Beispiel #1
0
 def update(self, plugin: RepositoryPlugin) -> None:
     require(self.replica is None or self.replica == 'gcp')
     assert self.drs_path is not None
     drs_uri = plugin.drs_uri(self.drs_path)
     drs_client = plugin.drs_client()
     access = drs_client.get_object(drs_uri, access_method=AccessMethod.gs)
     assert access.headers is None
     url = furl(access.url)
     blob_name = '/'.join(url.path.segments)
     # https://github.com/databiosphere/azul/issues/2479#issuecomment-733410253
     if url.fragmentstr:
         blob_name += '#' + unquote(url.fragmentstr)
     else:
         # furl does not differentiate between no fragment and empty
         # fragment
         if access.url.endswith('#'):
             blob_name += '#'
     blob = self._get_blob(bucket_name=url.netloc, blob_name=blob_name)
     expiration = int(time.time() + 3600)
     file_name = self.file_name.replace('"', r'\"')
     assert all(0x1f < ord(c) < 0x80 for c in file_name)
     disposition = f"attachment; filename={file_name}"
     signed_url = blob.generate_signed_url(expiration=expiration,
                                           response_disposition=disposition)
     self._location = signed_url
Beispiel #2
0
 def update(self, plugin: RepositoryPlugin,
            authentication: Optional[Authentication]) -> None:
     require(self.replica is None or self.replica == 'gcp')
     assert self.drs_path is not None
     drs_uri = plugin.drs_uri(self.drs_path)
     drs_client = plugin.drs_client(authentication)
     access = drs_client.get_object(drs_uri, access_method=AccessMethod.gs)
     require(access.method is AccessMethod.https, access.method)
     require(access.headers is None, access.headers)
     signed_url = access.url
     args = furl(signed_url).args
     require('X-Goog-Signature' in args, args)
     self._location = signed_url
Beispiel #3
0
 def update(self,
            plugin: RepositoryPlugin,
            authentication: Optional[Authentication]
            ) -> None:
     assert isinstance(plugin, Plugin)
     url = plugin.direct_file_url(file_uuid=self.file_uuid,
                                  file_version=self.file_version,
                                  replica=None)
     self._location = url
Beispiel #4
0
 def _create_db(self) -> Tuple[JSONs, str]:
     """
     Write hardcoded portal integrations DB to S3.
     :return: Newly created DB and accompanying version.
     """
     catalog = config.default_catalog
     plugin = RepositoryPlugin.load(catalog).create(catalog)
     db = self.demultiplex(plugin.portal_db())
     version = self._write_db(db, None)
     return db, version
Beispiel #5
0
    def update(self, plugin: RepositoryPlugin,
               authentication: Optional[Authentication]) -> None:
        self.drs_path = None  # to shorten the retry URLs
        if self.replica is None:
            self.replica = 'aws'
        assert isinstance(plugin, Plugin)
        dss_url = plugin.direct_file_url(file_uuid=self.file_uuid,
                                         file_version=self.file_version,
                                         replica=self.replica,
                                         token=self.token)
        dss_response = requests.get(dss_url, allow_redirects=False)
        if dss_response.status_code == 301:
            retry_after = int(dss_response.headers.get('Retry-After'))
            location = dss_response.headers['Location']

            location = urllib.parse.urlparse(location)
            query = urllib.parse.parse_qs(location.query, strict_parsing=True)
            self.token = one(query['token'])
            self.replica = one(query['replica'])
            self.file_version = one(query['version'])
            self._retry_after = retry_after
        elif dss_response.status_code == 302:
            location = dss_response.headers['Location']
            # Remove once https://github.com/HumanCellAtlas/data-store/issues/1837 is resolved
            if True:
                location = urllib.parse.urlparse(location)
                query = urllib.parse.parse_qs(location.query,
                                              strict_parsing=True)
                expires = int(one(query['Expires']))
                bucket = location.netloc.partition('.')[0]
                dss_endpoint = one(plugin.sources).name
                assert bucket == aws.dss_checkout_bucket(dss_endpoint), bucket
                with aws.direct_access_credentials(dss_endpoint,
                                                   lambda_name='service'):
                    # FIXME: make region configurable (https://github.com/DataBiosphere/azul/issues/1560)
                    s3 = aws.client('s3', region_name='us-east-1')
                    params = {
                        'Bucket':
                        bucket,
                        'Key':
                        location.path[1:],
                        'ResponseContentDisposition':
                        'attachment;filename=' + self.file_name,
                    }
                    location = s3.generate_presigned_url(
                        ClientMethod=s3.get_object.__name__,
                        ExpiresIn=round(expires - time.time()),
                        Params=params)
            self._location = location
        else:
            dss_response.raise_for_status()
            assert False
Beispiel #6
0
def verify_sources():
    tdr_catalogs = {
        catalog.name
        for catalog in config.catalogs.values()
        if catalog.plugins[RepositoryPlugin.type_name()].name == 'tdr'
    }
    assert tdr_catalogs, tdr_catalogs
    futures = []
    with ThreadPoolExecutor(max_workers=16) as tpe:
        for source in set(chain.from_iterable(map(config.sources, tdr_catalogs))):
            source = TDRSourceSpec.parse(source)
            for check in (tdr.check_api_access, tdr.check_bigquery_access, verify_source):
                futures.append(tpe.submit(check, source))
        for completed_future in as_completed(futures):
            futures.remove(completed_future)
            e = completed_future.exception()
            if e is not None:
                for running_future in futures:
                    running_future.cancel()
                raise e
Beispiel #7
0
def main(argv):
    configure_script_logging(logger)
    import argparse
    parser = argparse.ArgumentParser(
        description='Subscribe indexer lambda to bundle events from DSS')
    parser.add_argument('--unsubscribe',
                        '-U',
                        dest='subscribe',
                        action='store_false',
                        default=True)
    parser.add_argument(
        '--personal',
        '-p',
        dest='shared',
        action='store_false',
        default=True,
        help=
        "Do not use the shared credentials of the Google service account that represents the "
        "current deployment, but instead use personal credentials for authenticating to the DSS. "
        "When specifying this option you will need to a) run `hca dss login` prior to running "
        "this script or b) set GOOGLE_APPLICATION_CREDENTIALS to point to another service "
        "account's credentials. Note that this implies that the resulting DSS subscription will "
        "be owned by a) you or b) the other service account and that only a) you or b) someone "
        "in possession of those credentials can modify the subscription in the future. This is "
        "typically not what you'd want.")
    options = parser.parse_args(argv)
    dss_client = azul.dss.client()
    for catalog in config.catalogs:
        plugin = RepositoryPlugin.load(catalog)
        if isinstance(plugin, dss.Plugin):
            if options.shared:
                with aws.service_account_credentials(
                        config.ServiceAccount.indexer):
                    subscription.manage_subscriptions(
                        plugin, dss_client, subscribe=options.subscribe)
            else:
                subscription.manage_subscriptions(plugin,
                                                  dss_client,
                                                  subscribe=options.subscribe)
Beispiel #8
0
def main(argv):
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=AzulArgumentHelpFormatter)
    default_catalog = config.default_catalog
    plugin_cls = RepositoryPlugin.load(default_catalog)
    plugin = plugin_cls.create(default_catalog)
    if len(plugin.sources) == 1:
        source_arg = {'default': str(one(plugin.sources))}
    else:
        source_arg = {'required': True}
    parser.add_argument('--source', '-s',
                        **source_arg,
                        help='The repository source containing the bundle')
    parser.add_argument('--uuid', '-b',
                        required=True,
                        help='The UUID of the bundle to can.')
    parser.add_argument('--version', '-v',
                        help='The version of the bundle to can  (default: the latest version).')
    parser.add_argument('--output-dir', '-O',
                        default=os.path.join(config.project_root, 'test', 'indexer', 'data'),
                        help='The path to the output directory (default: %(default)s).')
    args = parser.parse_args(argv)
    bundle = fetch_bundle(args.source, args.uuid, args.version)
    save_bundle(bundle, args.output_dir)
Beispiel #9
0
 def repository_plugin(self, catalog: CatalogName) -> RepositoryPlugin:
     return RepositoryPlugin.load(catalog).create(catalog)
Beispiel #10
0
def plugin_for(catalog):
    return RepositoryPlugin.load(catalog).create(catalog)
 def plugin_db(self) -> JSONs:
     # Must be lazy so the mock catalog's repository plugin is used
     catalog = config.default_catalog
     plugin = RepositoryPlugin.load(catalog).create(catalog)
     return plugin.portal_db()
Beispiel #12
0
 def default_db(self) -> JSONs:
     # FIXME: Parameterize PortalService instances with current catalog
     #        https://github.com/DataBiosphere/azul/issues/2716
     catalog = config.default_catalog
     plugin = RepositoryPlugin.load(catalog).create(catalog)
     return self.demultiplex(plugin.portal_db())
Beispiel #13
0
 def repository_plugin(self) -> RepositoryPlugin:
     catalog = self.catalog
     return RepositoryPlugin.load(catalog).create(catalog)