def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def get_repo_url(repo, access_protocol, github_login): """Report the repository access URL for Git matching the protocol""" prop = {'https': repo.clone_url, 'ssh': repo.ssh_url}[access_protocol] if access_protocol == 'https' and github_login: # we were provided explicit github login. For ssh access it is # impossible to specify different login within ssh RI, but it is # possible to do so for https logins url = URL(prop) assert url.scheme in ('http', 'https') url.username = github_login prop = url.as_str() return prop
def get_repo_url(repo, access_protocol, github_login): """Report the repository access URL for Git matching the protocol""" prop = { 'https': repo.clone_url, 'ssh': repo.ssh_url }[access_protocol] if access_protocol == 'https' and github_login: # we were provided explicit github login. For ssh access it is # impossible to specify different login within ssh RI, but it is # possible to do so for https logins url = URL(prop) assert url.scheme in ('http', 'https') url.username = github_login prop = url.as_str() return prop
def get_versioned_url(url, guarantee_versioned=False, return_all=False, verify=False, s3conn=None, update=False): """Given a url return a versioned URL Originally targeting AWS S3 buckets with versioning enabled Parameters ---------- url : string guarantee_versioned : bool, optional Would fail if buckets is determined to have no versioning enabled. It will not fail if we fail to determine if bucket is versioned or not return_all: bool, optional If True, would return a list with URLs for all the versions of this file, sorted chronologically with latest first (when possible, e.g. for S3). Remove markers get ignored verify: bool, optional Verify that URL is accessible. As discovered some versioned keys might be denied access to update : bool, optional If the URL already contains a version ID, update it to the latest version ID. This option has no effect if return_all is true. Returns ------- string or list of string """ url_rec = URL(url) s3_bucket, fpath = None, url_rec.path.lstrip('/') if url_rec.hostname.endswith('.s3.amazonaws.com'): if url_rec.scheme not in ('http', 'https'): raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme) # we know how to slice this cat s3_bucket = url_rec.hostname.split('.', 1)[0] elif url_rec.hostname == 's3.amazonaws.com': if url_rec.scheme not in ('http', 'https'): raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme) # url is s3.amazonaws.com/bucket/PATH s3_bucket, fpath = fpath.split('/', 1) elif url_rec.scheme == 's3': s3_bucket = url_rec.hostname # must be # and for now implement magical conversion to URL # TODO: wouldn't work if needs special permissions etc # actually for now raise NotImplementedError was_versioned = False all_versions = [] if s3_bucket: # TODO: cache if s3conn is None: # we need to reuse our providers from ..downloaders.providers import Providers providers = Providers.from_config_files() s3url = "s3://%s/" % s3_bucket s3provider = providers.get_provider(s3url) if s3provider.authenticator.bucket is not None and s3provider.authenticator.bucket.name == s3_bucket: # we have established connection before, so let's just reuse bucket = s3provider.authenticator.bucket else: bucket = s3provider.authenticator.authenticate( s3_bucket, s3provider.credential ) # s3conn or _get_bucket_connection(S3_TEST_CREDENTIAL) else: bucket = s3conn.get_bucket(s3_bucket) supports_versioning = True # assume that it does try: supports_versioning = bucket.get_versioning_status() # TODO cache except S3ResponseError as e: # might be forbidden, i.e. "403 Forbidden" so we try then anyways supports_versioning = 'maybe' if supports_versioning: all_keys = bucket.list_versions(fpath) # Filter and sort them so the newest one on top all_keys = [ x for x in sorted(all_keys, key=lambda x: (x.last_modified, x.is_latest)) if ((x.name == fpath) # match exact name, not just prefix ) ][::-1] # our current assumptions assert (all_keys[0].is_latest) # and now filter out delete markers etc all_keys = [x for x in all_keys if isinstance(x, Key)] # ignore DeleteMarkers assert (all_keys) for key in all_keys: url_versioned = add_version_to_url(url_rec, key.version_id, replace=update and not return_all) all_versions.append(url_versioned) if verify: # it would throw HTTPError exception if not accessible _ = urlopen(Request(url_versioned)) was_versioned = True if not return_all: break if guarantee_versioned and not was_versioned: raise RuntimeError("Could not version %s" % url) if not all_versions: # we didn't get a chance all_versions = [url_rec.as_str()] if return_all: return all_versions else: return all_versions[0]
def get_versioned_url(url, guarantee_versioned=False, return_all=False, verify=False, s3conn=None, update=False): """Given a url return a versioned URL Originally targeting AWS S3 buckets with versioning enabled Parameters ---------- url : string guarantee_versioned : bool, optional Would fail if buckets is determined to have no versioning enabled. It will not fail if we fail to determine if bucket is versioned or not return_all: bool, optional If True, would return a list with URLs for all the versions of this file, sorted chronologically with latest first (when possible, e.g. for S3). Remove markers get ignored verify: bool, optional Verify that URL is accessible. As discovered some versioned keys might be denied access to update : bool, optional If the URL already contains a version ID, update it to the latest version ID. This option has no effect if return_all is true. Returns ------- string or list of string """ url_rec = URL(url) s3_bucket, fpath = None, url_rec.path.lstrip('/') if url_rec.hostname.endswith('.s3.amazonaws.com'): if url_rec.scheme not in ('http', 'https'): raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme) # we know how to slice this cat s3_bucket = url_rec.hostname.split('.', 1)[0] elif url_rec.hostname == 's3.amazonaws.com': if url_rec.scheme not in ('http', 'https'): raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme) # url is s3.amazonaws.com/bucket/PATH s3_bucket, fpath = fpath.split('/', 1) elif url_rec.scheme == 's3': s3_bucket = url_rec.hostname # must be # and for now implement magical conversion to URL # TODO: wouldn't work if needs special permissions etc # actually for now raise NotImplementedError was_versioned = False all_versions = [] if s3_bucket: # TODO: cache if s3conn is None: # we need to reuse our providers from ..downloaders.providers import Providers providers = Providers.from_config_files() s3url = "s3://%s/" % s3_bucket s3provider = providers.get_provider(s3url) if s3provider.authenticator.bucket is not None and s3provider.authenticator.bucket.name == s3_bucket: # we have established connection before, so let's just reuse bucket = s3provider.authenticator.bucket else: bucket = s3provider.authenticator.authenticate(s3_bucket, s3provider.credential) # s3conn or _get_bucket_connection(S3_TEST_CREDENTIAL) else: bucket = s3conn.get_bucket(s3_bucket) supports_versioning = True # assume that it does try: supports_versioning = bucket.get_versioning_status() # TODO cache except S3ResponseError as e: # might be forbidden, i.e. "403 Forbidden" so we try then anyways supports_versioning = 'maybe' if supports_versioning: all_keys = bucket.list_versions(fpath) # Filter and sort them so the newest one on top all_keys = [x for x in sorted(all_keys, key=lambda x: (x.last_modified, x.is_latest)) if ((x.name == fpath) # match exact name, not just prefix ) ][::-1] # our current assumptions assert(all_keys[0].is_latest) # and now filter out delete markers etc all_keys = [x for x in all_keys if isinstance(x, Key)] # ignore DeleteMarkers assert(all_keys) for key in all_keys: url_versioned = add_version_to_url( url_rec, key.version_id, replace=update and not return_all) all_versions.append(url_versioned) if verify: # it would throw HTTPError exception if not accessible _ = urlopen(Request(url_versioned)) was_versioned = True if not return_all: break if guarantee_versioned and not was_versioned: raise RuntimeError("Could not version %s" % url) if not all_versions: # we didn't get a chance all_versions = [url_rec.as_str()] if return_all: return all_versions else: return all_versions[0]