def get_repo_url(repo, access_protocol, github_login): """Report the repository access URL for Git matching the protocol""" prop = {'https': repo.clone_url, 'ssh': repo.ssh_url}[access_protocol] if access_protocol == 'https' and github_login: # we were provided explicit github login. For ssh access it is # impossible to specify different login within ssh RI, but it is # possible to do so for https logins url = URL(prop) assert url.scheme in ('http', 'https') url.username = github_login prop = url.as_str() return prop
def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def test_url_fragments_and_query(): url = URL(hostname="host", query=OrderedDict((('a', 'x/b'), ('b', 'y')))) eq_(str(url), '//host?a=x%2Fb&b=y') eq_(url.query, 'a=x%2Fb&b=y') eq_(url.query_dict, {'a': 'x/b', 'b': 'y'}) url = URL(hostname="host", fragment=OrderedDict((('b', 'x/b'), ('a', 'y')))) eq_(str(url), '//host#b=x/b&a=y') eq_(url.fragment, 'b=x/b&a=y') eq_(url.fragment_dict, {'a': 'y', 'b': 'x/b'}) fname = get_most_obscure_supported_name() url = URL(hostname="host", fragment={'a': fname}) eq_(url.fragment_dict, {'a': fname})
def get_repo_url(repo, access_protocol, github_login): """Report the repository access URL for Git matching the protocol""" prop = { 'https': repo.clone_url, 'ssh': repo.ssh_url }[access_protocol] if access_protocol == 'https' and github_login: # we were provided explicit github login. For ssh access it is # impossible to specify different login within ssh RI, but it is # possible to do so for https logins url = URL(prop) assert url.scheme in ('http', 'https') url.username = github_login prop = url.as_str() return prop
def test_url_compose_archive_one(): url = URL(scheme='dl+archive', path='KEY', fragment=OrderedDict((('path', 'f/p/ s+'), ('size', 30)))) # funny - space is encoded as + but + is %2B eq_(str(url), 'dl+archive:KEY#path=f/p/+s%2B&size=30') eq_(url.fragment_dict, {'path': 'f/p/ s+', 'size': '30'})
def add_version_to_url(url, version, replace=False): """Add a version ID to `url`. Parameters ---------- url : datalad.support.network.URL A URL. version : str The value of 'versionId='. replace : boolean, optional If a versionID is already present in `url`, replace it. Returns ------- A versioned URL (str) """ version_id = "versionId={}".format(version) if not url.query: query = version_id else: ver_match = re.match( "(?P<pre>.*&)?" "(?P<vers>versionId=[^&]+)" "(?P<post>&.*)?", url.query) if ver_match: if replace: query = "".join([ ver_match.group("pre") or "", version_id, ver_match.group("post") or "" ]) else: query = url.query else: query = url.query + "&" + version_id return URL(**dict(url.fields, query=query)).as_str()
def get_file_url(self, archive_file=None, archive_key=None, file=None, size=None): """Given archive (file or a key) and a file -- compose URL for access Examples -------- dl+archive:SHA256E-s176--69...3e.tar.gz#path=1/d2/2d&size=123 when size of file within archive was known to be 123 dl+archive:SHA256E-s176--69...3e.tar.gz#path=1/d2/2d when size of file within archive was not provided Parameters ---------- size: int, optional Size of the file. If not provided, will simply be empty """ assert (file is not None) if archive_file is not None: if archive_key is not None: raise ValueError( "Provide archive_file or archive_key - not both") archive_key = self.repo.get_file_annexinfo(archive_file)['key'] assert (archive_key is not None) attrs = OrderedDict() # looking forward for more if file: attrs['path'] = file.lstrip('/') if size is not None: attrs['size'] = size return str( URL(scheme=self.URL_SCHEME, path=archive_key, fragment=attrs))
def fix_url(data, keys=['url']): """Given data, get value within 'url' key and fix up so it is legit url - replace spaces with %20 """ data = data.copy() for key in keys: if key in data: # catches key error if dictionary does not contain key data[key] = URL(data[key]).as_str() yield data
def test_url_eq(): eq_(URL(), URL()) # doesn't make sense to ask what kind of a url it is an empty URL #eq_(RI(), RI()) neq_(URL(), URL(hostname='x')) # Different types aren't equal even if have the same fields values neq_(URL(path='x'), PathRI(path='x')) neq_(URL(hostname='x'), SSHRI(hostname='x')) neq_(str(URL(hostname='x')), str(SSHRI(hostname='x')))
def test_add_version_to_url(): base_url = "http://ex.com/f.txt" base_url_query = "http://ex.com/f.txt?k=v" for replace in True, False: eq_(add_version_to_url(URL(base_url), "new.id", replace=replace), base_url + "?versionId=new.id") eq_(add_version_to_url(URL(base_url_query), "new.id", replace=replace), base_url_query + "&versionId=new.id") expected = "new.id" if replace else "orig.id" eq_(add_version_to_url(URL(base_url + "?versionId=orig.id"), "new.id", replace=replace), base_url + "?versionId=" + expected) eq_(add_version_to_url(URL(base_url_query + "&versionId=orig.id"), "new.id", replace=replace), base_url_query + "&versionId=" + expected)
def _parse_url(self, url): """Parse url and return archive key, file within archive and additional attributes (such as size)""" url = URL(url) assert (url.scheme == self.URL_SCHEME) fdict = url.fragment_dict if 'path' not in fdict: # must be old-style key/path#size= assert '/' in url.path, "must be of key/path format" key, path = url.path.split('/', 1) else: key, path = url.path, fdict.pop('path') if 'size' in fdict: fdict['size'] = int(fdict['size']) return key, path, fdict
def verify_ria_url(url, cfg): """Verify and decode ria url Expects a ria-URL pointing to a RIA store, applies rewrites and tries to decode potential host and base path for the store from it. Additionally raises if `url` is considered invalid. ria+ssh://somehost:/path/to/store ria+file:///path/to/store Parameters ---------- url : str URL to verify an decode. cfg : dict-like Configuration settings for rewrite_url() Raises ------ ValueError Returns ------- tuple (host, base-path, rewritten url) """ from datalad.config import rewrite_url from datalad.support.network import URL if not url: raise ValueError("Got no URL") url = rewrite_url(cfg, url) url_ri = URL(url) if not url_ri.scheme.startswith('ria+'): raise ValueError("Missing ria+ prefix in final URL: %s" % url) if url_ri.fragment: raise ValueError( "Unexpected fragment in RIA-store URL: %s" % url_ri.fragment) protocol = url_ri.scheme[4:] if protocol not in ['ssh', 'file', 'http', 'https']: raise ValueError("Unsupported protocol: %s. " "Supported: ssh, file, http(s)" % protocol) return url_ri.hostname if protocol != 'file' else None, \ url_ri.path if url_ri.path else '/', \ url
def verify_ria_url(url, cfg): """Verify and decode ria url Expects a ria-URL pointing to a RIA store, applies rewrites and tries to decode potential host and base path for the store from it. Additionally raises if `url` is considered invalid. ria+ssh://somehost:/path/to/store ria+file:///path/to/store Parameters ---------- url : str URL to verify an decode. cfg : dict-like Configuration settings for rewrite_url() Raises ------ ValueError Returns ------- tuple (host, base-path, rewritten url) `host` is not just a hostname, but is a stub URL that may also contain username, password, and port, if specified in a given URL. """ from datalad.config import rewrite_url from datalad.support.network import URL if not url: raise ValueError("Got no URL") url = rewrite_url(cfg, url) url_ri = URL(url) if not url_ri.scheme.startswith('ria+'): raise ValueError("Missing ria+ prefix in final URL: %s" % url) if url_ri.fragment: raise ValueError("Unexpected fragment in RIA-store URL: %s" % url_ri.fragment) protocol = url_ri.scheme[4:] if protocol not in ['ssh', 'file', 'http', 'https']: raise ValueError("Unsupported protocol: %s. " "Supported: ssh, file, http(s)" % protocol) host = '{proto}://{user}{pdlm}{passwd}{udlm}{host}{portdlm}{port}'.format( proto=protocol, user=url_ri.username or '', pdlm=':' if url_ri.password else '', passwd=url_ri.password or '', udlm='@' if url_ri.username else '', host=url_ri.hostname or '', portdlm=':' if url_ri.port else '', port=url_ri.port or '', ) # this != file is critical behavior, if removed, it will ruin the IO selection # in RIARemote!! return host if protocol != 'file' else None, \ url_ri.path if url_ri.path else '/', \ url
def postclonecfg_ria(ds, props): """Configure a dataset freshly cloned from a RIA store""" repo = ds.repo # RIA uses hashdir mixed, copying data to it via git-annex (if cloned via # ssh) would make it see a bare repo and establish a hashdir lower annex # object tree. # Moreover, we want the ORA remote to receive all data for the store, so its # objects could be moved into archives (the main point of a RIA store). RIA_REMOTE_NAME = 'origin' # don't hardcode everywhere ds.config.set( 'remote.{}.annex-ignore'.format(RIA_REMOTE_NAME), 'true', where='local') # chances are that if this dataset came from a RIA store, its subdatasets # may live there too. Place a subdataset source candidate config that makes # get probe this RIA store when obtaining subdatasets ds.config.set( # we use the label 'origin' for this candidate in order to not have to # generate a complicated name from the actual source specification. # we pick a cost of 200 to sort it before datalad's default candidates # for non-RIA URLs, because they prioritize hierarchical layouts that # cannot be found in a RIA store 'datalad.get.subdataset-source-candidate-200origin', # use the entire original URL, up to the fragment + plus dataset ID # placeholder, this should make things work with any store setup we # support (paths, ports, ...) props['source'].split('#', maxsplit=1)[0] + '#{id}', where='local') # setup publication dependency, if a corresponding special remote exists # and was enabled (there could be RIA stores that actually only have repos) # make this function be a generator ora_remotes = [s for s in ds.siblings('query', result_renderer='disabled') if s.get('annex-externaltype') == 'ora'] if not ora_remotes and any( r.get('externaltype') == 'ora' for r in (repo.get_special_remotes().values() if hasattr(repo, 'get_special_remotes') else [])): # no ORA remote autoenabled, but configuration known about at least one. # Let's check origin's config for datalad.ora-remote.uuid as stored by # create-sibling-ria and enable try enabling that one. lgr.debug("Found no autoenabled ORA special remote. Trying to look it " "up in source config ...") # First figure whether we cloned via SSH, HTTP or local path and then # get that config file the same way: config_content = None scheme = props['giturl'].split(':', 1)[0] if scheme in ['http', 'https']: try: config_content = download_url( "{}{}config".format( props['giturl'], '/' if not props['giturl'].endswith('/') else '')) except DownloadError as e: lgr.debug("Failed to get config file from source:\n%s", exc_str(e)) elif scheme == 'ssh': # TODO: switch the following to proper command abstraction: # SSHRemoteIO ignores the path part ATM. No remote CWD! (To be # changed with command abstractions). So we need to get that part to # have a valid path to origin's config file: cfg_path = PurePosixPath(URL(props['giturl']).path) / 'config' op = SSHRemoteIO(props['giturl']) try: config_content = op.read_file(cfg_path) except RIARemoteError as e: lgr.debug("Failed to get config file from source: %s", exc_str(e)) elif scheme == 'file': # TODO: switch the following to proper command abstraction: op = LocalIO() cfg_path = Path(URL(props['giturl']).localpath) / 'config' try: config_content = op.read_file(cfg_path) except (RIARemoteError, OSError) as e: lgr.debug("Failed to get config file from source: %s", exc_str(e)) else: lgr.debug("Unknown URL-Scheme %s in %s. Can handle SSH, HTTP or " "FILE scheme URLs.", scheme, props['source']) # 3. And read it org_uuid = None if config_content: # TODO: We might be able to spare the saving to a file. # "git config -f -" is not explicitly documented but happens # to work and would read from stdin. Make sure we know this # works for required git versions and on all platforms. with make_tempfile(content=config_content) as cfg_file: runner = GitWitlessRunner() try: result = runner.run( ['git', 'config', '-f', cfg_file, 'datalad.ora-remote.uuid'], protocol=StdOutCapture ) org_uuid = result['stdout'].strip() except CommandError as e: # doesn't contain what we are looking for lgr.debug("Found no UUID for ORA special remote at " "'%s' (%s)", RIA_REMOTE_NAME, exc_str(e)) # Now, enable it. If annex-init didn't fail to enable it as stored, we # wouldn't end up here, so enable with store URL as suggested by the URL # we cloned from. if org_uuid: srs = repo.get_special_remotes() if org_uuid in srs.keys(): # TODO: - Double-check autoenable value and only do this when # true? # - What if still fails? -> Annex shouldn't change config # in that case # we only need the store: new_url = props['source'].split('#')[0] try: repo.enable_remote(srs[org_uuid]['name'], options=['url={}'.format(new_url)] ) lgr.info("Reconfigured %s for %s", srs[org_uuid]['name'], new_url) # update ora_remotes for considering publication dependency # below ora_remotes = [s for s in ds.siblings('query', result_renderer='disabled') if s.get('annex-externaltype', None) == 'ora'] except CommandError as e: lgr.debug("Failed to reconfigure ORA special remote: %s", exc_str(e)) else: lgr.debug("Unknown ORA special remote uuid at '%s': %s", RIA_REMOTE_NAME, org_uuid) if ora_remotes: if len(ora_remotes) == 1: yield from ds.siblings('configure', name=RIA_REMOTE_NAME, publish_depends=ora_remotes[0]['name'], result_filter=None, result_renderer='disabled') else: lgr.warning("Found multiple ORA remotes. Couldn't decide which " "publishing to 'origin' should depend on: %s. Consider " "running 'datalad siblings configure -s origin " "--publish-depends ORAREMOTENAME' to set publication " "dependency manually.", [r['name'] for r in ora_remotes])
def get_versioned_url(url, guarantee_versioned=False, return_all=False, verify=False, s3conn=None, update=False): """Given a url return a versioned URL Originally targeting AWS S3 buckets with versioning enabled Parameters ---------- url : string guarantee_versioned : bool, optional Would fail if buckets is determined to have no versioning enabled. It will not fail if we fail to determine if bucket is versioned or not return_all: bool, optional If True, would return a list with URLs for all the versions of this file, sorted chronologically with latest first (when possible, e.g. for S3). Remove markers get ignored verify: bool, optional Verify that URL is accessible. As discovered some versioned keys might be denied access to update : bool, optional If the URL already contains a version ID, update it to the latest version ID. This option has no effect if return_all is true. Returns ------- string or list of string """ url_rec = URL(url) s3_bucket, fpath = None, url_rec.path.lstrip('/') if url_rec.hostname.endswith('.s3.amazonaws.com'): if url_rec.scheme not in ('http', 'https'): raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme) # we know how to slice this cat s3_bucket = url_rec.hostname.split('.', 1)[0] elif url_rec.hostname == 's3.amazonaws.com': if url_rec.scheme not in ('http', 'https'): raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme) # url is s3.amazonaws.com/bucket/PATH s3_bucket, fpath = fpath.split('/', 1) elif url_rec.scheme == 's3': s3_bucket = url_rec.hostname # must be # and for now implement magical conversion to URL # TODO: wouldn't work if needs special permissions etc # actually for now raise NotImplementedError was_versioned = False all_versions = [] if s3_bucket: # TODO: cache if s3conn is None: # we need to reuse our providers from ..downloaders.providers import Providers providers = Providers.from_config_files() s3url = "s3://%s/" % s3_bucket s3provider = providers.get_provider(s3url) if s3provider.authenticator.bucket is not None and s3provider.authenticator.bucket.name == s3_bucket: # we have established connection before, so let's just reuse bucket = s3provider.authenticator.bucket else: bucket = s3provider.authenticator.authenticate( s3_bucket, s3provider.credential ) # s3conn or _get_bucket_connection(S3_TEST_CREDENTIAL) else: bucket = s3conn.get_bucket(s3_bucket) supports_versioning = True # assume that it does try: supports_versioning = bucket.get_versioning_status() # TODO cache except S3ResponseError as e: # might be forbidden, i.e. "403 Forbidden" so we try then anyways supports_versioning = 'maybe' if supports_versioning: all_keys = bucket.list_versions(fpath) # Filter and sort them so the newest one on top all_keys = [ x for x in sorted(all_keys, key=lambda x: (x.last_modified, x.is_latest)) if ((x.name == fpath) # match exact name, not just prefix ) ][::-1] # our current assumptions assert (all_keys[0].is_latest) # and now filter out delete markers etc all_keys = [x for x in all_keys if isinstance(x, Key)] # ignore DeleteMarkers assert (all_keys) for key in all_keys: url_versioned = add_version_to_url(url_rec, key.version_id, replace=update and not return_all) all_versions.append(url_versioned) if verify: # it would throw HTTPError exception if not accessible _ = urlopen(Request(url_versioned)) was_versioned = True if not return_all: break if guarantee_versioned and not was_versioned: raise RuntimeError("Could not version %s" % url) if not all_versions: # we didn't get a chance all_versions = [url_rec.as_str()] if return_all: return all_versions else: return all_versions[0]
def __call__(urls, dataset=None, path=None, overwrite=False, archive=False, save=True, message=None): from ..downloaders.providers import Providers ds = None if save or dataset: try: ds = require_dataset(dataset, check_installed=True, purpose='downloading urls') except NoDatasetFound: pass common_report = {"action": "download_url", "ds": ds} got_ds_instance = isinstance(dataset, Dataset) dir_is_target = not path or path.endswith(op.sep) path = str(resolve_path(path or op.curdir, ds=dataset)) if dir_is_target: # resolve_path() doesn't preserve trailing separators. Add one for # the download() call. path = path + op.sep urls = ensure_list_from_str(urls) if not dir_is_target: if len(urls) > 1: yield get_status_dict( status="error", message= ("When specifying multiple urls, --path should point to " "a directory target (with a trailing separator). Got %r", path), type="file", path=path, **common_report) return if archive: # make sure the file suffix indicated by a URL is preserved # so that any further archive processing doesn't have to # employ mime type inspection in order to determine the archive # type from datalad.support.network import URL suffixes = PurePosixPath(URL(urls[0]).path).suffixes if not Path(path).suffixes == suffixes: path += ''.join(suffixes) # we know that we have a single URL # download() would be fine getting an existing directory and # downloading the URL underneath it, but let's enforce a trailing # slash here for consistency. if op.isdir(path): yield get_status_dict( status="error", message=( "Non-directory path given (no trailing separator) " "but a directory with that name (after adding archive " "suffix) exists"), type="file", path=path, **common_report) return # TODO setup fancy ui.progressbars doing this in parallel and reporting overall progress # in % of urls which were already downloaded providers = Providers.from_config_files() downloaded_paths = [] path_urls = {} for url in urls: # somewhat "ugly" # providers.get_provider(url).get_downloader(url).download(url, path=path) # for now -- via sugaring try: downloaded_path = providers.download(url, path=path, overwrite=overwrite) except Exception as e: yield get_status_dict(status="error", message=exc_str(e), type="file", path=path, **common_report) else: downloaded_paths.append(downloaded_path) path_urls[downloaded_path] = url yield get_status_dict(status="ok", type="file", path=downloaded_path, **common_report) if downloaded_paths and save and ds is not None: msg = message or """\ [DATALAD] Download URLs URLs: {}""".format("\n ".join(urls)) for r in Save()( downloaded_paths, message=msg, # ATTN: Pass the original dataset argument to # preserve relative path handling semantics. dataset=dataset, return_type="generator", result_xfm=None, result_filter=None, on_failure="ignore"): yield r if isinstance(ds.repo, AnnexRepo): if got_ds_instance: # Paths in `downloaded_paths` are already relative to the # dataset. rpaths = dict(zip(downloaded_paths, downloaded_paths)) else: # Paths in `downloaded_paths` are already relative to the # current working directory. Take these relative to the # dataset for use with the AnnexRepo method calls. rpaths = {} for orig_path, resolved in zip( downloaded_paths, resolve_path(downloaded_paths, ds=dataset)): rpath = path_under_rev_dataset(ds, resolved) if rpath: rpaths[str(rpath)] = orig_path else: lgr.warning("Path %s not under dataset %s", orig_path, ds) annex_paths = [ p for p, annexed in zip( rpaths, ds.repo.is_under_annex(list(rpaths.keys()))) if annexed ] if annex_paths: for path in annex_paths: url = path_urls[rpaths[path]] try: # The file is already present. This is just to # register the URL. ds.repo.add_url_to_file( path, url, # avoid batch mode for single files # https://github.com/datalad/datalad/issues/2849 batch=len(annex_paths) > 1, # bypass URL size check, we already have the file options=['--relaxed']) except CommandError as exc: lgr.warning("Registering %s with %s failed: %s", path, url, exc_str(exc)) if archive: from datalad.api import add_archive_content for path in annex_paths: add_archive_content(path, annex=ds.repo, delete=True)
def test_get_url_path_on_fileurls(): eq_(URL('file:///a').path, '/a') eq_(URL('file:///a/b').path, '/a/b') eq_(URL('file:///a/b').localpath, '/a/b') eq_(URL('file:///a/b#id').path, '/a/b') eq_(URL('file:///a/b?whatever').path, '/a/b')
def test_url_dicts(): eq_(URL("http://host").query_dict, {})
def get_versioned_url(url, guarantee_versioned=False, return_all=False, verify=False, s3conn=None, update=False): """Given a url return a versioned URL Originally targeting AWS S3 buckets with versioning enabled Parameters ---------- url : string guarantee_versioned : bool, optional Would fail if buckets is determined to have no versioning enabled. It will not fail if we fail to determine if bucket is versioned or not return_all: bool, optional If True, would return a list with URLs for all the versions of this file, sorted chronologically with latest first (when possible, e.g. for S3). Remove markers get ignored verify: bool, optional Verify that URL is accessible. As discovered some versioned keys might be denied access to update : bool, optional If the URL already contains a version ID, update it to the latest version ID. This option has no effect if return_all is true. Returns ------- string or list of string """ url_rec = URL(url) s3_bucket, fpath = None, url_rec.path.lstrip('/') if url_rec.hostname.endswith('.s3.amazonaws.com'): if url_rec.scheme not in ('http', 'https'): raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme) # we know how to slice this cat s3_bucket = url_rec.hostname.split('.', 1)[0] elif url_rec.hostname == 's3.amazonaws.com': if url_rec.scheme not in ('http', 'https'): raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme) # url is s3.amazonaws.com/bucket/PATH s3_bucket, fpath = fpath.split('/', 1) elif url_rec.scheme == 's3': s3_bucket = url_rec.hostname # must be # and for now implement magical conversion to URL # TODO: wouldn't work if needs special permissions etc # actually for now raise NotImplementedError was_versioned = False all_versions = [] if s3_bucket: # TODO: cache if s3conn is None: # we need to reuse our providers from ..downloaders.providers import Providers providers = Providers.from_config_files() s3url = "s3://%s/" % s3_bucket s3provider = providers.get_provider(s3url) if s3provider.authenticator.bucket is not None and s3provider.authenticator.bucket.name == s3_bucket: # we have established connection before, so let's just reuse bucket = s3provider.authenticator.bucket else: bucket = s3provider.authenticator.authenticate(s3_bucket, s3provider.credential) # s3conn or _get_bucket_connection(S3_TEST_CREDENTIAL) else: bucket = s3conn.get_bucket(s3_bucket) supports_versioning = True # assume that it does try: supports_versioning = bucket.get_versioning_status() # TODO cache except S3ResponseError as e: # might be forbidden, i.e. "403 Forbidden" so we try then anyways supports_versioning = 'maybe' if supports_versioning: all_keys = bucket.list_versions(fpath) # Filter and sort them so the newest one on top all_keys = [x for x in sorted(all_keys, key=lambda x: (x.last_modified, x.is_latest)) if ((x.name == fpath) # match exact name, not just prefix ) ][::-1] # our current assumptions assert(all_keys[0].is_latest) # and now filter out delete markers etc all_keys = [x for x in all_keys if isinstance(x, Key)] # ignore DeleteMarkers assert(all_keys) for key in all_keys: url_versioned = add_version_to_url( url_rec, key.version_id, replace=update and not return_all) all_versions.append(url_versioned) if verify: # it would throw HTTPError exception if not accessible _ = urlopen(Request(url_versioned)) was_versioned = True if not return_all: break if guarantee_versioned and not was_versioned: raise RuntimeError("Could not version %s" % url) if not all_versions: # we didn't get a chance all_versions = [url_rec.as_str()] if return_all: return all_versions else: return all_versions[0]