def url_exists(url): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: return os.path.exists(local_path) if url.scheme == 's3': s3 = s3_util.create_s3_session(url) try: s3.get_object(Bucket=url.netloc, Key=url.path.lstrip('/')) return True except s3.ClientError as err: if err.response['Error']['Code'] == 'NoSuchKey': return False raise err elif url.scheme == 'gs': gcs = gcs_util.GCSBlob(url) return gcs.exists() # otherwise, just try to "read" from the URL, and assume that *any* # non-throwing response contains the resource represented by the URL try: read_from_url(url) return True except (SpackWebError, URLError): return False
def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=None): if sys.platform == "win32": if remote_path[1] == ':': remote_path = "file://" + remote_path remote_url = url_util.parse(remote_path) verify_ssl = spack.config.get('config:verify_ssl') if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url): warn_no_ssl_cert_checking() remote_file_path = url_util.local_file_path(remote_url) if remote_file_path is not None: mkdirp(os.path.dirname(remote_file_path)) if keep_original: shutil.copy(local_file_path, remote_file_path) else: try: rename(local_file_path, remote_file_path) except OSError as e: if e.errno == errno.EXDEV: # NOTE(opadron): The above move failed because it crosses # filesystem boundaries. Copy the file (plus original # metadata), and then delete the original. This operation # needs to be done in separate steps. shutil.copy2(local_file_path, remote_file_path) os.remove(local_file_path) else: raise elif remote_url.scheme == 's3': if extra_args is None: extra_args = {} remote_path = remote_url.path while remote_path.startswith('/'): remote_path = remote_path[1:] s3 = s3_util.create_s3_session( remote_url, connection=s3_util.get_mirror_connection(remote_url)) # noqa: E501 s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args) if not keep_original: os.remove(local_file_path) elif remote_url.scheme == 'gs': gcs = gcs_util.GCSBlob(remote_url) gcs.upload_to_blob(local_file_path) if not keep_original: os.remove(local_file_path) else: raise NotImplementedError('Unrecognized URL scheme: {SCHEME}'.format( SCHEME=remote_url.scheme))
def list_url(url): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: return os.listdir(local_path) if url.scheme == 's3': s3 = s3_util.create_s3_session(url) return list( set(key.split('/', 1)[0] for key in _iter_s3_prefix(s3, url)))
def remove_url(url): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: os.remove(local_path) return if url.scheme == 's3': s3 = s3_util.create_s3_session(url) s3.delete_object(Bucket=url.netloc, Key=url.path) return
def remove_url(url, recursive=False): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: if recursive: shutil.rmtree(local_path) else: os.remove(local_path) return if url.scheme == 's3': # Try to find a mirror for potential connection information s3 = s3_util.create_s3_session( url, connection=s3_util.get_mirror_connection(url)) # noqa: E501 bucket = url.netloc if recursive: # Because list_objects_v2 can only return up to 1000 items # at a time, we have to paginate to make sure we get it all prefix = url.path.strip('/') paginator = s3.get_paginator('list_objects_v2') pages = paginator.paginate(Bucket=bucket, Prefix=prefix) delete_request = {'Objects': []} for item in pages.search('Contents'): if not item: continue delete_request['Objects'].append({'Key': item['Key']}) # Make sure we do not try to hit S3 with a list of more # than 1000 items if len(delete_request['Objects']) >= 1000: r = s3.delete_objects(Bucket=bucket, Delete=delete_request) _debug_print_delete_results(r) delete_request = {'Objects': []} # Delete any items that remain if len(delete_request['Objects']): r = s3.delete_objects(Bucket=bucket, Delete=delete_request) _debug_print_delete_results(r) else: s3.delete_object(Bucket=bucket, Key=url.path.lstrip('/')) return elif url.scheme == 'gs': if recursive: bucket = gcs_util.GCSBucket(url) bucket.destroy(recursive=recursive) else: blob = gcs_util.GCSBlob(url) blob.delete_blob() return
def _s3_open(url): parsed = url_util.parse(url) s3 = s3_util.create_s3_session(parsed) bucket = parsed.netloc key = parsed.path if key.startswith('/'): key = key[1:] obj = s3.get_object(Bucket=bucket, Key=key) # NOTE(opadron): Apply workaround here (see above) stream = WrapStream(obj['Body']) headers = obj['ResponseMetadata']['HTTPHeaders'] return url, headers, stream
def push_to_url(local_file_path, remote_path, **kwargs): keep_original = kwargs.get('keep_original', True) remote_url = url_util.parse(remote_path) verify_ssl = spack.config.get('config:verify_ssl') if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url): warn_no_ssl_cert_checking() remote_file_path = url_util.local_file_path(remote_url) if remote_file_path is not None: mkdirp(os.path.dirname(remote_file_path)) if keep_original: shutil.copy(local_file_path, remote_file_path) else: try: os.rename(local_file_path, remote_file_path) except OSError as e: if e.errno == errno.EXDEV: # NOTE(opadron): The above move failed because it crosses # filesystem boundaries. Copy the file (plus original # metadata), and then delete the original. This operation # needs to be done in separate steps. shutil.copy2(local_file_path, remote_file_path) os.remove(local_file_path) elif remote_url.scheme == 's3': extra_args = kwargs.get('extra_args', {}) remote_path = remote_url.path while remote_path.startswith('/'): remote_path = remote_path[1:] s3 = s3_util.create_s3_session(remote_url) s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args) if not keep_original: os.remove(local_file_path) else: raise NotImplementedError('Unrecognized URL scheme: {SCHEME}'.format( SCHEME=remote_url.scheme))
def list_url(url, recursive=False): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: if recursive: return list(_iter_local_prefix(local_path)) return [subpath for subpath in os.listdir(local_path) if os.path.isfile(os.path.join(local_path, subpath))] if url.scheme == 's3': s3 = s3_util.create_s3_session(url) if recursive: return list(_iter_s3_prefix(s3, url)) return list(set( key.split('/', 1)[0] for key in _iter_s3_prefix(s3, url)))
def url_exists(url): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: return os.path.exists(local_path) if url.scheme == 's3': s3 = s3_util.create_s3_session(url) from botocore.exceptions import ClientError try: s3.get_object(Bucket=url.netloc, Key=url.path) return True except ClientError as err: if err.response['Error']['Code'] == 'NoSuchKey': return False raise err # otherwise, just try to "read" from the URL, and assume that *any* # non-throwing response contains the resource represented by the URL try: read_from_url(url) return True except URLError: return False
def list_url(url, recursive=False): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: if recursive: return list(_iter_local_prefix(local_path)) return [ subpath for subpath in os.listdir(local_path) if os.path.isfile(os.path.join(local_path, subpath)) ] if url.scheme == 's3': s3 = s3_util.create_s3_session( url, connection=s3_util.get_mirror_connection(url)) # noqa: E501 if recursive: return list(_iter_s3_prefix(s3, url)) return list( set(key.split('/', 1)[0] for key in _iter_s3_prefix(s3, url))) elif url.scheme == 'gs': gcs = gcs_util.GCSBucket(url) return gcs.get_all_blobs(recursive=recursive)