def url_exists(url): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: return os.path.exists(local_path) if url.scheme == 's3': s3 = s3_util.create_s3_session(url) try: s3.get_object(Bucket=url.netloc, Key=url.path.lstrip('/')) return True except s3.ClientError as err: if err.response['Error']['Code'] == 'NoSuchKey': return False raise err elif url.scheme == 'gs': gcs = gcs_util.GCSBlob(url) return gcs.exists() # otherwise, just try to "read" from the URL, and assume that *any* # non-throwing response contains the resource represented by the URL try: read_from_url(url) return True except (SpackWebError, URLError): return False
def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=None): if sys.platform == "win32": if remote_path[1] == ':': remote_path = "file://" + remote_path remote_url = url_util.parse(remote_path) verify_ssl = spack.config.get('config:verify_ssl') if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url): warn_no_ssl_cert_checking() remote_file_path = url_util.local_file_path(remote_url) if remote_file_path is not None: mkdirp(os.path.dirname(remote_file_path)) if keep_original: shutil.copy(local_file_path, remote_file_path) else: try: rename(local_file_path, remote_file_path) except OSError as e: if e.errno == errno.EXDEV: # NOTE(opadron): The above move failed because it crosses # filesystem boundaries. Copy the file (plus original # metadata), and then delete the original. This operation # needs to be done in separate steps. shutil.copy2(local_file_path, remote_file_path) os.remove(local_file_path) else: raise elif remote_url.scheme == 's3': if extra_args is None: extra_args = {} remote_path = remote_url.path while remote_path.startswith('/'): remote_path = remote_path[1:] s3 = s3_util.create_s3_session( remote_url, connection=s3_util.get_mirror_connection(remote_url)) # noqa: E501 s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args) if not keep_original: os.remove(local_file_path) elif remote_url.scheme == 'gs': gcs = gcs_util.GCSBlob(remote_url) gcs.upload_to_blob(local_file_path) if not keep_original: os.remove(local_file_path) else: raise NotImplementedError('Unrecognized URL scheme: {SCHEME}'.format( SCHEME=remote_url.scheme))
def remove_url(url, recursive=False): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: if recursive: shutil.rmtree(local_path) else: os.remove(local_path) return if url.scheme == 's3': # Try to find a mirror for potential connection information s3 = s3_util.create_s3_session( url, connection=s3_util.get_mirror_connection(url)) # noqa: E501 bucket = url.netloc if recursive: # Because list_objects_v2 can only return up to 1000 items # at a time, we have to paginate to make sure we get it all prefix = url.path.strip('/') paginator = s3.get_paginator('list_objects_v2') pages = paginator.paginate(Bucket=bucket, Prefix=prefix) delete_request = {'Objects': []} for item in pages.search('Contents'): if not item: continue delete_request['Objects'].append({'Key': item['Key']}) # Make sure we do not try to hit S3 with a list of more # than 1000 items if len(delete_request['Objects']) >= 1000: r = s3.delete_objects(Bucket=bucket, Delete=delete_request) _debug_print_delete_results(r) delete_request = {'Objects': []} # Delete any items that remain if len(delete_request['Objects']): r = s3.delete_objects(Bucket=bucket, Delete=delete_request) _debug_print_delete_results(r) else: s3.delete_object(Bucket=bucket, Key=url.path.lstrip('/')) return elif url.scheme == 'gs': if recursive: bucket = gcs_util.GCSBucket(url) bucket.destroy(recursive=recursive) else: blob = gcs_util.GCSBlob(url) blob.delete_blob() return
def gcs_open(req, *args, **kwargs): """Open a reader stream to a blob object on GCS """ import spack.util.gcs as gcs_util url = url_util.parse(req.get_full_url()) gcsblob = gcs_util.GCSBlob(url) if not gcsblob.exists(): raise web_util.SpackWebError('GCS blob {0} does not exist'.format( gcsblob.blob_path)) stream = gcsblob.get_blob_byte_stream() headers = gcsblob.get_blob_headers() return urllib_response.addinfourl(stream, headers, url)