async def get_object_metadata( conn: SwiftService, meta_cont: str, meta_obj: typing.Union[typing.List[str], None] ) -> typing.List[dict]: """Get object metadata.""" try: res = list(conn.stat(meta_cont, meta_obj)) # Fail if an object wasn't usable if False in [i["success"] for i in res]: raise aiohttp.web.HTTPNotFound() # Filter for metadata not already served with the list request res = [ [ i["object"], dict(filter(lambda j: "x-object-meta" in j[0], i["headers"].items())), ] for i in res ] # Strip unnecessary specifcations from header names and split open s3 # information so that it doesn't have to be done in the browser for i in res: i[1] = {k.replace("x-object-meta-", ""): v for k, v in i[1].items()} if "s3cmd-attrs" in i[1].keys(): i[1]["s3cmd-attrs"] = { k: v for k, v in [j.split(":") for j in i[1]["s3cmd-attrs"].split("/")] } return res except SwiftError: # Fail if container wasn't found raise aiohttp.web.HTTPNotFound()
def main(global_config, **settings): """ This function returns a Pyramid WSGI application. """ config = Configurator(settings=settings) # app specific stuff config.add_route(name='wps', pattern='/wps') config.add_route(name='outputs', pattern='/outputs/*filename') config.add_route(name='status', pattern='/status/*filename') # web routes config.add_static_view('static', 'static', cache_max_age=3600) config.add_route('home', '/') config.scan('.views') # ensure paths exist for name in ('workdir', 'statuspath', 'outputpath'): dirname = os.path.abspath(wpsconfig.get_config_value('server', name)) if not os.path.exists(dirname): os.makedirs(dirname) # TODO: init swift container here? # initialize swift storage container if active if wpsconfig.get_config_value('server', 'storage') == 'SwiftStorage': from swiftclient.service import SwiftService, SwiftError swift = SwiftService() container = wpsconfig.get_config_value('SwiftStorage', 'container') try: stat = swift.stat(container) except SwiftError as e: # e.exception.http_status should be 404 # create container res = swift.post(container) # res['success'] sholud be True stat = swift.stat(container) # we sholud have stat for container now from ecocloud_wps_demo.pywps.swiftstorage import get_temp_url_key cur_key = stat['headers'].get('x-container-meta-temp-url-key') temp_url_key = get_temp_url_key() if cur_key != temp_url_key: # setting temp_url_key res = swift.post(container, options={'meta': {'temp-url-key': temp_url_key}}) # res['success'] == True return config.make_wsgi_app()
class DiracStore(object): def __init__(self, container, topic, kafka): self.container = container self.swift = SwiftService() self.out = OutputManager() self.kafka = kafka auth = get_auth() self.url = auth[0] self.token = auth[1] self.topic = topic self.producer = KafkaProducer(bootstrap_servers=kafka) def send(self, resource): json = message_to_json(resource) print("sending " + json) self.producer.send(self.topic, json.encode('utf-8')) def make_resource(self, stat): resource = schema.Resource() resource.type = 'file' resource.name = stat['Object'] resource.location = self.url + '/' + self.container + '/' + stat['Object'] resource.mimeType = stat['Content Type'] resource.size = long(stat['Content Length']) resource.created = stat['Last Modified'] return resource def stat(self, paths): stat = {} for response in self.swift.stat(container=self.container, objects=paths): if response['success']: stat[response['object']] = {item[0]: item[1] for item in response['items']} return stat def store(self, path, source): isdir = os.path.isdir(source) base = source if isdir else os.path.dirname(source) sources = os.listdir(source) if isdir else [source] locations = [os.path.join(path, os.path.basename(file)) for file in sources] print(str(len(locations)) + " locations!") stats = self.stat(locations) objs = [SwiftUploadObject(os.path.join(base, os.path.basename(location)), object_name=location) for location in locations if not location in stats] print(str(len(objs)) + " previously unseen!") for response in self.swift.upload(self.container, objs): if response['success']: if 'object' in response: print('uploading ' + response['object']) stat = self.stat([response['object']]) resource = self.make_resource(stat.values()[0]) self.send(resource)
def ck_cdn(self, secrets): if self.client.rax_id: rax_id = self.client.rax_id p_okg("client.rax_id: {}".format(rax_id)) else: p_warn("client.rax_id not set.") return if self.client.bucket_id: bucket_id = self.client.bucket_id p_okg("client.bucket_id: {}".format(bucket_id)) else: p_fail("client.bucket_id not set.") print("logging in...") uploader = swift_uploader.Uploader() # pprint.pprint(pw.swift[rax_id]) uploader.user=rax_id cf = uploader.auth() print("checking for valid bucket...") from swiftclient.service import SwiftService swift = SwiftService() stats_it = swift.stat(container=bucket_id) """ containers = cf.get_all_containers() container_names = [container.name for container in containers] print("container_names", container_names) if bucket_id in container_names: p_okg('"{}" found.'.format(bucket_id)) else: p_fail('"{}" not found.'.format(bucket_id)) """ # not sure what to do with this... # container = cf.get_container(bucket_id) return
def ck_cdn(self, secrets): if self.client.rax_id: rax_id = self.client.rax_id p_okg("client.rax_id: {}".format(rax_id)) else: p_warn("client.rax_id not set.") return if self.client.bucket_id: bucket_id = self.client.bucket_id p_okg("client.bucket_id: {}".format(bucket_id)) else: p_fail("client.bucket_id not set.") print("logging in...") uploader = swift_uploader.Uploader() # pprint.pprint(pw.swift[rax_id]) uploader.user = rax_id cf = uploader.auth() print("checking for valid bucket...") from swiftclient.service import SwiftService swift = SwiftService() stats_it = swift.stat(container=bucket_id) """ containers = cf.get_all_containers() container_names = [container.name for container in containers] print("container_names", container_names) if bucket_id in container_names: p_okg('"{}" found.'.format(bucket_id)) else: p_fail('"{}" not found.'.format(bucket_id)) """ # not sure what to do with this... # container = cf.get_container(bucket_id) return
class SwiftStorage(Storage): """Storage on OpenStack swift service.""" def __init__(self, storage_id, container_name, auth_config=None, transfer_config=None): super(SwiftStorage, self).__init__(storage_id) opts = transfer_config or {} opts["auth_version"] = "2.0" if auth_config: for k, v in six.iteritems(auth_config): opts[k] = v self._client = SwiftService(opts) self._container = container_name def _get_file_safe(self, remote_path, local_path): tmpdir = tempfile.mkdtemp() results = self._client.download(container=self._container, objects=[remote_path], options={"out_directory": tmpdir}) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot download [%s]: %s" % (remote_path, r["error"])) timestamp = float(r["response_dict"]["headers"]["x-timestamp"]) os.utime(os.path.join(tmpdir, remote_path), (timestamp, timestamp)) if not has_results: raise RuntimeError("Cannot copy download [%s]" % (remote_path, "NO RESULT")) shutil.move(os.path.join(tmpdir, remote_path), local_path) shutil.rmtree(tmpdir, ignore_errors=True) def _check_existing_file(self, remote_path, local_path): (local_dir, basename) = os.path.split(local_path) if os.path.exists(local_path): results = self._client.stat(self._container, objects=[remote_path]) local_stat = os.stat(local_path) for r in results: if r['success']: if int(r['headers']['content-length']) != local_stat.st_size: return False timestamp = float(r["headers"]["x-timestamp"]) if int(local_stat.st_mtime) == int(timestamp): return True else: LOGGER.debug('Cannot find %s or %s', local_path) return False def stat(self, remote_path): if not remote_path.endswith('/'): results = self._client.stat(self._container, objects=[remote_path]) for r in results: if r['success']: return {'is_dir': False, 'size': r['headers']['content-length'], 'last_modified': r['headers']['x-timestamp']} remote_path += '/' results = self._client.list(container=self._container, options={"prefix": remote_path, "delimiter": "/"}) for r in results: if r['success']: return {'is_dir': True} return False def push_file(self, local_path, remote_path): (local_dir, basename) = os.path.split(local_path) obj = SwiftUploadObject(local_path, object_name=remote_path) results = self._client.upload(self._container, [obj]) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot push file [%s]>[%s]: %s" % (local_path, remote_path, r["error"])) if not has_results: raise RuntimeError("Cannot push file [%s]>[%s]: %s" % (local_path, remote_path, "NO RESULTS")) def stream(self, remote_path, buffer_size=1024): def generate(): tmpdir = tempfile.mkdtemp() results = self._client.download(container=self._container, objects=[remote_path], options={"out_directory": tmpdir}) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot download file [%s]: %s", (remote_path, r["error"])) if not has_results: raise RuntimeError("Cannot download file [%s]: NO RESULTS", (remote_path)) with open(os.path.join(tmpdir, remote_path), "rb") as f: for chunk in iter(lambda: f.read(buffer_size), b''): yield chunk shutil.rmtree(tmpdir, ignore_errors=True) return generate() def listdir(self, remote_path, recursive=False): options = {"prefix": remote_path} if not recursive: options["delimiter"] = "/" list_parts_gen = self._client.list(container=self._container, options=options) lsdir = {} for page in list_parts_gen: if page["success"]: for item in page["listing"]: if "subdir" in item: lsdir[item["subdir"]] = {'is_dir': True} else: path = item["name"] last_modified = datetime.strptime(item["last_modified"], '%Y-%m-%dT%H:%M:%S.%f') lsdir[path] = {'size': item["bytes"], 'last_modified': datetime.timestamp(last_modified)} return lsdir def mkdir(self, remote_path): pass def _delete_single(self, remote_path, isdir): if not isdir: results = self._client.delete(container=self._container, objects=[remote_path]) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot delete file [%s]: %s" % (remote_path, r["error"])) if not has_results: raise RuntimeError("Cannot delete file [%s]: NO RESULT" % (remote_path)) def rename(self, old_remote_path, new_remote_path): listfiles = self.listdir(old_remote_path, True) for f in listfiles: assert f[:len(old_remote_path)] == old_remote_path, "inconsistent listdir result" obj = SwiftCopyObject(f, {"destination": "/%s/%s%s" % ( self._container, new_remote_path, f[len(old_remote_path):])}) results = self._client.copy(self._container, [obj]) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot copy file [%s]: %s" % (old_remote_path, r["error"])) if not has_results: raise RuntimeError("Cannot copy file [%s]: NO RESULT" % (old_remote_path)) self._delete_single(f, False) def exists(self, remote_path): result = self._client.list(container=self._container, options={"prefix": remote_path, "delimiter": "/"}) for page in result: if page["success"]: for item in page["listing"]: if "subdir" in item: return True if (item["name"] == remote_path or remote_path == '' or remote_path.endswith('/') or item["name"].startswith(remote_path + '/')): return True return False def isdir(self, remote_path): if not remote_path.endswith('/'): return self.exists(remote_path+'/') return self.exists(remote_path) def _internal_path(self, path): # OpenStack does not work with paths but keys. This function possibly adapts a # path-like representation to a OpenStack key. if path.startswith('/'): return path[1:] return path
class Swift(object): def __init__(self, settings): options = get_os_settings(settings) self.swift = SwiftService(options) options = get_swift_settings(settings) self.temp_url_key = options['temp_url_key'] # TODO: hard coded template self.name_template = options['container'] + '_{name}' # TODO: this should be a module level method def build_object_name(self, user_id, path='', name=None): # - if name is None, return full folder path with trailing slash # - the returned does not have a leading slash if not user_id or '/' in user_id: raise ValueError('Invalid userid', user_id) container = self.name_template.format(name=user_id) parts = [] if path: # disallow '..' if '..' in path: raise ValueError('Invalid path', path) # strip all leading trailing slashes from path # deduplicate double slashes path = '/'.join(x for x in path.split('/') if x) if path: parts.append(path) if name: # build a file path if '/' in name or name in ('..', '.'): raise ValueError('Invalid name', name) parts.append(name) else: # ensure we get a trailing slash if there is no name # -> it is a folder parts.append('') return container, '/'.join(parts) def _create_container(self, container): return self.swift.post( container=container, options={ # swiftservice converts this to X-Container-Meta-Temp-Url-Key 'meta': { 'temp-url-key': self.temp_url_key, # TODO: hard coded 10G quota 'quota-bytes': str(int(10e9)), 'access-control-allow-origin': '*', } }) def stat(self, user_id, path=''): container, object_prefix = self.build_object_name(user_id, path) if path: # object stat requested pass else: # container stat requested try: stat = self.swift.stat(container=container) except SwiftError as e: if e.exception.http_status == 404: # container does not exists res = self._create_container(container) stat = self.swift.stat(container=container) else: raise headers = stat['headers'] return { 'used': safe_int(headers.get('x-container-bytes-used', None)), 'quota': safe_int(headers.get('x-container-meta-quota-bytes', None)), 'count': safe_int(headers.get('x-container-object-count', None)), 'created': safe_isodate(headers.get('x-timestamp', None)), } return {stat.items} def list(self, user_id, path=''): container, object_prefix = self.build_object_name(user_id, path) for data in self.swift.list(container=container, options={ 'delimiter': '/', 'prefix': object_prefix }): if data['action'] == ['list_container_part' ] and not data['success']: data = self._create_container(container) if data['success']: for item in data['listing']: # filter current folder if item.get('subdir', None) == object_prefix: # ignore current directory continue elif item.get('name', None) == object_prefix: # ignore the current directory continue else: if item.get('subdir', None): # it is a pseudo dir yield { 'name': item.get('subdir')[len(object_prefix):].strip( '/'), 'bytes': 0, 'content_type': 'application/directory', } else: item['name'] = item['name'][len(object_prefix):] yield item # skip error handling below continue # TODO: we are raising an exception here... jumping out fo the # generator.... should be fine for this method, but # does this have the potential to leak threads? # SwiftService uses threads to generate results ex = data['error'] if isinstance(ex, SwiftError): if not path and ex.exception.http_status == 404: # ex.exception should be a ClientException, not found # if path is empty, we ignore it, it means, the # user container does not exist yet. break raise ex def create_folder(self, user_id, path='', description=None): container, object_path = self.build_object_name(user_id, path) # create upload object object_path = SwiftUploadObject(None, object_name=object_path, options={ 'dir_marker': True, 'meta': { 'description': description or '', }, }) folders = [] for res in self.swift.upload(container, [object_path]): if not res['success']: raise res['error'] if res['action'] == 'create_container': # if res['response_dict']['reason'] == 'Created' # status will be 202 if container already existed if res['response_dict']['status'] == 201: # set up metadata for user container res = self._create_container(container) # TODO: project only: if res['action'] == 'create_dir_marker': meta = {} if description: meta['description'] = description folder = SwiftPostObject(object_name=res['object'], options={ 'header': res['headers'], 'meta': meta, }) folders.append(folder) # TODO: check whether we should use post above instead of upload # maybe we can avoid calling swift twice? # also woke sure container get's created in case of post ret = [] for res in self.swift.post(container, folders): if not res['success']: raise res['error'] ret.append(res) return ret def delete_folder(self, user_id, path=''): container, object_path = self.build_object_name(user_id, path) # don't use delimiter here, otherwise swift.delete will only see # one level of subfolders and won't be able to delete everything # TODO: can this delete the container as well? for res in self.swift.delete(container=container, options={'prefix': object_path}): yield res['object'][len(object_path):] def upload_file(self, user_id, path, name, file, content_type='application/octet-stream', content_length=-1): container, object_name = self.build_object_name(user_id, path, name) # prepend account and container to path headers = {'Content-Type': content_type or 'application/octet-stream'} # if content_length >= 0: # headers['Content-Length'] = str(content_length) upload_obj = SwiftUploadObject(source=LengthWrapper( file, content_length, True), object_name=object_name, options={'header': headers}) log = logging.getLogger(__name__) log.info('Tool Upload %s', upload_obj) for res in self.swift.upload(container, [upload_obj]): if res['action'] == 'create_container': res = self._create_container(container) # Getting a funny response iterator here # 1. action: create_container # 2. action: upload_object log.info('Tool Result %s', res) if res.get('error', None): # res['error'].http_status == 413: # -> Request Entity Too Large # res['error'].http_resonse_content == 'Upload exceeds quota' raise res['error'] def delete_file(self, user_id, path, name): container, object_name = self.build_object_name(user_id, path, name) # TODO: could set options['prefix'] to make sure we don't delete # anything outside project/folder # TODO: coould this delete the container? res = self.swift.delete(container=container, objects=[object_name]) for res in self.swift.delete(container=container, objects=[object_name]): if res.get('error', None): raise res['error'] def generate_temp_url(self, user_id, path, name): container, object_name = self.build_object_name(user_id, path, name) # discover swift endpoint urls conn = get_conn(self.swift._options) url, token = conn.get_auth() urlparts = urlparse(url) # generate swift path /v1/<account>/<container>/<userid>/path path = '/'.join((urlparts.path, container, object_name)) # TODO: valid for 5 minutes temp_url = generate_temp_url(path, 300, self.temp_url_key, method='GET') return urljoin(url, temp_url)
class SwiftObjectStorageDriver(ObjectStorageDriver): """ Archive driver using swift-api as backing store. Buckets presented as part of object lookup in this API are mapped to object key prefixes in the backing S3 store so that a single bucket (or set of buckets) can be used since namespaces are limited. """ __config_name__ = 'swift' __driver_version__ = '1' __uri_scheme__ = 'swift' _key_format = '{prefix}{userid}/{container}/{key}' DEFAULT_AUTH_TIMEOUT = 10 def __init__(self, config): super(SwiftObjectStorageDriver, self).__init__(config) # Initialize the client self.client_config = config self.container_name = self.config.get('container') self.can_create_container = self.config.get('create_container', False) self.auth_options = copy.copy(self.client_config) if 'container' in self.auth_options: self.auth_options.pop('container') if 'create_container' in self.auth_options: self.auth_options.pop('create_container') self.client = SwiftService(options=self.auth_options) if not self.container_name: raise ValueError( 'Cannot configure swift driver with out a provided container to use' ) self.prefix = self.config.get('anchore_key_prefix', '') self._check_creds() self._check_container() def _check_creds(self): """ Simple operation to verify creds work without state change :return: True on success """ try: resp = self.client.stat() if resp['success']: return True elif resp.get('error') and resp.get('error').http_status in [ 401, 403 ]: raise BadCredentialsError(self.auth_options, endpoint=None, cause=resp.get('error')) elif resp.get('error'): raise DriverConfigurationError(cause=resp.get('error')) else: raise DriverConfigurationError( Exception( 'Got unsuccessful response from stat operation against service: {}' .format(resp))) except SwiftError as e: raise DriverConfigurationError(e) def _check_container(self): try: resp = self.client.stat(container=self.container_name) except SwiftError as e: if e.exception.http_status == 404 and self.can_create_container: try: self.client.post(container=self.container_name) except Exception as e: logger.exception(e) raise e else: raise DriverConfigurationError(e) def _build_key(self, userId, usrBucket, key): return self._key_format.format(prefix=self.prefix, userid=userId, container=usrBucket, key=key) def _parse_uri(self, uri): parsed = urllib.parse.urlparse(uri, scheme=self.__uri_scheme__) container = parsed.hostname key = parsed.path[1:] # Strip leading '/' return container, key def get_by_uri(self, uri): try: container, key = self._parse_uri(uri) if container != self.container_name: logger.warn( 'Container mismatch between content_uri and configured cotnainer name: {} in db record, but {} in config' .format(container, self.container_name)) resp = self.client.download(container=container, objects=[key], options={'out_file': '-'}) for obj in resp: if 'contents' in obj and obj['action'] == 'download_object': content = b''.join([x for x in obj['contents']]) ret = utils.ensure_bytes(content) return (ret) elif obj['action'] == 'download_object' and not obj['success']: raise ObjectKeyNotFoundError(bucket='', key='', userId='', caused_by=None) raise Exception( 'Unexpected operation/action from swift: {}'.format( obj['action'])) except SwiftError as e: raise ObjectStorageDriverError(cause=e) def delete_by_uri(self, uri): try: container, key = self._parse_uri(uri) if container != self.container_name: logger.warn( 'Container mismatch between content_uri and configured bucket name: {} in db record, but {} in config' .format(container, self.container_name)) resp = self.client.delete(container=container, objects=[key]) for r in resp: if r['success'] and r['action'] == 'delete_object': return True except Exception as e: raise e def exists(self, uri): try: container, key = self._parse_uri(uri) if container != self.container_name: logger.warn( 'Bucket mismatch between content_uri and configured bucket name: {} in db record, but {} in config' .format(container, self.container_name)) resp = self.client.download(container=container, objects=[key], options={ 'out_file': '-', 'no_download': True }) for obj in resp: if 'success' in obj and obj['success'] and obj[ 'action'] == 'download_object': return True elif obj['action'] == 'download_object' and not obj['success']: return False raise Exception( 'Unexpected operation/action from swift: {}'.format( obj['action'])) except SwiftError as e: raise ObjectStorageDriverError(cause=e) def get(self, userId, bucket, key): return self.get_by_uri(self.uri_for(userId, bucket, key)) def put(self, userId, bucket, key, data): try: uri = self.uri_for(userId, bucket, key) swift_bucket, swift_key = self._parse_uri(uri) obj = SwiftUploadObject(object_name=swift_key, source=io.BytesIO(data)) resp = self.client.upload(container=swift_bucket, objects=[obj]) for upload in resp: if upload['action'] == 'upload_object' and upload['success']: return uri else: raise Exception('Failed uploading object to swift') except Exception as e: raise e def delete(self, userId, bucket, key): return self.delete_by_uri(self.uri_for(userId, bucket, key)) def uri_for(self, userId, bucket, key): return '{}://{}/{}'.format(self.__uri_scheme__, self.container_name, self._build_key(userId, bucket, key))
class SwiftBackend(duplicity.backend.Backend): u""" Backend for Swift """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) try: from swiftclient.service import SwiftService from swiftclient import Connection from swiftclient import ClientException except ImportError as e: raise BackendException(u"""\ Swift backend requires the python-swiftclient library. Exception: %s""" % str(e)) self.resp_exc = ClientException conn_kwargs = {} os_options = {} svc_options = {} # if the user has already authenticated if u'SWIFT_PREAUTHURL' in os.environ and u'SWIFT_PREAUTHTOKEN' in os.environ: conn_kwargs[u'preauthurl'] = os.environ[u'SWIFT_PREAUTHURL'] conn_kwargs[u'preauthtoken'] = os.environ[u'SWIFT_PREAUTHTOKEN'] else: if u'SWIFT_USERNAME' not in os.environ: raise BackendException(u'SWIFT_USERNAME environment variable ' u'not set.') if u'SWIFT_PASSWORD' not in os.environ: raise BackendException(u'SWIFT_PASSWORD environment variable ' u'not set.') if u'SWIFT_AUTHURL' not in os.environ: raise BackendException(u'SWIFT_AUTHURL environment variable ' u'not set.') svc_options[u'os_username'] = conn_kwargs[u'user'] = os.environ[ u'SWIFT_USERNAME'] svc_options[u'os_password'] = conn_kwargs[u'key'] = os.environ[ u'SWIFT_PASSWORD'] svc_options[u'os_auth_url'] = conn_kwargs[u'authurl'] = os.environ[ u'SWIFT_AUTHURL'] if u'SWIFT_AUTHVERSION' in os.environ: svc_options[u'auth_version'] = conn_kwargs[ u'auth_version'] = os.environ[u'SWIFT_AUTHVERSION'] if os.environ[u'SWIFT_AUTHVERSION'] == u'3': if u'SWIFT_USER_DOMAIN_NAME' in os.environ: os_options.update({ u'user_domain_name': os.environ[u'SWIFT_USER_DOMAIN_NAME'] }) if u'SWIFT_USER_DOMAIN_ID' in os.environ: os_options.update({ u'user_domain_id': os.environ[u'SWIFT_USER_DOMAIN_ID'] }) if u'SWIFT_PROJECT_DOMAIN_NAME' in os.environ: os_options.update({ u'project_domain_name': os.environ[u'SWIFT_PROJECT_DOMAIN_NAME'] }) if u'SWIFT_PROJECT_DOMAIN_ID' in os.environ: os_options.update({ u'project_domain_id': os.environ[u'SWIFT_PROJECT_DOMAIN_ID'] }) if u'SWIFT_TENANTNAME' in os.environ: os_options.update( {u'tenant_name': os.environ[u'SWIFT_TENANTNAME']}) if u'SWIFT_ENDPOINT_TYPE' in os.environ: os_options.update( {u'endpoint_type': os.environ[u'SWIFT_ENDPOINT_TYPE']}) if u'SWIFT_USERID' in os.environ: os_options.update( {u'user_id': os.environ[u'SWIFT_USERID']}) if u'SWIFT_TENANTID' in os.environ: os_options.update( {u'tenant_id': os.environ[u'SWIFT_TENANTID']}) if u'SWIFT_REGIONNAME' in os.environ: os_options.update( {u'region_name': os.environ[u'SWIFT_REGIONNAME']}) else: conn_kwargs[u'auth_version'] = u'1' if u'SWIFT_TENANTNAME' in os.environ: conn_kwargs[u'tenant_name'] = os.environ[u'SWIFT_TENANTNAME'] if u'SWIFT_REGIONNAME' in os.environ: os_options.update( {u'region_name': os.environ[u'SWIFT_REGIONNAME']}) # formatting options for swiftclient.SwiftService for key in os_options.keys(): svc_options[u'os_' + key] = os_options[key] conn_kwargs[u'os_options'] = os_options # This folds the null prefix and all null parts, which means that: # //MyContainer/ and //MyContainer are equivalent. # //MyContainer//My/Prefix/ and //MyContainer/My/Prefix are equivalent. url_parts = [x for x in parsed_url.path.split(u'/') if x != u''] self.container = url_parts.pop(0) if url_parts: self.prefix = u'%s/' % u'/'.join(url_parts) else: self.prefix = u'' policy = config.swift_storage_policy policy_header = u'X-Storage-Policy' container_metadata = None try: log.Debug(u"Starting connection with arguments:'%s'" % conn_kwargs) self.conn = Connection(**conn_kwargs) container_metadata = self.conn.head_container(self.container) except ClientException as e: log.Debug(u"Connection failed: %s %s" % (e.__class__.__name__, str(e))) pass except Exception as e: log.FatalError( u"Connection failed: %s %s" % (e.__class__.__name__, str(e)), log.ErrorCode.connection_failed) if container_metadata is None: log.Info(u"Creating container %s" % self.container) try: headers = dict([[policy_header, policy]]) if policy else None self.conn.put_container(self.container, headers=headers) except Exception as e: log.FatalError( u"Container creation failed: %s %s" % (e.__class__.__name__, str(e)), log.ErrorCode.connection_failed) elif policy and container_metadata[policy_header.lower()] != policy: log.FatalError( u"Container '%s' exists but its storage policy is '%s' not '%s'." % (self.container, container_metadata[policy_header.lower()], policy)) else: log.Debug(u"Container already created: %s" % container_metadata) # checking service connection try: log.Debug(u"Starting Swiftservice: '%s'" % svc_options) self.svc = SwiftService(options=svc_options) container_stat = self.svc.stat(self.container) except ClientException as e: log.FatalError( u"Connection failed: %s %s" % (e.__class__.__name__, str(e)), log.ErrorCode.connection_failed) log.Debug(u"Container stats: %s" % container_stat) def _error_code(self, operation, e): # pylint: disable=unused-argument if isinstance(e, self.resp_exc): if e.http_status == 404: return log.ErrorCode.backend_not_found def _put(self, source_path, remote_filename): lp = util.fsdecode(source_path.name) if config.mp_segment_size > 0: from swiftclient.service import SwiftUploadObject st = os.stat(lp) # only upload using Dynamic Large Object if mpvolsize is triggered if st.st_size >= config.mp_segment_size: log.Debug(u"Uploading Dynamic Large Object") mp = self.svc.upload( self.container, [ SwiftUploadObject(lp, object_name=self.prefix + util.fsdecode(remote_filename)) ], options={u'segment_size': config.mp_segment_size}) uploads = [a for a in mp if u'container' not in a[u'action']] for upload in uploads: if not upload[u'success']: raise BackendException(upload[u'traceback']) return rp = self.prefix + util.fsdecode(remote_filename) log.Debug(u"Uploading '%s' to '%s' in remote container '%s'" % (lp, rp, self.container)) self.conn.put_object(container=self.container, obj=self.prefix + util.fsdecode(remote_filename), contents=open(lp, u'rb')) def _get(self, remote_filename, local_path): headers, body = self.conn.get_object(self.container, self.prefix + util.fsdecode(remote_filename), resp_chunk_size=1024) with open(local_path.name, u'wb') as f: for chunk in body: f.write(chunk) def _list(self): headers, objs = self.conn.get_container(self.container, full_listing=True, path=self.prefix) # removes prefix from return values. should check for the prefix ? return [o[u'name'][len(self.prefix):] for o in objs] def _delete(self, filename): # use swiftservice to correctly delete all segments in case of multipart uploads deleted = [ a for a in self.svc.delete(self.container, [self.prefix + util.fsdecode(filename)]) ] def _query(self, filename): # use swiftservice to correctly report filesize in case of multipart uploads sobject = [ a for a in self.svc.stat(self.container, [self.prefix + util.fsdecode(filename)]) ][0] sobj = {u'size': int(sobject[u'headers'][u'content-length'])} log.Debug(u"Objectquery: '%s' has size %s." % (util.fsdecode(filename), sobj[u'size'])) return sobj
class SwiftFS(HasTraits): container = Unicode(os.environ.get('CONTAINER', 'demo')) storage_url = Unicode(help="The base URL for containers", default_value='http://example.com', config=True) delimiter = Unicode("/", help="Path delimiter", config=True) root_dir = Unicode("/", config=True) log = logging.getLogger('SwiftFS') def __init__(self, **kwargs): super(self.__class__, self).__init__(**kwargs) # With the python swift client, the connection is automagically # created using environment variables (I know... horrible or what?) self.log.info("using swift container `%s`", self.container) # open connection to swift container self.swift = SwiftService() # make sure container exists try: result = self.swift.post(container=self.container) except SwiftError as e: self.log.error("creating container %s", e.value) raise HTTPError(404, e.value) if not result["success"]: msg = "could not create container %s" % self.container self.log.error(msg) raise HTTPError(404, msg) # see 'list' at https://docs.openstack.org/developer/python-swiftclient/service-api.html # Returns a list of all objects that start with the prefix given # Of course, in a proper heirarchical file-system, list-dir only returns the files # in that dir, so we need to filter the list to me ONLY those objects where the # 'heirarchical' bit of the name stops at the path given # The method has 2 modes: 1 when the list of names is returned with the full # path-name, and one where the name is just the "file name" @LogMethodResults() def listdir(self, path="", with_prefix=False, this_dir_only=True): """ list all the "files" in the "directory" for the given path. If the 'this_dir_only' is False (it is True by default), then the full list of all objects in that path are returned (needed for a rename, for example) returns a list of dictionaries for each object: {'bytes': 11, 'hash': '3e25960a79dbc69b674cd4ec67a72c62', 'last_modified': '2017-06-06T08:55:36.473Z', 'name': 'foo/bar/thingamy.bob'} """ files = [] # Get all objects that match the known path path = self.clean_path(path) _opts = {'prefix': path} try: dir_listing = self.swift.list(container=self.container, options=_opts) for page in dir_listing: # each page is up to 10,000 items if page["success"]: files.extend(page["listing"]) # page is returning a list else: raise page["error"] except SwiftError as e: self.log.error("SwiftFS.listdir %s", e.value) if this_dir_only: # make up the pattern to compile into our regex engine regex_delim = re.escape(self.delimiter) if len(path) > 0: regex_path = re.escape(path.rstrip(self.delimiter)) pattern = '^({0}{1}[^{1}]+{1}?|{0})$'.format( regex_path, regex_delim) else: pattern = '^[^{0}]+{0}?$'.format(regex_delim) self.log.debug("restrict directory pattern is: `%s`", pattern) regex = re.compile(pattern, re.UNICODE) new_files = [] for f in files: if regex.match(f['name']): new_files.append(f) files = new_files return files # We can 'stat' files, but not directories @LogMethodResults() def isfile(self, path): if path is None or path == '': self.log.debug("SwiftFS.isfile has no path, returning False") return False _isfile = False if not path.endswith(self.delimiter): path = self.clean_path(path) try: response = self.swift.stat(container=self.container, objects=[path]) except Exception as e: self.log.error("SwiftFS.isfile %s", e.value) for r in response: if r['success']: _isfile = True else: self.log.error('Failed to retrieve stats for %s' % r['object']) break return _isfile # We can 'list' direcotries, but not 'stat' them @LogMethodResults() def isdir(self, path): # directories mush have a trailing slash on them. # The core code seems to remove any trailing slash, so lets add it back # on if not path.endswith(self.delimiter): path = path + self.delimiter # Root directory checks if path == self.delimiter: # effectively root directory self.log.debug("SwiftFS.isdir found root dir - returning True") return True _isdir = False path = self.clean_path(path) _opts = {} if re.search('\w', path): _opts = {'prefix': path} try: self.log.debug("SwiftFS.isdir setting prefix to '%s'", path) response = self.swift.list(container=self.container, options=_opts) except SwiftError as e: self.log.error("SwiftFS.isdir %s", e.value) for r in response: if r['success']: _isdir = True else: self.log.error('Failed to retrieve stats for %s' % path) break return _isdir @LogMethod() def cp(self, old_path, new_path): self._copymove(old_path, new_path, with_delete=False) @LogMethod() def mv(self, old_path, new_path): self._copymove(old_path, new_path, with_delete=True) @LogMethod() def remove_container(self): response = {} try: response = self.swift.stat(container=self.container) except SwiftError as e: self.log.error("SwiftFS.remove_container %s", e.value) if 'success' in response and response['success'] == True: try: response = self.swift.delete(container=self.container) except SwiftError as e: self.log.error("SwiftFS.remove_container %s", e.value) for r in response: self.log.debug("SwiftFS.rm action: `%s` success: `%s`", r['action'], r['success']) @LogMethod() def rm(self, path, recursive=False): if path in ["", self.delimiter]: self.do_error('Cannot delete root directory', code=400) return False if not (self.isdir(path) or self.isfile(path)): return False if recursive: for f in self._walk_path(path, dir_first=True): self.log.debug("SwiftFS.rm recurse into `%s`", f) self.rm(f) self.log.info("SwiftFS.rm and now remove `%s`", path) self.rm(path) else: self.log.info("SwiftFS.rm not recursing for `%s`", path) files = self.listdir(path) isEmpty = True if len(files) > 1: isEmpty = False if len(files) == 1 and files[0]['name'] != path: isEmpty = False if not isEmpty: self.do_error("directory %s not empty" % path, code=400) path = self.clean_path(path) try: response = self.swift.delete(container=self.container, objects=[path]) except SwiftError as e: self.log.error("SwiftFS.rm %s", e.value) return False for r in response: self.log.debug("SwiftFS.rm action: `%s` success: `%s`", r['action'], r['success']) return True @LogMethod() def _walk_path(self, path, dir_first=False): if not dir_first: yield path for f in self.listdir(path): if not dir_first: yield f['name'] if self.guess_type(f['name']) == 'directory': for ff in self._walk_path(f['name'], dir_first=dir_first): yield ff if dir_first: yield f['name'] if dir_first: yield path # core function to copy or move file-objects # does clever recursive stuff for directory trees @LogMethod() def _copymove(self, old_path, new_path, with_delete=False): # check parent directory exists self.checkParentDirExists(new_path) for f in self._walk_path(old_path): new_f = f.replace(old_path, new_path, 1) if self.guess_type(f) == 'directory': self.mkdir(new_f) else: old_path = self.clean_path(old_path) new_path = self.clean_path(new_path) try: response = self.swift.copy( self.container, [f], { 'destination': self.delimiter + self.container + self.delimiter + new_f }) except SwiftError as e: self.log.error(e.value) raise for r in response: if r["success"]: if r["action"] == "copy_object": self.log.debug("object %s copied from /%s/%s" % (r["destination"], r["container"], r["object"])) if r["action"] == "create_container": self.log.debug("container %s created" % r["container"]) else: if "error" in r and isinstance(r["error"], Exception): raise r["error"] # we always test for delete: file or directory... if with_delete: self.rm(old_path, recursive=True) # Directories are just objects that have a trailing '/' @LogMethod() def mkdir(self, path): path = path.rstrip(self.delimiter) path = path + self.delimiter self._do_write(path, None) # This works by downloading the file to disk then reading the contents of # that file into memory, before deleting the file # NOTE this is reading text files! # NOTE this really only works with files in the local direcotry, but given # local filestore will disappear when the docker ends, I'm not too bothered. @LogMethod() def read(self, path): if self.guess_type(path) == "directory": msg = "cannot read from path %s: it is a directory" % path self.do_error(msg, code=400) content = '' fhandle, localFile = tempfile.mkstemp(prefix="swiftfs_") os.close(fhandle) path = self.clean_path(path) try: response = self.swift.download(container=self.container, objects=[path], options={"out_file": localFile}) except SwiftError as e: self.log.error("SwiftFS.read %s", e.value) return '' for r in response: if r['success']: self.log.debug("SwiftFS.read: using local file %s", localFile) with open(localFile) as lf: content = lf.read() os.remove(localFile) return content # Write is 'upload' and 'upload' needs a "file" it can read from # We use io.StringIO for this @LogMethod() def write(self, path, content): if self.guess_type(path) == "directory": msg = "cannot write to path %s: it is a directory" % path self.do_error(msg, code=400) #path = self.clean_path(path) # If we can't make the directory path, then we can't make the file! #success = self._make_intermedate_dirs(path) self._do_write(path, content) @LogMethod() def _make_intermedate_dirs(self, path): # we loop over the path, checking for an object at every level # of the hierachy, except the last item (which may be a file, # or a directory itself path_parts = re.split(self.delimiter, path) current_path = '' for p in path_parts[:-1]: this_path = current_path + p + self.delimiter if self.isfile(this_path): self.log.error( "SwiftFS._make_intermedate_dirs failure: dir exists at path `%s`" % this_path) return False if not self.isdir(this_path): self.log.debug( "SwiftFS._make_intermedate_dirs making directory") self._do_write(this_path, None) current_path = this_path return True @LogMethod() def _do_write(self, path, content): # check parent directory exists self.checkParentDirExists(path) type = self.guess_type(path) things = [] if type == "directory": self.log.debug("SwiftFS._do_write create directory") things.append(SwiftUploadObject(None, object_name=path)) else: self.log.debug("SwiftFS._do_write create file/notebook from '%s'", content) output = io.BytesIO(content.encode('utf-8')) things.append(SwiftUploadObject(output, object_name=path)) # Now do the upload path = self.clean_path(path) try: response = self.swift.upload(self.container, things) except SwiftError as e: self.log.error("SwiftFS._do_write swift-error: %s", e.value) raise except ClientException as e: self.log.error("SwiftFS._do_write client-error: %s", e.value) raise for r in response: self.log.debug("SwiftFS._do_write action: '%s', response: '%s'", r['action'], r['success']) @LogMethodResults() def guess_type(self, path, allow_directory=True): """ Guess the type of a file. If allow_directory is False, don't consider the possibility that the file is a directory. Parameters ---------- path: string """ _type = '' if path.endswith(".ipynb"): _type = "notebook" elif allow_directory and path.endswith(self.delimiter): _type = "directory" elif allow_directory and self.isdir(path): _type = "directory" else: _type = "file" return _type @LogMethod() def clean_path(self, path): # strip of any leading '/' path = path.lstrip(self.delimiter) if self.guess_type(path) == 'directory': # ensure we have a / at the end of directory paths path = path.rstrip(self.delimiter) + self.delimiter if path == self.delimiter: path = '' return path @LogMethodResults() def checkParentDirExists(self, path): """checks if the parent directory of a path exists""" p = path.strip(self.delimiter) p = p.split(self.delimiter)[:-1] p = self.delimiter.join(p) self.log.debug("SwiftFS.checkDirExists: directory name %s", p) if not self.isdir(p): self.do_error('parent directory does not exist %s' % p, code=400) @LogMethod() def do_error(self, msg, code=500): self.log.error(msg) raise HTTPError(code, msg)