def saveObjsBackend(objs, backend, config): if(backend == 'hdfs'): for obj in objs: try: # obj[0] is hdfs path and obj[1] is local filesystem path if '.py' in obj[1]: # If you are uploading a module do not allow it to be overwritten subprocess.check_call(['hdfs', 'dfs', '-copyFromLocal', obj[1], obj[0]]) else: # If it is the metadata then it has to be overwritten everytime subprocess.check_call(['hdfs', 'dfs', '-copyFromLocal', '-f', obj[1], obj[0]]) except Exception as e: shutil.copyfile(config['BACKUP_METADATA_LOCAL_PATH'], config['METADATA_LOCAL_PATH']) raise RuntimeError(e) elif(backend == 'swift'): options = {'os_auth_url': os.environ['OS_AUTH_URL'], 'os_username': os.environ['OS_USERNAME'], 'os_password': os.environ['OS_PASSWORD'], 'os_tenant_id': os.environ['OS_TENANT_ID'], 'os_tenant_name': os .environ['OS_TENANT_NAME']} swiftService = SwiftService(options=options) objects = [] for obj in objs: objects.append(SwiftUploadObject(obj[1], object_name=obj[0])) swiftUpload = swiftService.upload(container='containerModules', objects=objects) for uploaded in swiftUpload: if("error" in uploaded.keys()): shutil.copyfile(config['BACKUP_METADATA_LOCAL_PATH'], config['METADATA_LOCAL_PATH']) raise RuntimeError(uploaded['error']) elif(backend == 'nfs'): for obj in objs: shutil.copyfile(obj[1], config['MODULES_DIR'] + obj[0]) print('Metadata/Module changed and uploaded')
class SwiftClient(object): """Client for Swift object/blob store of Openstack See http://swift.openstack.org Swift requires environment variables (OS_*) for the authentication and configuration""" def __init__(self, container, prefix=''): self.container = container self.prefix = prefix self.client = SwiftService() def download(self, source, target): objects = [self.prefix + '/' + source] options = {'out_file': target} return list(self.client.download(self.container, objects, options)) def upload(self, source, target): object_name = self.prefix + '/' + target objects = [SwiftUploadObject(source, object_name=object_name)] return list(self.client.upload(self.container, objects)) def ls(self, path): fpath = self.prefix + '/' + path + '/' clisting = self.client.list(self.container, {'prefix': fpath}) listing = list(clisting)[0]['listing'] result = [d['name'].replace(fpath, '') for d in listing] return result def url(self, path=''): return self.container + '/' + self.prefix + '/' + path
def upload(source, dest): """ Upload file to a remote SWIFT store @param source: List of local source path to upload from. @type source : Dicrionary @param dest: The destination information such as destination url to upload the file. @type dest: Dictionary @return: True if upload is successful. Otherwise False. """ url = urlsplit(dest['url']) _, ver, account, container, object_name = url.path.split('/', 4) swift_opts = { 'os_storage_url': '{scheme}://{netloc}/{ver}/{account}'.format( scheme=re.sub(r'^swift\+', '', url.scheme), netloc=url.netloc, ver=ver, account=account) } # SwiftService knows about environment variables for opt in ('os_auth_url', 'os_username', 'os_password', 'os_tenant_name', 'os_storage_url'): if opt in dest: swift_opts[opt] = dest[opt] try: swift = SwiftService(swift_opts) headers = [] if 'content_type' in source: headers.append('Content-Type: {}'.format(source['content_type'])) retries = 5 # number of retries left backoff = 30 # wait time between retries backoff_inc = 30 # increase in wait time per retry while retries: retries -= 1 try: for result in swift.upload(container, [SwiftUploadObject(source['url'], object_name=object_name, options={'header': headers})]): # TODO: we may get result['action'] = 'create_container' # self.assertNotIn(member, container)d result['action'] = 'upload_object'; result['path'] = # source['url'] if not result['success']: raise Exception( 'Upload to Swift {container}/{object_name} failed with {error}'.format(object_name=object_name, **result)) # no exception we can continue retries = 0 except Exception as e: if not retries: # reraise if no retries left raise LOG.warn('Upload to Swift failed: %s - %d retries left', e, retries) time.sleep(backoff) backoff += backoff_inc backoff_inc += 30 except Exception as e: LOG.error("Upload to swift failed: %s", e, exc_info=True) raise
def test_upload_with_bad_segment_size(self): for bad in ('ten', '1234X', '100.3'): options = {'segment_size': bad} try: service = SwiftService(options) next(service.upload('c', 'o')) self.fail('Expected SwiftError when segment_size=%s' % bad) except SwiftError as exc: self.assertEqual('Segment size should be an integer value', exc.value)
class DiracStore(object): def __init__(self, container, topic, kafka): self.container = container self.swift = SwiftService() self.out = OutputManager() self.kafka = kafka auth = get_auth() self.url = auth[0] self.token = auth[1] self.topic = topic self.producer = KafkaProducer(bootstrap_servers=kafka) def send(self, resource): json = message_to_json(resource) print("sending " + json) self.producer.send(self.topic, json.encode('utf-8')) def make_resource(self, stat): resource = schema.Resource() resource.type = 'file' resource.name = stat['Object'] resource.location = self.url + '/' + self.container + '/' + stat['Object'] resource.mimeType = stat['Content Type'] resource.size = long(stat['Content Length']) resource.created = stat['Last Modified'] return resource def stat(self, paths): stat = {} for response in self.swift.stat(container=self.container, objects=paths): if response['success']: stat[response['object']] = {item[0]: item[1] for item in response['items']} return stat def store(self, path, source): isdir = os.path.isdir(source) base = source if isdir else os.path.dirname(source) sources = os.listdir(source) if isdir else [source] locations = [os.path.join(path, os.path.basename(file)) for file in sources] print(str(len(locations)) + " locations!") stats = self.stat(locations) objs = [SwiftUploadObject(os.path.join(base, os.path.basename(location)), object_name=location) for location in locations if not location in stats] print(str(len(objs)) + " previously unseen!") for response in self.swift.upload(self.container, objs): if response['success']: if 'object' in response: print('uploading ' + response['object']) stat = self.stat([response['object']]) resource = self.make_resource(stat.values()[0]) self.send(resource)
def _do_store(self, output): log = logging.getLogger(__name__) file_name = output.file request_uuid = str(output.uuid or uuid.uuid1()) # build output name (prefix, suffix) = os.path.splitext(file_name) if not suffix: suffix = output.data_format.extension (file_dir, file_name) = os.path.split(prefix) output_name = file_name + suffix swift = SwiftService({ 'use_slo': True, 'segment_size': 5 * 1024 * 1024 * 1024 }) object_name = '/'.join((request_uuid, output_name)) upload = SwiftUploadObject( output.file, object_name, options={ 'header': { 'X-Delete-After': str(60 * 60 * 24 * 7) # 7 days } }) log.info('Storing file output to %s', object_name) response = swift.upload(self.container, [upload]) # We have to consume the reponse otherwise the object won't get uploaded for res in response: if res['success']: # res['action'] = ('create_container', 'upload_object') continue log.error('FAIL: %s', res) return (10, object_name, self.output_url.rstrip('/') + '/' + object_name)
class SwiftStorage(Storage): """Storage on OpenStack swift service.""" def __init__(self, storage_id, container_name, auth_config=None, transfer_config=None): super(SwiftStorage, self).__init__(storage_id) opts = transfer_config or {} opts["auth_version"] = "2.0" if auth_config: for k, v in six.iteritems(auth_config): opts[k] = v self._client = SwiftService(opts) self._container = container_name def _get_file_safe(self, remote_path, local_path): tmpdir = tempfile.mkdtemp() results = self._client.download(container=self._container, objects=[remote_path], options={"out_directory": tmpdir}) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot download [%s]: %s" % (remote_path, r["error"])) timestamp = float(r["response_dict"]["headers"]["x-timestamp"]) os.utime(os.path.join(tmpdir, remote_path), (timestamp, timestamp)) if not has_results: raise RuntimeError("Cannot copy download [%s]" % (remote_path, "NO RESULT")) shutil.move(os.path.join(tmpdir, remote_path), local_path) shutil.rmtree(tmpdir, ignore_errors=True) def _check_existing_file(self, remote_path, local_path): (local_dir, basename) = os.path.split(local_path) if os.path.exists(local_path): results = self._client.stat(self._container, objects=[remote_path]) local_stat = os.stat(local_path) for r in results: if r['success']: if int(r['headers']['content-length']) != local_stat.st_size: return False timestamp = float(r["headers"]["x-timestamp"]) if int(local_stat.st_mtime) == int(timestamp): return True else: LOGGER.debug('Cannot find %s or %s', local_path) return False def stat(self, remote_path): if not remote_path.endswith('/'): results = self._client.stat(self._container, objects=[remote_path]) for r in results: if r['success']: return {'is_dir': False, 'size': r['headers']['content-length'], 'last_modified': r['headers']['x-timestamp']} remote_path += '/' results = self._client.list(container=self._container, options={"prefix": remote_path, "delimiter": "/"}) for r in results: if r['success']: return {'is_dir': True} return False def push_file(self, local_path, remote_path): (local_dir, basename) = os.path.split(local_path) obj = SwiftUploadObject(local_path, object_name=remote_path) results = self._client.upload(self._container, [obj]) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot push file [%s]>[%s]: %s" % (local_path, remote_path, r["error"])) if not has_results: raise RuntimeError("Cannot push file [%s]>[%s]: %s" % (local_path, remote_path, "NO RESULTS")) def stream(self, remote_path, buffer_size=1024): def generate(): tmpdir = tempfile.mkdtemp() results = self._client.download(container=self._container, objects=[remote_path], options={"out_directory": tmpdir}) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot download file [%s]: %s", (remote_path, r["error"])) if not has_results: raise RuntimeError("Cannot download file [%s]: NO RESULTS", (remote_path)) with open(os.path.join(tmpdir, remote_path), "rb") as f: for chunk in iter(lambda: f.read(buffer_size), b''): yield chunk shutil.rmtree(tmpdir, ignore_errors=True) return generate() def listdir(self, remote_path, recursive=False): options = {"prefix": remote_path} if not recursive: options["delimiter"] = "/" list_parts_gen = self._client.list(container=self._container, options=options) lsdir = {} for page in list_parts_gen: if page["success"]: for item in page["listing"]: if "subdir" in item: lsdir[item["subdir"]] = {'is_dir': True} else: path = item["name"] last_modified = datetime.strptime(item["last_modified"], '%Y-%m-%dT%H:%M:%S.%f') lsdir[path] = {'size': item["bytes"], 'last_modified': datetime.timestamp(last_modified)} return lsdir def mkdir(self, remote_path): pass def _delete_single(self, remote_path, isdir): if not isdir: results = self._client.delete(container=self._container, objects=[remote_path]) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot delete file [%s]: %s" % (remote_path, r["error"])) if not has_results: raise RuntimeError("Cannot delete file [%s]: NO RESULT" % (remote_path)) def rename(self, old_remote_path, new_remote_path): listfiles = self.listdir(old_remote_path, True) for f in listfiles: assert f[:len(old_remote_path)] == old_remote_path, "inconsistent listdir result" obj = SwiftCopyObject(f, {"destination": "/%s/%s%s" % ( self._container, new_remote_path, f[len(old_remote_path):])}) results = self._client.copy(self._container, [obj]) has_results = False for r in results: has_results = True if not r["success"]: raise RuntimeError("Cannot copy file [%s]: %s" % (old_remote_path, r["error"])) if not has_results: raise RuntimeError("Cannot copy file [%s]: NO RESULT" % (old_remote_path)) self._delete_single(f, False) def exists(self, remote_path): result = self._client.list(container=self._container, options={"prefix": remote_path, "delimiter": "/"}) for page in result: if page["success"]: for item in page["listing"]: if "subdir" in item: return True if (item["name"] == remote_path or remote_path == '' or remote_path.endswith('/') or item["name"].startswith(remote_path + '/')): return True return False def isdir(self, remote_path): if not remote_path.endswith('/'): return self.exists(remote_path+'/') return self.exists(remote_path) def _internal_path(self, path): # OpenStack does not work with paths but keys. This function possibly adapts a # path-like representation to a OpenStack key. if path.startswith('/'): return path[1:] return path
class Swift(object): def __init__(self, settings): options = get_os_settings(settings) self.swift = SwiftService(options) options = get_swift_settings(settings) self.temp_url_key = options['temp_url_key'] # TODO: hard coded template self.name_template = options['container'] + '_{name}' # TODO: this should be a module level method def build_object_name(self, user_id, path='', name=None): # - if name is None, return full folder path with trailing slash # - the returned does not have a leading slash if not user_id or '/' in user_id: raise ValueError('Invalid userid', user_id) container = self.name_template.format(name=user_id) parts = [] if path: # disallow '..' if '..' in path: raise ValueError('Invalid path', path) # strip all leading trailing slashes from path # deduplicate double slashes path = '/'.join(x for x in path.split('/') if x) if path: parts.append(path) if name: # build a file path if '/' in name or name in ('..', '.'): raise ValueError('Invalid name', name) parts.append(name) else: # ensure we get a trailing slash if there is no name # -> it is a folder parts.append('') return container, '/'.join(parts) def _create_container(self, container): return self.swift.post( container=container, options={ # swiftservice converts this to X-Container-Meta-Temp-Url-Key 'meta': { 'temp-url-key': self.temp_url_key, # TODO: hard coded 10G quota 'quota-bytes': str(int(10e9)), 'access-control-allow-origin': '*', } }) def stat(self, user_id, path=''): container, object_prefix = self.build_object_name(user_id, path) if path: # object stat requested pass else: # container stat requested try: stat = self.swift.stat(container=container) except SwiftError as e: if e.exception.http_status == 404: # container does not exists res = self._create_container(container) stat = self.swift.stat(container=container) else: raise headers = stat['headers'] return { 'used': safe_int(headers.get('x-container-bytes-used', None)), 'quota': safe_int(headers.get('x-container-meta-quota-bytes', None)), 'count': safe_int(headers.get('x-container-object-count', None)), 'created': safe_isodate(headers.get('x-timestamp', None)), } return {stat.items} def list(self, user_id, path=''): container, object_prefix = self.build_object_name(user_id, path) for data in self.swift.list(container=container, options={ 'delimiter': '/', 'prefix': object_prefix }): if data['action'] == ['list_container_part' ] and not data['success']: data = self._create_container(container) if data['success']: for item in data['listing']: # filter current folder if item.get('subdir', None) == object_prefix: # ignore current directory continue elif item.get('name', None) == object_prefix: # ignore the current directory continue else: if item.get('subdir', None): # it is a pseudo dir yield { 'name': item.get('subdir')[len(object_prefix):].strip( '/'), 'bytes': 0, 'content_type': 'application/directory', } else: item['name'] = item['name'][len(object_prefix):] yield item # skip error handling below continue # TODO: we are raising an exception here... jumping out fo the # generator.... should be fine for this method, but # does this have the potential to leak threads? # SwiftService uses threads to generate results ex = data['error'] if isinstance(ex, SwiftError): if not path and ex.exception.http_status == 404: # ex.exception should be a ClientException, not found # if path is empty, we ignore it, it means, the # user container does not exist yet. break raise ex def create_folder(self, user_id, path='', description=None): container, object_path = self.build_object_name(user_id, path) # create upload object object_path = SwiftUploadObject(None, object_name=object_path, options={ 'dir_marker': True, 'meta': { 'description': description or '', }, }) folders = [] for res in self.swift.upload(container, [object_path]): if not res['success']: raise res['error'] if res['action'] == 'create_container': # if res['response_dict']['reason'] == 'Created' # status will be 202 if container already existed if res['response_dict']['status'] == 201: # set up metadata for user container res = self._create_container(container) # TODO: project only: if res['action'] == 'create_dir_marker': meta = {} if description: meta['description'] = description folder = SwiftPostObject(object_name=res['object'], options={ 'header': res['headers'], 'meta': meta, }) folders.append(folder) # TODO: check whether we should use post above instead of upload # maybe we can avoid calling swift twice? # also woke sure container get's created in case of post ret = [] for res in self.swift.post(container, folders): if not res['success']: raise res['error'] ret.append(res) return ret def delete_folder(self, user_id, path=''): container, object_path = self.build_object_name(user_id, path) # don't use delimiter here, otherwise swift.delete will only see # one level of subfolders and won't be able to delete everything # TODO: can this delete the container as well? for res in self.swift.delete(container=container, options={'prefix': object_path}): yield res['object'][len(object_path):] def upload_file(self, user_id, path, name, file, content_type='application/octet-stream', content_length=-1): container, object_name = self.build_object_name(user_id, path, name) # prepend account and container to path headers = {'Content-Type': content_type or 'application/octet-stream'} # if content_length >= 0: # headers['Content-Length'] = str(content_length) upload_obj = SwiftUploadObject(source=LengthWrapper( file, content_length, True), object_name=object_name, options={'header': headers}) log = logging.getLogger(__name__) log.info('Tool Upload %s', upload_obj) for res in self.swift.upload(container, [upload_obj]): if res['action'] == 'create_container': res = self._create_container(container) # Getting a funny response iterator here # 1. action: create_container # 2. action: upload_object log.info('Tool Result %s', res) if res.get('error', None): # res['error'].http_status == 413: # -> Request Entity Too Large # res['error'].http_resonse_content == 'Upload exceeds quota' raise res['error'] def delete_file(self, user_id, path, name): container, object_name = self.build_object_name(user_id, path, name) # TODO: could set options['prefix'] to make sure we don't delete # anything outside project/folder # TODO: coould this delete the container? res = self.swift.delete(container=container, objects=[object_name]) for res in self.swift.delete(container=container, objects=[object_name]): if res.get('error', None): raise res['error'] def generate_temp_url(self, user_id, path, name): container, object_name = self.build_object_name(user_id, path, name) # discover swift endpoint urls conn = get_conn(self.swift._options) url, token = conn.get_auth() urlparts = urlparse(url) # generate swift path /v1/<account>/<container>/<userid>/path path = '/'.join((urlparts.path, container, object_name)) # TODO: valid for 5 minutes temp_url = generate_temp_url(path, 300, self.temp_url_key, method='GET') return urljoin(url, temp_url)
class SwiftObjectStorageDriver(ObjectStorageDriver): """ Archive driver using swift-api as backing store. Buckets presented as part of object lookup in this API are mapped to object key prefixes in the backing S3 store so that a single bucket (or set of buckets) can be used since namespaces are limited. """ __config_name__ = 'swift' __driver_version__ = '1' __uri_scheme__ = 'swift' _key_format = '{prefix}{userid}/{container}/{key}' DEFAULT_AUTH_TIMEOUT = 10 def __init__(self, config): super(SwiftObjectStorageDriver, self).__init__(config) # Initialize the client self.client_config = config self.container_name = self.config.get('container') self.can_create_container = self.config.get('create_container', False) self.auth_options = copy.copy(self.client_config) if 'container' in self.auth_options: self.auth_options.pop('container') if 'create_container' in self.auth_options: self.auth_options.pop('create_container') self.client = SwiftService(options=self.auth_options) if not self.container_name: raise ValueError( 'Cannot configure swift driver with out a provided container to use' ) self.prefix = self.config.get('anchore_key_prefix', '') self._check_creds() self._check_container() def _check_creds(self): """ Simple operation to verify creds work without state change :return: True on success """ try: resp = self.client.stat() if resp['success']: return True elif resp.get('error') and resp.get('error').http_status in [ 401, 403 ]: raise BadCredentialsError(self.auth_options, endpoint=None, cause=resp.get('error')) elif resp.get('error'): raise DriverConfigurationError(cause=resp.get('error')) else: raise DriverConfigurationError( Exception( 'Got unsuccessful response from stat operation against service: {}' .format(resp))) except SwiftError as e: raise DriverConfigurationError(e) def _check_container(self): try: resp = self.client.stat(container=self.container_name) except SwiftError as e: if e.exception.http_status == 404 and self.can_create_container: try: self.client.post(container=self.container_name) except Exception as e: logger.exception(e) raise e else: raise DriverConfigurationError(e) def _build_key(self, userId, usrBucket, key): return self._key_format.format(prefix=self.prefix, userid=userId, container=usrBucket, key=key) def _parse_uri(self, uri): parsed = urllib.parse.urlparse(uri, scheme=self.__uri_scheme__) container = parsed.hostname key = parsed.path[1:] # Strip leading '/' return container, key def get_by_uri(self, uri): try: container, key = self._parse_uri(uri) if container != self.container_name: logger.warn( 'Container mismatch between content_uri and configured cotnainer name: {} in db record, but {} in config' .format(container, self.container_name)) resp = self.client.download(container=container, objects=[key], options={'out_file': '-'}) for obj in resp: if 'contents' in obj and obj['action'] == 'download_object': content = b''.join([x for x in obj['contents']]) ret = utils.ensure_bytes(content) return (ret) elif obj['action'] == 'download_object' and not obj['success']: raise ObjectKeyNotFoundError(bucket='', key='', userId='', caused_by=None) raise Exception( 'Unexpected operation/action from swift: {}'.format( obj['action'])) except SwiftError as e: raise ObjectStorageDriverError(cause=e) def delete_by_uri(self, uri): try: container, key = self._parse_uri(uri) if container != self.container_name: logger.warn( 'Container mismatch between content_uri and configured bucket name: {} in db record, but {} in config' .format(container, self.container_name)) resp = self.client.delete(container=container, objects=[key]) for r in resp: if r['success'] and r['action'] == 'delete_object': return True except Exception as e: raise e def exists(self, uri): try: container, key = self._parse_uri(uri) if container != self.container_name: logger.warn( 'Bucket mismatch between content_uri and configured bucket name: {} in db record, but {} in config' .format(container, self.container_name)) resp = self.client.download(container=container, objects=[key], options={ 'out_file': '-', 'no_download': True }) for obj in resp: if 'success' in obj and obj['success'] and obj[ 'action'] == 'download_object': return True elif obj['action'] == 'download_object' and not obj['success']: return False raise Exception( 'Unexpected operation/action from swift: {}'.format( obj['action'])) except SwiftError as e: raise ObjectStorageDriverError(cause=e) def get(self, userId, bucket, key): return self.get_by_uri(self.uri_for(userId, bucket, key)) def put(self, userId, bucket, key, data): try: uri = self.uri_for(userId, bucket, key) swift_bucket, swift_key = self._parse_uri(uri) obj = SwiftUploadObject(object_name=swift_key, source=io.BytesIO(data)) resp = self.client.upload(container=swift_bucket, objects=[obj]) for upload in resp: if upload['action'] == 'upload_object' and upload['success']: return uri else: raise Exception('Failed uploading object to swift') except Exception as e: raise e def delete(self, userId, bucket, key): return self.delete_by_uri(self.uri_for(userId, bucket, key)) def uri_for(self, userId, bucket, key): return '{}://{}/{}'.format(self.__uri_scheme__, self.container_name, self._build_key(userId, bucket, key))
class SwiftBackend(duplicity.backend.Backend): u""" Backend for Swift """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) try: from swiftclient.service import SwiftService from swiftclient import Connection from swiftclient import ClientException except ImportError as e: raise BackendException(u"""\ Swift backend requires the python-swiftclient library. Exception: %s""" % str(e)) self.resp_exc = ClientException conn_kwargs = {} os_options = {} svc_options = {} # if the user has already authenticated if u'SWIFT_PREAUTHURL' in os.environ and u'SWIFT_PREAUTHTOKEN' in os.environ: conn_kwargs[u'preauthurl'] = os.environ[u'SWIFT_PREAUTHURL'] conn_kwargs[u'preauthtoken'] = os.environ[u'SWIFT_PREAUTHTOKEN'] else: if u'SWIFT_USERNAME' not in os.environ: raise BackendException(u'SWIFT_USERNAME environment variable ' u'not set.') if u'SWIFT_PASSWORD' not in os.environ: raise BackendException(u'SWIFT_PASSWORD environment variable ' u'not set.') if u'SWIFT_AUTHURL' not in os.environ: raise BackendException(u'SWIFT_AUTHURL environment variable ' u'not set.') svc_options[u'os_username'] = conn_kwargs[u'user'] = os.environ[ u'SWIFT_USERNAME'] svc_options[u'os_password'] = conn_kwargs[u'key'] = os.environ[ u'SWIFT_PASSWORD'] svc_options[u'os_auth_url'] = conn_kwargs[u'authurl'] = os.environ[ u'SWIFT_AUTHURL'] if u'SWIFT_AUTHVERSION' in os.environ: svc_options[u'auth_version'] = conn_kwargs[ u'auth_version'] = os.environ[u'SWIFT_AUTHVERSION'] if os.environ[u'SWIFT_AUTHVERSION'] == u'3': if u'SWIFT_USER_DOMAIN_NAME' in os.environ: os_options.update({ u'user_domain_name': os.environ[u'SWIFT_USER_DOMAIN_NAME'] }) if u'SWIFT_USER_DOMAIN_ID' in os.environ: os_options.update({ u'user_domain_id': os.environ[u'SWIFT_USER_DOMAIN_ID'] }) if u'SWIFT_PROJECT_DOMAIN_NAME' in os.environ: os_options.update({ u'project_domain_name': os.environ[u'SWIFT_PROJECT_DOMAIN_NAME'] }) if u'SWIFT_PROJECT_DOMAIN_ID' in os.environ: os_options.update({ u'project_domain_id': os.environ[u'SWIFT_PROJECT_DOMAIN_ID'] }) if u'SWIFT_TENANTNAME' in os.environ: os_options.update( {u'tenant_name': os.environ[u'SWIFT_TENANTNAME']}) if u'SWIFT_ENDPOINT_TYPE' in os.environ: os_options.update( {u'endpoint_type': os.environ[u'SWIFT_ENDPOINT_TYPE']}) if u'SWIFT_USERID' in os.environ: os_options.update( {u'user_id': os.environ[u'SWIFT_USERID']}) if u'SWIFT_TENANTID' in os.environ: os_options.update( {u'tenant_id': os.environ[u'SWIFT_TENANTID']}) if u'SWIFT_REGIONNAME' in os.environ: os_options.update( {u'region_name': os.environ[u'SWIFT_REGIONNAME']}) else: conn_kwargs[u'auth_version'] = u'1' if u'SWIFT_TENANTNAME' in os.environ: conn_kwargs[u'tenant_name'] = os.environ[u'SWIFT_TENANTNAME'] if u'SWIFT_REGIONNAME' in os.environ: os_options.update( {u'region_name': os.environ[u'SWIFT_REGIONNAME']}) # formatting options for swiftclient.SwiftService for key in os_options.keys(): svc_options[u'os_' + key] = os_options[key] conn_kwargs[u'os_options'] = os_options # This folds the null prefix and all null parts, which means that: # //MyContainer/ and //MyContainer are equivalent. # //MyContainer//My/Prefix/ and //MyContainer/My/Prefix are equivalent. url_parts = [x for x in parsed_url.path.split(u'/') if x != u''] self.container = url_parts.pop(0) if url_parts: self.prefix = u'%s/' % u'/'.join(url_parts) else: self.prefix = u'' policy = config.swift_storage_policy policy_header = u'X-Storage-Policy' container_metadata = None try: log.Debug(u"Starting connection with arguments:'%s'" % conn_kwargs) self.conn = Connection(**conn_kwargs) container_metadata = self.conn.head_container(self.container) except ClientException as e: log.Debug(u"Connection failed: %s %s" % (e.__class__.__name__, str(e))) pass except Exception as e: log.FatalError( u"Connection failed: %s %s" % (e.__class__.__name__, str(e)), log.ErrorCode.connection_failed) if container_metadata is None: log.Info(u"Creating container %s" % self.container) try: headers = dict([[policy_header, policy]]) if policy else None self.conn.put_container(self.container, headers=headers) except Exception as e: log.FatalError( u"Container creation failed: %s %s" % (e.__class__.__name__, str(e)), log.ErrorCode.connection_failed) elif policy and container_metadata[policy_header.lower()] != policy: log.FatalError( u"Container '%s' exists but its storage policy is '%s' not '%s'." % (self.container, container_metadata[policy_header.lower()], policy)) else: log.Debug(u"Container already created: %s" % container_metadata) # checking service connection try: log.Debug(u"Starting Swiftservice: '%s'" % svc_options) self.svc = SwiftService(options=svc_options) container_stat = self.svc.stat(self.container) except ClientException as e: log.FatalError( u"Connection failed: %s %s" % (e.__class__.__name__, str(e)), log.ErrorCode.connection_failed) log.Debug(u"Container stats: %s" % container_stat) def _error_code(self, operation, e): # pylint: disable=unused-argument if isinstance(e, self.resp_exc): if e.http_status == 404: return log.ErrorCode.backend_not_found def _put(self, source_path, remote_filename): lp = util.fsdecode(source_path.name) if config.mp_segment_size > 0: from swiftclient.service import SwiftUploadObject st = os.stat(lp) # only upload using Dynamic Large Object if mpvolsize is triggered if st.st_size >= config.mp_segment_size: log.Debug(u"Uploading Dynamic Large Object") mp = self.svc.upload( self.container, [ SwiftUploadObject(lp, object_name=self.prefix + util.fsdecode(remote_filename)) ], options={u'segment_size': config.mp_segment_size}) uploads = [a for a in mp if u'container' not in a[u'action']] for upload in uploads: if not upload[u'success']: raise BackendException(upload[u'traceback']) return rp = self.prefix + util.fsdecode(remote_filename) log.Debug(u"Uploading '%s' to '%s' in remote container '%s'" % (lp, rp, self.container)) self.conn.put_object(container=self.container, obj=self.prefix + util.fsdecode(remote_filename), contents=open(lp, u'rb')) def _get(self, remote_filename, local_path): headers, body = self.conn.get_object(self.container, self.prefix + util.fsdecode(remote_filename), resp_chunk_size=1024) with open(local_path.name, u'wb') as f: for chunk in body: f.write(chunk) def _list(self): headers, objs = self.conn.get_container(self.container, full_listing=True, path=self.prefix) # removes prefix from return values. should check for the prefix ? return [o[u'name'][len(self.prefix):] for o in objs] def _delete(self, filename): # use swiftservice to correctly delete all segments in case of multipart uploads deleted = [ a for a in self.svc.delete(self.container, [self.prefix + util.fsdecode(filename)]) ] def _query(self, filename): # use swiftservice to correctly report filesize in case of multipart uploads sobject = [ a for a in self.svc.stat(self.container, [self.prefix + util.fsdecode(filename)]) ][0] sobj = {u'size': int(sobject[u'headers'][u'content-length'])} log.Debug(u"Objectquery: '%s' has size %s." % (util.fsdecode(filename), sobj[u'size'])) return sobj
class Swift(object): def __init__(self, config): self.config = config options = self._init_swift_options() options['object_uu_threads'] = 20 self.swift = SwiftService(options=options) container_names = [] try: list_account_part = self.swift.list() for page in list_account_part: if page["success"]: for item in page["listing"]: i_name = item["name"] container_names.append(i_name) if i_name == self.config["swift_container"]: print("using SWIFT", self.config["swift_container"], "container:", item) else: logging.error( "error listing SWIFT object storage containers") except SwiftError as e: logging.exception("error listing containers") if self.config["swift_container"] not in container_names: # create the container try: self.swift.post(container=self.config["swift_container"]) except SwiftError: logging.exception( "error creating SWIFT object storage container " + self.config["swift_container"]) else: logging.debug( "container already exists on SWIFT object storage: " + self.config["swift_container"]) def _init_swift_options(self): options = {} for key in self.config["swift"]: if len(self.config["swift"][key].strip()) > 0: options[key] = self.config["swift"][key] return options def upload_file_to_swift(self, file_path, dest_path=None): """ Upload the given file to current SWIFT object storage container """ objs = [] # file object file_name = os.path.basename(file_path) object_name = file_name if dest_path != None: object_name = dest_path + "/" + file_name obj = SwiftUploadObject(file_path, object_name=object_name) objs.append(obj) try: for result in self.swift.upload(self.config["swift_container"], objs): if not result['success']: error = result['error'] if result['action'] == "upload_object": logging.error( "Failed to upload object %s to container %s: %s" % (self.config["swift_container"], result['object'], error)) else: logging.error("%s" % error) except SwiftError: logging.exception("error uploading file to SWIFT container") def upload_files_to_swift(self, file_paths, dest_path=None): """ Bulk upload of a list of files to current SWIFT object storage container under the same destination path """ objs = [] # file object for file_path in file_paths: file_name = os.path.basename(file_path) object_name = file_name if dest_path != None: object_name = dest_path + "/" + file_name obj = SwiftUploadObject(file_path, object_name=object_name) objs.append(obj) try: for result in self.swift.upload(self.config["swift_container"], objs): if not result['success']: error = result['error'] if result['action'] == "upload_object": logging.error( "Failed to upload object %s to container %s: %s" % (self.config["swift_container"], result['object'], error)) else: logging.error("%s" % error) except SwiftError: logging.exception("error uploading file to SWIFT container") def download_file(self, file_path, dest_path): """ Download a file given a path and returns the download destination file path. """ objs = [file_path] try: for down_res in self.swift.download( container=self.config["swift_container"], objects=objs): if down_res['success']: #print("'%s' downloaded" % down_res['object']) local_path = down_res['path'] #print(local_path) shutil.move(local_path, dest_path) else: logging.error("'%s' download failed" % down_res['object']) except SwiftError: logging.exception("error downloading file from SWIFT container") def get_swift_list(self, dir_name=None): """ Return all contents of a given dir in SWIFT object storage. Goes through the pagination to obtain all file names. afaik, this is terribly inefficient, as we have to go through all the objects of the storage. """ result = [] try: list_parts_gen = self.swift.list( container=self.config["swift_container"]) for page in list_parts_gen: if page["success"]: for item in page["listing"]: if dir_name == None or item["name"].startswith( dir_name): result.append(item["name"]) else: logging.error(page["error"]) except SwiftError as e: logger.error(e.value) return result def remove_file(self, file_path): """ Remove an existing file on the SWIFT object storage """ try: objs = [file_path] for result in self.swift.delete(self.config["swift_container"], objs): if not result['success']: error = result['error'] if result['action'] == "delete_object": logging.error( "Failed to delete object %s from container %s: %s" % (self.config["swift_container"], result['object'], error)) else: logging.error("%s" % error) except SwiftError: logging.exception("error removing file from SWIFT container") def remove_all_files(self): """ Remove all the existing files on the SWIFT object storage """ try: list_parts_gen = self.swift.list( container=self.config["swift_container"]) for page in list_parts_gen: if page["success"]: to_delete = [] for item in page["listing"]: to_delete.append(item["name"]) for del_res in self.swift.delete( container=self.config["swift_container"], objects=to_delete): if not del_res['success']: error = del_res['error'] if del_res['action'] == "delete_object": logging.error( "Failed to delete object %s from container %s: %s" % (self.config["swift_container"], del_res['object'], error)) else: logging.error("%s" % error) except SwiftError: logging.exception("error removing all files from SWIFT container")
class SwiftFS(HasTraits): container = Unicode(os.environ.get('CONTAINER', 'demo')) storage_url = Unicode(help="The base URL for containers", default_value='http://example.com', config=True) delimiter = Unicode("/", help="Path delimiter", config=True) root_dir = Unicode("/", config=True) log = logging.getLogger('SwiftFS') def __init__(self, **kwargs): super(self.__class__, self).__init__(**kwargs) # With the python swift client, the connection is automagically # created using environment variables (I know... horrible or what?) self.log.info("using swift container `%s`", self.container) # open connection to swift container self.swift = SwiftService() # make sure container exists try: result = self.swift.post(container=self.container) except SwiftError as e: self.log.error("creating container %s", e.value) raise HTTPError(404, e.value) if not result["success"]: msg = "could not create container %s" % self.container self.log.error(msg) raise HTTPError(404, msg) # see 'list' at https://docs.openstack.org/developer/python-swiftclient/service-api.html # Returns a list of all objects that start with the prefix given # Of course, in a proper heirarchical file-system, list-dir only returns the files # in that dir, so we need to filter the list to me ONLY those objects where the # 'heirarchical' bit of the name stops at the path given # The method has 2 modes: 1 when the list of names is returned with the full # path-name, and one where the name is just the "file name" @LogMethodResults() def listdir(self, path="", with_prefix=False, this_dir_only=True): """ list all the "files" in the "directory" for the given path. If the 'this_dir_only' is False (it is True by default), then the full list of all objects in that path are returned (needed for a rename, for example) returns a list of dictionaries for each object: {'bytes': 11, 'hash': '3e25960a79dbc69b674cd4ec67a72c62', 'last_modified': '2017-06-06T08:55:36.473Z', 'name': 'foo/bar/thingamy.bob'} """ files = [] # Get all objects that match the known path path = self.clean_path(path) _opts = {'prefix': path} try: dir_listing = self.swift.list(container=self.container, options=_opts) for page in dir_listing: # each page is up to 10,000 items if page["success"]: files.extend(page["listing"]) # page is returning a list else: raise page["error"] except SwiftError as e: self.log.error("SwiftFS.listdir %s", e.value) if this_dir_only: # make up the pattern to compile into our regex engine regex_delim = re.escape(self.delimiter) if len(path) > 0: regex_path = re.escape(path.rstrip(self.delimiter)) pattern = '^({0}{1}[^{1}]+{1}?|{0})$'.format( regex_path, regex_delim) else: pattern = '^[^{0}]+{0}?$'.format(regex_delim) self.log.debug("restrict directory pattern is: `%s`", pattern) regex = re.compile(pattern, re.UNICODE) new_files = [] for f in files: if regex.match(f['name']): new_files.append(f) files = new_files return files # We can 'stat' files, but not directories @LogMethodResults() def isfile(self, path): if path is None or path == '': self.log.debug("SwiftFS.isfile has no path, returning False") return False _isfile = False if not path.endswith(self.delimiter): path = self.clean_path(path) try: response = self.swift.stat(container=self.container, objects=[path]) except Exception as e: self.log.error("SwiftFS.isfile %s", e.value) for r in response: if r['success']: _isfile = True else: self.log.error('Failed to retrieve stats for %s' % r['object']) break return _isfile # We can 'list' direcotries, but not 'stat' them @LogMethodResults() def isdir(self, path): # directories mush have a trailing slash on them. # The core code seems to remove any trailing slash, so lets add it back # on if not path.endswith(self.delimiter): path = path + self.delimiter # Root directory checks if path == self.delimiter: # effectively root directory self.log.debug("SwiftFS.isdir found root dir - returning True") return True _isdir = False path = self.clean_path(path) _opts = {} if re.search('\w', path): _opts = {'prefix': path} try: self.log.debug("SwiftFS.isdir setting prefix to '%s'", path) response = self.swift.list(container=self.container, options=_opts) except SwiftError as e: self.log.error("SwiftFS.isdir %s", e.value) for r in response: if r['success']: _isdir = True else: self.log.error('Failed to retrieve stats for %s' % path) break return _isdir @LogMethod() def cp(self, old_path, new_path): self._copymove(old_path, new_path, with_delete=False) @LogMethod() def mv(self, old_path, new_path): self._copymove(old_path, new_path, with_delete=True) @LogMethod() def remove_container(self): response = {} try: response = self.swift.stat(container=self.container) except SwiftError as e: self.log.error("SwiftFS.remove_container %s", e.value) if 'success' in response and response['success'] == True: try: response = self.swift.delete(container=self.container) except SwiftError as e: self.log.error("SwiftFS.remove_container %s", e.value) for r in response: self.log.debug("SwiftFS.rm action: `%s` success: `%s`", r['action'], r['success']) @LogMethod() def rm(self, path, recursive=False): if path in ["", self.delimiter]: self.do_error('Cannot delete root directory', code=400) return False if not (self.isdir(path) or self.isfile(path)): return False if recursive: for f in self._walk_path(path, dir_first=True): self.log.debug("SwiftFS.rm recurse into `%s`", f) self.rm(f) self.log.info("SwiftFS.rm and now remove `%s`", path) self.rm(path) else: self.log.info("SwiftFS.rm not recursing for `%s`", path) files = self.listdir(path) isEmpty = True if len(files) > 1: isEmpty = False if len(files) == 1 and files[0]['name'] != path: isEmpty = False if not isEmpty: self.do_error("directory %s not empty" % path, code=400) path = self.clean_path(path) try: response = self.swift.delete(container=self.container, objects=[path]) except SwiftError as e: self.log.error("SwiftFS.rm %s", e.value) return False for r in response: self.log.debug("SwiftFS.rm action: `%s` success: `%s`", r['action'], r['success']) return True @LogMethod() def _walk_path(self, path, dir_first=False): if not dir_first: yield path for f in self.listdir(path): if not dir_first: yield f['name'] if self.guess_type(f['name']) == 'directory': for ff in self._walk_path(f['name'], dir_first=dir_first): yield ff if dir_first: yield f['name'] if dir_first: yield path # core function to copy or move file-objects # does clever recursive stuff for directory trees @LogMethod() def _copymove(self, old_path, new_path, with_delete=False): # check parent directory exists self.checkParentDirExists(new_path) for f in self._walk_path(old_path): new_f = f.replace(old_path, new_path, 1) if self.guess_type(f) == 'directory': self.mkdir(new_f) else: old_path = self.clean_path(old_path) new_path = self.clean_path(new_path) try: response = self.swift.copy( self.container, [f], { 'destination': self.delimiter + self.container + self.delimiter + new_f }) except SwiftError as e: self.log.error(e.value) raise for r in response: if r["success"]: if r["action"] == "copy_object": self.log.debug("object %s copied from /%s/%s" % (r["destination"], r["container"], r["object"])) if r["action"] == "create_container": self.log.debug("container %s created" % r["container"]) else: if "error" in r and isinstance(r["error"], Exception): raise r["error"] # we always test for delete: file or directory... if with_delete: self.rm(old_path, recursive=True) # Directories are just objects that have a trailing '/' @LogMethod() def mkdir(self, path): path = path.rstrip(self.delimiter) path = path + self.delimiter self._do_write(path, None) # This works by downloading the file to disk then reading the contents of # that file into memory, before deleting the file # NOTE this is reading text files! # NOTE this really only works with files in the local direcotry, but given # local filestore will disappear when the docker ends, I'm not too bothered. @LogMethod() def read(self, path): if self.guess_type(path) == "directory": msg = "cannot read from path %s: it is a directory" % path self.do_error(msg, code=400) content = '' fhandle, localFile = tempfile.mkstemp(prefix="swiftfs_") os.close(fhandle) path = self.clean_path(path) try: response = self.swift.download(container=self.container, objects=[path], options={"out_file": localFile}) except SwiftError as e: self.log.error("SwiftFS.read %s", e.value) return '' for r in response: if r['success']: self.log.debug("SwiftFS.read: using local file %s", localFile) with open(localFile) as lf: content = lf.read() os.remove(localFile) return content # Write is 'upload' and 'upload' needs a "file" it can read from # We use io.StringIO for this @LogMethod() def write(self, path, content): if self.guess_type(path) == "directory": msg = "cannot write to path %s: it is a directory" % path self.do_error(msg, code=400) #path = self.clean_path(path) # If we can't make the directory path, then we can't make the file! #success = self._make_intermedate_dirs(path) self._do_write(path, content) @LogMethod() def _make_intermedate_dirs(self, path): # we loop over the path, checking for an object at every level # of the hierachy, except the last item (which may be a file, # or a directory itself path_parts = re.split(self.delimiter, path) current_path = '' for p in path_parts[:-1]: this_path = current_path + p + self.delimiter if self.isfile(this_path): self.log.error( "SwiftFS._make_intermedate_dirs failure: dir exists at path `%s`" % this_path) return False if not self.isdir(this_path): self.log.debug( "SwiftFS._make_intermedate_dirs making directory") self._do_write(this_path, None) current_path = this_path return True @LogMethod() def _do_write(self, path, content): # check parent directory exists self.checkParentDirExists(path) type = self.guess_type(path) things = [] if type == "directory": self.log.debug("SwiftFS._do_write create directory") things.append(SwiftUploadObject(None, object_name=path)) else: self.log.debug("SwiftFS._do_write create file/notebook from '%s'", content) output = io.BytesIO(content.encode('utf-8')) things.append(SwiftUploadObject(output, object_name=path)) # Now do the upload path = self.clean_path(path) try: response = self.swift.upload(self.container, things) except SwiftError as e: self.log.error("SwiftFS._do_write swift-error: %s", e.value) raise except ClientException as e: self.log.error("SwiftFS._do_write client-error: %s", e.value) raise for r in response: self.log.debug("SwiftFS._do_write action: '%s', response: '%s'", r['action'], r['success']) @LogMethodResults() def guess_type(self, path, allow_directory=True): """ Guess the type of a file. If allow_directory is False, don't consider the possibility that the file is a directory. Parameters ---------- path: string """ _type = '' if path.endswith(".ipynb"): _type = "notebook" elif allow_directory and path.endswith(self.delimiter): _type = "directory" elif allow_directory and self.isdir(path): _type = "directory" else: _type = "file" return _type @LogMethod() def clean_path(self, path): # strip of any leading '/' path = path.lstrip(self.delimiter) if self.guess_type(path) == 'directory': # ensure we have a / at the end of directory paths path = path.rstrip(self.delimiter) + self.delimiter if path == self.delimiter: path = '' return path @LogMethodResults() def checkParentDirExists(self, path): """checks if the parent directory of a path exists""" p = path.strip(self.delimiter) p = p.split(self.delimiter)[:-1] p = self.delimiter.join(p) self.log.debug("SwiftFS.checkDirExists: directory name %s", p) if not self.isdir(p): self.do_error('parent directory does not exist %s' % p, code=400) @LogMethod() def do_error(self, msg, code=500): self.log.error(msg) raise HTTPError(code, msg)