def info(self, key_or_file): ''' Get info about a file ''' from datetime import datetime k = path.parse(key_or_file) result = { 'uri': '%s://%s%s' % (k.scheme, k.netloc, k.path), } if k.scheme == 'file': if not os.path.exists(k.path): raise KeyNotFound("Error getting info on %s: File doesn't exist" % (key_or_file, )) stat = os.stat(k.path) result['size'] = stat.st_size result['last_modified'] = datetime.fromtimestamp(stat.st_mtime) elif k.scheme == 's3': remote_object = self._lookup(k.netloc, k.path) if remote_object is None: raise KeyNotFound("Error getting info on %s: Key doesn't exist" % (key_or_file, )) result['size'] = remote_object.size result['last_modified'] = datetime.strptime(remote_object.last_modified, "%a, %d %b %Y %H:%M:%S GMT") result['content_type'] = remote_object.content_type result['content_encoding'] = remote_object.content_encoding result['encrypted'] = bool(remote_object.encrypted) result['acl'] = remote_object.get_acl() result['owner'] = remote_object.owner result['version_id'] = remote_object.version_id else: raise InvalidSchemeException("URI Scheme %s is not implemented" % k.scheme) return result
def test_s3_cp_download_lookup_recover_in_one_retry( self, ensure_integrity_mock): from baiji.exceptions import KeyNotFound ensure_integrity_mock.side_effect = [ KeyNotFound('key not found'), None ] s3.cp(self.existing_remote_file, self.tmp_dir, force=True)
def __init__(self, key, mode='r', connection=None, encrypt=True, version_id=None): from baiji.connection import S3Connection self.encrypt = encrypt self.key = key if path.islocal(key): self.should_upload_on_close = False self.mode = FileMode(mode, allowed_modes='arwxb+t') from six.moves import builtins local_path = path.parse(key).path if self.mode.is_output and not os.path.exists( os.path.dirname(local_path)): from baiji.util.shutillib import mkdir_p mkdir_p(os.path.dirname(local_path)) try: # Use os.open to catch exclusive access to the file, but use open to get a nice, useful file object self.fd = os.open(local_path, self.mode.flags) self.f = builtins.open(local_path, self.mode.mode.replace('x', 'w')) os.close(self.fd) except OSError as e: import errno if e.errno is errno.EEXIST: raise KeyExists("Local file exists: %s" % local_path) elif e.errno is errno.ENOENT: raise KeyNotFound("Local file does not exist: %s" % local_path) else: raise IOError(e.errno, "%s: %s" % (e.strerror, e.filename)) else: if connection is None: connection = S3Connection() self.connection = connection self.mode = FileMode(mode, allowed_modes='rwxbt') self.should_upload_on_close = self.mode.is_output if self.mode.creating_exclusively: if self.connection.exists(self.key): raise KeyExists("Key exists in bucket: %s" % self.key) else: self.connection.touch(self.key, encrypt=self.encrypt) # Use w+ so we can read back the contents in upload() new_mode = ('w+' + (self.mode.binary and 'b' or '') + (self.mode.text and 't' or '')) from baiji.util import tempfile self.f = tempfile.NamedTemporaryFile( mode=new_mode, suffix=os.path.splitext(path.parse(self.key).path)[1]) self.name = self.f.name self.remotename = key # Used by some serialization code to find files which sit along side the file in question, like textures which sit next to a mesh file if self.mode.reading: self.connection.cp(self.key, self.name, force=True, version_id=version_id)
def rm(self, key_or_file, version_id=None): ''' Remove a key from AWS S3 ''' import shutil from baiji.util.munging import _strip_initial_slashes k = path.parse(key_or_file) if k.scheme == 'file': if os.path.isdir(k.path): shutil.rmtree(k.path) elif os.path.exists(k.path): return os.remove(k.path) else: raise KeyNotFound("%s does not exist" % key_or_file) elif k.scheme == 's3': if not self.exists(key_or_file, version_id=version_id): raise KeyNotFound("%s does not exist" % key_or_file) return self._bucket(k.netloc).delete_key(_strip_initial_slashes(k.path), version_id=version_id) else: raise InvalidSchemeException("URI Scheme %s is not implemented" % k.scheme)
def sync_file(self, src, dst, update=True, delete=False, progress=False, policy=None, encoding=None, encrypt=True, guess_content_type=False): ''' Sync a file from src to dst. update: When True, update dst if it exists but contents do not match. delete: When True, remove dst if src does not exist. When False, raise an error if src does not exist. As this function is a file by file sync, not applicable to directories nor recursive, src being a directory is best treated as mkdir_p(dst). ''' from baiji.util.console import create_conditional_print print_verbose = create_conditional_print(progress) if path.isdirlike(src): print_verbose('{} is a directory'.format(src)) if path.islocal(dst): # for remote paths, don't bother creating dirs; they don't really exist. from baiji.util.shutillib import mkdir_p mkdir_p(dst) return src_exists = self.exists(src) if not delete and not src_exists: raise KeyNotFound( "Error syncing {} to {}: Source doesn't exist".format(src, dst)) dst_exists = self.exists(dst) needs_delete = dst_exists and not src_exists needs_fresh_copy = src_exists and not dst_exists needs_update = dst_exists and src_exists and self.etag(src) != self.etag(dst) if not needs_delete and not needs_fresh_copy and not needs_update: print_verbose('{} is up to date'.format(dst)) return # At this point, exactly one of these should be true. assert needs_delete ^ needs_fresh_copy ^ needs_update if needs_fresh_copy: print_verbose('copying {} to {}'.format(src, dst)) self.cp(src, dst, force=False, progress=progress, policy=policy, encoding=encoding, encrypt=encrypt, guess_content_type=guess_content_type) elif needs_update: print_verbose('file is out of date: {}'.format(dst)) if update: print_verbose('copying {} to {}'.format(src, dst)) self.cp(src, dst, force=True, progress=progress, policy=policy, encoding=encoding, encrypt=encrypt, guess_content_type=guess_content_type) elif needs_delete: print_verbose('source file does not exist: {}'.format(src)) if delete: print_verbose('removing {}'.format(dst)) self.rm(dst)
def size(self, key_or_file, version_id=None): ''' Return the size of a file. If it's on s3, don't download it. ''' k = path.parse(key_or_file) if k.scheme == 'file': return os.path.getsize(k.path) elif k.scheme == 's3': k = self._lookup(k.netloc, k.path, version_id=version_id) if k is None: raise KeyNotFound("s3://%s/%s not found on s3" % (k.netloc, k.path)) return k.size else: raise InvalidSchemeException("URI Scheme %s is not implemented" % k.scheme)
def lookup(self, version_id=None): from baiji.util.lookup import get_versioned_key_remote if self.is_file: raise ValueError( "S3CopyOperation.CopyableKey.lookup called for local file") key = get_versioned_key_remote(self.bucket, self.remote_path, version_id=version_id) if not key: raise KeyNotFound("Error finding %s on s3: doesn't exist" % (self.uri)) return key
def encrypt_at_rest(self, key): ''' This method takes a key on s3 and encrypts it. Note that calling this method on a local file is an error and that calling it on an s3 key that is already encrypted, while allowed, is a no-op. ''' k = path.parse(key) if k.scheme != 's3': raise InvalidSchemeException("URI Scheme %s is not implemented" % k.scheme) remote_object = self._lookup(k.netloc, k.path) if remote_object is None: raise KeyNotFound("Error encrypting %s: Key doesn't exist" % (key, )) if not bool(remote_object.encrypted): bucket = self._bucket(k.netloc) src = k.path if src.startswith(path.sep): src = src[len(path.sep):] # NB: copy_key is failing with absolute src keys... bucket.copy_key(src, k.netloc, src, preserve_acl=True, metadata=None, encrypt_key=True)
def execute(self): from boto.s3.connection import S3ResponseError if not self.force and self.dst.exists(): if self.skip: import warnings warnings.warn( "Skipping existing destination copying %s to %s: Destinaton exists" % (self.src.uri, self.dst.uri)) return else: raise KeyExists("Error copying %s to %s: Destinaton exists" % (self.src.uri, self.dst.uri)) if self.dst.is_file: self.prep_local_destination() try: if self.task == ('file', 'file'): self.local_copy() elif self.task == ('file', 's3'): self.upload() elif self.task == ('s3', 'file'): self.download() elif self.task == ('s3', 's3'): self.remote_copy() else: raise InvalidSchemeException( "Copy for URI Scheme %s to %s is not implemented" % self.task) except KeyNotFound: if self.dst.is_s3: try: _ = self.dst.bucket except KeyNotFound: raise KeyNotFound( "Error copying {} to {}: Destination bucket doesn't exist" .format(self.src.uri, self.dst.uri)) if not self.src.exists(): raise KeyNotFound( "Error copying {} to {}: Source doesn't exist".format( self.src.uri, self.dst.uri)) else: raise KeyNotFound( "Error copying {} to {}: Destination doesn't exist".format( self.src.uri, self.dst.uri)) except IOError as e: import errno if e.errno == errno.ENOENT: raise KeyNotFound( "Error copying {} to {}: Source doesn't exist".format( self.src.uri, self.dst.uri)) else: raise S3Exception("Error copying {} to {}: {}".format( self.src.uri, self.dst.uri, e)) except S3ResponseError as e: if e.status == 403: raise S3Exception( "HTTP Error 403: Permission Denied on {}".format( " or ".join( [x.uri for x in [self.src, self.dst] if x.is_s3]))) else: raise
def _get_etag(self, netloc, remote_path): k = self._lookup(netloc, remote_path) if k is None: raise KeyNotFound("s3://%s/%s not found on s3" % (netloc, remote_path)) return k.etag.strip("\"") # because s3 seriously gives the md5sum back wrapped in an extra set of double quotes...