Ejemplo n.º 1
0
 def info(self, key_or_file):
     '''
     Get info about a file
     '''
     from datetime import datetime
     k = path.parse(key_or_file)
     result = {
         'uri': '%s://%s%s' % (k.scheme, k.netloc, k.path),
     }
     if k.scheme == 'file':
         if not os.path.exists(k.path):
             raise KeyNotFound("Error getting info on %s: File doesn't exist" % (key_or_file, ))
         stat = os.stat(k.path)
         result['size'] = stat.st_size
         result['last_modified'] = datetime.fromtimestamp(stat.st_mtime)
     elif k.scheme == 's3':
         remote_object = self._lookup(k.netloc, k.path)
         if remote_object is None:
             raise KeyNotFound("Error getting info on %s: Key doesn't exist" % (key_or_file, ))
         result['size'] = remote_object.size
         result['last_modified'] = datetime.strptime(remote_object.last_modified, "%a, %d %b %Y %H:%M:%S GMT")
         result['content_type'] = remote_object.content_type
         result['content_encoding'] = remote_object.content_encoding
         result['encrypted'] = bool(remote_object.encrypted)
         result['acl'] = remote_object.get_acl()
         result['owner'] = remote_object.owner
         result['version_id'] = remote_object.version_id
     else:
         raise InvalidSchemeException("URI Scheme %s is not implemented" % k.scheme)
     return result
Ejemplo n.º 2
0
 def test_s3_cp_download_lookup_recover_in_one_retry(
         self, ensure_integrity_mock):
     from baiji.exceptions import KeyNotFound
     ensure_integrity_mock.side_effect = [
         KeyNotFound('key not found'), None
     ]
     s3.cp(self.existing_remote_file, self.tmp_dir, force=True)
Ejemplo n.º 3
0
    def __init__(self,
                 key,
                 mode='r',
                 connection=None,
                 encrypt=True,
                 version_id=None):
        from baiji.connection import S3Connection
        self.encrypt = encrypt
        self.key = key
        if path.islocal(key):
            self.should_upload_on_close = False
            self.mode = FileMode(mode, allowed_modes='arwxb+t')
            from six.moves import builtins
            local_path = path.parse(key).path
            if self.mode.is_output and not os.path.exists(
                    os.path.dirname(local_path)):
                from baiji.util.shutillib import mkdir_p
                mkdir_p(os.path.dirname(local_path))
            try:
                # Use os.open to catch exclusive access to the file, but use open to get a nice, useful file object
                self.fd = os.open(local_path, self.mode.flags)
                self.f = builtins.open(local_path,
                                       self.mode.mode.replace('x', 'w'))
                os.close(self.fd)
            except OSError as e:
                import errno
                if e.errno is errno.EEXIST:
                    raise KeyExists("Local file exists: %s" % local_path)
                elif e.errno is errno.ENOENT:
                    raise KeyNotFound("Local file does not exist: %s" %
                                      local_path)
                else:
                    raise IOError(e.errno, "%s: %s" % (e.strerror, e.filename))
        else:
            if connection is None:
                connection = S3Connection()
            self.connection = connection

            self.mode = FileMode(mode, allowed_modes='rwxbt')
            self.should_upload_on_close = self.mode.is_output
            if self.mode.creating_exclusively:
                if self.connection.exists(self.key):
                    raise KeyExists("Key exists in bucket: %s" % self.key)
                else:
                    self.connection.touch(self.key, encrypt=self.encrypt)
            # Use w+ so we can read back the contents in upload()
            new_mode = ('w+' + (self.mode.binary and 'b' or '') +
                        (self.mode.text and 't' or ''))
            from baiji.util import tempfile
            self.f = tempfile.NamedTemporaryFile(
                mode=new_mode,
                suffix=os.path.splitext(path.parse(self.key).path)[1])
            self.name = self.f.name
            self.remotename = key  # Used by some serialization code to find files which sit along side the file in question, like textures which sit next to a mesh file
            if self.mode.reading:
                self.connection.cp(self.key,
                                   self.name,
                                   force=True,
                                   version_id=version_id)
Ejemplo n.º 4
0
 def rm(self, key_or_file, version_id=None):
     '''
     Remove a key from AWS S3
     '''
     import shutil
     from baiji.util.munging import _strip_initial_slashes
     k = path.parse(key_or_file)
     if k.scheme == 'file':
         if os.path.isdir(k.path):
             shutil.rmtree(k.path)
         elif os.path.exists(k.path):
             return os.remove(k.path)
         else:
             raise KeyNotFound("%s does not exist" % key_or_file)
     elif k.scheme == 's3':
         if not self.exists(key_or_file, version_id=version_id):
             raise KeyNotFound("%s does not exist" % key_or_file)
         return self._bucket(k.netloc).delete_key(_strip_initial_slashes(k.path), version_id=version_id)
     else:
         raise InvalidSchemeException("URI Scheme %s is not implemented" % k.scheme)
Ejemplo n.º 5
0
    def sync_file(self, src, dst, update=True, delete=False, progress=False, policy=None, encoding=None, encrypt=True, guess_content_type=False):
        '''
        Sync a file from src to dst.

        update: When True, update dst if it exists but contents do not match.
        delete: When True, remove dst if src does not exist. When False, raise
          an error if src does not exist.

        As this function is a file by file sync, not applicable to directories
        nor recursive, src being a directory is best treated as mkdir_p(dst).
        '''
        from baiji.util.console import create_conditional_print
        print_verbose = create_conditional_print(progress)

        if path.isdirlike(src):
            print_verbose('{} is a directory'.format(src))
            if path.islocal(dst): # for remote paths, don't bother creating dirs; they don't really exist.
                from baiji.util.shutillib import mkdir_p
                mkdir_p(dst)
            return

        src_exists = self.exists(src)
        if not delete and not src_exists:
            raise KeyNotFound(
                "Error syncing {} to {}: Source doesn't exist".format(src, dst))

        dst_exists = self.exists(dst)

        needs_delete = dst_exists and not src_exists
        needs_fresh_copy = src_exists and not dst_exists
        needs_update = dst_exists and src_exists and self.etag(src) != self.etag(dst)

        if not needs_delete and not needs_fresh_copy and not needs_update:
            print_verbose('{} is up to date'.format(dst))
            return

        # At this point, exactly one of these should be true.
        assert needs_delete ^ needs_fresh_copy ^ needs_update

        if needs_fresh_copy:
            print_verbose('copying {} to {}'.format(src, dst))
            self.cp(src, dst, force=False, progress=progress, policy=policy, encoding=encoding, encrypt=encrypt, guess_content_type=guess_content_type)
        elif needs_update:
            print_verbose('file is out of date: {}'.format(dst))
            if update:
                print_verbose('copying {} to {}'.format(src, dst))
                self.cp(src, dst, force=True, progress=progress, policy=policy, encoding=encoding, encrypt=encrypt, guess_content_type=guess_content_type)
        elif needs_delete:
            print_verbose('source file does not exist: {}'.format(src))
            if delete:
                print_verbose('removing {}'.format(dst))
                self.rm(dst)
Ejemplo n.º 6
0
 def size(self, key_or_file, version_id=None):
     '''
     Return the size of a file. If it's on s3, don't download it.
     '''
     k = path.parse(key_or_file)
     if k.scheme == 'file':
         return os.path.getsize(k.path)
     elif k.scheme == 's3':
         k = self._lookup(k.netloc, k.path, version_id=version_id)
         if k is None:
             raise KeyNotFound("s3://%s/%s not found on s3" % (k.netloc, k.path))
         return k.size
     else:
         raise InvalidSchemeException("URI Scheme %s is not implemented" % k.scheme)
Ejemplo n.º 7
0
        def lookup(self, version_id=None):
            from baiji.util.lookup import get_versioned_key_remote

            if self.is_file:
                raise ValueError(
                    "S3CopyOperation.CopyableKey.lookup called for local file")

            key = get_versioned_key_remote(self.bucket,
                                           self.remote_path,
                                           version_id=version_id)

            if not key:
                raise KeyNotFound("Error finding %s on s3: doesn't exist" %
                                  (self.uri))
            return key
Ejemplo n.º 8
0
 def encrypt_at_rest(self, key):
     '''
     This method takes a key on s3 and encrypts it.
     Note that calling this method on a local file is an error
     and that calling it on an s3 key that is already encrypted,
     while allowed, is a no-op.
     '''
     k = path.parse(key)
     if k.scheme != 's3':
         raise InvalidSchemeException("URI Scheme %s is not implemented" % k.scheme)
     remote_object = self._lookup(k.netloc, k.path)
     if remote_object is None:
         raise KeyNotFound("Error encrypting %s: Key doesn't exist" % (key, ))
     if not bool(remote_object.encrypted):
         bucket = self._bucket(k.netloc)
         src = k.path
         if src.startswith(path.sep):
             src = src[len(path.sep):] # NB: copy_key is failing with absolute src keys...
         bucket.copy_key(src, k.netloc, src, preserve_acl=True, metadata=None, encrypt_key=True)
Ejemplo n.º 9
0
    def execute(self):
        from boto.s3.connection import S3ResponseError
        if not self.force and self.dst.exists():
            if self.skip:
                import warnings
                warnings.warn(
                    "Skipping existing destination copying %s to %s: Destinaton exists"
                    % (self.src.uri, self.dst.uri))
                return
            else:
                raise KeyExists("Error copying %s to %s: Destinaton exists" %
                                (self.src.uri, self.dst.uri))

        if self.dst.is_file:
            self.prep_local_destination()

        try:
            if self.task == ('file', 'file'):
                self.local_copy()
            elif self.task == ('file', 's3'):
                self.upload()
            elif self.task == ('s3', 'file'):
                self.download()
            elif self.task == ('s3', 's3'):
                self.remote_copy()
            else:
                raise InvalidSchemeException(
                    "Copy for URI Scheme %s to %s is not implemented" %
                    self.task)
        except KeyNotFound:
            if self.dst.is_s3:
                try:
                    _ = self.dst.bucket
                except KeyNotFound:
                    raise KeyNotFound(
                        "Error copying {} to {}: Destination bucket doesn't exist"
                        .format(self.src.uri, self.dst.uri))
            if not self.src.exists():
                raise KeyNotFound(
                    "Error copying {} to {}: Source doesn't exist".format(
                        self.src.uri, self.dst.uri))
            else:
                raise KeyNotFound(
                    "Error copying {} to {}: Destination doesn't exist".format(
                        self.src.uri, self.dst.uri))
        except IOError as e:
            import errno
            if e.errno == errno.ENOENT:
                raise KeyNotFound(
                    "Error copying {} to {}: Source doesn't exist".format(
                        self.src.uri, self.dst.uri))
            else:
                raise S3Exception("Error copying {} to {}: {}".format(
                    self.src.uri, self.dst.uri, e))
        except S3ResponseError as e:
            if e.status == 403:
                raise S3Exception(
                    "HTTP Error 403: Permission Denied on {}".format(
                        " or ".join(
                            [x.uri for x in [self.src, self.dst] if x.is_s3])))
            else:
                raise
Ejemplo n.º 10
0
 def _get_etag(self, netloc, remote_path):
     k = self._lookup(netloc, remote_path)
     if k is None:
         raise KeyNotFound("s3://%s/%s not found on s3" % (netloc, remote_path))
     return k.etag.strip("\"") # because s3 seriously gives the md5sum back wrapped in an extra set of double quotes...