Beispiel #1
0
    def import_bucket_to_volume(self, bucket, volume_name):
        '''
        Import the bucket to a volume named in 'volume_name'.

        The bucket argument can be either a boto.s3.bucket.Bucket object or a
        string object.

        Prior to creating a volume check if there will be enough available
        space on the volume.

        Returns True if bucket has been successfully imported, False (or raises
        an exception) otherwise.
        '''

        if isinstance(bucket, basestring):
            bucket = self.s3.get_bucket(bucket)
        volume_name = toutf8(volume_name)

        logger.info("Importing bucket '%s' to volume '%s'" %
                    (toutf8(bucket.name), volume_name))

        required_space = self.calculate_required_space(bucket, volume_name)
        self.check_quota(required_space, volume_name)
        if required_space == 0:
            logger.info("Nothing to import for bucket '%s'" %
                        toutf8(bucket.name))
            return False
        self.check_size(required_space, volume_name)

        size = self.volume_size or calculate_volume_size(required_space)
        self.create_volume(volume_name, size)
        self.copy_keys_parallelly(bucket, volume_name)

        return True
Beispiel #2
0
 def calculate_required_space(self, bucket, volume_name):
     required_space = 0
     for key in bucket.list():
         dest = toutf8(key.name)
         if self.subdir:
             dest = join_posix_paths(toutf8(self.subdir), dest)
         if self._file_sizes_differ(key, volume_name, dest):
             required_space += key.size + len(toutf8(key.name))
     return required_space
Beispiel #3
0
    def get_remaining_quota(self, volume_name):
        '''
        Return the remaining quota of SX volume's designated/existing owner if
        it's set, otherwise return None.
        '''

        volume_list = self.sx.listVolumes.json_call()[u'volumeList']
        if volume_name in volume_list:
            owner = volume_list[volume_name]['owner']
        else:
            owner = self.volume_owner

        user_list = self.sx.listUsers.json_call()
        try:
            user_quota = user_list[owner]['userQuota']
            user_quota_used = user_list[owner]['userQuotaUsed']
        except KeyError:
            raise S3FatalImportError('%s: no such user.' % toutf8(owner))

        if not user_quota:
            remaining = None
        else:
            remaining = user_quota - user_quota_used
            if remaining < 0:
                raise S3FatalImportError(
                    "User quota of '%s' volume owner is smaller than his used "
                    "user quota." % volume_name)
        return remaining
Beispiel #4
0
    def _copy_key(self, key, volume_name, uploader):
        bucket_name = toutf8(key.bucket.name)
        source_filename = toutf8(key.name)
        file_size = key.size

        dest_filename = toutf8(key.name)
        if self.subdir:
            dest_filename = join_posix_paths(toutf8(self.subdir),
                                             dest_filename)

        logger.debug(
            "Attempting to copy 's3://%(bucket)s/%(source)s' "
            "to 'sx://%(cluster)s/%(volume)s/%(dest)s'." % {
                'source': source_filename,
                'bucket': bucket_name,
                'cluster': self.sx.cluster.name,
                'volume': volume_name,
                'dest': dest_filename
            })

        if key.version_id is not None:
            logger.warning(
                "Key '%s' is versioned; copying only the latest version" %
                toutf8(source_filename))

        if self._file_sizes_differ(key, volume_name, dest_filename):
            with closing(self.stream_type(key)) as file_stream:
                uploader.upload_stream(volume_name, file_size, dest_filename,
                                       file_stream)
                logger.debug(
                    "'s3://%(bucket)s/%(source)s' successfully copied "
                    "to 'sx://%(cluster)s/%(volume)s/%(dest)s'." % {
                        'source': source_filename,
                        'bucket': bucket_name,
                        'cluster': self.sx.cluster.name,
                        'volume': volume_name,
                        'dest': dest_filename
                    })
        else:
            logger.debug("Source and destination of '%s' have the same size. "
                         "Key will not be copied." % source_filename)
Beispiel #5
0
    def import_bucket(self, bucket):
        '''
        Import the bucket to a volume with a default name (volume_prefix
        concatenated with bucket name).

        Argument can be either a boto.s3.bucket.Bucket object or a string
        object.

        Returns True if bucket has been successfully imported, False (or raises
        an exception) otherwise.
        '''

        if isinstance(bucket, basestring):
            bucket_name = toutf8(bucket)
        else:
            bucket_name = toutf8(bucket.name)

        if self.volume_prefix:
            volume_name = toutf8(self.volume_prefix) + bucket_name
        else:
            volume_name = bucket_name

        return self.import_bucket_to_volume(bucket, volume_name)
Beispiel #6
0
    def copy_keys_parallelly(self, bucket, volume_name):
        '''
        Copy keys for a given bucket parallelly, using threads.

        At this point it is assumed that the destination volume exists and
        contains enough free space to receive the keys.
        '''
        logger.debug("Attempting to copy keys from bucket '%s'." %
                     toutf8(bucket.name))
        keyiter = iter(bucket.list())

        try:
            threads = []
            for i in range(self.worker_num):
                t = threading.Thread(target=self._copy_keys,
                                     args=(keyiter, volume_name))
                t.start()
                threads.append(t)
                logger.debug("Thread %s started." % t.name)

            while not self._stopping_event.is_set():
                self._stopping_event.wait(self._event_timeout)
        except KeyboardInterrupt as exc:
            self._stopping_event.set()
            raise exc.__class__('Transfer terminated.')

        try:
            exc_info = self._exception_queue.get(block=False)
        except Queue.Empty:
            exc_info = None

        while threads:
            for t in threads:
                t.join(self._join_timeout)
                if not t.is_alive():
                    threads.remove(t)
                    break

        self._exception_queue = Queue.Queue()
        self._stopping_event.clear()

        if exc_info:
            raise exc_info[0], exc_info[1], exc_info[2]
Beispiel #7
0
    def import_all(self):
        '''Import all buckets from S3 to SX.'''

        buckets = self.s3.get_all_buckets()
        for bucket in buckets:
            try:
                self.import_bucket(bucket)

            except (boto.exception.S3ResponseError,
                    S3NonFatalImportError) as err:
                if isinstance(err, boto.exception.S3ResponseError):
                    err_info = '%s: %s %s' % (err.__class__.__name__,
                                              err.status, err.reason)
                else:
                    err_info = '%s: %s' % (err.__class__.__name__, str(err))
                logger.warning(
                    "Bucket '%s' import failed due to an error: %s" %
                    (toutf8(bucket.name), err_info))

            except BaseException as err:
                logger.error('Finishing import due to %s' %
                             err.__class__.__name__)
                raise