Example #1
0
 def __init__(self, *args, **kwargs):
     super(KineticAuditor, self).__init__(*args, **kwargs)
     self.reset_stats()
     self.mgr = DiskFileManager(self.conf, self.logger)
     self.swift_dir = self.conf.get('swift_dir', '/etc/swift')
     self.max_files_per_second = float(
         self.conf.get('files_per_second', 20))
     self.max_bytes_per_second = float(
         self.conf.get('bytes_per_second', 10000000))
Example #2
0
 def __init__(self, *args, **kwargs):
     super(KineticUpdater, self).__init__(*args, **kwargs)
     self.mgr = DiskFileManager(self.conf, self.logger)
Example #3
0
class KineticUpdater(ObjectUpdater):

    def __init__(self, *args, **kwargs):
        super(KineticUpdater, self).__init__(*args, **kwargs)
        self.mgr = DiskFileManager(self.conf, self.logger)

    def run_forever(self, *args, **kwargs):
        """Run the updater continuously."""
        time.sleep(random.random() * self.interval)
        while True:
            begin = time.time()
            self.logger.info(_('Begin object update sweep'))
            self.run_once(*args, **kwargs)
            elapsed = time.time() - begin
            self.logger.info(_('Object update sweep completed: %.02fs'),
                             elapsed)
            dump_recon_cache({'object_updater_sweep': elapsed},
                             self.rcache, self.logger)
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)

    def _get_devices(self):
        return set([
            d['device'] for policy in POLICIES for d in
            POLICIES.get_object_ring(int(policy), self.swift_dir).devs
            if d
        ])

    def run_once(self, *args, **kwargs):
        self.stats = defaultdict(int)
        override_devices = list_from_csv(kwargs.get('devices'))
        devices = override_devices or self._get_devices()
        for device in devices:
            success = False
            try:
                self.object_sweep(device)
            except DiskFileDeviceUnavailable:
                self.logger.warning('Unable to connect to %s', device)
            except Exception:
                self.logger.exception('Unhandled exception trying to '
                                      'sweep object updates on %s', device)
            else:
                success = True
            if success:
                self.stats['device.success'] += 1
            else:
                self.stats['device.failures'] += 1

    def _find_updates_entries(self, device):
        conn = self.mgr.get_connection(*device.split(':'))
        start_key = 'async_pending'
        end_key = 'async_pending/'
        for async_key in conn.getKeyRange(start_key, end_key).wait():
            yield async_key

    def object_sweep(self, device):
        self.logger.debug('Search async_pending on %r', device)
        for update_entry in self._find_updates_entries(device):
            self.stats['found_updates'] += 1
            success = self.process_object_update(device, update_entry)
            if success:
                self.stats['success'] += 1
            else:
                self.stats['failures'] += 1

    def _load_update(self, device, async_key):
        # load update
        conn = self.mgr.get_connection(*device.split(':'))
        resp = conn.get(async_key)
        entry = resp.wait()
        update = msgpack.unpackb(entry.value)
        return update

    def _unlink_update(self, device, async_key):
        conn = self.mgr.get_connection(*device.split(':'))
        conn.delete(async_key).wait()
        return True

    def _save_update(self, device, async_key, update):
        conn = self.mgr.get_connection(*device.split(':'))
        blob = msgpack.packb(update)
        conn.put(async_key, blob).wait()
        return True

    def process_object_update(self, device, update_entry):
        update = self._load_update(device, update_entry)

        # process update
        headers = HeaderKeyDict(update['headers'])
        del headers['user-agent']
        successes = update.get('successes', [])
        part, nodes = self.get_container_ring().get_nodes(
            update['account'], update['container'])
        obj = '/%s/%s/%s' % \
              (update['account'], update['container'], update['obj'])
        success = True
        new_successes = False
        for node in nodes:
            if node['id'] not in successes:
                new_success, node_id = self.object_update(
                    node, part, update['op'], obj, headers)
                if new_success:
                    successes.append(node['id'])
                    new_successes = True
                else:
                    success = False
        if success:
            self.successes += 1
            self.logger.increment('successes')
            self.logger.debug('Update sent for %(obj)s %(path)s',
                              {'obj': obj, 'path': update_entry})
            self.logger.increment("unlinks")
            return self._unlink_update(device, update_entry)
        else:
            self.failures += 1
            self.logger.increment('failures')
            self.logger.debug('Update failed for %(obj)s %(path)s',
                              {'obj': obj, 'path': update_entry})
            if new_successes:
                update['successes'] = successes
                self._save_update(device, update_entry, update)
        return success
Example #4
0
class KineticAuditor(ObjectAuditor):

    def __init__(self, *args, **kwargs):
        super(KineticAuditor, self).__init__(*args, **kwargs)
        self.reset_stats()
        self.mgr = DiskFileManager(self.conf, self.logger)
        self.swift_dir = self.conf.get('swift_dir', '/etc/swift')
        self.max_files_per_second = float(
            self.conf.get('files_per_second', 20))
        self.max_bytes_per_second = float(
            self.conf.get('bytes_per_second', 10000000))

    def reset_stats(self):
        self.stats = defaultdict(int)
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        self.passes = 0
        self.quarantines = 0
        self.errors = 0

    def run_forever(self, *args, **kwargs):
        """Run the auditor continuously."""
        time.sleep(random.random() * self.interval)
        while True:
            begin = time.time()
            self.logger.info(_('Begin object audit sweep'))
            self.run_once(*args, **kwargs)
            elapsed = time.time() - begin
            self.logger.info(_('Object audit sweep completed: %.02fs'),
                             elapsed)
            dump_recon_cache({'object_audit_sweep': elapsed},
                             self.rcache, self.logger)
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)
            self.reset_stats()

    def _get_devices(self):
        return set([
            d['device'] for policy in POLICIES for d in
            POLICIES.get_object_ring(int(policy), self.swift_dir).devs
        ])

    def _find_objects(self, device):
        conn = self.mgr.get_connection(*device.split(':'))
        start_key = 'objects'
        end_key = 'objects/'
        for head_key in conn.getKeyRange(start_key, end_key).wait():
            yield head_key

    def _audit_object(self, device, head_key):
        df = self.mgr.get_diskfile_from_audit_location(
            device, head_key)
        etag = hashlib.md5()
        size = 0
        with df.open():
            metadata = df.get_metadata()
            for chunk in df:
                chunk_len = len(chunk)
                etag.update(chunk)
                size += chunk_len
                self.bytes_running_time = ratelimit_sleep(
                    self.bytes_running_time,
                    self.max_bytes_per_second,
                    incr_by=chunk_len)
                self.bytes_processed += chunk_len
                self.total_bytes_processed += chunk_len
            if size != int(metadata.get('Content-Length')):
                self.logger.warning(
                    'found object %r with size %r instead of %r',
                    head_key, size, metadata.get('Content-Length'))
                df.quarantine()
                return False
            got_etag = etag.hexdigest()
            expected_etag = metadata.get('ETag')
            if got_etag != expected_etag:
                self.logger.warning(
                    'found object %r with etag %r instead of %r',
                    head_key, got_etag, expected_etag)
                df.quarantine()
                return False
        return True

    def run_once(self, *args, **kwargs):
        self.reset_stats()
        override_devices = list_from_csv(kwargs.get('devices'))
        devices = override_devices or self._get_devices()
        self.logger.info('Starting sweep of %r', devices)
        start = time.time()
        for device in devices:
            for location in self._find_objects(device):
                self.stats['found_objects'] += 1
                success = self._audit_object(device, location)
                if success:
                    self.stats['success'] += 1
                else:
                    self.stats['failures'] += 1
        self.logger.info('Finished sweep of %r (%ds) => %r', devices,
                         time.time() - start, self.stats)
Example #5
0
 def __init__(self, *args, **kwargs):
     super(KineticUpdater, self).__init__(*args, **kwargs)
     self.stats = defaultdict(int)
     self.mgr = DiskFileManager(self.conf, self.logger)
Example #6
0
class KineticUpdater(ObjectUpdater):
    def __init__(self, *args, **kwargs):
        super(KineticUpdater, self).__init__(*args, **kwargs)
        self.stats = defaultdict(int)
        self.mgr = DiskFileManager(self.conf, self.logger)

    def run_forever(self, *args, **kwargs):
        """Run the updater continuously."""
        time.sleep(random.random() * self.interval)
        while True:
            begin = time.time()
            self.logger.info(_('Begin object update sweep'))
            self.run_once(*args, **kwargs)
            elapsed = time.time() - begin
            self.logger.info(_('Object update sweep completed: %.02fs'),
                             elapsed)
            dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache,
                             self.logger)
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)
            self.stats = defaultdict(int)

    def _get_devices(self):
        return set([
            d['device'] for policy in POLICIES
            for d in POLICIES.get_object_ring(int(policy), self.swift_dir).devs
        ])

    def run_once(self, *args, **kwargs):
        override_devices = list_from_csv(kwargs.get('devices'))
        devices = override_devices or self._get_devices()
        for device in devices:
            self.object_sweep(device)

    def _find_updates_entries(self, device):
        conn = self.mgr.get_connection(*device.split(':'))
        start_key = 'async_pending'
        end_key = 'async_pending/'
        for async_key in conn.getKeyRange(start_key, end_key).wait():
            yield async_key

    def object_sweep(self, device):
        self.logger.debug('Search async_pending on %r', device)
        for update_entry in self._find_updates_entries(device):
            self.stats['found_updates'] += 1
            success = self.process_object_update(device, update_entry)
            if success:
                self.stats['success'] += 1
            else:
                self.stats['failures'] += 1

    def _load_update(self, device, async_key):
        # load update
        conn = self.mgr.get_connection(*device.split(':'))
        resp = conn.get(async_key)
        entry = resp.wait()
        update = msgpack.unpackb(entry.value)
        return update

    def _unlink_update(self, device, async_key):
        conn = self.mgr.get_connection(*device.split(':'))
        conn.delete(async_key).wait()
        return True

    def _save_update(self, device, async_key, update):
        conn = self.mgr.get_connection(*device.split(':'))
        blob = msgpack.packb(update)
        conn.put(async_key, blob).wait()
        return True

    def process_object_update(self, device, update_entry):
        update = self._load_update(device, update_entry)

        # process update
        headers = HeaderKeyDict(update['headers'])
        del headers['user-agent']
        successes = update.get('successes', [])
        part, nodes = self.get_container_ring().get_nodes(
            update['account'], update['container'])
        obj = '/%s/%s/%s' % \
              (update['account'], update['container'], update['obj'])
        success = True
        new_successes = False
        for node in nodes:
            if node['id'] not in successes:
                status = self.object_update(node, part, update['op'], obj,
                                            headers)
                if not is_success(status) and status != HTTP_NOT_FOUND:
                    success = False
                else:
                    successes.append(node['id'])
                    new_successes = True
        if success:
            self.successes += 1
            self.logger.increment('successes')
            self.logger.debug('Update sent for %(obj)s %(path)s', {
                'obj': obj,
                'path': update_entry
            })
            self.logger.increment("unlinks")
            return self._unlink_update(device, update_entry)
        else:
            self.failures += 1
            self.logger.increment('failures')
            self.logger.debug('Update failed for %(obj)s %(path)s', {
                'obj': obj,
                'path': update_entry
            })
            if new_successes:
                update['successes'] = successes
                self._save_update(device, update_entry, update)
        return success