def __init__(self, *args, **kwargs): super(KineticAuditor, self).__init__(*args, **kwargs) self.reset_stats() self.mgr = DiskFileManager(self.conf, self.logger) self.swift_dir = self.conf.get('swift_dir', '/etc/swift') self.max_files_per_second = float( self.conf.get('files_per_second', 20)) self.max_bytes_per_second = float( self.conf.get('bytes_per_second', 10000000))
def __init__(self, *args, **kwargs): super(KineticUpdater, self).__init__(*args, **kwargs) self.mgr = DiskFileManager(self.conf, self.logger)
class KineticUpdater(ObjectUpdater): def __init__(self, *args, **kwargs): super(KineticUpdater, self).__init__(*args, **kwargs) self.mgr = DiskFileManager(self.conf, self.logger) def run_forever(self, *args, **kwargs): """Run the updater continuously.""" time.sleep(random.random() * self.interval) while True: begin = time.time() self.logger.info(_('Begin object update sweep')) self.run_once(*args, **kwargs) elapsed = time.time() - begin self.logger.info(_('Object update sweep completed: %.02fs'), elapsed) dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache, self.logger) if elapsed < self.interval: time.sleep(self.interval - elapsed) def _get_devices(self): return set([ d['device'] for policy in POLICIES for d in POLICIES.get_object_ring(int(policy), self.swift_dir).devs if d ]) def run_once(self, *args, **kwargs): self.stats = defaultdict(int) override_devices = list_from_csv(kwargs.get('devices')) devices = override_devices or self._get_devices() for device in devices: success = False try: self.object_sweep(device) except DiskFileDeviceUnavailable: self.logger.warning('Unable to connect to %s', device) except Exception: self.logger.exception('Unhandled exception trying to ' 'sweep object updates on %s', device) else: success = True if success: self.stats['device.success'] += 1 else: self.stats['device.failures'] += 1 def _find_updates_entries(self, device): conn = self.mgr.get_connection(*device.split(':')) start_key = 'async_pending' end_key = 'async_pending/' for async_key in conn.getKeyRange(start_key, end_key).wait(): yield async_key def object_sweep(self, device): self.logger.debug('Search async_pending on %r', device) for update_entry in self._find_updates_entries(device): self.stats['found_updates'] += 1 success = self.process_object_update(device, update_entry) if success: self.stats['success'] += 1 else: self.stats['failures'] += 1 def _load_update(self, device, async_key): # load update conn = self.mgr.get_connection(*device.split(':')) resp = conn.get(async_key) entry = resp.wait() update = msgpack.unpackb(entry.value) return update def _unlink_update(self, device, async_key): conn = self.mgr.get_connection(*device.split(':')) conn.delete(async_key).wait() return True def _save_update(self, device, async_key, update): conn = self.mgr.get_connection(*device.split(':')) blob = msgpack.packb(update) conn.put(async_key, blob).wait() return True def process_object_update(self, device, update_entry): update = self._load_update(device, update_entry) # process update headers = HeaderKeyDict(update['headers']) del headers['user-agent'] successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: new_success, node_id = self.object_update( node, part, update['op'], obj, headers) if new_success: successes.append(node['id']) new_successes = True else: success = False if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', {'obj': obj, 'path': update_entry}) self.logger.increment("unlinks") return self._unlink_update(device, update_entry) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', {'obj': obj, 'path': update_entry}) if new_successes: update['successes'] = successes self._save_update(device, update_entry, update) return success
class KineticAuditor(ObjectAuditor): def __init__(self, *args, **kwargs): super(KineticAuditor, self).__init__(*args, **kwargs) self.reset_stats() self.mgr = DiskFileManager(self.conf, self.logger) self.swift_dir = self.conf.get('swift_dir', '/etc/swift') self.max_files_per_second = float( self.conf.get('files_per_second', 20)) self.max_bytes_per_second = float( self.conf.get('bytes_per_second', 10000000)) def reset_stats(self): self.stats = defaultdict(int) self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_files_processed = 0 self.passes = 0 self.quarantines = 0 self.errors = 0 def run_forever(self, *args, **kwargs): """Run the auditor continuously.""" time.sleep(random.random() * self.interval) while True: begin = time.time() self.logger.info(_('Begin object audit sweep')) self.run_once(*args, **kwargs) elapsed = time.time() - begin self.logger.info(_('Object audit sweep completed: %.02fs'), elapsed) dump_recon_cache({'object_audit_sweep': elapsed}, self.rcache, self.logger) if elapsed < self.interval: time.sleep(self.interval - elapsed) self.reset_stats() def _get_devices(self): return set([ d['device'] for policy in POLICIES for d in POLICIES.get_object_ring(int(policy), self.swift_dir).devs ]) def _find_objects(self, device): conn = self.mgr.get_connection(*device.split(':')) start_key = 'objects' end_key = 'objects/' for head_key in conn.getKeyRange(start_key, end_key).wait(): yield head_key def _audit_object(self, device, head_key): df = self.mgr.get_diskfile_from_audit_location( device, head_key) etag = hashlib.md5() size = 0 with df.open(): metadata = df.get_metadata() for chunk in df: chunk_len = len(chunk) etag.update(chunk) size += chunk_len self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=chunk_len) self.bytes_processed += chunk_len self.total_bytes_processed += chunk_len if size != int(metadata.get('Content-Length')): self.logger.warning( 'found object %r with size %r instead of %r', head_key, size, metadata.get('Content-Length')) df.quarantine() return False got_etag = etag.hexdigest() expected_etag = metadata.get('ETag') if got_etag != expected_etag: self.logger.warning( 'found object %r with etag %r instead of %r', head_key, got_etag, expected_etag) df.quarantine() return False return True def run_once(self, *args, **kwargs): self.reset_stats() override_devices = list_from_csv(kwargs.get('devices')) devices = override_devices or self._get_devices() self.logger.info('Starting sweep of %r', devices) start = time.time() for device in devices: for location in self._find_objects(device): self.stats['found_objects'] += 1 success = self._audit_object(device, location) if success: self.stats['success'] += 1 else: self.stats['failures'] += 1 self.logger.info('Finished sweep of %r (%ds) => %r', devices, time.time() - start, self.stats)
def __init__(self, *args, **kwargs): super(KineticUpdater, self).__init__(*args, **kwargs) self.stats = defaultdict(int) self.mgr = DiskFileManager(self.conf, self.logger)
class KineticUpdater(ObjectUpdater): def __init__(self, *args, **kwargs): super(KineticUpdater, self).__init__(*args, **kwargs) self.stats = defaultdict(int) self.mgr = DiskFileManager(self.conf, self.logger) def run_forever(self, *args, **kwargs): """Run the updater continuously.""" time.sleep(random.random() * self.interval) while True: begin = time.time() self.logger.info(_('Begin object update sweep')) self.run_once(*args, **kwargs) elapsed = time.time() - begin self.logger.info(_('Object update sweep completed: %.02fs'), elapsed) dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache, self.logger) if elapsed < self.interval: time.sleep(self.interval - elapsed) self.stats = defaultdict(int) def _get_devices(self): return set([ d['device'] for policy in POLICIES for d in POLICIES.get_object_ring(int(policy), self.swift_dir).devs ]) def run_once(self, *args, **kwargs): override_devices = list_from_csv(kwargs.get('devices')) devices = override_devices or self._get_devices() for device in devices: self.object_sweep(device) def _find_updates_entries(self, device): conn = self.mgr.get_connection(*device.split(':')) start_key = 'async_pending' end_key = 'async_pending/' for async_key in conn.getKeyRange(start_key, end_key).wait(): yield async_key def object_sweep(self, device): self.logger.debug('Search async_pending on %r', device) for update_entry in self._find_updates_entries(device): self.stats['found_updates'] += 1 success = self.process_object_update(device, update_entry) if success: self.stats['success'] += 1 else: self.stats['failures'] += 1 def _load_update(self, device, async_key): # load update conn = self.mgr.get_connection(*device.split(':')) resp = conn.get(async_key) entry = resp.wait() update = msgpack.unpackb(entry.value) return update def _unlink_update(self, device, async_key): conn = self.mgr.get_connection(*device.split(':')) conn.delete(async_key).wait() return True def _save_update(self, device, async_key, update): conn = self.mgr.get_connection(*device.split(':')) blob = msgpack.packb(update) conn.put(async_key, blob).wait() return True def process_object_update(self, device, update_entry): update = self._load_update(device, update_entry) # process update headers = HeaderKeyDict(update['headers']) del headers['user-agent'] successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: status = self.object_update(node, part, update['op'], obj, headers) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_entry }) self.logger.increment("unlinks") return self._unlink_update(device, update_entry) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_entry }) if new_successes: update['successes'] = successes self._save_update(device, update_entry, update) return success