def test_split_policy_string(self): expectations = { 'something': ('something', POLICIES[0]), 'something-1': ('something', POLICIES[1]), 'tmp': ('tmp', POLICIES[0]), 'objects': ('objects', POLICIES[0]), 'tmp-1': ('tmp', POLICIES[1]), 'objects-1': ('objects', POLICIES[1]), 'objects-': PolicyError, 'objects-0': PolicyError, 'objects--1': ('objects-', POLICIES[1]), 'objects-+1': PolicyError, 'objects--': PolicyError, 'objects-foo': PolicyError, 'objects--bar': PolicyError, 'objects-+bar': PolicyError, # questionable, demonstrated as inverse of get_policy_string 'objects+0': ('objects+0', POLICIES[0]), '': ('', POLICIES[0]), '0': ('0', POLICIES[0]), '-1': ('', POLICIES[1]), } for policy_string, expected in expectations.items(): if expected == PolicyError: try: invalid = split_policy_string(policy_string) except PolicyError: continue # good else: self.fail('The string %r returned %r ' 'instead of raising a PolicyError' % (policy_string, invalid)) self.assertEqual(expected, split_policy_string(policy_string)) # should be inverse of get_policy_string self.assertEqual(policy_string, get_policy_string(*expected))
def test_split_policy_string(self): expectations = { "something": ("something", POLICIES[0]), "something-1": ("something", POLICIES[1]), "tmp": ("tmp", POLICIES[0]), "objects": ("objects", POLICIES[0]), "tmp-1": ("tmp", POLICIES[1]), "objects-1": ("objects", POLICIES[1]), "objects-": PolicyError, "objects-0": PolicyError, "objects--1": ("objects-", POLICIES[1]), "objects-+1": PolicyError, "objects--": PolicyError, "objects-foo": PolicyError, "objects--bar": PolicyError, "objects-+bar": PolicyError, # questionable, demonstrated as inverse of get_policy_string "objects+0": ("objects+0", POLICIES[0]), "": ("", POLICIES[0]), "0": ("0", POLICIES[0]), "-1": ("", POLICIES[1]), } for policy_string, expected in expectations.items(): if expected == PolicyError: try: invalid = split_policy_string(policy_string) except PolicyError: continue # good else: self.fail( "The string %r returned %r " "instead of raising a PolicyError" % (policy_string, invalid) ) self.assertEqual(expected, split_policy_string(policy_string)) # should be inverse of get_policy_string self.assertEqual(policy_string, get_policy_string(*expected))
def object_sweep(self, device): """ If there are async pendings on the device, walk each one and update. :param device: path to device """ start_time = time.time() # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: self.logger.warning( _('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e }) continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue last_obj_hash = None for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue if obj_hash == last_obj_hash: self.logger.increment("unlinks") os.unlink(update_path) else: self.process_object_update(update_path, device, policy) last_obj_hash = obj_hash self.objects_running_time = ratelimit_sleep( self.objects_running_time, self.max_objects_per_second) try: os.rmdir(prefix_path) except OSError: pass self.logger.timing_since('timing', start_time)
def _iter_async_pendings(self, device): """ Locate and yield all the async pendings on the device. Multiple updates for the same object will come out in reverse-chronological order (i.e. newest first) so that callers can skip stale async_pendings. Tries to clean up empty directories as it goes. """ # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: # This isn't an error, but a misconfiguration. Logging a # warning should be sufficient. self.logger.warning( _('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e }) continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.stats.errors += 1 self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue yield { 'device': device, 'policy': policy, 'path': update_path, 'obj_hash': obj_hash, 'timestamp': timestamp } try: os.rmdir(prefix_path) except OSError: pass
def object_sweep(self, device): """ If there are async pendings on the device, walk each one and update. :param device: path to device """ start_time = time.time() # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: self.logger.warning(_('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e}) continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue last_obj_hash = None for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue if obj_hash == last_obj_hash: self.logger.increment("unlinks") os.unlink(update_path) else: self.process_object_update(update_path, device, policy) last_obj_hash = obj_hash self.objects_running_time = ratelimit_sleep( self.objects_running_time, self.max_objects_per_second) try: os.rmdir(prefix_path) except OSError: pass self.logger.timing_since('timing', start_time)
def get_ring_and_datadir(path): """ :param path: path to ring :returns: a tuple, (ring, datadir) """ ring_name = os.path.basename(path).split('.')[0] base, policy = split_policy_string(ring_name) if base == 'object': datadir = get_data_dir(policy) else: datadir = base + 's' return Ring(path), datadir
def split_key(key): parts = key.split('.') base, policy = split_policy_string(parts[0]) if base != 'objects': return False hashpath = parts[1] timestamp = '.'.join(parts[2:4]) nonce_parts = parts[-1].split('-') nonce = '-'.join(nonce_parts[:5]) if len(nonce_parts) > 5: frag_index = int(nonce_parts[5]) else: frag_index = None return { 'policy': policy, 'hashpath': hashpath, 'nonce': nonce, 'frag_index': frag_index, 'timestamp': timestamp, }
def get_diskfile_from_audit_location(self, device, head_key): host, port = device.split(":") policy_match = re.match("objects([-]?[0-9]?)\.", head_key) policy_string = policy_match.group(1) try: _, policy = split_policy_string(policy_string) except PolicyError: policy = POLICIES.legacy datadir = head_key.split(".", 3)[1] return DiskFile( self, host, port, self.threadpools[device], None, policy=policy, _datadir=datadir, unlink_wait=self.unlink_wait, )
def _iter_async_pendings(self, device): """ Locate and yield all the async pendings on the device. Multiple updates for the same object will come out in reverse-chronological order (i.e. newest first) so that callers can skip stale async_pendings. Tries to clean up empty directories as it goes. """ # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue if not os.path.isdir(async_pending): continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: # This isn't an error, but a misconfiguration. Logging a # warning should be sufficient. self.logger.warning( _('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e }) continue prefix_dirs = self._listdir(async_pending) shuffle(prefix_dirs) for prefix in prefix_dirs: prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue last_obj_hash = None for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.stats.errors += 1 self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue # Async pendings are stored on disk like this: # # <device>/async_pending/<suffix>/<obj_hash>-<timestamp> # # If there are multiple updates for a given object, # they'll look like this: # # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp1> # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp2> # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp3> # # Async updates also have the property that newer # updates contain all the information in older updates. # Since we sorted the directory listing in reverse # order, we'll see timestamp3 first, yield it, and then # unlink timestamp2 and timestamp1 since we know they # are obsolete. # # This way, our caller only gets useful async_pendings. if obj_hash == last_obj_hash: self.stats.unlinks += 1 self.logger.increment('unlinks') try: os.unlink(update_path) except OSError as e: if e.errno != errno.ENOENT: raise else: last_obj_hash = obj_hash yield { 'device': device, 'policy': policy, 'path': update_path, 'obj_hash': obj_hash, 'timestamp': timestamp }
def object_sweep(self, device): """ If there are async pendings on the device, walk each one and update. :param device: path to device """ start_time = time.time() last_status_update = start_time start_stats = self.stats.copy() my_pid = os.getpid() self.logger.info("Object update sweep starting on %s (pid: %d)", device, my_pid) # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: # This isn't an error, but a misconfiguration. Logging a # warning should be sufficient. self.logger.warning(_('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e}) continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue last_obj_hash = None for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.stats.errors += 1 self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue if obj_hash == last_obj_hash: self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) else: self.process_object_update(update_path, device, policy) last_obj_hash = obj_hash self.objects_running_time = ratelimit_sleep( self.objects_running_time, self.max_objects_per_second) now = time.time() if now - last_status_update >= self.report_interval: this_sweep = self.stats.since(start_stats) self.logger.info( ('Object update sweep progress on %(device)s: ' '%(elapsed).02fs, %(stats)s (pid: %(pid)d)'), {'device': device, 'elapsed': now - start_time, 'pid': my_pid, 'stats': this_sweep}) last_status_update = now try: os.rmdir(prefix_path) except OSError: pass self.logger.timing_since('timing', start_time) sweep_totals = self.stats.since(start_stats) self.logger.info( ('Object update sweep completed on %(device)s ' 'in %(elapsed).02fs seconds:, ' '%(successes)d successes, %(failures)d failures, ' '%(quarantines)d quarantines, ' '%(unlinks)d unlinks, %(errors)d errors ' '(pid: %(pid)d)'), {'device': device, 'elapsed': time.time() - start_time, 'pid': my_pid, 'successes': sweep_totals.successes, 'failures': sweep_totals.failures, 'quarantines': sweep_totals.quarantines, 'unlinks': sweep_totals.unlinks, 'errors': sweep_totals.errors})
def object_sweep(self, device): """ If there are async pendings on the device, walk each one and update. :param device: path to device """ start_time = time.time() last_status_update = start_time start_stats = self.stats.copy() my_pid = os.getpid() self.logger.info("Object update sweep starting on %s (pid: %d)", device, my_pid) # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: # This isn't an error, but a misconfiguration. Logging a # warning should be sufficient. self.logger.warning(_('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e}) continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue last_obj_hash = None for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.stats.errors += 1 self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue if obj_hash == last_obj_hash: self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) else: self.process_object_update(update_path, device, policy) last_obj_hash = obj_hash self.objects_running_time = ratelimit_sleep( self.objects_running_time, self.max_objects_per_second) now = time.time() if now - last_status_update >= self.report_interval: this_sweep = self.stats.since(start_stats) self.logger.info( ('Object update sweep progress on %(device)s: ' '%(elapsed).02fs, %(stats)s (pid: %(pid)d)'), {'device': device, 'elapsed': now - start_time, 'pid': my_pid, 'stats': this_sweep}) last_status_update = now try: os.rmdir(prefix_path) except OSError: pass self.logger.timing_since('timing', start_time) sweep_totals = self.stats.since(start_stats) self.logger.info( ('Object update sweep completed on %(device)s ' 'in %(elapsed).02fs seconds:, ' '%(successes)d successes, %(failures)d failures, ' '%(quarantines)d quarantines, ' '%(unlinks)d unlinks, %(errors)d errors, ' '%(redirects)d redirects ' '(pid: %(pid)d)'), {'device': device, 'elapsed': time.time() - start_time, 'pid': my_pid, 'successes': sweep_totals.successes, 'failures': sweep_totals.failures, 'quarantines': sweep_totals.quarantines, 'unlinks': sweep_totals.unlinks, 'errors': sweep_totals.errors, 'redirects': sweep_totals.redirects})