def object_sweep(self, device): """ If there are async pendings on the device, walk each one and update. :param device: path to device """ start_time = time.time() last_status_update = start_time start_stats = self.stats.copy() my_pid = os.getpid() self.logger.info("Object update sweep starting on %s (pid: %d)", device, my_pid) last_obj_hash = None ap_iter = RateLimitedIterator( self._iter_async_pendings(device), elements_per_second=self.max_objects_per_second) for update in ap_iter: if update['obj_hash'] == last_obj_hash: self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update['path']) else: self.process_object_update(update['path'], update['device'], update['policy']) last_obj_hash = update['obj_hash'] now = time.time() if now - last_status_update >= self.report_interval: this_sweep = self.stats.since(start_stats) self.logger.info( ('Object update sweep progress on %(device)s: ' '%(elapsed).02fs, %(stats)s (pid: %(pid)d)'), { 'device': device, 'elapsed': now - start_time, 'pid': my_pid, 'stats': this_sweep }) last_status_update = now self.logger.timing_since('timing', start_time) sweep_totals = self.stats.since(start_stats) self.logger.info( ('Object update sweep completed on %(device)s ' 'in %(elapsed).02fs seconds:, ' '%(successes)d successes, %(failures)d failures, ' '%(quarantines)d quarantines, ' '%(unlinks)d unlinks, %(errors)d errors, ' '%(redirects)d redirects ' '(pid: %(pid)d)'), { 'device': device, 'elapsed': time.time() - start_time, 'pid': my_pid, 'successes': sweep_totals.successes, 'failures': sweep_totals.failures, 'quarantines': sweep_totals.quarantines, 'unlinks': sweep_totals.unlinks, 'errors': sweep_totals.errors, 'redirects': sweep_totals.redirects })
def _manifest_get_response(self, req, content_length, response_headers, segments): self.first_byte, self.last_byte = None, None if req.range: byteranges = req.range.ranges_for_length(content_length) if len(byteranges) == 0: return HTTPRequestedRangeNotSatisfiable(request=req) elif len(byteranges) == 1: self.first_byte, self.last_byte = byteranges[0] # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. self.last_byte -= 1 else: req.range = None ver, account, _junk = req.split_path(3, 3, rest_with_last=True) plain_listing_iter = self._segment_listing_iterator( req, ver, account, segments) ratelimited_listing_iter = RateLimitedIterator( plain_listing_iter, self.slo.rate_limit_segments_per_sec, limit_after=self.slo.rate_limit_after_segment) # self._segment_listing_iterator gives us 3-tuples of (segment dict, # start byte, end byte), but SegmentedIterable wants (obj path, etag, # size, start byte, end byte), so we clean that up here segment_listing_iter = ( ("/{ver}/{acc}/{conobj}".format( ver=ver, acc=account, conobj=seg_dict['name'].lstrip('/')), seg_dict['hash'], int(seg_dict['bytes']), start_byte, end_byte) for seg_dict, start_byte, end_byte in ratelimited_listing_iter) segmented_iter = SegmentedIterable(req, self.slo.app, segment_listing_iter, name=req.path, logger=self.slo.logger, ua_suffix="SLO MultipartGET", swift_source="SLO", max_get_time=self.slo.max_get_time) try: segmented_iter.validate_first_segment() except (ListingIterError, SegmentError): # Copy from the SLO explanation in top of this file. # If any of the segments from the manifest are not found or # their Etag/Content Length no longer match the connection # will drop. In this case a 409 Conflict will be logged in # the proxy logs and the user will receive incomplete results. return HTTPConflict(request=req) response = Response(request=req, content_length=content_length, headers=response_headers, conditional_response=True, app_iter=segmented_iter) if req.range: response.headers.pop('Etag') return response
def process_policy(self, policy): self.logger.info( 'Processing files for policy %s under %s (cleanup=%s)', policy.name, self.root, self.do_cleanup) self.part_power = policy.object_ring.part_power self.next_part_power = policy.object_ring.next_part_power self.diskfile_mgr = self.diskfile_router[policy] self.datadir = diskfile.get_data_dir(policy) self.states = { "part_power": self.part_power, "next_part_power": self.next_part_power, "state": {}, } audit_stats = {} locations = audit_location_generator( self.conf['devices'], self.datadir, mount_check=self.conf['mount_check'], devices_filter=self.devices_filter, hook_pre_device=self.hook_pre_device, hook_post_device=self.hook_post_device, partitions_filter=self.partitions_filter, hook_pre_partition=self.hook_pre_partition, hook_post_partition=self.hook_post_partition, hashes_filter=self.hashes_filter, logger=self.logger, error_counter=audit_stats, yield_hash_dirs=True) if self.conf['files_per_second'] > 0: locations = RateLimitedIterator(locations, self.conf['files_per_second']) for hash_path, device, partition in locations: # note, in cleanup step next_part_power == part_power new_hash_path = replace_partition_in_path(self.conf['devices'], hash_path, self.next_part_power) if new_hash_path == hash_path: continue self.process_location(hash_path, new_hash_path) # any unmounted devices don't trigger the pre_device trigger. # so we'll deal with them here. for dev in audit_stats.get('unmounted', []): self.place_policy_stat(dev, policy, 'unmounted', 1) # Further unlistable_partitions doesn't trigger the post_device, so # we also need to deal with them here. for datadir in audit_stats.get('unlistable_partitions', []): device_path, _ = os.path.split(datadir) device = os.path.basename(device_path) self.place_policy_stat(device, policy, 'unlistable_partitions', 1)
def _manifest_get_response(self, req, content_length, response_headers, segments): self.first_byte, self.last_byte = None, None if req.range: byteranges = req.range.ranges_for_length(content_length) if len(byteranges) == 0: return HTTPRequestedRangeNotSatisfiable(request=req) elif len(byteranges) == 1: self.first_byte, self.last_byte = byteranges[0] # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. self.last_byte -= 1 else: req.range = None ver, account, _junk = req.split_path(3, 3, rest_with_last=True) plain_listing_iter = self._segment_listing_iterator( req, ver, account, segments) ratelimited_listing_iter = RateLimitedIterator( plain_listing_iter, self.slo.rate_limit_segments_per_sec, limit_after=self.slo.rate_limit_after_segment) # self._segment_listing_iterator gives us 3-tuples of (segment dict, # start byte, end byte), but SegmentedIterable wants (obj path, etag, # size, start byte, end byte), so we clean that up here segment_listing_iter = ( ("/{ver}/{acc}/{conobj}".format( ver=ver, acc=account, conobj=seg_dict['name'].lstrip('/')), seg_dict['hash'], int(seg_dict['bytes']), start_byte, end_byte) for seg_dict, start_byte, end_byte in ratelimited_listing_iter) response = Response(request=req, content_length=content_length, headers=response_headers, conditional_response=True, app_iter=SegmentedIterable( req, self.slo.app, segment_listing_iter, name=req.path, logger=self.slo.logger, ua_suffix="SLO MultipartGET", swift_source="SLO", max_get_time=self.slo.max_get_time)) if req.range: response.headers.pop('Etag') return response
def process_policy(self, policy): self.logger.info( 'Processing files for policy %s under %s (cleanup=%s)', policy.name, self.root, self.do_cleanup) self.part_power = policy.object_ring.part_power self.next_part_power = policy.object_ring.next_part_power self.diskfile_mgr = self.diskfile_router[policy] self.datadir = diskfile.get_data_dir(policy) self.states = { "part_power": self.part_power, "next_part_power": self.next_part_power, "state": {}, } locations = audit_location_generator( self.conf['devices'], self.datadir, mount_check=self.conf['mount_check'], devices_filter=self.devices_filter, hook_pre_device=self.hook_pre_device, hook_post_device=self.hook_post_device, partitions_filter=self.partitions_filter, hook_post_partition=self.hook_post_partition, hashes_filter=self.hashes_filter, logger=self.logger, error_counter=self.stats, yield_hash_dirs=True ) if self.conf['files_per_second'] > 0: locations = RateLimitedIterator( locations, self.conf['files_per_second']) for hash_path, device, partition in locations: # note, in cleanup step next_part_power == part_power new_hash_path = replace_partition_in_path( self.conf['devices'], hash_path, self.next_part_power) if new_hash_path == hash_path: continue self.process_location(hash_path, new_hash_path)
def get_or_head_response(self, req, x_object_manifest, response_headers=None): if response_headers is None: response_headers = self._response_headers container, obj_prefix = x_object_manifest.split('/', 1) container = unquote(container) obj_prefix = unquote(obj_prefix) version, account, _junk = req.split_path(2, 3, True) error_response, segments = self._get_container_listing( req, version, account, container, obj_prefix) if error_response: return error_response have_complete_listing = len(segments) < \ constraints.CONTAINER_LISTING_LIMIT first_byte = last_byte = None actual_content_length = None content_length_for_swob_range = None if req.range and len(req.range.ranges) == 1: content_length_for_swob_range = sum(o['bytes'] for o in segments) # This is a hack to handle suffix byte ranges (e.g. "bytes=-5"), # which we can't honor unless we have a complete listing. _junk, range_end = req.range.ranges_for_length(float("inf"))[0] # If this is all the segments, we know whether or not this # range request is satisfiable. # # Alternately, we may not have all the segments, but this range # falls entirely within the first page's segments, so we know # that it is satisfiable. if (have_complete_listing or range_end < content_length_for_swob_range): byteranges = req.range.ranges_for_length( content_length_for_swob_range) if not byteranges: headers = {'Accept-Ranges': 'bytes'} if have_complete_listing: headers['Content-Range'] = 'bytes */%d' % ( content_length_for_swob_range, ) return HTTPRequestedRangeNotSatisfiable(request=req, headers=headers) first_byte, last_byte = byteranges[0] # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. last_byte -= 1 actual_content_length = last_byte - first_byte + 1 else: # The range may or may not be satisfiable, but we can't tell # based on just one page of listing, and we're not going to go # get more pages because that would use up too many resources, # so we ignore the Range header and return the whole object. actual_content_length = None content_length_for_swob_range = None req.range = None response_headers = [(h, v) for h, v in response_headers if h.lower() not in ("content-length", "content-range")] if content_length_for_swob_range is not None: # Here, we have to give swob a big-enough content length so that # it can compute the actual content length based on the Range # header. This value will not be visible to the client; swob will # substitute its own Content-Length. # # Note: if the manifest points to at least CONTAINER_LISTING_LIMIT # segments, this may be less than the sum of all the segments' # sizes. However, it'll still be greater than the last byte in the # Range header, so it's good enough for swob. response_headers.append( ('Content-Length', str(content_length_for_swob_range))) elif have_complete_listing: actual_content_length = sum(o['bytes'] for o in segments) response_headers.append( ('Content-Length', str(actual_content_length))) if have_complete_listing: response_headers = [(h, v) for h, v in response_headers if h.lower() != "etag"] etag = md5() for seg_dict in segments: etag.update(seg_dict['hash'].strip('"')) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) app_iter = None if req.method == 'GET': listing_iter = RateLimitedIterator( self._segment_listing_iterator(req, version, account, container, obj_prefix, segments, first_byte=first_byte, last_byte=last_byte), self.dlo.rate_limit_segments_per_sec, limit_after=self.dlo.rate_limit_after_segment) app_iter = SegmentedIterable( req, self.dlo.app, listing_iter, ua_suffix="DLO MultipartGET", swift_source="DLO", name=req.path, logger=self.logger, max_get_time=self.dlo.max_get_time, response_body_length=actual_content_length) try: app_iter.validate_first_segment() except (SegmentError, ListingIterError): return HTTPConflict(request=req) resp = Response(request=req, headers=response_headers, conditional_response=True, app_iter=app_iter) return resp
def roundrobin_datadirs(self, dirs): return RateLimitedIterator( roundrobin_datadirs(dirs), elements_per_second=self.databases_per_second)
def cleanup(conf, logger, device): diskfile_router = diskfile.DiskFileRouter(conf, logger) errors = cleaned_up = 0 error_counter = {} found_policy = False for policy in POLICIES: diskfile_mgr = diskfile_router[policy] policy.object_ring = None # Ensure it will be reloaded policy.load_ring(conf['swift_dir']) part_power = policy.object_ring.part_power next_part_power = policy.object_ring.next_part_power if not next_part_power or next_part_power != part_power: continue logger.info('Cleaning up files for policy %s under %s', policy.name, conf['devices']) found_policy = True datadir = diskfile.get_data_dir(policy) locks = [None] states = { "part_power": part_power, "next_part_power": next_part_power, "state": {}, } cleanup_devices_filter = partial(devices_filter, device) cleanup_hook_pre_device = partial(hook_pre_device, locks, states, datadir) cleanup_hook_post_device = partial(hook_post_device, locks) cleanup_partition_filter = partial(partitions_filter, states, part_power, next_part_power) cleanup_hook_post_partition = partial(hook_post_partition, states, STEP_CLEANUP, policy, diskfile_mgr) cleanup_hashes_filter = partial(hashes_filter, next_part_power) locations = audit_location_generator( conf['devices'], datadir, mount_check=conf['mount_check'], devices_filter=cleanup_devices_filter, hook_pre_device=cleanup_hook_pre_device, hook_post_device=cleanup_hook_post_device, partitions_filter=cleanup_partition_filter, hook_post_partition=cleanup_hook_post_partition, hashes_filter=cleanup_hashes_filter, logger=logger, error_counter=error_counter) if conf['files_per_second'] > 0: locations = RateLimitedIterator(locations, conf['files_per_second']) for fname, device, partition in locations: expected_fname = replace_partition_in_path(fname, part_power) if fname == expected_fname: continue # Make sure there is a valid object file in the expected new # location. Note that this could be newer than the original one # (which happens if there is another PUT after partition power # has been increased, but cleanup did not yet run) loc = diskfile.AuditLocation(os.path.dirname(expected_fname), device, partition, policy) df = diskfile_mgr.get_diskfile_from_audit_location(loc) try: with df.open(): pass except DiskFileQuarantined as exc: logger.warning( 'ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)r', { 'obj': loc, 'err': exc }) errors += 1 continue except DiskFileDeleted: pass except DiskFileNotExist as exc: err = False if policy.policy_type == 'erasure_coding': # Might be a non-durable fragment - check that there is # a fragment in the new path. Will be fixed by the # reconstructor then if not os.path.isfile(expected_fname): err = True else: err = True if err: logger.warning('Error cleaning up %s: %r', fname, exc) errors += 1 continue try: os.remove(fname) cleaned_up += 1 logger.debug("Removed %s", fname) suffix_dir = os.path.dirname(os.path.dirname(fname)) diskfile.invalidate_hash(suffix_dir) except OSError as exc: logger.warning('Error cleaning up %s: %r', fname, exc) errors += 1 return determine_exit_code( logger=logger, found_policy=found_policy, processed=cleaned_up, action='cleaned up', action_errors=errors, error_counter=error_counter, )
def relink(conf, logger, device): diskfile_router = diskfile.DiskFileRouter(conf, logger) found_policy = False relinked = errors = 0 error_counter = {} for policy in POLICIES: diskfile_mgr = diskfile_router[policy] policy.object_ring = None # Ensure it will be reloaded policy.load_ring(conf['swift_dir']) part_power = policy.object_ring.part_power next_part_power = policy.object_ring.next_part_power if not next_part_power or next_part_power == part_power: continue logger.info('Relinking files for policy %s under %s', policy.name, conf['devices']) found_policy = True datadir = diskfile.get_data_dir(policy) locks = [None] states = { "part_power": part_power, "next_part_power": next_part_power, "state": {}, } relink_devices_filter = partial(devices_filter, device) relink_hook_pre_device = partial(hook_pre_device, locks, states, datadir) relink_hook_post_device = partial(hook_post_device, locks) relink_partition_filter = partial(partitions_filter, states, part_power, next_part_power) relink_hook_post_partition = partial(hook_post_partition, states, STEP_RELINK, policy, diskfile_mgr) relink_hashes_filter = partial(hashes_filter, next_part_power) locations = audit_location_generator( conf['devices'], datadir, mount_check=conf['mount_check'], devices_filter=relink_devices_filter, hook_pre_device=relink_hook_pre_device, hook_post_device=relink_hook_post_device, partitions_filter=relink_partition_filter, hook_post_partition=relink_hook_post_partition, hashes_filter=relink_hashes_filter, logger=logger, error_counter=error_counter) if conf['files_per_second'] > 0: locations = RateLimitedIterator(locations, conf['files_per_second']) for fname, _, _ in locations: newfname = replace_partition_in_path(fname, next_part_power) try: diskfile.relink_paths(fname, newfname, check_existing=True) relinked += 1 suffix_dir = os.path.dirname(os.path.dirname(newfname)) diskfile.invalidate_hash(suffix_dir) except OSError as exc: errors += 1 logger.warning("Relinking %s to %s failed: %s", fname, newfname, exc) return determine_exit_code( logger=logger, found_policy=found_policy, processed=relinked, action='relinked', action_errors=errors, error_counter=error_counter, )
def run_once(self, *args, **kwargs): """ Executes a single pass, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon accepts processes and process keyword args. These will override the values from the config file if provided. """ # This if-clause will be removed when general task queue feature is # implemented. if not self.dequeue_from_legacy: self.logger.info('This node is not configured to dequeue tasks ' 'from the legacy queue. This node will ' 'not process any expiration tasks. At least ' 'one node in your cluster must be configured ' 'with dequeue_from_legacy == true.') return self.get_process_values(kwargs) pool = GreenPool(self.concurrency) self.report_first_time = self.report_last_time = time() self.report_objects = 0 try: self.logger.debug('Run begin') task_account_container_list_to_delete = list() for task_account, my_index, divisor in \ self.iter_task_accounts_to_expire(): container_count, obj_count = \ self.swift.get_account_info(task_account) # the task account is skipped if there are no task container if not container_count: continue self.logger.info( _('Pass beginning for task account %(account)s; ' '%(container_count)s possible containers; ' '%(obj_count)s possible objects') % { 'account': task_account, 'container_count': container_count, 'obj_count': obj_count }) task_account_container_list = \ [(task_account, task_container) for task_container in self.iter_task_containers_to_expire(task_account)] task_account_container_list_to_delete.extend( task_account_container_list) # delete_task_iter is a generator to yield a dict of # task_account, task_container, task_object, delete_timestamp, # target_path to handle delete actual object and pop the task # from the queue. delete_task_iter = \ self.round_robin_order(self.iter_task_to_expire( task_account_container_list, my_index, divisor)) rate_limited_iter = RateLimitedIterator( delete_task_iter, elements_per_second=self.tasks_per_second) for delete_task in rate_limited_iter: pool.spawn_n(self.delete_object, **delete_task) pool.waitall() for task_account, task_container in \ task_account_container_list_to_delete: try: self.swift.delete_container( task_account, task_container, acceptable_statuses=(2, HTTP_NOT_FOUND, HTTP_CONFLICT)) except (Exception, Timeout) as err: self.logger.exception( _('Exception while deleting container %(account)s ' '%(container)s %(err)s') % { 'account': task_account, 'container': task_container, 'err': str(err) }) self.logger.debug('Run end') self.report(final=True) except (Exception, Timeout): self.logger.exception(_('Unhandled exception'))
def _manifest_get_response(self, req, content_length, response_headers, segments): if req.range: byteranges = [ # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. (start, end - 1) for start, end in req.range.ranges_for_length(content_length) ] else: byteranges = [] ver, account, _junk = req.split_path(3, 3, rest_with_last=True) plain_listing_iter = self._segment_listing_iterator( req, ver, account, segments, byteranges) def ratelimit_predicate(seg_dict): if 'raw_data' in seg_dict: return False # it's already in memory anyway start = seg_dict.get('start_byte') or 0 end = seg_dict.get('end_byte') if end is None: end = int(seg_dict['bytes']) - 1 is_small = (end - start + 1) < self.slo.rate_limit_under_size return is_small ratelimited_listing_iter = RateLimitedIterator( plain_listing_iter, self.slo.rate_limit_segments_per_sec, limit_after=self.slo.rate_limit_after_segment, ratelimit_if=ratelimit_predicate) # data segments are already in the correct format, but object-backed # segments need a path key added segment_listing_iter = (seg_dict if 'raw_data' in seg_dict else dict( seg_dict, path=self._segment_path(ver, account, seg_dict)) for seg_dict in ratelimited_listing_iter) segmented_iter = SegmentedIterable(req, self.slo.app, segment_listing_iter, name=req.path, logger=self.slo.logger, ua_suffix="SLO MultipartGET", swift_source="SLO", max_get_time=self.slo.max_get_time) try: segmented_iter.validate_first_segment() except (ListingIterError, SegmentError): # Copy from the SLO explanation in top of this file. # If any of the segments from the manifest are not found or # their Etag/Content Length no longer match the connection # will drop. In this case a 409 Conflict will be logged in # the proxy logs and the user will receive incomplete results. return HTTPConflict(request=req) conditional_etag = resolve_etag_is_at_header(req, response_headers) response = Response(request=req, content_length=content_length, headers=response_headers, conditional_response=True, conditional_etag=conditional_etag, app_iter=segmented_iter) return response