def direct_get_container_policy_index(container_ring, account_name, container_name): """ Talk directly to the primary container servers to figure out the storage policy index for a given container. :param container_ring: ring in which to look up the container locations :param account_name: name of the container's account :param container_name: name of the container :returns: storage policy index, or None if it couldn't get a majority """ def _eat_client_exception(*args): try: return direct_head_container(*args) except ClientException as err: if err.http_status == 404: return err.http_headers except (Timeout, socket.error): pass pile = GreenPile() part, nodes = container_ring.get_nodes(account_name, container_name) for node in nodes: pile.spawn(_eat_client_exception, node, part, account_name, container_name) headers = [x for x in pile if x is not None] if len(headers) < majority_size(len(nodes)): return return best_policy_index(headers)
def process_container(self, dbfile): """ Process a container, and update the information in the account. :param dbfile: container DB to process """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) info = broker.get_info() # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if Timestamp(info['put_timestamp']) <= 0: return if self.account_suppressions.get(info['account'], 0) > time.time(): return if info['put_timestamp'] > info['reported_put_timestamp'] or \ info['delete_timestamp'] > info['reported_delete_timestamp'] \ or info['object_count'] != info['reported_object_count'] or \ info['bytes_used'] != info['reported_bytes_used']: container = '/%s/%s' % (info['account'], info['container']) part, nodes = self.get_account_ring().get_nodes(info['account']) events = [spawn(self.container_report, node, part, container, info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used'], info['storage_policy_index']) for node in nodes] successes = 0 for event in events: if is_success(event.wait()): successes += 1 if successes >= majority_size(len(events)): self.logger.increment('successes') self.successes += 1 self.logger.debug( _('Update report sent for %(container)s %(dbfile)s'), {'container': container, 'dbfile': dbfile}) broker.reported(info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used']) else: self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report failed for %(container)s %(dbfile)s'), {'container': container, 'dbfile': dbfile}) self.account_suppressions[info['account']] = until = \ time.time() + self.account_suppression_time if self.new_account_suppressions: print(info['account'], until, file=self.new_account_suppressions) # Only track timing data for attempted updates: self.logger.timing_since('timing', start_time) else: self.logger.increment('no_changes') self.no_changes += 1
def _post_replicate_hook(self, broker, info, responses): if info['account'] == MISPLACED_OBJECTS_ACCOUNT: return try: self.sync_store.update_sync_store(broker) except Exception: self.logger.exception('Failed to update sync_store %s' % broker.db_file) point = broker.get_reconciler_sync() if not broker.has_multiple_policies() and info['max_row'] != point: broker.update_reconciler_sync(info['max_row']) return max_sync = self.dump_to_reconciler(broker, point) success = responses.count(True) >= majority_size(len(responses)) if max_sync > point and success: # to be safe, only slide up the sync point with a majority on # replication broker.update_reconciler_sync(max_sync)
def add_to_reconciler_queue(container_ring, account, container, obj, obj_policy_index, obj_timestamp, op, force=False, conn_timeout=5, response_timeout=15): """ Add an object to the container reconciler's queue. This will cause the container reconciler to move it from its current storage policy index to the correct storage policy index. :param container_ring: container ring :param account: the misplaced object's account :param container: the misplaced object's container :param obj: the misplaced object :param obj_policy_index: the policy index where the misplaced object currently is :param obj_timestamp: the misplaced object's X-Timestamp. We need this to ensure that the reconciler doesn't overwrite a newer object with an older one. :param op: the method of the operation (DELETE or PUT) :param force: over-write queue entries newer than obj_timestamp :param conn_timeout: max time to wait for connection to container server :param response_timeout: max time to wait for response from container server :returns: .misplaced_object container name, False on failure. "Success" means a majority of containers got the update. """ container_name = get_reconciler_container_name(obj_timestamp) object_name = get_reconciler_obj_name(obj_policy_index, account, container, obj) if force: # this allows an operator to re-enqueue an object that has # already been popped from the queue to be reprocessed, but # could potentially prevent out of order updates from making it # into the queue x_timestamp = Timestamp.now().internal else: x_timestamp = obj_timestamp q_op_type = get_reconciler_content_type(op) headers = { 'X-Size': 0, 'X-Etag': obj_timestamp, 'X-Timestamp': x_timestamp, 'X-Content-Type': q_op_type, } def _check_success(*args, **kwargs): try: direct_put_container_object(*args, **kwargs) return 1 except (ClientException, Timeout, socket.error): return 0 pile = GreenPile() part, nodes = container_ring.get_nodes(MISPLACED_OBJECTS_ACCOUNT, container_name) for node in nodes: pile.spawn(_check_success, node, part, MISPLACED_OBJECTS_ACCOUNT, container_name, object_name, headers=headers, conn_timeout=conn_timeout, response_timeout=response_timeout) successes = sum(pile) if successes >= majority_size(len(nodes)): return container_name else: return False
def add_to_reconciler_queue(container_ring, account, container, obj, obj_policy_index, obj_timestamp, op, force=False, conn_timeout=5, response_timeout=15): """ Add an object to the container reconciler's queue. This will cause the container reconciler to move it from its current storage policy index to the correct storage policy index. :param container_ring: container ring :param account: the misplaced object's account :param container: the misplaced object's container :param obj: the misplaced object :param obj_policy_index: the policy index where the misplaced object currently is :param obj_timestamp: the misplaced object's X-Timestamp. We need this to ensure that the reconciler doesn't overwrite a newer object with an older one. :param op: the method of the operation (DELETE or PUT) :param force: over-write queue entries newer than obj_timestamp :param conn_timeout: max time to wait for connection to container server :param response_timeout: max time to wait for response from container server :returns: .misplaced_object container name, False on failure. "Success" means a majority of containers got the update. """ container_name = get_reconciler_container_name(obj_timestamp) object_name = get_reconciler_obj_name(obj_policy_index, account, container, obj) if force: # this allows an operator to re-enqueue an object that has # already been popped from the queue to be reprocessed, but # could potentially prevent out of order updates from making it # into the queue x_timestamp = Timestamp(time.time()).internal else: x_timestamp = obj_timestamp q_op_type = get_reconciler_content_type(op) headers = { 'X-Size': 0, 'X-Etag': obj_timestamp, 'X-Timestamp': x_timestamp, 'X-Content-Type': q_op_type, } def _check_success(*args, **kwargs): try: direct_put_container_object(*args, **kwargs) return 1 except (ClientException, Timeout, socket.error): return 0 pile = GreenPile() part, nodes = container_ring.get_nodes(MISPLACED_OBJECTS_ACCOUNT, container_name) for node in nodes: pile.spawn(_check_success, node, part, MISPLACED_OBJECTS_ACCOUNT, container_name, object_name, headers=headers, conn_timeout=conn_timeout, response_timeout=response_timeout) successes = sum(pile) if successes >= majority_size(len(nodes)): return container_name else: return False
def process_container(self, dbfile): """ Process a container, and update the information in the account. :param dbfile: container DB to process """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) try: info = broker.get_info() except LockTimeout as e: self.logger.info( "Failed to get container info (Lock timeout: %s); skipping.", str(e)) return # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if Timestamp(info['put_timestamp']) <= 0: return if self.account_suppressions.get(info['account'], 0) > time.time(): return if not broker.is_root_container(): # Don't double-up account stats. # The sharder should get these stats to the root container, # and the root's updater will get them to the right account. info['object_count'] = info['bytes_used'] = 0 if info['put_timestamp'] > info['reported_put_timestamp'] or \ info['delete_timestamp'] > info['reported_delete_timestamp'] \ or info['object_count'] != info['reported_object_count'] or \ info['bytes_used'] != info['reported_bytes_used']: container = '/%s/%s' % (info['account'], info['container']) part, nodes = self.get_account_ring().get_nodes(info['account']) events = [ spawn(self.container_report, node, part, container, info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used'], info['storage_policy_index']) for node in nodes ] successes = 0 stub404s = 0 for event in events: result = event.wait() if is_success(result): successes += 1 if result == 404: stub404s += 1 if successes >= majority_size(len(events)): self.logger.increment('successes') self.successes += 1 self.logger.debug( _('Update report sent for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) broker.reported(info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used']) elif stub404s == len(events): self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report stub for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) broker.quarantine('no account replicas exist') # All that's left at this point is a few sacks of Gnocchi, # easily collected by the dark data watcher in object auditor. else: self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report failed for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) self.account_suppressions[info['account']] = until = \ time.time() + self.account_suppression_time if self.new_account_suppressions: print(info['account'], until, file=self.new_account_suppressions) # Only track timing data for attempted updates: self.logger.timing_since('timing', start_time) else: self.logger.increment('no_changes') self.no_changes += 1