def test_is_local_device(self): my_ips = ["127.0.0.1", "0000:0000:0000:0000:0000:0000:0000:0001"] my_port = 6000 self.assertTrue(is_local_device(my_ips, my_port, "localhost", my_port)) self.assertFalse( is_local_device(my_ips, my_port, "localhost", my_port + 1)) self.assertFalse(is_local_device(my_ips, my_port, "127.0.0.2", my_port)) # for those that don't have a local port self.assertTrue(is_local_device(my_ips, None, my_ips[0], None))
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return self._local_device_ids = set() for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): if self.mount_check and not ismount( os.path.join(self.root, node['device'])): self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev ]) self.logger.warn( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) dirs.append((datadir, node['id'])) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n(self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats()
def handle_container(self, handler, job): part, container_nodes = self.container_ring.get_nodes( handler._account.encode('utf-8'), handler._container.encode('utf-8')) nodes_count = len(container_nodes) for index, node in enumerate(container_nodes): if not is_local_device(self.myips, None, node['ip'], node['port']): continue broker = self.get_broker(handler._account, handler._container, part, node) broker_info = broker.get_info() last_row = handler.get_last_row(broker_info['id']) if not last_row: last_row = 0 try: items = broker.get_items_since(last_row, self.items_chunk) except DatabaseConnectionError: continue if not items: return (0, 0, None, broker_info['id']) self.log( 'info', 'Processing %d rows since row %d for %s/%s' % (len(items), last_row, handler._account, handler._container)) owned_count, verified_count = self.process_items( handler, items, nodes_count, index, job) return (owned_count, verified_count, items[-1]['ROWID'], broker_info['id']) return (0, 0, None, None)
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips() if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return self._local_device_ids = set() for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): if self.mount_check and not ismount( os.path.join(self.root, node['device'])): self.logger.warn( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) dirs.append((datadir, node['id'])) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats()
def test_is_local_device(self): my_ips = ["127.0.0.1", "0000:0000:0000:0000:0000:0000:0000:0001"] my_port = 6000 self.assertTrue(is_local_device(my_ips, my_port, "localhost", my_port)) self.assertFalse(is_local_device(my_ips, my_port, "localhost", my_port + 1)) self.assertFalse(is_local_device(my_ips, my_port, "127.0.0.2", my_port)) # for those that don't have a local port self.assertTrue(is_local_device(my_ips, None, my_ips[0], None))
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips() for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = itertools.ifilter( lambda dev: dev and is_local_device(ips, self.port, dev[ 'replication_ip'], dev['replication_port']), policy.object_ring.devs) for local_dev in local_devices: if override_devices and (local_dev['device'] not in override_devices): continue dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) remove_file(part_path) continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } yield part_info
def reap_device(self, device): """ Called once per pass for each device on the server. This will scan the accounts directory for the device, looking for partitions this device is the primary for, then looking for account databases that are marked status=DELETED and still have containers and calling :func:`reap_account`. Account databases marked status=DELETED that no longer have containers will eventually be permanently removed by the reclaim process within the account replicator (see :mod:`swift.db_replicator`). :param device: The device to look for accounts to be deleted. """ datadir = os.path.join(self.devices, device, DATADIR) if not os.path.exists(datadir): return for partition in os.listdir(datadir): partition_path = os.path.join(datadir, partition) if not partition.isdigit(): continue nodes = self.get_account_ring().get_part_nodes(int(partition)) if not os.path.isdir(partition_path): continue container_shard = None for container_shard, node in enumerate(nodes): if is_local_device(self.myips, None, node['ip'], None) and \ (not self.bind_port or self.bind_port == node['port']) and \ (device == node['device']): break else: continue for suffix in os.listdir(partition_path): suffix_path = os.path.join(partition_path, suffix) if not os.path.isdir(suffix_path): continue for hsh in os.listdir(suffix_path): hsh_path = os.path.join(suffix_path, hsh) if not os.path.isdir(hsh_path): continue for fname in sorted(os.listdir(hsh_path), reverse=True): if fname.endswith('.ts'): break elif fname.endswith('.db'): self.start_time = time() broker = \ AccountBroker(os.path.join(hsh_path, fname), logger=self.logger) if broker.is_status_deleted() and \ not broker.empty(): self.reap_account( broker, partition, nodes, container_shard=container_shard)
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = itertools.ifilter( lambda dev: dev and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"]), policy.object_ring.devs, ) for local_dev in local_devices: if override_devices and (local_dev["device"] not in override_devices): continue dev_path = self._df_router[policy].get_dev_path(local_dev["device"]) if not dev_path: self.logger.warn(_("%s is not mounted"), local_dev["device"]) continue obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception("Unable to create %s" % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception("Unable to list partitions in %r" % obj_path) continue for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning("Unexpected entity in data dir: %r" % part_path) remove_file(part_path) continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { "local_dev": local_dev, "policy": policy, "partition": partition, "part_path": part_path, } yield part_info
def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] data_dir = get_data_dir(policy) for local_dev in [ dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices)) ]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue try: job_path = join(obj_path, partition) part_nodes = policy.object_ring.get_part_nodes( int(partition)) nodes = [ node for node in part_nodes if node['id'] != local_dev['id'] ] jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy=policy, partition=partition, region=local_dev['region'])) except ValueError: continue return jobs
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return if self.handoffs_only: self.logger.warning( 'Starting replication pass with handoffs_only enabled. ' 'This mode is not intended for normal ' 'operation; use handoffs_only with care.') self._local_device_ids = set() found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True if not check_drive(self.root, node['device'], self.mount_check): self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev ]) self.logger.warning( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) filt = (self.handoffs_only_filter(node['id']) if self.handoffs_only else None) dirs.append((datadir, node['id'], filt)) if not found_local: self.logger.error( "Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n(self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) if self.handoffs_only: self.logger.warning( 'Finished replication pass with handoffs_only enabled. ' 'If handoffs_only is no longer required, disable it.') self._report_stats()
def process_repl(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] obj_ring = self.get_object_ring(policy.idx) data_dir = get_data_dir(policy.idx) for local_dev in [dev for dev in obj_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices))]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue try: job_path = join(obj_path, partition) part_nodes = obj_ring.get_part_nodes(int(partition)) nodes = [node for node in part_nodes if node['id'] != local_dev['id']] jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy_idx=policy.idx, partition=partition, object_ring=obj_ring, region=local_dev['region'])) except ValueError: continue return jobs
def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return self._local_device_ids = set() found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True if not check_drive(self.root, node['device'], self.mount_check): self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev]) self.logger.warning( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) dirs.append((datadir, node['id'])) if not found_local: self.logger.error("Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats()
def get_local_devices(self): """ Returns a set of all local devices in all replication-type storage policies. This is the device names, e.g. "sdq" or "d1234" or something, not the full ring entries. """ ips = whataremyips(self.bind_ip) local_devices = set() for policy in POLICIES: if policy.policy_type != REPL_POLICY: continue self.load_object_ring(policy) for device in policy.object_ring.devs: if device and is_local_device(ips, self.port, device['replication_ip'], device['replication_port']): local_devices.add(device['device']) return local_devices
def get_local_devices(self): """ Returns a set of all local devices in all replication-type storage policies. This is the device names, e.g. "sdq" or "d1234" or something, not the full ring entries. """ ips = whataremyips(self.bind_ip) local_devices = set() for policy in POLICIES: if policy.policy_type != REPL_POLICY: continue self.load_object_ring(policy) for device in policy.object_ring.devs: if device and is_local_device( ips, self.port, device['replication_ip'], device['replication_port']): local_devices.add(device['device']) return local_devices
def _get_db_info(self, account, container): """ Returns the database path of the container :param account: UTF-8 encoded account name :param container: UTF-8 encoded container name :returns: a tuple of (db path, nodes count, index of replica) """ part, container_nodes = self.container_ring.get_nodes( account, container) nodes_count = len(container_nodes) db_hash = hash_path(account, container) db_dir = storage_directory(DATADIR, part, db_hash) for index, node in enumerate(container_nodes): if not is_local_device(self.myips, None, node['ip'], node['port']): continue db_path = os.path.join(self.root, node['device'], db_dir, db_hash + '.db') return db_path, nodes_count, index return None, None, None
def handle_container(self, settings): part, container_nodes = self.container_ring.get_nodes( settings['account'], settings['container']) nodes_count = len(container_nodes) handler = self.handler_class(self.status_dir, settings) for index, node in enumerate(container_nodes): if not is_local_device(self.myips, None, node['ip'], node['port']): continue broker = self.get_broker(settings['account'], settings['container'], part, node) broker_info = broker.get_info() last_row = handler.get_last_row(broker_info['id']) if not last_row: last_row = 0 try: items = broker.get_items_since(last_row, self.items_chunk) except DatabaseConnectionError: continue if items: self.process_items(handler, items, nodes_count, index) handler.save_last_row(items[-1]['ROWID'], broker_info['id']) return
def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] df_mgr = self._df_router[policy] self.all_devs_info.update( [(dev['replication_ip'], dev['device']) for dev in policy.object_ring.devs if dev]) data_dir = get_data_dir(policy) found_local = False for local_dev in [dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices))]: found_local = True dev_path = check_drive(self.devices_dir, local_dev['device'], self.mount_check) if not dev_path: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs if failure_dev]) self.logger.warning( _('%s is not mounted'), local_dev['device']) continue obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue if (partition.startswith('auditor_status_') and partition.endswith('.json')): # ignore auditor status files continue part_nodes = None try: job_path = join(obj_path, partition) part_nodes = policy.object_ring.get_part_nodes( int(partition)) nodes = [node for node in part_nodes if node['id'] != local_dev['id']] jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy=policy, partition=partition, region=local_dev['region'])) except ValueError: if part_nodes: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in nodes]) else: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs if failure_dev]) continue if not found_local: self.logger.error("Can't find itself in policy with index %d with" " ips %s and with port %s in ring file, not" " replicating", int(policy), ", ".join(ips), self.port) return jobs
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ broker = None try: broker = ContainerBroker(path) info = broker.get_info() x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if is_local_device(self._myips, self._myport, node['ip'], node['port']): break else: return if not broker.is_deleted(): sync_to = None user_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.iteritems(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': user_key = value if not sync_to or not user_key: self.container_skips += 1 self.logger.increment('skips') return err, sync_to, realm, realm_key = validate_sync_to( sync_to, self.allowed_sync_hosts, self.realms_conf) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), { 'db_file': str(broker), 'validate_sync_to_err': err }) self.container_failures += 1 self.logger.increment('failures') return stop_at = time() + self.container_time next_sync_point = None while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] > sync_point1: break key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.) and will skip # problematic rows as needed in case of faults. # This section will attempt to sync previously skipped # rows in case the previous attempts by any of the nodes # didn't succeed. if not self.container_sync_row(row, sync_to, user_key, broker, info, realm, realm_key): if not next_sync_point: next_sync_point = sync_point2 sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) if next_sync_point: broker.set_x_container_sync_points(None, next_sync_point) while time() < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back # around to the section above and attempt to sync # previously skipped rows in case the other nodes didn't # succeed or in case it failed to do so the first time. if unpack_from('>I', key)[0] % \ len(nodes) == ordinal: self.container_sync_row(row, sync_to, user_key, broker, info, realm, realm_key) sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) self.container_syncs += 1 self.logger.increment('syncs') except (Exception, Timeout) as err: self.container_failures += 1 self.logger.increment('failures') self.logger.exception(_('ERROR Syncing %s'), broker if broker else path)
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ broker = None try: broker = ContainerBroker(path) # The path we pass to the ContainerBroker is a real path of # a container DB. If we get here, however, it means that this # path is linked from the sync_containers dir. In rare cases # of race or processes failures the link can be stale and # the get_info below will raise a DB doesn't exist exception # In this case we remove the stale link and raise an error # since in most cases the db should be there. try: info = broker.get_info() except DatabaseConnectionError as db_err: if str(db_err).endswith("DB doesn't exist"): self.sync_store.remove_synced_container(broker) raise x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if is_local_device(self._myips, self._myport, node['ip'], node['port']): break else: return if not broker.is_deleted(): sync_to = None user_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.items(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': user_key = value if not sync_to or not user_key: self.container_skips += 1 self.logger.increment('skips') return err, sync_to, realm, realm_key = validate_sync_to( sync_to, self.allowed_sync_hosts, self.realms_conf) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), {'db_file': str(broker), 'validate_sync_to_err': err}) self.container_failures += 1 self.logger.increment('failures') return stop_at = time() + self.container_time next_sync_point = None while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] > sync_point1: break key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.) and will skip # problematic rows as needed in case of faults. # This section will attempt to sync previously skipped # rows in case the previous attempts by any of the nodes # didn't succeed. if not self.container_sync_row( row, sync_to, user_key, broker, info, realm, realm_key): if not next_sync_point: next_sync_point = sync_point2 sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) if next_sync_point: broker.set_x_container_sync_points(None, next_sync_point) while time() < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of the # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back # around to the section above and attempt to sync # previously skipped rows in case the other nodes didn't # succeed or in case it failed to do so the first time. if unpack_from('>I', key)[0] % \ len(nodes) == ordinal: self.container_sync_row( row, sync_to, user_key, broker, info, realm, realm_key) sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) self.container_syncs += 1 self.logger.increment('syncs') except (Exception, Timeout): self.container_failures += 1 self.logger.increment('failures') self.logger.exception(_('ERROR Syncing %s'), broker if broker else path)
def container_sync(self, path): """ Checks the given path for a container database, determines if syncing is turned on for that database and, if so, sends any updates to the other container. :param path: the path to a container db """ broker = None try: broker = ContainerBroker(path, logger=self.logger) # The path we pass to the ContainerBroker is a real path of # a container DB. If we get here, however, it means that this # path is linked from the sync_containers dir. In rare cases # of race or processes failures the link can be stale and # the get_info below will raise a DB doesn't exist exception # In this case we remove the stale link and raise an error # since in most cases the db should be there. try: info = broker.get_info() except DatabaseConnectionError as db_err: if str(db_err).endswith("DB doesn't exist"): self.sync_store.remove_synced_container(broker) raise x, nodes = self.container_ring.get_nodes(info['account'], info['container']) for ordinal, node in enumerate(nodes): if is_local_device(self._myips, self._myport, node['ip'], node['port']): break else: return if not broker.is_deleted(): sync_to = None user_key = None sync_point1 = info['x_container_sync_point1'] sync_point2 = info['x_container_sync_point2'] for key, (value, timestamp) in broker.metadata.items(): if key.lower() == 'x-container-sync-to': sync_to = value elif key.lower() == 'x-container-sync-key': user_key = value if not sync_to or not user_key: self.container_skips += 1 self.logger.increment('skips') return err, sync_to, realm, realm_key = validate_sync_to( sync_to, self.allowed_sync_hosts, self.realms_conf) if err: self.logger.info( _('ERROR %(db_file)s: %(validate_sync_to_err)s'), { 'db_file': str(broker), 'validate_sync_to_err': err }) self.container_failures += 1 self.logger.increment('failures') return start_at = time() stop_at = start_at + self.container_time next_sync_point = None sync_stage_time = start_at try: while time() < stop_at and sync_point2 < sync_point1: rows = broker.get_items_since(sync_point2, 1) if not rows: break row = rows[0] if row['ROWID'] > sync_point1: break # This node will only initially sync out one third # of the objects (if 3 replicas, 1/4 if 4, etc.) # and will skip problematic rows as needed in case of # faults. # This section will attempt to sync previously skipped # rows in case the previous attempts by any of the # nodes didn't succeed. if not self.container_sync_row(row, sync_to, user_key, broker, info, realm, realm_key): if not next_sync_point: next_sync_point = sync_point2 sync_point2 = row['ROWID'] broker.set_x_container_sync_points(None, sync_point2) if next_sync_point: broker.set_x_container_sync_points( None, next_sync_point) else: next_sync_point = sync_point2 sync_stage_time = time() while sync_stage_time < stop_at: rows = broker.get_items_since(sync_point1, 1) if not rows: break row = rows[0] key = hash_path(info['account'], info['container'], row['name'], raw_digest=True) # This node will only initially sync out one third of # the objects (if 3 replicas, 1/4 if 4, etc.). # It'll come back around to the section above # and attempt to sync previously skipped rows in case # the other nodes didn't succeed or in case it failed # to do so the first time. if unpack_from('>I', key)[0] % \ len(nodes) == ordinal: self.container_sync_row(row, sync_to, user_key, broker, info, realm, realm_key) sync_point1 = row['ROWID'] broker.set_x_container_sync_points(sync_point1, None) sync_stage_time = time() self.container_syncs += 1 self.logger.increment('syncs') finally: self.container_report(start_at, sync_stage_time, sync_point1, next_sync_point, info, broker.get_max_row()) except (Exception, Timeout): self.container_failures += 1 self.logger.increment('failures') self.logger.exception(_('ERROR Syncing %s'), broker if broker else path)
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for getting partitions in the top level reconstructor In handoffs_only mode no primary partitions will not be included in the returned (possibly empty) list. """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) ec_policies = (policy for policy in POLICIES if policy.policy_type == EC_POLICY) policy2devices = {} for policy in ec_policies: self.load_object_ring(policy) local_devices = list( six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev[ 'replication_port']), policy.object_ring.devs)) if override_devices: local_devices = list( six.moves.filter( lambda dev_info: dev_info['device'] in override_devices, local_devices)) policy2devices[policy] = local_devices self.device_count += len(local_devices) all_parts = [] for policy, local_devices in policy2devices.items(): # Skip replication if next_part_power is set. In this case # every object is hard-linked twice, but the replicator # can't detect them and would create a second copy of the # file if not yet existing - and this might double the # actual transferred and stored data next_part_power = getattr(policy.object_ring, 'next_part_power', None) if next_part_power is not None: self.logger.warning( _("next_part_power set in policy '%s'. Skipping"), policy.name) continue df_mgr = self._df_router[policy] for local_dev in local_devices: dev_path = df_mgr.get_dev_path(local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue data_dir = get_data_dir(policy) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if partition in ('auditor_status_ALL.json', 'auditor_status_ZBF.json'): continue if not partition.isdigit(): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) self.delete_partition(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue # N.B. At a primary node in handoffs_only mode may skip to # sync misplaced (handoff) fragments in the primary # partition. That may happen while rebalancing several # times. (e.g. a node holding handoff fragment being a new # primary) Those fragments will be synced (and revert) once # handoffs_only mode turned off. if self.handoffs_only and any(local_dev['id'] == n['id'] for n in policy.object_ring. get_part_nodes(partition)): self.logger.debug( 'Skipping %s job for %s ' 'while in handoffs_only mode.', SYNC, part_path) continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } all_parts.append(part_info) random.shuffle(all_parts) return all_parts
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for getting partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) ec_policies = (policy for policy in POLICIES if policy.policy_type == EC_POLICY) policy2devices = {} for policy in ec_policies: self.load_object_ring(policy) local_devices = list( six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev[ 'replication_port']), policy.object_ring.devs)) if override_devices: local_devices = list( six.moves.filter( lambda dev_info: dev_info['device'] in override_devices, local_devices)) policy2devices[policy] = local_devices self.device_count += len(local_devices) all_parts = [] for policy, local_devices in policy2devices.items(): df_mgr = self._df_router[policy] for local_dev in local_devices: dev_path = df_mgr.get_dev_path(local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue data_dir = get_data_dir(policy) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if partition in ('auditor_status_ALL.json', 'auditor_status_ZBF.json'): continue if not partition.isdigit(): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) self.delete_partition(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } all_parts.append(part_info) random.shuffle(all_parts) return all_parts
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = list(six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev['replication_port']), policy.object_ring.devs)) if override_devices: self.device_count = len(override_devices) else: self.device_count = len(local_devices) for local_dev in local_devices: if override_devices and (local_dev['device'] not in override_devices): continue self.reconstruction_device_count += 1 dev_path = self._df_router[policy].get_dev_path( local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception( 'Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception( 'Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) remove_file(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } yield part_info self.reconstruction_part_count += 1
def test_is_local_device(self): # localhost shows up in whataremyips() output as "::1" for IPv6 my_ips = ["127.0.0.1", "::1"] my_port = 6000 self.assertTrue(is_local_device(my_ips, my_port, "127.0.0.1", my_port)) self.assertTrue(is_local_device(my_ips, my_port, "::1", my_port)) self.assertTrue(is_local_device( my_ips, my_port, "0000:0000:0000:0000:0000:0000:0000:0001", my_port)) self.assertTrue(is_local_device(my_ips, my_port, "localhost", my_port)) self.assertFalse(is_local_device(my_ips, my_port, "localhost", my_port + 1)) self.assertFalse(is_local_device(my_ips, my_port, "127.0.0.2", my_port)) # for those that don't have a local port self.assertTrue(is_local_device(my_ips, None, my_ips[0], None)) # When servers_per_port is active, the "my_port" passed in is None # which means "don't include port in the determination of locality # because it's not reliable in this deployment scenario" self.assertTrue(is_local_device(my_ips, None, "127.0.0.1", 6666)) self.assertTrue(is_local_device(my_ips, None, "::1", 6666)) self.assertTrue(is_local_device( my_ips, None, "0000:0000:0000:0000:0000:0000:0000:0001", 6666)) self.assertTrue(is_local_device(my_ips, None, "localhost", 6666)) self.assertFalse(is_local_device(my_ips, None, "127.0.0.2", my_port))
def build_replication_jobs(self, policy, ips, old_dict, new_dict, moving_map): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy :param policy: swift policy object :param ips: the local server ips :param old_dict: dictionary with devices from old ring :param new_dict: dictionary with devices from new ring :param moving_map: the dictionary that contains all the partitions that should be moved, their sources and destinations """ jobs = [] data_dir = get_data_dir(policy) devices = Set(map(lambda x: x[1], moving_map.values())) partitions = Set(map(lambda x: x[0], moving_map.values())) for local_dev in [ dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port'])) ]: if self.test: print local_dev['id'] if unicode(local_dev['id']) not in devices: continue dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self.logger.warn('%s is not mounted' % local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) for partition in os.listdir(obj_path): partition = unicode(partition) if (partition not in partitions): continue try: key = "%s_%s" % (local_dev['id'], partition) if key not in moving_map: continue job_path = join(obj_path, partition) _, source_id, dest_id = moving_map[key] if source_id != unicode(local_dev['id']): continue node = {} replication_ip, replication_device = new_dict[dest_id] node['replication_ip'] = replication_ip node['device'] = replication_device remote_path = os.path.join(self.devices_dir, node['device'], self.mover_tmp_dir) jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, node=node, policy=policy, partition=partition, remote_path=remote_path)) except ValueError: continue except Exception as e: self.logger.exception( "an %s exception accure at build_replication_jobs" % e) if self.test: print e return jobs
def process_repl(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] obj_ring = self.get_object_ring(policy.idx) data_dir = get_data_dir(policy.idx) for local_dev in [dev for dev in obj_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices))]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue try: job_path = join(obj_path, partition) part_nodes = obj_ring.get_part_nodes(int(partition)) ###################################### CHANGED_CODE ######################################################## f = open("/home/hduser/swift/swift/proxy/controllers/spindowndevices") downlist = f.read().split("\n") f.close() nodes = [node for node in part_nodes if node['id'] != local_dev['id'] and node['device'] not in downlist] print("===Replication nodes===",nodes) ###################################### CHANGED_CODE ######################################################## jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy_idx=policy.idx, partition=partition, object_ring=obj_ring, region=local_dev['region'])) except ValueError: continue return jobs
def run_once(self, *args, **kwargs): """Run a replication pass once.""" override_options = parse_override_options(once=True, **kwargs) devices_to_replicate = override_options.devices or Everything() partitions_to_replicate = override_options.partitions or Everything() self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return if self.handoffs_only: self.logger.warning( 'Starting replication pass with handoffs_only enabled. ' 'This mode is not intended for normal ' 'operation; use handoffs_only with care.') self._local_device_ids = set() found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True try: dev_path = check_drive(self.root, node['device'], self.mount_check) except ValueError as err: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev]) self.logger.warning('Skipping: %s', err) continue if node['device'] not in devices_to_replicate: self.logger.debug( 'Skipping device %s due to given arguments', node['device']) continue unlink_older_than( os.path.join(dev_path, 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) part_filt = self._partition_dir_filter( node['id'], partitions_to_replicate) dirs.append((datadir, node['id'], part_filt)) if not found_local: self.logger.error("Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in self.roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) if self.handoffs_only: self.logger.warning( 'Finished replication pass with handoffs_only enabled. ' 'If handoffs_only is no longer required, disable it.') self._report_stats()
def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] self.all_devs_info.update([(dev['replication_ip'], dev['device']) for dev in policy.object_ring.devs if dev]) data_dir = get_data_dir(policy) found_local = False for local_dev in [ dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices)) ]: found_local = True dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs if failure_dev ]) self.logger.warning(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue if (partition.startswith('auditor_status_') and partition.endswith('.json')): # ignore auditor status files continue part_nodes = None try: job_path = join(obj_path, partition) part_nodes = policy.object_ring.get_part_nodes( int(partition)) nodes = [ node for node in part_nodes if node['id'] != local_dev['id'] ] jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy=policy, partition=partition, region=local_dev['region'])) except ValueError: if part_nodes: self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in nodes ]) else: self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs if failure_dev ]) continue if not found_local: self.logger.error( "Can't find itself in policy with index %d with" " ips %s and with port %s in ring file, not" " replicating", int(policy), ", ".join(ips), self.port) return jobs
def build_replication_jobs(self, policy, ips, old_dict, new_dict, moving_map): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy :param policy: swift policy object :param ips: the local server ips :param old_dict: dictionary with devices from old ring :param new_dict: dictionary with devices from new ring :param moving_map: the dictionary that contains all the partitions that should be moved, their sources and destinations """ jobs = [] data_dir = get_data_dir(policy) devices = Set(map(lambda x: x[1], moving_map.values())) partitions = Set(map(lambda x: x[0], moving_map.values())) for local_dev in [dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) )]: if self.test: print local_dev['id'] if unicode(local_dev['id']) not in devices: continue dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self.logger.warn('%s is not mounted' % local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) for partition in os.listdir(obj_path): partition = unicode(partition) if (partition not in partitions): continue try: key = "%s_%s" % (local_dev['id'], partition) if key not in moving_map: continue job_path = join(obj_path, partition) _, source_id, dest_id = moving_map[key] if source_id != unicode(local_dev['id']): continue node = {} replication_ip, replication_device = new_dict[dest_id] node['replication_ip'] = replication_ip node['device'] = replication_device remote_path = os.path.join(self.devices_dir, node['device'], self.mover_tmp_dir) jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, node=node, policy=policy, partition=partition, remote_path=remote_path)) except ValueError: continue except Exception as e: self.logger.exception( "an %s exception accure at build_replication_jobs" % e) if self.test: print e return jobs
def test_is_local_device(self): # localhost shows up in whataremyips() output as "::1" for IPv6 my_ips = ["127.0.0.1", "::1"] my_port = 6200 self.assertTrue(is_local_device(my_ips, my_port, "127.0.0.1", my_port)) self.assertTrue(is_local_device(my_ips, my_port, "::1", my_port)) self.assertTrue( is_local_device(my_ips, my_port, "0000:0000:0000:0000:0000:0000:0000:0001", my_port)) self.assertTrue(is_local_device(my_ips, my_port, "localhost", my_port)) self.assertFalse( is_local_device(my_ips, my_port, "localhost", my_port + 1)) self.assertFalse(is_local_device(my_ips, my_port, "127.0.0.2", my_port)) # for those that don't have a local port self.assertTrue(is_local_device(my_ips, None, my_ips[0], None)) # When servers_per_port is active, the "my_port" passed in is None # which means "don't include port in the determination of locality # because it's not reliable in this deployment scenario" self.assertTrue(is_local_device(my_ips, None, "127.0.0.1", 6666)) self.assertTrue(is_local_device(my_ips, None, "::1", 6666)) self.assertTrue( is_local_device(my_ips, None, "0000:0000:0000:0000:0000:0000:0000:0001", 6666)) self.assertTrue(is_local_device(my_ips, None, "localhost", 6666)) self.assertFalse(is_local_device(my_ips, None, "127.0.0.2", my_port))
def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] self.all_devs_info.update([(dev["replication_ip"], dev["device"]) for dev in policy.object_ring.devs if dev]) data_dir = get_data_dir(policy) found_local = False for local_dev in [ dev for dev in policy.object_ring.devs if ( dev and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"]) and (override_devices is None or dev["device"] in override_devices) ) ]: found_local = True dev_path = join(self.devices_dir, local_dev["device"]) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self._add_failure_stats( [ (failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in policy.object_ring.devs if failure_dev ] ) self.logger.warning(_("%s is not mounted"), local_dev["device"]) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception("ERROR creating %s" % obj_path) continue for partition in os.listdir(obj_path): if override_partitions is not None and partition not in override_partitions: continue part_nodes = None try: job_path = join(obj_path, partition) part_nodes = policy.object_ring.get_part_nodes(int(partition)) nodes = [node for node in part_nodes if node["id"] != local_dev["id"]] jobs.append( dict( path=job_path, device=local_dev["device"], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy=policy, partition=partition, region=local_dev["region"], ) ) except ValueError: if part_nodes: self._add_failure_stats( [(failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in nodes] ) else: self._add_failure_stats( [ (failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in policy.object_ring.devs if failure_dev ] ) continue if not found_local: self.logger.error( "Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port ) return jobs