def setUp(self): utils.HASH_PATH_SUFFIX = 'endcap' utils.HASH_PATH_PREFIX = '' self.testdir = mkdtemp() ring_file = os.path.join(self.testdir, 'container.ring.gz') with closing(GzipFile(ring_file, 'wb')) as f: pickle.dump( RingData([[0, 1, 2, 0, 1, 2], [1, 2, 0, 1, 2, 0], [2, 3, 1, 2, 3, 1]], [{'id': 0, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 0}, {'id': 1, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 2}, {'id': 2, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 4}, {'id': 3, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 6}], 30), f) self.devices_dir = os.path.join(self.testdir, 'devices') os.mkdir(self.devices_dir) self.sda1 = os.path.join(self.devices_dir, 'sda1') os.mkdir(self.sda1) for policy in POLICIES: os.mkdir(os.path.join(self.sda1, get_tmp_dir(policy))) self.logger = debug_logger() self.ts_iter = make_timestamp_iter()
def setUp(self): utils.HASH_PATH_SUFFIX = "endcap" utils.HASH_PATH_PREFIX = "" self.testdir = mkdtemp() ring_file = os.path.join(self.testdir, "container.ring.gz") with closing(GzipFile(ring_file, "wb")) as f: pickle.dump( RingData( [[0, 1, 2, 0, 1, 2], [1, 2, 0, 1, 2, 0], [2, 3, 1, 2, 3, 1]], [ {"id": 0, "ip": "127.0.0.1", "port": 1, "device": "sda1", "zone": 0}, {"id": 1, "ip": "127.0.0.1", "port": 1, "device": "sda1", "zone": 2}, {"id": 2, "ip": "127.0.0.1", "port": 1, "device": "sda1", "zone": 4}, ], 30, ), f, ) self.devices_dir = os.path.join(self.testdir, "devices") os.mkdir(self.devices_dir) self.sda1 = os.path.join(self.devices_dir, "sda1") os.mkdir(self.sda1) for policy in POLICIES: os.mkdir(os.path.join(self.sda1, get_tmp_dir(int(policy)))) self.logger = debug_logger()
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) headers_out = HeaderKeyDict(update['headers']) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) events = [ spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes ] success = True new_successes = False for event in events: event_success, node_id = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) self.logger.increment("unlinks") os.unlink(update_path) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join(device, get_tmp_dir(policy)))
def setUp(self): utils.HASH_PATH_SUFFIX = 'endcap' utils.HASH_PATH_PREFIX = '' self.testdir = mkdtemp() ring_file = os.path.join(self.testdir, 'container.ring.gz') with closing(GzipFile(ring_file, 'wb')) as f: pickle.dump( RingData([[0, 1, 2, 0, 1, 2], [1, 2, 0, 1, 2, 0], [2, 3, 1, 2, 3, 1]], [{ 'id': 0, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 0 }, { 'id': 1, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 2 }, { 'id': 2, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 4 }], 30), f) self.devices_dir = os.path.join(self.testdir, 'devices') os.mkdir(self.devices_dir) self.sda1 = os.path.join(self.devices_dir, 'sda1') os.mkdir(self.sda1) for policy in POLICIES: os.mkdir(os.path.join(self.sda1, get_tmp_dir(policy))) self.logger = debug_logger()
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips() for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = itertools.ifilter( lambda dev: dev and is_local_device(ips, self.port, dev[ 'replication_ip'], dev['replication_port']), policy.object_ring.devs) for local_dev in local_devices: if override_devices and (local_dev['device'] not in override_devices): continue dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) remove_file(part_path) continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } yield part_info
def process_object_update(self, update_path, device, policy_idx): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy_idx: storage policy index of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) self.logger.increment('quarantines') renamer( update_path, os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: headers = update['headers'].copy() headers.setdefault('X-Backend-Storage-Policy-Index', str(policy_idx)) status = self.object_update(node, part, update['op'], obj, headers) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) self.logger.increment("unlinks") os.unlink(update_path) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join(device, get_tmp_dir(policy_idx)))
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) self.stats.quarantines += 1 self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) headers_out = HeaderKeyDict(update['headers']) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) events = [spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes] success = True new_successes = False for event in events: event_success, node_id = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if success: self.stats.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) else: self.stats.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join( device, get_tmp_dir(policy)))
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = itertools.ifilter( lambda dev: dev and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"]), policy.object_ring.devs, ) for local_dev in local_devices: if override_devices and (local_dev["device"] not in override_devices): continue dev_path = self._df_router[policy].get_dev_path(local_dev["device"]) if not dev_path: self.logger.warn(_("%s is not mounted"), local_dev["device"]) continue obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception("Unable to create %s" % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception("Unable to list partitions in %r" % obj_path) continue for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning("Unexpected entity in data dir: %r" % part_path) remove_file(part_path) continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { "local_dev": local_dev, "policy": policy, "partition": partition, "part_path": part_path, } yield part_info
def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] data_dir = get_data_dir(policy) for local_dev in [ dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices)) ]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue try: job_path = join(obj_path, partition) part_nodes = policy.object_ring.get_part_nodes( int(partition)) nodes = [ node for node in part_nodes if node['id'] != local_dev['id'] ] jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy=policy, partition=partition, region=local_dev['region'])) except ValueError: continue return jobs
def process_object_update(self, update_path, device, policy_idx): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy_idx: storage policy index of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) self.logger.increment('quarantines') renamer(update_path, os.path.join( device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: headers = update['headers'].copy() headers.setdefault('X-Backend-Storage-Policy-Index', str(policy_idx)) status = self.object_update(node, part, update['op'], obj, headers) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) self.logger.increment("unlinks") os.unlink(update_path) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join( device, get_tmp_dir(policy_idx)))
def process_repl(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] obj_ring = self.get_object_ring(policy.idx) data_dir = get_data_dir(policy.idx) for local_dev in [dev for dev in obj_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices))]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue try: job_path = join(obj_path, partition) part_nodes = obj_ring.get_part_nodes(int(partition)) nodes = [node for node in part_nodes if node['id'] != local_dev['id']] jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy_idx=policy.idx, partition=partition, object_ring=obj_ring, region=local_dev['region'])) except ValueError: continue return jobs
def process_repl(self, policy, jobs, ips): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ obj_ring = self.get_object_ring(policy.idx) data_dir = get_data_dir(policy.idx) for local_dev in [ dev for dev in obj_ring.devs if dev and dev['replication_ip'] in ips and dev['replication_port'] == self.port ]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): try: job_path = join(obj_path, partition) if isfile(job_path): # Clean up any (probably zero-byte) files where a # partition should be. self.logger.warning( 'Removing partition directory ' 'which was a file: %s', job_path) os.remove(job_path) continue part_nodes = obj_ring.get_part_nodes(int(partition)) nodes = [ node for node in part_nodes if node['id'] != local_dev['id'] ] jobs.append( dict(path=job_path, device=local_dev['device'], nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy_idx=policy.idx, partition=partition, object_ring=obj_ring)) except (ValueError, OSError): continue
def process_repl(self, policy, jobs, ips): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ obj_ring = self.get_object_ring(policy.idx) data_dir = get_data_dir(policy.idx) for local_dev in [dev for dev in obj_ring.devs if dev and dev['replication_ip'] in ips and dev['replication_port'] == self.port]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): try: job_path = join(obj_path, partition) if isfile(job_path): # Clean up any (probably zero-byte) files where a # partition should be. self.logger.warning( 'Removing partition directory ' 'which was a file: %s', job_path) os.remove(job_path) continue part_nodes = obj_ring.get_part_nodes(int(partition)) nodes = [node for node in part_nodes if node['id'] != local_dev['id']] jobs.append( dict(path=job_path, device=local_dev['device'], nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy_idx=policy.idx, partition=partition, object_ring=obj_ring)) except (ValueError, OSError): continue
def process_repl(self, policy, jobs, ips): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ obj_ring = self.get_object_ring(policy.idx) data_dir = get_data_dir(policy.idx) for local_dev in [ dev for dev in obj_ring.devs if dev and dev["replication_ip"] in ips and dev["replication_port"] == self.port ]: dev_path = join(self.devices_dir, local_dev["device"]) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_("%s is not mounted"), local_dev["device"]) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception("ERROR creating %s" % obj_path) continue for partition in os.listdir(obj_path): try: job_path = join(obj_path, partition) part_nodes = obj_ring.get_part_nodes(int(partition)) nodes = [node for node in part_nodes if node["id"] != local_dev["id"]] jobs.append( dict( path=job_path, device=local_dev["device"], nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy_idx=policy.idx, partition=partition, object_ring=obj_ring, ) ) except ValueError: continue
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for getting partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) ec_policies = (policy for policy in POLICIES if policy.policy_type == EC_POLICY) policy2devices = {} for policy in ec_policies: self.load_object_ring(policy) local_devices = list( six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev[ 'replication_port']), policy.object_ring.devs)) if override_devices: local_devices = list( six.moves.filter( lambda dev_info: dev_info['device'] in override_devices, local_devices)) policy2devices[policy] = local_devices self.device_count += len(local_devices) all_parts = [] for policy, local_devices in policy2devices.items(): df_mgr = self._df_router[policy] for local_dev in local_devices: dev_path = df_mgr.get_dev_path(local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue data_dir = get_data_dir(policy) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if partition in ('auditor_status_ALL.json', 'auditor_status_ZBF.json'): continue if not partition.isdigit(): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) self.delete_partition(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } all_parts.append(part_info) random.shuffle(all_parts) return all_parts
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for yielding partitions in the top level reconstructor """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) for policy in POLICIES: if policy.policy_type != EC_POLICY: continue self._diskfile_mgr = self._df_router[policy] self.load_object_ring(policy) data_dir = get_data_dir(policy) local_devices = list(six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev['replication_port']), policy.object_ring.devs)) if override_devices: self.device_count = len(override_devices) else: self.device_count = len(local_devices) for local_dev in local_devices: if override_devices and (local_dev['device'] not in override_devices): continue self.reconstruction_device_count += 1 dev_path = self._df_router[policy].get_dev_path( local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception( 'Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception( 'Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if not (partition.isdigit() and os.path.isdir(part_path)): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) remove_file(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } yield part_info self.reconstruction_part_count += 1
def build_replication_jobs(self, policy, ips, old_dict, new_dict, moving_map): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy :param policy: swift policy object :param ips: the local server ips :param old_dict: dictionary with devices from old ring :param new_dict: dictionary with devices from new ring :param moving_map: the dictionary that contains all the partitions that should be moved, their sources and destinations """ jobs = [] data_dir = get_data_dir(policy) devices = Set(map(lambda x: x[1], moving_map.values())) partitions = Set(map(lambda x: x[0], moving_map.values())) for local_dev in [dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) )]: if self.test: print local_dev['id'] if unicode(local_dev['id']) not in devices: continue dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self.logger.warn('%s is not mounted' % local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) for partition in os.listdir(obj_path): partition = unicode(partition) if (partition not in partitions): continue try: key = "%s_%s" % (local_dev['id'], partition) if key not in moving_map: continue job_path = join(obj_path, partition) _, source_id, dest_id = moving_map[key] if source_id != unicode(local_dev['id']): continue node = {} replication_ip, replication_device = new_dict[dest_id] node['replication_ip'] = replication_ip node['device'] = replication_device remote_path = os.path.join(self.devices_dir, node['device'], self.mover_tmp_dir) jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, node=node, policy=policy, partition=partition, remote_path=remote_path)) except ValueError: continue except Exception as e: self.logger.exception( "an %s exception accure at build_replication_jobs" % e) if self.test: print e return jobs
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception as e: if getattr(e, 'errno', None) == errno.ENOENT: return self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) self.stats.quarantines += 1 self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) try: # If this was the last async_pending in the directory, # then this will succeed. Otherwise, it'll fail, and # that's okay. os.rmdir(os.path.dirname(update_path)) except OSError: pass return def do_update(): successes = update.get('successes', []) headers_out = HeaderKeyDict(update['headers'].copy()) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) headers_out.setdefault('X-Backend-Accept-Redirect', 'true') headers_out.setdefault('X-Backend-Accept-Quoted-Location', 'true') container_path = update.get('container_path') if container_path: acct, cont = split_path('/' + container_path, minsegs=2) else: acct, cont = update['account'], update['container'] part, nodes = self.get_container_ring().get_nodes(acct, cont) obj = '/%s/%s/%s' % (acct, cont, update['obj']) events = [ spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes ] success = True new_successes = rewrite_pickle = False redirect = None redirects = set() for event in events: event_success, node_id, redirect = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if redirect: redirects.add(redirect) if success: self.stats.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) try: # If this was the last async_pending in the directory, # then this will succeed. Otherwise, it'll fail, and # that's okay. os.rmdir(os.path.dirname(update_path)) except OSError: pass elif redirects: # erase any previous successes update.pop('successes', None) redirect = max(redirects, key=lambda x: x[-1])[0] redirect_history = update.setdefault('redirect_history', []) if redirect in redirect_history: # force next update to be sent to root, reset history update['container_path'] = None update['redirect_history'] = [] else: update['container_path'] = redirect redirect_history.append(redirect) self.stats.redirects += 1 self.logger.increment("redirects") self.logger.debug( 'Update redirected for %(obj)s %(path)s to %(shard)s', { 'obj': obj, 'path': update_path, 'shard': update['container_path'] }) rewrite_pickle = True else: self.stats.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) if new_successes: update['successes'] = successes rewrite_pickle = True return rewrite_pickle, redirect rewrite_pickle, redirect = do_update() if redirect: # make one immediate retry to the redirect location rewrite_pickle, redirect = do_update() if rewrite_pickle: write_pickle(update, update_path, os.path.join(device, get_tmp_dir(policy)))
def build_replication_jobs(self, policy, ips, old_dict, new_dict, moving_map): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy :param policy: swift policy object :param ips: the local server ips :param old_dict: dictionary with devices from old ring :param new_dict: dictionary with devices from new ring :param moving_map: the dictionary that contains all the partitions that should be moved, their sources and destinations """ jobs = [] data_dir = get_data_dir(policy) devices = Set(map(lambda x: x[1], moving_map.values())) partitions = Set(map(lambda x: x[0], moving_map.values())) for local_dev in [ dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port'])) ]: if self.test: print local_dev['id'] if unicode(local_dev['id']) not in devices: continue dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self.logger.warn('%s is not mounted' % local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) for partition in os.listdir(obj_path): partition = unicode(partition) if (partition not in partitions): continue try: key = "%s_%s" % (local_dev['id'], partition) if key not in moving_map: continue job_path = join(obj_path, partition) _, source_id, dest_id = moving_map[key] if source_id != unicode(local_dev['id']): continue node = {} replication_ip, replication_device = new_dict[dest_id] node['replication_ip'] = replication_ip node['device'] = replication_device remote_path = os.path.join(self.devices_dir, node['device'], self.mover_tmp_dir) jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, node=node, policy=policy, partition=partition, remote_path=remote_path)) except ValueError: continue except Exception as e: self.logger.exception( "an %s exception accure at build_replication_jobs" % e) if self.test: print e return jobs
def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] self.all_devs_info.update([(dev["replication_ip"], dev["device"]) for dev in policy.object_ring.devs if dev]) data_dir = get_data_dir(policy) found_local = False for local_dev in [ dev for dev in policy.object_ring.devs if ( dev and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"]) and (override_devices is None or dev["device"] in override_devices) ) ]: found_local = True dev_path = join(self.devices_dir, local_dev["device"]) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self._add_failure_stats( [ (failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in policy.object_ring.devs if failure_dev ] ) self.logger.warning(_("%s is not mounted"), local_dev["device"]) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception("ERROR creating %s" % obj_path) continue for partition in os.listdir(obj_path): if override_partitions is not None and partition not in override_partitions: continue part_nodes = None try: job_path = join(obj_path, partition) part_nodes = policy.object_ring.get_part_nodes(int(partition)) nodes = [node for node in part_nodes if node["id"] != local_dev["id"]] jobs.append( dict( path=job_path, device=local_dev["device"], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy=policy, partition=partition, region=local_dev["region"], ) ) except ValueError: if part_nodes: self._add_failure_stats( [(failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in nodes] ) else: self._add_failure_stats( [ (failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in policy.object_ring.devs if failure_dev ] ) continue if not found_local: self.logger.error( "Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port ) return jobs
def temp_key(policy, hashpath, nonce, timestamp=None, **kwargs): temp_policy = diskfile.get_tmp_dir(policy) # we add some time to roughly indicate when this this was created timestamp = timestamp or time.time() timestamp = diskfile.Timestamp(timestamp).internal return "%s.%s.%s.%s" % (temp_policy, hashpath, nonce, timestamp)
def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] df_mgr = self._df_router[policy] self.all_devs_info.update( [(dev['replication_ip'], dev['device']) for dev in policy.object_ring.devs if dev]) data_dir = get_data_dir(policy) found_local = False for local_dev in [dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices))]: found_local = True dev_path = check_drive(self.devices_dir, local_dev['device'], self.mount_check) if not dev_path: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs if failure_dev]) self.logger.warning( _('%s is not mounted'), local_dev['device']) continue obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue if (partition.startswith('auditor_status_') and partition.endswith('.json')): # ignore auditor status files continue part_nodes = None try: job_path = join(obj_path, partition) part_nodes = policy.object_ring.get_part_nodes( int(partition)) nodes = [node for node in part_nodes if node['id'] != local_dev['id']] jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy=policy, partition=partition, region=local_dev['region'])) except ValueError: if part_nodes: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in nodes]) else: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs if failure_dev]) continue if not found_local: self.logger.error("Can't find itself in policy with index %d with" " ips %s and with port %s in ring file, not" " replicating", int(policy), ", ".join(ips), self.port) return jobs
def collect_parts(self, override_devices=None, override_partitions=None): """ Helper for getting partitions in the top level reconstructor In handoffs_only mode no primary partitions will not be included in the returned (possibly empty) list. """ override_devices = override_devices or [] override_partitions = override_partitions or [] ips = whataremyips(self.bind_ip) ec_policies = (policy for policy in POLICIES if policy.policy_type == EC_POLICY) policy2devices = {} for policy in ec_policies: self.load_object_ring(policy) local_devices = list( six.moves.filter( lambda dev: dev and is_local_device( ips, self.port, dev['replication_ip'], dev[ 'replication_port']), policy.object_ring.devs)) if override_devices: local_devices = list( six.moves.filter( lambda dev_info: dev_info['device'] in override_devices, local_devices)) policy2devices[policy] = local_devices self.device_count += len(local_devices) all_parts = [] for policy, local_devices in policy2devices.items(): # Skip replication if next_part_power is set. In this case # every object is hard-linked twice, but the replicator # can't detect them and would create a second copy of the # file if not yet existing - and this might double the # actual transferred and stored data next_part_power = getattr(policy.object_ring, 'next_part_power', None) if next_part_power is not None: self.logger.warning( _("next_part_power set in policy '%s'. Skipping"), policy.name) continue df_mgr = self._df_router[policy] for local_dev in local_devices: dev_path = df_mgr.get_dev_path(local_dev['device']) if not dev_path: self.logger.warning(_('%s is not mounted'), local_dev['device']) continue data_dir = get_data_dir(policy) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('Unable to create %s' % obj_path) continue try: partitions = os.listdir(obj_path) except OSError: self.logger.exception('Unable to list partitions in %r' % obj_path) continue self.part_count += len(partitions) for partition in partitions: part_path = join(obj_path, partition) if partition in ('auditor_status_ALL.json', 'auditor_status_ZBF.json'): continue if not partition.isdigit(): self.logger.warning( 'Unexpected entity in data dir: %r' % part_path) self.delete_partition(part_path) self.reconstruction_part_count += 1 continue partition = int(partition) if override_partitions and (partition not in override_partitions): continue # N.B. At a primary node in handoffs_only mode may skip to # sync misplaced (handoff) fragments in the primary # partition. That may happen while rebalancing several # times. (e.g. a node holding handoff fragment being a new # primary) Those fragments will be synced (and revert) once # handoffs_only mode turned off. if self.handoffs_only and any(local_dev['id'] == n['id'] for n in policy.object_ring. get_part_nodes(partition)): self.logger.debug( 'Skipping %s job for %s ' 'while in handoffs_only mode.', SYNC, part_path) continue part_info = { 'local_dev': local_dev, 'policy': policy, 'partition': partition, 'part_path': part_path, } all_parts.append(part_info) random.shuffle(all_parts) return all_parts
def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] self.all_devs_info.update([(dev['replication_ip'], dev['device']) for dev in policy.object_ring.devs if dev]) data_dir = get_data_dir(policy) found_local = False for local_dev in [ dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices)) ]: found_local = True dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs if failure_dev ]) self.logger.warning(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue if (partition.startswith('auditor_status_') and partition.endswith('.json')): # ignore auditor status files continue part_nodes = None try: job_path = join(obj_path, partition) part_nodes = policy.object_ring.get_part_nodes( int(partition)) nodes = [ node for node in part_nodes if node['id'] != local_dev['id'] ] jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy=policy, partition=partition, region=local_dev['region'])) except ValueError: if part_nodes: self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in nodes ]) else: self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs if failure_dev ]) continue if not found_local: self.logger.error( "Can't find itself in policy with index %d with" " ips %s and with port %s in ring file, not" " replicating", int(policy), ", ".join(ips), self.port) return jobs
def process_repl(self, policy, ips, override_devices=None, override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] obj_ring = self.get_object_ring(policy.idx) data_dir = get_data_dir(policy.idx) for local_dev in [dev for dev in obj_ring.devs if (dev and is_local_device(ips, self.port, dev['replication_ip'], dev['replication_port']) and (override_devices is None or dev['device'] in override_devices))]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(int(policy))) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue unlink_older_than(tmp_path, time.time() - self.reclaim_age) if not os.path.exists(obj_path): try: mkdirs(obj_path) except Exception: self.logger.exception('ERROR creating %s' % obj_path) continue for partition in os.listdir(obj_path): if (override_partitions is not None and partition not in override_partitions): continue try: job_path = join(obj_path, partition) part_nodes = obj_ring.get_part_nodes(int(partition)) ###################################### CHANGED_CODE ######################################################## f = open("/home/hduser/swift/swift/proxy/controllers/spindowndevices") downlist = f.read().split("\n") f.close() nodes = [node for node in part_nodes if node['id'] != local_dev['id'] and node['device'] not in downlist] print("===Replication nodes===",nodes) ###################################### CHANGED_CODE ######################################################## jobs.append( dict(path=job_path, device=local_dev['device'], obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, policy_idx=policy.idx, partition=partition, object_ring=obj_ring, region=local_dev['region'])) except ValueError: continue return jobs