Esempio n. 1
0
 def collect_jobs(self):
     """
     Returns a sorted list of jobs (dictionaries) that specify the
     partitions, nodes, etc to be rsynced.
     """
     jobs = []
     ips = whataremyips()
     for local_dev in [dev for dev in self.object_ring.devs
             if dev and dev['ip'] in ips and dev['port'] == self.port]:
         dev_path = join(self.devices_dir, local_dev['device'])
         obj_path = join(dev_path, 'objects')
         tmp_path = join(dev_path, 'tmp')
         if self.mount_check and not os.path.ismount(dev_path):
             self.logger.warn(_('%s is not mounted'), local_dev['device'])
             continue
         unlink_older_than(tmp_path, time.time() - self.reclaim_age)
         if not os.path.exists(obj_path):
             continue
         for partition in os.listdir(obj_path):
             try:
                 nodes = [node for node in
                     self.object_ring.get_part_nodes(int(partition))
                          if node['id'] != local_dev['id']]
                 jobs.append(dict(path=join(obj_path, partition),
                     nodes=nodes,
                     delete=len(nodes) > self.object_ring.replica_count - 1,
                     partition=partition))
             except ValueError:
                 continue
     random.shuffle(jobs)
     # Partititons that need to be deleted take priority
     jobs.sort(key=lambda job: not job['delete'])
     self.job_count = len(jobs)
     return jobs
Esempio n. 2
0
 def collect_jobs(self):
     """
     Returns a sorted list of jobs (dictionaries) that specify the
     partitions, nodes, etc to be rsynced.
     """
     jobs = []
     ips = whataremyips()
     for local_dev in [dev for dev in self.object_ring.devs
             if dev and dev['ip'] in ips and dev['port'] == self.port]:
         dev_path = join(self.devices_dir, local_dev['device'])
         obj_path = join(dev_path, 'objects')
         tmp_path = join(dev_path, 'tmp')
         if self.mount_check and not os.path.ismount(dev_path):
             self.logger.warn(_('%s is not mounted'), local_dev['device'])
             continue
         unlink_older_than(tmp_path, time.time() - self.reclaim_age)
         if not os.path.exists(obj_path):
             continue
         for partition in os.listdir(obj_path):
             try:
                 nodes = [node for node in
                     self.object_ring.get_part_nodes(int(partition))
                          if node['id'] != local_dev['id']]
                 jobs.append(dict(path=join(obj_path, partition),
                     nodes=nodes,
                     delete=len(nodes) > self.object_ring.replica_count - 1,
                     partition=partition))
             except ValueError:
                 continue
     random.shuffle(jobs)
     # Partititons that need to be deleted take priority
     jobs.sort(key=lambda job: not job['delete'])
     self.job_count = len(jobs)
     return jobs
Esempio n. 3
0
 def run_once(self, *args, **kwargs):
     """Run a replication pass once."""
     self._zero_stats()
     dirs = []
     ips = whataremyips()
     if not ips:
         self.logger.error(_('ERROR Failed to get my own IPs?'))
         return
     for node in self.ring.devs:
         if (node and node['replication_ip'] in ips
                 and node['replication_port'] == self.port):
             if self.mount_check and not os.path.ismount(
                     os.path.join(self.root, node['device'])):
                 self.logger.warn(
                     _('Skipping %(device)s as it is not mounted') % node)
                 continue
             unlink_older_than(
                 os.path.join(self.root, node['device'], 'tmp'),
                 time.time() - self.reclaim_age)
             datadir = os.path.join(self.root, node['device'], self.datadir)
             if os.path.isdir(datadir):
                 dirs.append((datadir, node['id']))
     self.logger.info(_('Beginning replication run'))
     for part, object_file, node_id in roundrobin_datadirs(dirs):
         self.cpool.spawn_n(self._replicate_object, part, object_file,
                            node_id)
     self.cpool.waitall()
     self.logger.info(_('Replication run OVER'))
     self._report_stats()
Esempio n. 4
0
 def run_once(self, *args, **kwargs):
     """Run a replication pass once."""
     self._zero_stats()
     dirs = []
     ips = whataremyips()
     if not ips:
         self.logger.error(_('ERROR Failed to get my own IPs?'))
         return
     for node in self.ring.devs:
         if node and node['ip'] in ips and node['port'] == self.port:
             if self.mount_check and not os.path.ismount(
                     os.path.join(self.root, node['device'])):
                 self.logger.warn(
                     _('Skipping %(device)s as it is not mounted') % node)
                 continue
             unlink_older_than(
                 os.path.join(self.root, node['device'], 'tmp'),
                 time.time() - self.reclaim_age)
             datadir = os.path.join(self.root, node['device'], self.datadir)
             if os.path.isdir(datadir):
                 dirs.append((datadir, node['id']))
     self.logger.info(_('Beginning replication run'))
     for part, object_file, node_id in self.roundrobin_datadirs(dirs):
         self.cpool.spawn_n(
             self._replicate_object, part, object_file, node_id)
     self.cpool.waitall()
     self.logger.info(_('Replication run OVER'))
     self._report_stats()
Esempio n. 5
0
 def collect_parts(self, override_devices=None, override_partitions=None):
     """
     Helper for yielding partitions in the top level reconstructor
     """
     override_devices = override_devices or []
     override_partitions = override_partitions or []
     ips = whataremyips()
     for policy in POLICIES:
         if policy.policy_type != EC_POLICY:
             continue
         self._diskfile_mgr = self._df_router[policy]
         self.load_object_ring(policy)
         data_dir = get_data_dir(policy)
         local_devices = itertools.ifilter(
             lambda dev: dev and is_local_device(ips, self.port, dev[
                 'replication_ip'], dev['replication_port']),
             policy.object_ring.devs)
         for local_dev in local_devices:
             if override_devices and (local_dev['device']
                                      not in override_devices):
                 continue
             dev_path = join(self.devices_dir, local_dev['device'])
             obj_path = join(dev_path, data_dir)
             tmp_path = join(dev_path, get_tmp_dir(int(policy)))
             if self.mount_check and not ismount(dev_path):
                 self.logger.warn(_('%s is not mounted'),
                                  local_dev['device'])
                 continue
             unlink_older_than(tmp_path, time.time() - self.reclaim_age)
             if not os.path.exists(obj_path):
                 try:
                     mkdirs(obj_path)
                 except Exception:
                     self.logger.exception('Unable to create %s' % obj_path)
                 continue
             try:
                 partitions = os.listdir(obj_path)
             except OSError:
                 self.logger.exception('Unable to list partitions in %r' %
                                       obj_path)
                 continue
             for partition in partitions:
                 part_path = join(obj_path, partition)
                 if not (partition.isdigit() and os.path.isdir(part_path)):
                     self.logger.warning(
                         'Unexpected entity in data dir: %r' % part_path)
                     remove_file(part_path)
                     continue
                 partition = int(partition)
                 if override_partitions and (partition
                                             not in override_partitions):
                     continue
                 part_info = {
                     'local_dev': local_dev,
                     'policy': policy,
                     'partition': partition,
                     'part_path': part_path,
                 }
                 yield part_info
Esempio n. 6
0
 def collect_jobs(self):
     """
     Returns a sorted list of jobs (dictionaries) that specify the
     partitions, nodes, etc to be synced.
     """
     jobs = []
     ips = whataremyips()
     for local_dev in [
             dev for dev in self.object_ring.devs
             if dev and dev['replication_ip'] in ips
             and dev['replication_port'] == self.port
     ]:
         dev_path = join(self.devices_dir, local_dev['device'])
         obj_path = join(dev_path, 'objects')
         tmp_path = join(dev_path, 'tmp')
         if self.mount_check and not ismount(dev_path):
             self.logger.warn(_('%s is not mounted'), local_dev['device'])
             continue
         unlink_older_than(tmp_path, time.time() - self.reclaim_age)
         if not os.path.exists(obj_path):
             try:
                 mkdirs(obj_path)
             except Exception:
                 self.logger.exception('ERROR creating %s' % obj_path)
             continue
         for partition in os.listdir(obj_path):
             try:
                 job_path = join(obj_path, partition)
                 if isfile(job_path):
                     # Clean up any (probably zero-byte) files where a
                     # partition should be.
                     self.logger.warning(
                         'Removing partition directory '
                         'which was a file: %s', job_path)
                     os.remove(job_path)
                     continue
                 part_nodes = \
                     self.object_ring.get_part_nodes(int(partition))
                 nodes = [
                     node for node in part_nodes
                     if node['id'] != local_dev['id']
                 ]
                 jobs.append(
                     dict(path=job_path,
                          device=local_dev['device'],
                          nodes=nodes,
                          delete=len(nodes) > len(part_nodes) - 1,
                          partition=partition))
             except (ValueError, OSError):
                 continue
     random.shuffle(jobs)
     if self.handoffs_first:
         # Move the handoff parts to the front of the list
         jobs.sort(key=lambda job: not job['delete'])
     self.job_count = len(jobs)
     return jobs
Esempio n. 7
0
    def collect_parts(self, override_devices=None, override_partitions=None):
        """
        Helper for yielding partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type != EC_POLICY:
                continue
            self._diskfile_mgr = self._df_router[policy]
            self.load_object_ring(policy)
            data_dir = get_data_dir(policy)
            local_devices = itertools.ifilter(
                lambda dev: dev and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"]),
                policy.object_ring.devs,
            )

            for local_dev in local_devices:
                if override_devices and (local_dev["device"] not in override_devices):
                    continue
                dev_path = self._df_router[policy].get_dev_path(local_dev["device"])
                if not dev_path:
                    self.logger.warn(_("%s is not mounted"), local_dev["device"])
                    continue
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() - self.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception("Unable to create %s" % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception("Unable to list partitions in %r" % obj_path)
                    continue
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if not (partition.isdigit() and os.path.isdir(part_path)):
                        self.logger.warning("Unexpected entity in data dir: %r" % part_path)
                        remove_file(part_path)
                        continue
                    partition = int(partition)
                    if override_partitions and (partition not in override_partitions):
                        continue
                    part_info = {
                        "local_dev": local_dev,
                        "policy": policy,
                        "partition": partition,
                        "part_path": part_path,
                    }
                    yield part_info
Esempio n. 8
0
    def build_replication_jobs(self,
                               policy,
                               ips,
                               override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        data_dir = get_data_dir(policy)
        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']) and
             (override_devices is None or dev['device'] in override_devices))
        ]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    continue
        return jobs
    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be synced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [dev for dev in self.object_ring.devs
                          if dev and dev['replication_ip'] in ips and
                          dev['replication_port'] == self.port]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, 'objects')
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                try:
                    job_path = join(obj_path, partition)
                    if isfile(job_path):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning('Removing partition directory '
                                            'which was a file: %s', job_path)
                        os.remove(job_path)
                        continue
                    part_nodes = \
                        self.object_ring.get_part_nodes(int(partition))
		#MODIFIED LightSync
                    for mypos in range(len(part_nodes)):
                        if part_nodes[mypos]['id'] == local_dev['id']:
                            break
                    nodes = part_nodes[mypos+1:]+part_nodes[:mypos]
		##
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             partition=partition))
                except (ValueError, OSError):
                    continue
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs
Esempio n. 10
0
    def run_once(self, *args, **kwargs):
        """Run a replication pass once."""
        self._zero_stats()
        dirs = []
        ips = whataremyips(self.bind_ip)
        if not ips:
            self.logger.error(_('ERROR Failed to get my own IPs?'))
            return

        if self.handoffs_only:
            self.logger.warning(
                'Starting replication pass with handoffs_only enabled. '
                'This mode is not intended for normal '
                'operation; use handoffs_only with care.')

        self._local_device_ids = set()
        found_local = False
        for node in self.ring.devs:
            if node and is_local_device(ips, self.port, node['replication_ip'],
                                        node['replication_port']):
                found_local = True
                if not check_drive(self.root, node['device'],
                                   self.mount_check):
                    self._add_failure_stats([
                        (failure_dev['replication_ip'], failure_dev['device'])
                        for failure_dev in self.ring.devs if failure_dev
                    ])
                    self.logger.warning(
                        _('Skipping %(device)s as it is not mounted') % node)
                    continue
                unlink_older_than(
                    os.path.join(self.root, node['device'], 'tmp'),
                    time.time() - self.reclaim_age)
                datadir = os.path.join(self.root, node['device'], self.datadir)
                if os.path.isdir(datadir):
                    self._local_device_ids.add(node['id'])
                    filt = (self.handoffs_only_filter(node['id'])
                            if self.handoffs_only else None)
                    dirs.append((datadir, node['id'], filt))
        if not found_local:
            self.logger.error(
                "Can't find itself %s with port %s in ring "
                "file, not replicating", ", ".join(ips), self.port)
        self.logger.info(_('Beginning replication run'))
        for part, object_file, node_id in roundrobin_datadirs(dirs):
            self.cpool.spawn_n(self._replicate_object, part, object_file,
                               node_id)
        self.cpool.waitall()
        self.logger.info(_('Replication run OVER'))
        if self.handoffs_only:
            self.logger.warning(
                'Finished replication pass with handoffs_only enabled. '
                'If handoffs_only is no longer required, disable it.')
        self._report_stats()
Esempio n. 11
0
 def collect_jobs(self):
     """
     Returns a sorted list of jobs (dictionaries) that specify the
     partitions, nodes, etc to be synced.
     """
     jobs = []
     ips = whataremyips()
     for local_dev in [
         dev
         for dev in self.object_ring.devs
         if dev and dev["replication_ip"] in ips and dev["replication_port"] == self.port
     ]:
         dev_path = join(self.devices_dir, local_dev["device"])
         obj_path = join(dev_path, "objects")
         tmp_path = join(dev_path, "tmp")
         if self.mount_check and not os.path.ismount(dev_path):
             self.logger.warn(_("%s is not mounted"), local_dev["device"])
             continue
         unlink_older_than(tmp_path, time.time() - self.reclaim_age)
         if not os.path.exists(obj_path):
             try:
                 mkdirs(obj_path)
             except Exception:
                 self.logger.exception("ERROR creating %s" % obj_path)
             continue
         for partition in os.listdir(obj_path):
             try:
                 job_path = join(obj_path, partition)
                 if isfile(job_path):
                     # Clean up any (probably zero-byte) files where a
                     # partition should be.
                     self.logger.warning("Removing partition directory " "which was a file: %s", job_path)
                     os.remove(job_path)
                     continue
                 part_nodes = self.object_ring.get_part_nodes(int(partition))
                 nodes = [node for node in part_nodes if node["id"] != local_dev["id"]]
                 jobs.append(
                     dict(
                         path=job_path,
                         device=local_dev["device"],
                         nodes=nodes,
                         delete=len(nodes) > len(part_nodes) - 1,
                         partition=partition,
                     )
                 )
             except (ValueError, OSError):
                 continue
     random.shuffle(jobs)
     if self.handoffs_first:
         # Move the handoff parts to the front of the list
         jobs.sort(key=lambda job: not job["delete"])
     self.job_count = len(jobs)
     return jobs
Esempio n. 12
0
    def process_repl(self, policy, ips, override_devices=None,
                     override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        obj_ring = self.get_object_ring(policy.idx)
        data_dir = get_data_dir(policy.idx)
        for local_dev in [dev for dev in obj_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              and (override_devices is None
                                   or dev['device'] in override_devices))]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(int(policy)))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                try:
                    job_path = join(obj_path, partition)
                    part_nodes = obj_ring.get_part_nodes(int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy_idx=policy.idx,
                             partition=partition,
                             object_ring=obj_ring,
                             region=local_dev['region']))
                except ValueError:
                    continue
        return jobs
Esempio n. 13
0
    def process_repl(self, policy, jobs, ips):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        obj_ring = self.get_object_ring(policy.idx)
        data_dir = get_data_dir(policy.idx)
        for local_dev in [
                dev for dev in obj_ring.devs
                if dev and dev['replication_ip'] in ips
                and dev['replication_port'] == self.port
        ]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                try:
                    job_path = join(obj_path, partition)
                    if isfile(job_path):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job_path)
                        os.remove(job_path)
                        continue
                    part_nodes = obj_ring.get_part_nodes(int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy_idx=policy.idx,
                             partition=partition,
                             object_ring=obj_ring))

                except (ValueError, OSError):
                    continue
Esempio n. 14
0
 def collect_jobs(self):
     """
     Returns a sorted list of jobs (dictionaries) that specify the
     partitions, nodes, etc to be rsynced.
     """
     jobs = []
     ips = whataremyips()
     for local_dev in [dev for dev in self.object_ring.devs
                       if dev and dev['ip'] in ips and
                       dev['port'] == self.port]:
         dev_path = join(self.devices_dir, local_dev['device'])
         obj_path = join(dev_path, 'objects')
         tmp_path = join(dev_path, 'tmp')
         if self.mount_check and not os.path.ismount(dev_path):
             self.logger.warn(_('%s is not mounted'), local_dev['device'])
             continue
         unlink_older_than(tmp_path, time.time() - self.reclaim_age)
         if not os.path.exists(obj_path):
             try:
                 mkdirs(obj_path)
             except Exception:
                 self.logger.exception('ERROR creating %s' % obj_path)
             continue
         for partition in os.listdir(obj_path):
             try:
                 job_path = join(obj_path, partition)
                 if isfile(job_path):
                     # Clean up any (probably zero-byte) files where a
                     # partition should be.
                     self.logger.warning('Removing partition directory '
                                         'which was a file: %s', job_path)
                     os.remove(job_path)
                     continue
                 part_nodes = \
                     self.object_ring.get_part_nodes(int(partition))
                 nodes = [node for node in part_nodes
                          if node['id'] != local_dev['id']]
                 jobs.append(
                     dict(path=job_path,
                          device=local_dev['device'],
                          nodes=nodes,
                          delete=len(nodes) > len(part_nodes) - 1,
                          partition=partition))
             except (ValueError, OSError):
                 continue
     random.shuffle(jobs)
     self.job_count = len(jobs)
     return jobs
Esempio n. 15
0
    def process_repl(self, policy, jobs, ips):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        obj_ring = self.get_object_ring(policy.idx)
        data_dir = get_data_dir(policy.idx)
        for local_dev in [dev for dev in obj_ring.devs
                          if dev and dev['replication_ip'] in ips and
                          dev['replication_port'] == self.port]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(int(policy)))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                try:
                    job_path = join(obj_path, partition)
                    if isfile(job_path):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job_path)
                        os.remove(job_path)
                        continue
                    part_nodes = obj_ring.get_part_nodes(int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy_idx=policy.idx,
                             partition=partition,
                             object_ring=obj_ring))

                except (ValueError, OSError):
                    continue
Esempio n. 16
0
 def process_repl(self, policy, jobs, ips):
     """
     Helper function for collect_jobs to build jobs for replication
     using replication style storage policy
     """
     obj_ring = self.get_object_ring(policy.idx)
     data_dir = get_data_dir(policy.idx)
     for local_dev in [
         dev
         for dev in obj_ring.devs
         if dev and dev["replication_ip"] in ips and dev["replication_port"] == self.port
     ]:
         dev_path = join(self.devices_dir, local_dev["device"])
         obj_path = join(dev_path, data_dir)
         tmp_path = join(dev_path, get_tmp_dir(int(policy)))
         if self.mount_check and not ismount(dev_path):
             self.logger.warn(_("%s is not mounted"), local_dev["device"])
             continue
         unlink_older_than(tmp_path, time.time() - self.reclaim_age)
         if not os.path.exists(obj_path):
             try:
                 mkdirs(obj_path)
             except Exception:
                 self.logger.exception("ERROR creating %s" % obj_path)
             continue
         for partition in os.listdir(obj_path):
             try:
                 job_path = join(obj_path, partition)
                 part_nodes = obj_ring.get_part_nodes(int(partition))
                 nodes = [node for node in part_nodes if node["id"] != local_dev["id"]]
                 jobs.append(
                     dict(
                         path=job_path,
                         device=local_dev["device"],
                         nodes=nodes,
                         delete=len(nodes) > len(part_nodes) - 1,
                         policy_idx=policy.idx,
                         partition=partition,
                         object_ring=obj_ring,
                     )
                 )
             except ValueError:
                 continue
Esempio n. 17
0
 def run_once(self, *args, **kwargs):
     """Run a replication pass once."""
     self._zero_stats()
     dirs = []
     ips = whataremyips(self.bind_ip)
     if not ips:
         self.logger.error(_('ERROR Failed to get my own IPs?'))
         return
     self._local_device_ids = set()
     found_local = False
     ###遍历节点
     for node in self.ring.devs:
         if node and is_local_device(ips, self.port,
                                     node['replication_ip'],
                                     node['replication_port']):
             found_local = True
             if self.mount_check and not ismount(
                     os.path.join(self.root, node['device'])):
                 self._add_failure_stats(
                     [(failure_dev['replication_ip'],
                       failure_dev['device'])
                      for failure_dev in self.ring.devs if failure_dev])
                 self.logger.warning(
                     _('Skipping %(device)s as it is not mounted') % node)
                 continue
             unlink_older_than(
                 os.path.join(self.root, node['device'], 'tmp'),
                 time.time() - self.reclaim_age)
             datadir = os.path.join(self.root, node['device'], self.datadir)
             if os.path.isdir(datadir):
                 self._local_device_ids.add(node['id'])
                 dirs.append((datadir, node['id']))
     if not found_local:
         self.logger.error("Can't find itself %s with port %s in ring "
                           "file, not replicating",
                           ", ".join(ips), self.port)
     self.logger.info(_('Beginning replication run'))
     for part, object_file, node_id in roundrobin_datadirs(dirs):
         self.cpool.spawn_n(
             self._replicate_object, part, object_file, node_id)
     self.cpool.waitall()
     self.logger.info(_('Replication run OVER'))
     self._report_stats()
Esempio n. 18
0
 def run_once(self, *args, **kwargs):
     """Run a replication pass once."""
     self._zero_stats()
     dirs = []
     ips = whataremyips(self.bind_ip)
     if not ips:
         self.logger.error(_('ERROR Failed to get my own IPs?'))
         return
     self._local_device_ids = set()
     found_local = False
     for node in self.ring.devs:
         if node and is_local_device(ips, self.port,
                                     node['replication_ip'],
                                     node['replication_port']):
             found_local = True
             if self.mount_check and not ismount(
                     os.path.join(self.root, node['device'])):
                 self._add_failure_stats(
                     [(failure_dev['replication_ip'],
                       failure_dev['device'])
                      for failure_dev in self.ring.devs if failure_dev])
                 self.logger.warning(
                     _('Skipping %(device)s as it is not mounted') % node)
                 continue
             unlink_older_than(
                 os.path.join(self.root, node['device'], 'tmp'),
                 time.time() - self.reclaim_age)
             datadir = os.path.join(self.root, node['device'], self.datadir)
             if os.path.isdir(datadir):
                 self._local_device_ids.add(node['id'])
                 dirs.append((datadir, node['id']))
     if not found_local:
         self.logger.error("Can't find itself %s with port %s in ring "
                           "file, not replicating",
                           ", ".join(ips), self.port)
     self.logger.info(_('Beginning replication run'))
     for part, object_file, node_id in roundrobin_datadirs(dirs):
         self.cpool.spawn_n(
             self._replicate_object, part, object_file, node_id)
     self.cpool.waitall()
     self.logger.info(_('Replication run OVER'))
     self._report_stats()
Esempio n. 19
0
    def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        self.all_devs_info.update([(dev["replication_ip"], dev["device"]) for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [
            dev
            for dev in policy.object_ring.devs
            if (
                dev
                and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"])
                and (override_devices is None or dev["device"] in override_devices)
            )
        ]:
            found_local = True
            dev_path = join(self.devices_dir, local_dev["device"])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self._add_failure_stats(
                    [
                        (failure_dev["replication_ip"], failure_dev["device"])
                        for failure_dev in policy.object_ring.devs
                        if failure_dev
                    ]
                )
                self.logger.warning(_("%s is not mounted"), local_dev["device"])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception("ERROR creating %s" % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if override_partitions is not None and partition not in override_partitions:
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(int(partition))
                    nodes = [node for node in part_nodes if node["id"] != local_dev["id"]]
                    jobs.append(
                        dict(
                            path=job_path,
                            device=local_dev["device"],
                            obj_path=obj_path,
                            nodes=nodes,
                            delete=len(nodes) > len(part_nodes) - 1,
                            policy=policy,
                            partition=partition,
                            region=local_dev["region"],
                        )
                    )
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats(
                            [(failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in nodes]
                        )
                    else:
                        self._add_failure_stats(
                            [
                                (failure_dev["replication_ip"], failure_dev["device"])
                                for failure_dev in policy.object_ring.devs
                                if failure_dev
                            ]
                        )
                    continue
        if not found_local:
            self.logger.error(
                "Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port
            )
        return jobs
Esempio n. 20
0
    def build_replication_jobs(self,
                               policy,
                               ips,
                               override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        self.all_devs_info.update([(dev['replication_ip'], dev['device'])
                                   for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']) and
             (override_devices is None or dev['device'] in override_devices))
        ]:
            found_local = True
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self._add_failure_stats([
                    (failure_dev['replication_ip'], failure_dev['device'])
                    for failure_dev in policy.object_ring.devs if failure_dev
                ])
                self.logger.warning(_('%s is not mounted'),
                                    local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                if (partition.startswith('auditor_status_')
                        and partition.endswith('.json')):
                    # ignore auditor status files
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device']) for failure_dev in nodes
                        ])
                    else:
                        self._add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in policy.object_ring.devs
                            if failure_dev
                        ])
                    continue
        if not found_local:
            self.logger.error(
                "Can't find itself in policy with index %d with"
                " ips %s and with port %s in ring file, not"
                " replicating", int(policy), ", ".join(ips), self.port)
        return jobs
Esempio n. 21
0
    def build_replication_jobs(self, policy, ips, override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        df_mgr = self._df_router[policy]
        self.all_devs_info.update(
            [(dev['replication_ip'], dev['device'])
             for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [dev for dev in policy.object_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              and (override_devices is None
                                   or dev['device'] in override_devices))]:
            found_local = True
            dev_path = check_drive(self.devices_dir, local_dev['device'],
                                   self.mount_check)
            if not dev_path:
                self._add_failure_stats(
                    [(failure_dev['replication_ip'],
                      failure_dev['device'])
                     for failure_dev in policy.object_ring.devs
                     if failure_dev])
                self.logger.warning(
                    _('%s is not mounted'), local_dev['device'])
                continue
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            unlink_older_than(tmp_path, time.time() -
                              df_mgr.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                if (partition.startswith('auditor_status_') and
                        partition.endswith('.json')):
                    # ignore auditor status files
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in nodes])
                    else:
                        self._add_failure_stats(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in policy.object_ring.devs
                             if failure_dev])
                    continue
        if not found_local:
            self.logger.error("Can't find itself in policy with index %d with"
                              " ips %s and with port %s in ring file, not"
                              " replicating",
                              int(policy), ", ".join(ips), self.port)
        return jobs
Esempio n. 22
0
    def run_once(self, *args, **kwargs):
        """Run a replication pass once."""
        override_options = parse_override_options(once=True, **kwargs)

        devices_to_replicate = override_options.devices or Everything()
        partitions_to_replicate = override_options.partitions or Everything()

        self._zero_stats()
        dirs = []
        ips = whataremyips(self.bind_ip)
        if not ips:
            self.logger.error(_('ERROR Failed to get my own IPs?'))
            return

        if self.handoffs_only:
            self.logger.warning(
                'Starting replication pass with handoffs_only enabled. '
                'This mode is not intended for normal '
                'operation; use handoffs_only with care.')

        self._local_device_ids = set()
        found_local = False
        for node in self.ring.devs:
            if node and is_local_device(ips, self.port,
                                        node['replication_ip'],
                                        node['replication_port']):
                found_local = True
                try:
                    dev_path = check_drive(self.root, node['device'],
                                           self.mount_check)
                except ValueError as err:
                    self._add_failure_stats(
                        [(failure_dev['replication_ip'],
                          failure_dev['device'])
                         for failure_dev in self.ring.devs if failure_dev])
                    self.logger.warning('Skipping: %s', err)
                    continue
                if node['device'] not in devices_to_replicate:
                    self.logger.debug(
                        'Skipping device %s due to given arguments',
                        node['device'])
                    continue
                unlink_older_than(
                    os.path.join(dev_path, 'tmp'),
                    time.time() - self.reclaim_age)
                datadir = os.path.join(self.root, node['device'], self.datadir)
                if os.path.isdir(datadir):
                    self._local_device_ids.add(node['id'])
                    part_filt = self._partition_dir_filter(
                        node['id'], partitions_to_replicate)
                    dirs.append((datadir, node['id'], part_filt))
        if not found_local:
            self.logger.error("Can't find itself %s with port %s in ring "
                              "file, not replicating",
                              ", ".join(ips), self.port)
        self.logger.info(_('Beginning replication run'))
        for part, object_file, node_id in self.roundrobin_datadirs(dirs):
            self.cpool.spawn_n(
                self._replicate_object, part, object_file, node_id)
        self.cpool.waitall()
        self.logger.info(_('Replication run OVER'))
        if self.handoffs_only:
            self.logger.warning(
                'Finished replication pass with handoffs_only enabled. '
                'If handoffs_only is no longer required, disable it.')
        self._report_stats()
Esempio n. 23
0
    def collect_parts(self, override_devices=None, override_partitions=None):
        """
        Helper for getting partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        ec_policies = (policy for policy in POLICIES
                       if policy.policy_type == EC_POLICY)

        policy2devices = {}

        for policy in ec_policies:
            self.load_object_ring(policy)
            local_devices = list(
                six.moves.filter(
                    lambda dev: dev and is_local_device(
                        ips, self.port, dev['replication_ip'], dev[
                            'replication_port']), policy.object_ring.devs))

            if override_devices:
                local_devices = list(
                    six.moves.filter(
                        lambda dev_info: dev_info['device'] in
                        override_devices, local_devices))

            policy2devices[policy] = local_devices
            self.device_count += len(local_devices)

        all_parts = []

        for policy, local_devices in policy2devices.items():
            df_mgr = self._df_router[policy]
            for local_dev in local_devices:
                dev_path = df_mgr.get_dev_path(local_dev['device'])
                if not dev_path:
                    self.logger.warning(_('%s is not mounted'),
                                        local_dev['device'])
                    continue
                data_dir = get_data_dir(policy)
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception('Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception('Unable to list partitions in %r' %
                                          obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if partition in ('auditor_status_ALL.json',
                                     'auditor_status_ZBF.json'):
                        continue
                    if not partition.isdigit():
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        self.delete_partition(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition
                                                not in override_partitions):
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    all_parts.append(part_info)
        random.shuffle(all_parts)
        return all_parts
Esempio n. 24
0
    def collect_parts(self, override_devices=None,
                      override_partitions=None):
        """
        Helper for yielding partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type != EC_POLICY:
                continue
            self._diskfile_mgr = self._df_router[policy]
            self.load_object_ring(policy)
            data_dir = get_data_dir(policy)
            local_devices = list(six.moves.filter(
                lambda dev: dev and is_local_device(
                    ips, self.port,
                    dev['replication_ip'], dev['replication_port']),
                policy.object_ring.devs))

            if override_devices:
                self.device_count = len(override_devices)
            else:
                self.device_count = len(local_devices)

            for local_dev in local_devices:
                if override_devices and (local_dev['device'] not in
                                         override_devices):
                    continue
                self.reconstruction_device_count += 1
                dev_path = self._df_router[policy].get_dev_path(
                    local_dev['device'])
                if not dev_path:
                    self.logger.warning(_('%s is not mounted'),
                                        local_dev['device'])
                    continue
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() -
                                  self.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception(
                            'Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception(
                        'Unable to list partitions in %r' % obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if not (partition.isdigit() and
                            os.path.isdir(part_path)):
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        remove_file(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition not in
                                                override_partitions):
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    yield part_info
                    self.reconstruction_part_count += 1
Esempio n. 25
0
    def build_replication_jobs(self, policy, ips, old_dict,
                               new_dict, moving_map):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy

        :param policy: swift policy object
        :param ips: the local server ips
        :param old_dict: dictionary with devices from old ring
        :param new_dict: dictionary with devices from new ring
        :param moving_map: the dictionary that contains all the partitions
            that should be moved, their sources and destinations
        """

        jobs = []
        data_dir = get_data_dir(policy)
        devices = Set(map(lambda x: x[1], moving_map.values()))
        partitions = Set(map(lambda x: x[0], moving_map.values()))

        for local_dev in [dev for dev in policy.object_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              )]:

            if self.test:
                print local_dev['id']

            if unicode(local_dev['id']) not in devices:
                continue

            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn('%s is not mounted' % local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)

            for partition in os.listdir(obj_path):
                partition = unicode(partition)

                if (partition not in partitions):
                    continue

                try:

                    key = "%s_%s" % (local_dev['id'], partition)
                    if key not in moving_map:
                        continue

                    job_path = join(obj_path, partition)

                    _, source_id, dest_id = moving_map[key]

                    if source_id != unicode(local_dev['id']):
                        continue

                    node = {}
                    replication_ip, replication_device = new_dict[dest_id]
                    node['replication_ip'] = replication_ip
                    node['device'] = replication_device

                    remote_path = os.path.join(self.devices_dir,
                                               node['device'],
                                               self.mover_tmp_dir)

                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             node=node,
                             policy=policy,
                             partition=partition,
                             remote_path=remote_path))

                except ValueError:
                    continue
                except Exception as e:
                    self.logger.exception(
                        "an %s exception accure at build_replication_jobs" % e)
                    if self.test:
                        print e
        return jobs
Esempio n. 26
0
    def build_replication_jobs(self, policy, ips, old_dict, new_dict,
                               moving_map):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy

        :param policy: swift policy object
        :param ips: the local server ips
        :param old_dict: dictionary with devices from old ring
        :param new_dict: dictionary with devices from new ring
        :param moving_map: the dictionary that contains all the partitions
            that should be moved, their sources and destinations
        """

        jobs = []
        data_dir = get_data_dir(policy)
        devices = Set(map(lambda x: x[1], moving_map.values()))
        partitions = Set(map(lambda x: x[0], moving_map.values()))

        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']))
        ]:

            if self.test:
                print local_dev['id']

            if unicode(local_dev['id']) not in devices:
                continue

            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn('%s is not mounted' % local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)

            for partition in os.listdir(obj_path):
                partition = unicode(partition)

                if (partition not in partitions):
                    continue

                try:

                    key = "%s_%s" % (local_dev['id'], partition)
                    if key not in moving_map:
                        continue

                    job_path = join(obj_path, partition)

                    _, source_id, dest_id = moving_map[key]

                    if source_id != unicode(local_dev['id']):
                        continue

                    node = {}
                    replication_ip, replication_device = new_dict[dest_id]
                    node['replication_ip'] = replication_ip
                    node['device'] = replication_device

                    remote_path = os.path.join(self.devices_dir,
                                               node['device'],
                                               self.mover_tmp_dir)

                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             node=node,
                             policy=policy,
                             partition=partition,
                             remote_path=remote_path))

                except ValueError:
                    continue
                except Exception as e:
                    self.logger.exception(
                        "an %s exception accure at build_replication_jobs" % e)
                    if self.test:
                        print e
        return jobs
Esempio n. 27
0
    def process_repl(self, policy, ips, override_devices=None,
                     override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        obj_ring = self.get_object_ring(policy.idx)
        data_dir = get_data_dir(policy.idx)
        for local_dev in [dev for dev in obj_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              and (override_devices is None
                                   or dev['device'] in override_devices))]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(int(policy)))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                try:
                    job_path = join(obj_path, partition)
                    part_nodes = obj_ring.get_part_nodes(int(partition))

######################################  CHANGED_CODE  ########################################################
                    f = open("/home/hduser/swift/swift/proxy/controllers/spindowndevices")
                    downlist = f.read().split("\n")
                    f.close()

                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id'] and node['device'] not in downlist]
                    print("===Replication nodes===",nodes)

######################################  CHANGED_CODE  ########################################################

                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy_idx=policy.idx,
                             partition=partition,
                             object_ring=obj_ring,
                             region=local_dev['region']))
                except ValueError:
                    continue
        return jobs
Esempio n. 28
0
    def collect_parts(self, override_devices=None, override_partitions=None):
        """
        Helper for getting partitions in the top level reconstructor

        In handoffs_only mode no primary partitions will not be included in the
        returned (possibly empty) list.
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        ec_policies = (policy for policy in POLICIES
                       if policy.policy_type == EC_POLICY)

        policy2devices = {}

        for policy in ec_policies:
            self.load_object_ring(policy)
            local_devices = list(
                six.moves.filter(
                    lambda dev: dev and is_local_device(
                        ips, self.port, dev['replication_ip'], dev[
                            'replication_port']), policy.object_ring.devs))

            if override_devices:
                local_devices = list(
                    six.moves.filter(
                        lambda dev_info: dev_info['device'] in
                        override_devices, local_devices))

            policy2devices[policy] = local_devices
            self.device_count += len(local_devices)

        all_parts = []

        for policy, local_devices in policy2devices.items():
            # Skip replication if next_part_power is set. In this case
            # every object is hard-linked twice, but the replicator
            # can't detect them and would create a second copy of the
            # file if not yet existing - and this might double the
            # actual transferred and stored data
            next_part_power = getattr(policy.object_ring, 'next_part_power',
                                      None)
            if next_part_power is not None:
                self.logger.warning(
                    _("next_part_power set in policy '%s'. Skipping"),
                    policy.name)
                continue

            df_mgr = self._df_router[policy]
            for local_dev in local_devices:
                dev_path = df_mgr.get_dev_path(local_dev['device'])
                if not dev_path:
                    self.logger.warning(_('%s is not mounted'),
                                        local_dev['device'])
                    continue
                data_dir = get_data_dir(policy)
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception('Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception('Unable to list partitions in %r' %
                                          obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if partition in ('auditor_status_ALL.json',
                                     'auditor_status_ZBF.json'):
                        continue
                    if not partition.isdigit():
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        self.delete_partition(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition
                                                not in override_partitions):
                        continue
                    # N.B. At a primary node in handoffs_only mode may skip to
                    # sync misplaced (handoff) fragments in the primary
                    # partition. That may happen while rebalancing several
                    # times. (e.g. a node holding handoff fragment being a new
                    # primary) Those fragments will be synced (and revert) once
                    # handoffs_only mode turned off.
                    if self.handoffs_only and any(local_dev['id'] == n['id']
                                                  for n in policy.object_ring.
                                                  get_part_nodes(partition)):
                        self.logger.debug(
                            'Skipping %s job for %s '
                            'while in handoffs_only mode.', SYNC, part_path)
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    all_parts.append(part_info)
        random.shuffle(all_parts)
        return all_parts
Esempio n. 29
0
    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be synced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [dev for dev in self.object_ring.devs
                          if dev and dev['replication_ip'] in ips and
                          dev['replication_port'] == self.port]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, 'objects')
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                try:
                    job_path = join(obj_path, partition)
                    if isfile(job_path):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning('Removing partition directory '
                                            'which was a file: %s', job_path)
                        os.remove(job_path)
                        continue
                    part_nodes = \
                        self.object_ring.get_part_nodes(int(partition))
                    #### CHANGED CODE ####
                    #f = open("/home/swift/spindowndevices","r")
                    #sdlist = f.read().strip().split("\n")
                    #logging.info("===Spun down devices===:%s",str(sdlist))
                    #f.close()
                    #sddict =dict()
                    #for i in sdlist:
                    #    logging.info("===sdditc===%s",sddict)
                    #    if(i.split(":")[0] in sddict):
                    #        sddict[i.split(":")[0]].append(i.split(":")[1])
                    #    else:
                    #        sddict[i.split(":")[0]] = []
                    #        sddict[i.split(":")[0]].append(i.split(":")[1])
                    #nodes = []
                    #for node in part_nodes:
                    #    if(node['ip'] not in sddict and node['id']!= local_dev['id']):
                    #        nodes.append(node)
                    #    else:
                    #        if(node['device'] not in sddict[node['ip']] and node['id']!=local_dev['id']):
                    #            nodes.append(node)
                    nodes = [node for node in part_nodes
                            if node['id'] != local_dev['id']]

                    logging.info("===Replication nodes===%s",str(nodes))
#                    logging.info("===sddict===%s",str(sddict))
                    #### END CHANGED CODE ####
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             partition=partition))
                except (ValueError, OSError):
                    continue
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs