コード例 #1
0
ファイル: db_replicator.py プロジェクト: stephdgenius/swift
def roundrobin_datadirs(datadirs):
    """
    Generator to walk the data dirs in a round robin manner, evenly
    hitting each device on the system, and yielding any .db files
    found (in their proper places). The partitions within each data
    dir are walked randomly, however.

    :param datadirs: a list of tuples of (path, context, partition_filter) to
                     walk. The context may be any object; the context is not
                     used by this function but is included with each yielded
                     tuple.
    :returns: A generator of (partition, path_to_db_file, context)
    """

    def walk_datadir(datadir, context, part_filter):
        partitions = [pd for pd in os.listdir(datadir)
                      if looks_like_partition(pd) and part_filter(pd)]
        random.shuffle(partitions)
        for partition in partitions:
            part_dir = os.path.join(datadir, partition)
            if not os.path.isdir(part_dir):
                continue
            suffixes = os.listdir(part_dir)
            if not suffixes:
                os.rmdir(part_dir)
                continue
            for suffix in suffixes:
                suff_dir = os.path.join(part_dir, suffix)
                if not os.path.isdir(suff_dir):
                    continue
                hashes = os.listdir(suff_dir)
                if not hashes:
                    os.rmdir(suff_dir)
                    continue
                for hsh in hashes:
                    hash_dir = os.path.join(suff_dir, hsh)
                    if not os.path.isdir(hash_dir):
                        continue
                    object_file = os.path.join(hash_dir, hsh + '.db')
                    # common case
                    if os.path.exists(object_file):
                        yield (partition, object_file, context)
                        continue
                    # look for any alternate db filenames
                    db_files = get_db_files(object_file)
                    if db_files:
                        yield (partition, db_files[-1], context)
                        continue
                    try:
                        os.rmdir(hash_dir)
                    except OSError as e:
                        if e.errno != errno.ENOTEMPTY:
                            raise

    its = [walk_datadir(datadir, context, filt)
           for datadir, context, filt in datadirs]

    rr_its = round_robin_iter(its)
    for datadir in rr_its:
        yield datadir
コード例 #2
0
ファイル: db_replicator.py プロジェクト: jgmerritt/swift
def roundrobin_datadirs(datadirs):
    """
    Generator to walk the data dirs in a round robin manner, evenly
    hitting each device on the system, and yielding any .db files
    found (in their proper places). The partitions within each data
    dir are walked randomly, however.

    :param datadirs: a list of tuples of (path, context, partition_filter) to
                     walk. The context may be any object; the context is not
                     used by this function but is included with each yielded
                     tuple.
    :returns: A generator of (partition, path_to_db_file, context)
    """

    def walk_datadir(datadir, context, part_filter):
        partitions = [pd for pd in os.listdir(datadir)
                      if looks_like_partition(pd) and part_filter(pd)]
        random.shuffle(partitions)
        for partition in partitions:
            part_dir = os.path.join(datadir, partition)
            if not os.path.isdir(part_dir):
                continue
            suffixes = os.listdir(part_dir)
            if not suffixes:
                os.rmdir(part_dir)
                continue
            for suffix in suffixes:
                suff_dir = os.path.join(part_dir, suffix)
                if not os.path.isdir(suff_dir):
                    continue
                hashes = os.listdir(suff_dir)
                if not hashes:
                    os.rmdir(suff_dir)
                    continue
                for hsh in hashes:
                    hash_dir = os.path.join(suff_dir, hsh)
                    if not os.path.isdir(hash_dir):
                        continue
                    object_file = os.path.join(hash_dir, hsh + '.db')
                    # common case
                    if os.path.exists(object_file):
                        yield (partition, object_file, context)
                        continue
                    # look for any alternate db filenames
                    db_files = get_db_files(object_file)
                    if db_files:
                        yield (partition, db_files[-1], context)
                        continue
                    try:
                        os.rmdir(hash_dir)
                    except OSError as e:
                        if e.errno != errno.ENOTEMPTY:
                            raise

    its = [walk_datadir(datadir, context, filt)
           for datadir, context, filt in datadirs]

    rr_its = round_robin_iter(its)
    for datadir in rr_its:
        yield datadir
コード例 #3
0
ファイル: db_replicator.py プロジェクト: isPlusir/swift
def roundrobin_datadirs(datadirs):
    """
    Generator to walk the data dirs in a round robin manner, evenly
    hitting each device on the system, and yielding any .db files
    found (in their proper places). The partitions within each data
    dir are walked randomly, however.

    :param datadirs: a list of (path, node_id, partition_filter) to walk
    :returns: A generator of (partition, path_to_db_file, node_id)
    """
    def walk_datadir(datadir, node_id, part_filter):
        partitions = [
            pd for pd in os.listdir(datadir)
            if looks_like_partition(pd) and part_filter(pd)
        ]
        random.shuffle(partitions)
        for partition in partitions:
            part_dir = os.path.join(datadir, partition)
            if not os.path.isdir(part_dir):
                continue
            suffixes = os.listdir(part_dir)
            if not suffixes:
                os.rmdir(part_dir)
                continue
            for suffix in suffixes:
                suff_dir = os.path.join(part_dir, suffix)
                if not os.path.isdir(suff_dir):
                    continue
                hashes = os.listdir(suff_dir)
                if not hashes:
                    os.rmdir(suff_dir)
                    continue
                for hsh in hashes:
                    hash_dir = os.path.join(suff_dir, hsh)
                    if not os.path.isdir(hash_dir):
                        continue
                    object_file = os.path.join(hash_dir, hsh + '.db')
                    if os.path.exists(object_file):
                        yield (partition, object_file, node_id)
                    else:
                        try:
                            os.rmdir(hash_dir)
                        except OSError as e:
                            if e.errno != errno.ENOTEMPTY:
                                raise

    its = [
        walk_datadir(datadir, node_id, filt)
        for datadir, node_id, filt in datadirs
    ]

    rr_its = round_robin_iter(its)
    for datadir in rr_its:
        yield datadir
コード例 #4
0
ファイル: db_replicator.py プロジェクト: matthewoliver/swift
def roundrobin_datadirs(datadirs):
    """
    Generator to walk the data dirs in a round robin manner, evenly
    hitting each device on the system, and yielding any .db files
    found (in their proper places). The partitions within each data
    dir are walked randomly, however.

    :param datadirs: a list of (path, node_id, partition_filter) to walk
    :returns: A generator of (partition, path_to_db_file, node_id)
    """

    def walk_datadir(datadir, node_id, part_filter):
        partitions = [pd for pd in os.listdir(datadir)
                      if looks_like_partition(pd) and part_filter(pd)]
        random.shuffle(partitions)
        for partition in partitions:
            part_dir = os.path.join(datadir, partition)
            if not os.path.isdir(part_dir):
                continue
            suffixes = os.listdir(part_dir)
            if not suffixes:
                os.rmdir(part_dir)
                continue
            for suffix in suffixes:
                suff_dir = os.path.join(part_dir, suffix)
                if not os.path.isdir(suff_dir):
                    continue
                hashes = os.listdir(suff_dir)
                if not hashes:
                    os.rmdir(suff_dir)
                    continue
                for hsh in hashes:
                    hash_dir = os.path.join(suff_dir, hsh)
                    if not os.path.isdir(hash_dir):
                        continue
                    object_file = os.path.join(hash_dir, hsh + '.db')
                    if os.path.exists(object_file):
                        yield (partition, object_file, node_id)
                    else:
                        try:
                            os.rmdir(hash_dir)
                        except OSError as e:
                            if e.errno != errno.ENOTEMPTY:
                                raise

    its = [walk_datadir(datadir, node_id, filt)
           for datadir, node_id, filt in datadirs]

    rr_its = round_robin_iter(its)
    for datadir in rr_its:
        yield datadir
コード例 #5
0
ファイル: auditor.py プロジェクト: kaisimmons81/swift-2
    def audit_all_objects(self, mode='once', device_dirs=None):
        description = ''
        if device_dirs:
            device_dir_str = ','.join(sorted(device_dirs))
            if self.auditor_type == 'ALL':
                description = _(' - parallel, %s') % device_dir_str
            else:
                description = _(' - %s') % device_dir_str
        self.logger.info(
            _('Begin object audit "%(mode)s" mode (%(audi_type)s'
              '%(description)s)') % {
                  'mode': mode,
                  'audi_type': self.auditor_type,
                  'description': description
              })
        begin = reported = time.time()
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        total_quarantines = 0
        total_errors = 0
        time_auditing = 0

        # get AuditLocations for each policy
        loc_generators = []
        for policy in POLICIES:
            loc_generators.append(
                self.diskfile_router[policy].object_audit_location_generator(
                    policy,
                    device_dirs=device_dirs,
                    auditor_type=self.auditor_type))

        all_locs = round_robin_iter(loc_generators)
        for location in all_locs:
            loop_time = time.time()
            self.failsafe_object_audit(location)
            self.logger.timing_since('timing', loop_time)
            self.files_running_time = ratelimit_sleep(
                self.files_running_time, self.max_files_per_second)
            self.total_files_processed += 1
            now = time.time()
            if now - self.last_logged >= self.log_time:
                self.logger.info(
                    _('Object audit (%(type)s). '
                      'Since %(start_time)s: Locally: %(passes)d passed, '
                      '%(quars)d quarantined, %(errors)d errors, '
                      'files/sec: %(frate).2f, bytes/sec: %(brate).2f, '
                      'Total time: %(total).2f, Auditing time: %(audit).2f, '
                      'Rate: %(audit_rate).2f') % {
                          'type': '%s%s' % (self.auditor_type, description),
                          'start_time': time.ctime(reported),
                          'passes': self.passes,
                          'quars': self.quarantines,
                          'errors': self.errors,
                          'frate': self.passes / (now - reported),
                          'brate': self.bytes_processed / (now - reported),
                          'total': (now - begin),
                          'audit': time_auditing,
                          'audit_rate': time_auditing / (now - begin)
                      })
                cache_entry = self.create_recon_nested_dict(
                    'object_auditor_stats_%s' % (self.auditor_type),
                    device_dirs, {
                        'errors': self.errors,
                        'passes': self.passes,
                        'quarantined': self.quarantines,
                        'bytes_processed': self.bytes_processed,
                        'start_time': reported,
                        'audit_time': time_auditing
                    })
                dump_recon_cache(cache_entry, self.rcache, self.logger)
                reported = now
                total_quarantines += self.quarantines
                total_errors += self.errors
                self.passes = 0
                self.quarantines = 0
                self.errors = 0
                self.bytes_processed = 0
                self.last_logged = now
            time_auditing += (now - loop_time)
        # Avoid divide by zero during very short runs
        elapsed = (time.time() - begin) or 0.000001
        self.logger.info(
            _('Object audit (%(type)s) "%(mode)s" mode '
              'completed: %(elapsed).02fs. Total quarantined: %(quars)d, '
              'Total errors: %(errors)d, Total files/sec: %(frate).2f, '
              'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, '
              'Rate: %(audit_rate).2f') % {
                  'type': '%s%s' % (self.auditor_type, description),
                  'mode': mode,
                  'elapsed': elapsed,
                  'quars': total_quarantines + self.quarantines,
                  'errors': total_errors + self.errors,
                  'frate': self.total_files_processed / elapsed,
                  'brate': self.total_bytes_processed / elapsed,
                  'audit': time_auditing,
                  'audit_rate': time_auditing / elapsed
              })
        if self.stats_sizes:
            self.logger.info(
                _('Object audit stats: %s') % json.dumps(self.stats_buckets))

        for policy in POLICIES:
            # Unset remaining partitions to not skip them in the next run
            self.diskfile_router[policy].clear_auditor_status(
                policy, self.auditor_type)
コード例 #6
0
ファイル: auditor.py プロジェクト: jgmerritt/swift
    def audit_all_objects(self, mode='once', device_dirs=None):
        description = ''
        if device_dirs:
            device_dir_str = ','.join(sorted(device_dirs))
            if self.auditor_type == 'ALL':
                description = _(' - parallel, %s') % device_dir_str
            else:
                description = _(' - %s') % device_dir_str
        self.logger.info(_('Begin object audit "%(mode)s" mode (%(audi_type)s'
                           '%(description)s)') %
                         {'mode': mode, 'audi_type': self.auditor_type,
                          'description': description})
        begin = reported = time.time()
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        total_quarantines = 0
        total_errors = 0
        time_auditing = 0

        # get AuditLocations for each policy
        loc_generators = []
        for policy in POLICIES:
            loc_generators.append(
                self.diskfile_router[policy]
                    .object_audit_location_generator(
                        policy, device_dirs=device_dirs,
                        auditor_type=self.auditor_type))

        all_locs = round_robin_iter(loc_generators)
        for location in all_locs:
            loop_time = time.time()
            self.failsafe_object_audit(location)
            self.logger.timing_since('timing', loop_time)
            self.files_running_time = ratelimit_sleep(
                self.files_running_time, self.max_files_per_second)
            self.total_files_processed += 1
            now = time.time()
            if now - self.last_logged >= self.log_time:
                self.logger.info(_(
                    'Object audit (%(type)s). '
                    'Since %(start_time)s: Locally: %(passes)d passed, '
                    '%(quars)d quarantined, %(errors)d errors, '
                    'files/sec: %(frate).2f, bytes/sec: %(brate).2f, '
                    'Total time: %(total).2f, Auditing time: %(audit).2f, '
                    'Rate: %(audit_rate).2f') % {
                        'type': '%s%s' % (self.auditor_type, description),
                        'start_time': time.ctime(reported),
                        'passes': self.passes, 'quars': self.quarantines,
                        'errors': self.errors,
                        'frate': self.passes / (now - reported),
                        'brate': self.bytes_processed / (now - reported),
                        'total': (now - begin), 'audit': time_auditing,
                        'audit_rate': time_auditing / (now - begin)})
                cache_entry = self.create_recon_nested_dict(
                    'object_auditor_stats_%s' % (self.auditor_type),
                    device_dirs,
                    {'errors': self.errors, 'passes': self.passes,
                     'quarantined': self.quarantines,
                     'bytes_processed': self.bytes_processed,
                     'start_time': reported, 'audit_time': time_auditing})
                dump_recon_cache(cache_entry, self.rcache, self.logger)
                reported = now
                total_quarantines += self.quarantines
                total_errors += self.errors
                self.passes = 0
                self.quarantines = 0
                self.errors = 0
                self.bytes_processed = 0
                self.last_logged = now
            time_auditing += (now - loop_time)
        # Avoid divide by zero during very short runs
        elapsed = (time.time() - begin) or 0.000001
        self.logger.info(_(
            'Object audit (%(type)s) "%(mode)s" mode '
            'completed: %(elapsed).02fs. Total quarantined: %(quars)d, '
            'Total errors: %(errors)d, Total files/sec: %(frate).2f, '
            'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, '
            'Rate: %(audit_rate).2f') % {
                'type': '%s%s' % (self.auditor_type, description),
                'mode': mode, 'elapsed': elapsed,
                'quars': total_quarantines + self.quarantines,
                'errors': total_errors + self.errors,
                'frate': self.total_files_processed / elapsed,
                'brate': self.total_bytes_processed / elapsed,
                'audit': time_auditing, 'audit_rate': time_auditing / elapsed})
        if self.stats_sizes:
            self.logger.info(
                _('Object audit stats: %s') % json.dumps(self.stats_buckets))

        for policy in POLICIES:
            # Unset remaining partitions to not skip them in the next run
            self.diskfile_router[policy].clear_auditor_status(
                policy,
                self.auditor_type)