def test_split_policy_string(self):
     expectations = {
         'something': ('something', POLICIES[0]),
         'something-1': ('something', POLICIES[1]),
         'tmp': ('tmp', POLICIES[0]),
         'objects': ('objects', POLICIES[0]),
         'tmp-1': ('tmp', POLICIES[1]),
         'objects-1': ('objects', POLICIES[1]),
         'objects-': PolicyError,
         'objects-0': PolicyError,
         'objects--1': ('objects-', POLICIES[1]),
         'objects-+1': PolicyError,
         'objects--': PolicyError,
         'objects-foo': PolicyError,
         'objects--bar': PolicyError,
         'objects-+bar': PolicyError,
         # questionable, demonstrated as inverse of get_policy_string
         'objects+0': ('objects+0', POLICIES[0]),
         '': ('', POLICIES[0]),
         '0': ('0', POLICIES[0]),
         '-1': ('', POLICIES[1]),
     }
     for policy_string, expected in expectations.items():
         if expected == PolicyError:
             try:
                 invalid = split_policy_string(policy_string)
             except PolicyError:
                 continue  # good
             else:
                 self.fail('The string %r returned %r '
                           'instead of raising a PolicyError' %
                           (policy_string, invalid))
         self.assertEqual(expected, split_policy_string(policy_string))
         # should be inverse of get_policy_string
         self.assertEqual(policy_string, get_policy_string(*expected))
Beispiel #2
0
 def test_split_policy_string(self):
     expectations = {
         'something': ('something', POLICIES[0]),
         'something-1': ('something', POLICIES[1]),
         'tmp': ('tmp', POLICIES[0]),
         'objects': ('objects', POLICIES[0]),
         'tmp-1': ('tmp', POLICIES[1]),
         'objects-1': ('objects', POLICIES[1]),
         'objects-': PolicyError,
         'objects-0': PolicyError,
         'objects--1': ('objects-', POLICIES[1]),
         'objects-+1': PolicyError,
         'objects--': PolicyError,
         'objects-foo': PolicyError,
         'objects--bar': PolicyError,
         'objects-+bar': PolicyError,
         # questionable, demonstrated as inverse of get_policy_string
         'objects+0': ('objects+0', POLICIES[0]),
         '': ('', POLICIES[0]),
         '0': ('0', POLICIES[0]),
         '-1': ('', POLICIES[1]),
     }
     for policy_string, expected in expectations.items():
         if expected == PolicyError:
             try:
                 invalid = split_policy_string(policy_string)
             except PolicyError:
                 continue  # good
             else:
                 self.fail('The string %r returned %r '
                           'instead of raising a PolicyError' %
                           (policy_string, invalid))
         self.assertEqual(expected, split_policy_string(policy_string))
         # should be inverse of get_policy_string
         self.assertEqual(policy_string, get_policy_string(*expected))
Beispiel #3
0
 def test_split_policy_string(self):
     expectations = {
         "something": ("something", POLICIES[0]),
         "something-1": ("something", POLICIES[1]),
         "tmp": ("tmp", POLICIES[0]),
         "objects": ("objects", POLICIES[0]),
         "tmp-1": ("tmp", POLICIES[1]),
         "objects-1": ("objects", POLICIES[1]),
         "objects-": PolicyError,
         "objects-0": PolicyError,
         "objects--1": ("objects-", POLICIES[1]),
         "objects-+1": PolicyError,
         "objects--": PolicyError,
         "objects-foo": PolicyError,
         "objects--bar": PolicyError,
         "objects-+bar": PolicyError,
         # questionable, demonstrated as inverse of get_policy_string
         "objects+0": ("objects+0", POLICIES[0]),
         "": ("", POLICIES[0]),
         "0": ("0", POLICIES[0]),
         "-1": ("", POLICIES[1]),
     }
     for policy_string, expected in expectations.items():
         if expected == PolicyError:
             try:
                 invalid = split_policy_string(policy_string)
             except PolicyError:
                 continue  # good
             else:
                 self.fail(
                     "The string %r returned %r " "instead of raising a PolicyError" % (policy_string, invalid)
                 )
         self.assertEqual(expected, split_policy_string(policy_string))
         # should be inverse of get_policy_string
         self.assertEqual(policy_string, get_policy_string(*expected))
Beispiel #4
0
    def object_sweep(self, device):
        """
        If there are async pendings on the device, walk each one and update.

        :param device: path to device
        """
        start_time = time.time()
        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not os.path.isdir(async_pending):
                continue
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                self.logger.warning(
                    _('Directory %(directory)r does not map '
                      'to a valid policy (%(error)s)') % {
                          'directory': asyncdir,
                          'error': e
                      })
                continue
            for prefix in self._listdir(async_pending):
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                last_obj_hash = None
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.logger.increment('errors')
                        self.logger.error(
                            _('ERROR async pending file with unexpected '
                              'name %s') % (update_path))
                        continue
                    if obj_hash == last_obj_hash:
                        self.logger.increment("unlinks")
                        os.unlink(update_path)
                    else:
                        self.process_object_update(update_path, device, policy)
                        last_obj_hash = obj_hash

                    self.objects_running_time = ratelimit_sleep(
                        self.objects_running_time, self.max_objects_per_second)
                try:
                    os.rmdir(prefix_path)
                except OSError:
                    pass
            self.logger.timing_since('timing', start_time)
Beispiel #5
0
    def _iter_async_pendings(self, device):
        """
        Locate and yield all the async pendings on the device. Multiple updates
        for the same object will come out in reverse-chronological order
        (i.e. newest first) so that callers can skip stale async_pendings.

        Tries to clean up empty directories as it goes.
        """
        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not os.path.isdir(async_pending):
                continue
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                # This isn't an error, but a misconfiguration. Logging a
                # warning should be sufficient.
                self.logger.warning(
                    _('Directory %(directory)r does not map '
                      'to a valid policy (%(error)s)') % {
                          'directory': asyncdir,
                          'error': e
                      })
                continue
            for prefix in self._listdir(async_pending):
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.stats.errors += 1
                        self.logger.increment('errors')
                        self.logger.error(
                            _('ERROR async pending file with unexpected '
                              'name %s') % (update_path))
                        continue
                    yield {
                        'device': device,
                        'policy': policy,
                        'path': update_path,
                        'obj_hash': obj_hash,
                        'timestamp': timestamp
                    }
                try:
                    os.rmdir(prefix_path)
                except OSError:
                    pass
Beispiel #6
0
    def object_sweep(self, device):
        """
        If there are async pendings on the device, walk each one and update.

        :param device: path to device
        """
        start_time = time.time()
        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not os.path.isdir(async_pending):
                continue
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                self.logger.warning(_('Directory %(directory)r does not map '
                                      'to a valid policy (%(error)s)') % {
                                    'directory': asyncdir, 'error': e})
                continue
            for prefix in self._listdir(async_pending):
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                last_obj_hash = None
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.logger.increment('errors')
                        self.logger.error(
                            _('ERROR async pending file with unexpected '
                              'name %s')
                            % (update_path))
                        continue
                    if obj_hash == last_obj_hash:
                        self.logger.increment("unlinks")
                        os.unlink(update_path)
                    else:
                        self.process_object_update(update_path, device,
                                                   policy)
                        last_obj_hash = obj_hash

                    self.objects_running_time = ratelimit_sleep(
                        self.objects_running_time,
                        self.max_objects_per_second)
                try:
                    os.rmdir(prefix_path)
                except OSError:
                    pass
            self.logger.timing_since('timing', start_time)
Beispiel #7
0
def get_ring_and_datadir(path):
    """
    :param path: path to ring
    :returns: a tuple, (ring, datadir)
    """
    ring_name = os.path.basename(path).split('.')[0]
    base, policy = split_policy_string(ring_name)
    if base == 'object':
        datadir = get_data_dir(policy)
    else:
        datadir = base + 's'
    return Ring(path), datadir
def split_key(key):
    parts = key.split('.')
    base, policy = split_policy_string(parts[0])
    if base != 'objects':
        return False
    hashpath = parts[1]
    timestamp = '.'.join(parts[2:4])
    nonce_parts = parts[-1].split('-')
    nonce = '-'.join(nonce_parts[:5])
    if len(nonce_parts) > 5:
        frag_index = int(nonce_parts[5])
    else:
        frag_index = None
    return {
        'policy': policy,
        'hashpath': hashpath,
        'nonce': nonce,
        'frag_index': frag_index,
        'timestamp': timestamp,
    }
Beispiel #9
0
    def get_diskfile_from_audit_location(self, device, head_key):
        host, port = device.split(":")
        policy_match = re.match("objects([-]?[0-9]?)\.", head_key)
        policy_string = policy_match.group(1)

        try:
            _, policy = split_policy_string(policy_string)
        except PolicyError:
            policy = POLICIES.legacy
        datadir = head_key.split(".", 3)[1]
        return DiskFile(
            self,
            host,
            port,
            self.threadpools[device],
            None,
            policy=policy,
            _datadir=datadir,
            unlink_wait=self.unlink_wait,
        )
Beispiel #10
0
    def _iter_async_pendings(self, device):
        """
        Locate and yield all the async pendings on the device. Multiple updates
        for the same object will come out in reverse-chronological order
        (i.e. newest first) so that callers can skip stale async_pendings.

        Tries to clean up empty directories as it goes.
        """
        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            if not os.path.isdir(async_pending):
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                # This isn't an error, but a misconfiguration. Logging a
                # warning should be sufficient.
                self.logger.warning(
                    _('Directory %(directory)r does not map '
                      'to a valid policy (%(error)s)') % {
                          'directory': asyncdir,
                          'error': e
                      })
                continue
            prefix_dirs = self._listdir(async_pending)
            shuffle(prefix_dirs)
            for prefix in prefix_dirs:
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                last_obj_hash = None
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.stats.errors += 1
                        self.logger.increment('errors')
                        self.logger.error(
                            _('ERROR async pending file with unexpected '
                              'name %s') % (update_path))
                        continue
                    # Async pendings are stored on disk like this:
                    #
                    # <device>/async_pending/<suffix>/<obj_hash>-<timestamp>
                    #
                    # If there are multiple updates for a given object,
                    # they'll look like this:
                    #
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp1>
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp2>
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp3>
                    #
                    # Async updates also have the property that newer
                    # updates contain all the information in older updates.
                    # Since we sorted the directory listing in reverse
                    # order, we'll see timestamp3 first, yield it, and then
                    # unlink timestamp2 and timestamp1 since we know they
                    # are obsolete.
                    #
                    # This way, our caller only gets useful async_pendings.
                    if obj_hash == last_obj_hash:
                        self.stats.unlinks += 1
                        self.logger.increment('unlinks')
                        try:
                            os.unlink(update_path)
                        except OSError as e:
                            if e.errno != errno.ENOENT:
                                raise
                    else:
                        last_obj_hash = obj_hash
                        yield {
                            'device': device,
                            'policy': policy,
                            'path': update_path,
                            'obj_hash': obj_hash,
                            'timestamp': timestamp
                        }
Beispiel #11
0
    def object_sweep(self, device):
        """
        If there are async pendings on the device, walk each one and update.

        :param device: path to device
        """
        start_time = time.time()
        last_status_update = start_time
        start_stats = self.stats.copy()
        my_pid = os.getpid()
        self.logger.info("Object update sweep starting on %s (pid: %d)",
                         device, my_pid)

        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not os.path.isdir(async_pending):
                continue
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                # This isn't an error, but a misconfiguration. Logging a
                # warning should be sufficient.
                self.logger.warning(_('Directory %(directory)r does not map '
                                      'to a valid policy (%(error)s)') % {
                                    'directory': asyncdir, 'error': e})
                continue
            for prefix in self._listdir(async_pending):
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                last_obj_hash = None
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.stats.errors += 1
                        self.logger.increment('errors')
                        self.logger.error(
                            _('ERROR async pending file with unexpected '
                              'name %s')
                            % (update_path))
                        continue
                    if obj_hash == last_obj_hash:
                        self.stats.unlinks += 1
                        self.logger.increment('unlinks')
                        os.unlink(update_path)
                    else:
                        self.process_object_update(update_path, device,
                                                   policy)
                        last_obj_hash = obj_hash

                    self.objects_running_time = ratelimit_sleep(
                        self.objects_running_time,
                        self.max_objects_per_second)

                    now = time.time()
                    if now - last_status_update >= self.report_interval:
                        this_sweep = self.stats.since(start_stats)
                        self.logger.info(
                            ('Object update sweep progress on %(device)s: '
                             '%(elapsed).02fs, %(stats)s (pid: %(pid)d)'),
                            {'device': device,
                             'elapsed': now - start_time,
                             'pid': my_pid,
                             'stats': this_sweep})
                        last_status_update = now
                try:
                    os.rmdir(prefix_path)
                except OSError:
                    pass
            self.logger.timing_since('timing', start_time)
            sweep_totals = self.stats.since(start_stats)
            self.logger.info(
                ('Object update sweep completed on %(device)s '
                 'in %(elapsed).02fs seconds:, '
                 '%(successes)d successes, %(failures)d failures, '
                 '%(quarantines)d quarantines, '
                 '%(unlinks)d unlinks, %(errors)d errors '
                 '(pid: %(pid)d)'),
                {'device': device,
                 'elapsed': time.time() - start_time,
                 'pid': my_pid,
                 'successes': sweep_totals.successes,
                 'failures': sweep_totals.failures,
                 'quarantines': sweep_totals.quarantines,
                 'unlinks': sweep_totals.unlinks,
                 'errors': sweep_totals.errors})
Beispiel #12
0
    def object_sweep(self, device):
        """
        If there are async pendings on the device, walk each one and update.

        :param device: path to device
        """
        start_time = time.time()
        last_status_update = start_time
        start_stats = self.stats.copy()
        my_pid = os.getpid()
        self.logger.info("Object update sweep starting on %s (pid: %d)",
                         device, my_pid)

        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not os.path.isdir(async_pending):
                continue
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                # This isn't an error, but a misconfiguration. Logging a
                # warning should be sufficient.
                self.logger.warning(_('Directory %(directory)r does not map '
                                      'to a valid policy (%(error)s)') % {
                                    'directory': asyncdir, 'error': e})
                continue
            for prefix in self._listdir(async_pending):
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                last_obj_hash = None
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.stats.errors += 1
                        self.logger.increment('errors')
                        self.logger.error(
                            _('ERROR async pending file with unexpected '
                              'name %s')
                            % (update_path))
                        continue
                    if obj_hash == last_obj_hash:
                        self.stats.unlinks += 1
                        self.logger.increment('unlinks')
                        os.unlink(update_path)
                    else:
                        self.process_object_update(update_path, device,
                                                   policy)
                        last_obj_hash = obj_hash

                    self.objects_running_time = ratelimit_sleep(
                        self.objects_running_time,
                        self.max_objects_per_second)

                    now = time.time()
                    if now - last_status_update >= self.report_interval:
                        this_sweep = self.stats.since(start_stats)
                        self.logger.info(
                            ('Object update sweep progress on %(device)s: '
                             '%(elapsed).02fs, %(stats)s (pid: %(pid)d)'),
                            {'device': device,
                             'elapsed': now - start_time,
                             'pid': my_pid,
                             'stats': this_sweep})
                        last_status_update = now
                try:
                    os.rmdir(prefix_path)
                except OSError:
                    pass
        self.logger.timing_since('timing', start_time)
        sweep_totals = self.stats.since(start_stats)
        self.logger.info(
            ('Object update sweep completed on %(device)s '
             'in %(elapsed).02fs seconds:, '
             '%(successes)d successes, %(failures)d failures, '
             '%(quarantines)d quarantines, '
             '%(unlinks)d unlinks, %(errors)d errors, '
             '%(redirects)d redirects '
             '(pid: %(pid)d)'),
            {'device': device,
             'elapsed': time.time() - start_time,
             'pid': my_pid,
             'successes': sweep_totals.successes,
             'failures': sweep_totals.failures,
             'quarantines': sweep_totals.quarantines,
             'unlinks': sweep_totals.unlinks,
             'errors': sweep_totals.errors,
             'redirects': sweep_totals.redirects})