Example #1
0
    def test_invalidate_hash(self):

        def assertFileData(file_path, data):
            with open(file_path, 'r') as fp:
                fdata = fp.read()
                self.assertEquals(pickle.loads(fdata), pickle.loads(data))

        df = diskfile.DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o',
                               FakeLogger())
        mkdirs(df.datadir)
        ohash = hash_path('a', 'c', 'o')
        data_dir = ohash[-3:]
        whole_path_from = os.path.join(self.objects, '0', data_dir)
        hashes_file = os.path.join(self.objects, '0',
                                   diskfile.HASH_FILE)
        # test that non existent file except caught
        self.assertEquals(diskfile.invalidate_hash(whole_path_from),
                          None)
        # test that hashes get cleared
        check_pickle_data = pickle.dumps({data_dir: None},
                                         diskfile.PICKLE_PROTOCOL)
        for data_hash in [{data_dir: None}, {data_dir: 'abcdefg'}]:
            with open(hashes_file, 'wb') as fp:
                pickle.dump(data_hash, fp, diskfile.PICKLE_PROTOCOL)
            diskfile.invalidate_hash(whole_path_from)
            assertFileData(hashes_file, check_pickle_data)
Example #2
0
 def test_object_audit_no_meta(self):
     timestamp = str(normalize_timestamp(time.time()))
     path = os.path.join(self.disk_file._datadir, timestamp + ".data")
     mkdirs(self.disk_file._datadir)
     fp = open(path, "w")
     fp.write("0" * 1024)
     fp.close()
     invalidate_hash(os.path.dirname(self.disk_file._datadir))
     auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)
     pre_quarantines = auditor_worker.quarantines
     auditor_worker.object_audit(AuditLocation(self.disk_file._datadir, "sda", "0"))
     self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)
Example #3
0
 def test_object_audit_no_meta(self):
     timestamp = str(normalize_timestamp(time.time()))
     path = os.path.join(self.disk_file._datadir, timestamp + '.data')
     mkdirs(self.disk_file._datadir)
     fp = open(path, 'w')
     fp.write('0' * 1024)
     fp.close()
     invalidate_hash(os.path.dirname(self.disk_file._datadir))
     auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
     pre_quarantines = auditor_worker.quarantines
     auditor_worker.object_audit(
         AuditLocation(self.disk_file._datadir, 'sda', '0'))
     self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)
Example #4
0
 def test_object_audit_no_meta(self):
     timestamp = str(normalize_timestamp(time.time()))
     path = os.path.join(self.disk_file._datadir, timestamp + '.data')
     mkdirs(self.disk_file._datadir)
     fp = open(path, 'w')
     fp.write('0' * 1024)
     fp.close()
     invalidate_hash(os.path.dirname(self.disk_file._datadir))
     auditor_worker = auditor.AuditorWorker(self.conf, self.logger)
     pre_quarantines = auditor_worker.quarantines
     auditor_worker.object_audit(
         AuditLocation(self.disk_file._datadir, 'sda', '0'))
     self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1)
Example #5
0
    def test_invalidate_hash(self):
        def assertFileData(file_path, data):
            with open(file_path, 'r') as fp:
                fdata = fp.read()
                self.assertEquals(pickle.loads(fdata), pickle.loads(data))

        df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')
        mkdirs(df._datadir)
        ohash = hash_path('a', 'c', 'o')
        data_dir = ohash[-3:]
        whole_path_from = os.path.join(self.objects, '0', data_dir)
        hashes_file = os.path.join(self.objects, '0', diskfile.HASH_FILE)
        # test that non existent file except caught
        self.assertEquals(diskfile.invalidate_hash(whole_path_from), None)
        # test that hashes get cleared
        check_pickle_data = pickle.dumps({data_dir: None},
                                         diskfile.PICKLE_PROTOCOL)
        for data_hash in [{data_dir: None}, {data_dir: 'abcdefg'}]:
            with open(hashes_file, 'wb') as fp:
                pickle.dump(data_hash, fp, diskfile.PICKLE_PROTOCOL)
            diskfile.invalidate_hash(whole_path_from)
            assertFileData(hashes_file, check_pickle_data)
Example #6
0
    def process_location(self, hash_path, new_hash_path):
        # Compare the contents of each hash dir with contents of same hash
        # dir in its new partition to verify that the new location has the
        # most up to date set of files. The new location may have newer
        # files if it has been updated since relinked.
        self.stats['hash_dirs'] += 1

        # Get on disk data for new and old locations, cleaning up any
        # reclaimable or obsolete files in each. The new location is
        # cleaned up *before* the old location to prevent false negatives
        # where the old still has a file that has been cleaned up in the
        # new; cleaning up the new location first ensures that the old will
        # always be 'cleaner' than the new.
        new_df_data = self.diskfile_mgr.cleanup_ondisk_files(new_hash_path)
        old_df_data = self.diskfile_mgr.cleanup_ondisk_files(hash_path)
        # Now determine the most up to date set of on disk files would be
        # given the content of old and new locations...
        new_files = set(new_df_data['files'])
        old_files = set(old_df_data['files'])
        union_files = new_files.union(old_files)
        union_data = self.diskfile_mgr.get_ondisk_files(union_files,
                                                        '',
                                                        verify=False)
        obsolete_files = set(info['filename']
                             for info in union_data.get('obsolete', []))
        # drop 'obsolete' files but retain 'unexpected' files which might
        # be misplaced diskfiles from another policy
        required_files = union_files.difference(obsolete_files)
        required_links = required_files.intersection(old_files)

        missing_links = 0
        created_links = 0
        unwanted_files = []
        for filename in required_links:
            # Before removing old files, be sure that the corresponding
            # required new files exist by calling relink_paths again. There
            # are several possible outcomes:
            #  - The common case is that the new file exists, in which case
            #    relink_paths checks that the new file has the same inode
            #    as the old file. An exception is raised if the inode of
            #    the new file is not the same as the old file.
            #  - The new file may not exist because the relinker failed to
            #    create the link to the old file and has erroneously moved
            #    on to cleanup. In this case the relink_paths will create
            #    the link now or raise an exception if that fails.
            #  - The new file may not exist because some other process,
            #    such as an object server handling a request, has cleaned
            #    it up since we called cleanup_ondisk_files(new_hash_path).
            #    In this case a new link will be created to the old file.
            #    This is unnecessary but simpler than repeating the
            #    evaluation of what links are now required and safer than
            #    assuming that a non-existent file that *was* required is
            #    no longer required. The new file will eventually be
            #    cleaned up again.
            self.stats['files'] += 1
            old_file = os.path.join(hash_path, filename)
            new_file = os.path.join(new_hash_path, filename)
            try:
                if diskfile.relink_paths(old_file, new_file):
                    self.logger.debug("Relinking%s created link: %s to %s",
                                      ' (cleanup)' if self.do_cleanup else '',
                                      old_file, new_file)
                    created_links += 1
                    self.stats['linked'] += 1
            except OSError as exc:
                if exc.errno == errno.EEXIST and filename.endswith('.ts'):
                    # special case for duplicate tombstones, see:
                    # https://bugs.launchpad.net/swift/+bug/1921718
                    # https://bugs.launchpad.net/swift/+bug/1934142
                    self.logger.debug(
                        "Relinking%s: tolerating different inodes for "
                        "tombstone with same timestamp: %s to %s",
                        ' (cleanup)' if self.do_cleanup else '', old_file,
                        new_file)
                else:
                    self.logger.warning(
                        "Error relinking%s: failed to relink %s to %s: %s",
                        ' (cleanup)' if self.do_cleanup else '', old_file,
                        new_file, exc)
                    self.stats['errors'] += 1
                    missing_links += 1
        if created_links:
            self.linked_into_partitions.add(
                get_partition_from_path(self.conf['devices'], new_hash_path))
            try:
                diskfile.invalidate_hash(os.path.dirname(new_hash_path))
            except (Exception, LockTimeout) as exc:
                # at this point, the link's created. even if we counted it as
                # an error, a subsequent run wouldn't find any work to do. so,
                # don't bother; instead, wait for replication to be re-enabled
                # so post-replication rehashing or periodic rehashing can
                # eventually pick up the change
                self.logger.warning('Error invalidating suffix for %s: %r',
                                    new_hash_path, exc)

        if self.do_cleanup and not missing_links:
            # use the sorted list to help unit testing
            unwanted_files = old_df_data['files']

        # the new partition hash dir has the most up to date set of on
        # disk files so it is safe to delete the old location...
        rehash = False
        for filename in unwanted_files:
            old_file = os.path.join(hash_path, filename)
            try:
                os.remove(old_file)
            except OSError as exc:
                self.logger.warning('Error cleaning up %s: %r', old_file, exc)
                self.stats['errors'] += 1
            else:
                rehash = True
                self.stats['removed'] += 1
                self.logger.debug("Removed %s", old_file)

        if rehash:
            # Even though we're invalidating the suffix, don't update
            # self.linked_into_partitions -- we only care about them for
            # relinking into the new part-power space
            try:
                diskfile.invalidate_hash(os.path.dirname(hash_path))
            except (Exception, LockTimeout) as exc:
                # note: not counted as an error
                self.logger.warning('Error invalidating suffix for %s: %r',
                                    hash_path, exc)
Example #7
0
def cleanup(conf, logger, device):
    diskfile_router = diskfile.DiskFileRouter(conf, logger)
    errors = cleaned_up = 0
    error_counter = {}
    found_policy = False
    for policy in POLICIES:
        diskfile_mgr = diskfile_router[policy]
        policy.object_ring = None  # Ensure it will be reloaded
        policy.load_ring(conf['swift_dir'])
        part_power = policy.object_ring.part_power
        next_part_power = policy.object_ring.next_part_power
        if not next_part_power or next_part_power != part_power:
            continue
        logger.info('Cleaning up files for policy %s under %s', policy.name,
                    conf['devices'])
        found_policy = True
        datadir = diskfile.get_data_dir(policy)

        locks = [None]
        states = {
            "part_power": part_power,
            "next_part_power": next_part_power,
            "state": {},
        }
        cleanup_devices_filter = partial(devices_filter, device)
        cleanup_hook_pre_device = partial(hook_pre_device, locks, states,
                                          datadir)
        cleanup_hook_post_device = partial(hook_post_device, locks)
        cleanup_partition_filter = partial(partitions_filter, states,
                                           part_power, next_part_power)
        cleanup_hook_post_partition = partial(hook_post_partition, states,
                                              STEP_CLEANUP, policy,
                                              diskfile_mgr)
        cleanup_hashes_filter = partial(hashes_filter, next_part_power)

        locations = audit_location_generator(
            conf['devices'],
            datadir,
            mount_check=conf['mount_check'],
            devices_filter=cleanup_devices_filter,
            hook_pre_device=cleanup_hook_pre_device,
            hook_post_device=cleanup_hook_post_device,
            partitions_filter=cleanup_partition_filter,
            hook_post_partition=cleanup_hook_post_partition,
            hashes_filter=cleanup_hashes_filter,
            logger=logger,
            error_counter=error_counter)
        if conf['files_per_second'] > 0:
            locations = RateLimitedIterator(locations,
                                            conf['files_per_second'])
        for fname, device, partition in locations:
            expected_fname = replace_partition_in_path(fname, part_power)
            if fname == expected_fname:
                continue
            # Make sure there is a valid object file in the expected new
            # location. Note that this could be newer than the original one
            # (which happens if there is another PUT after partition power
            # has been increased, but cleanup did not yet run)
            loc = diskfile.AuditLocation(os.path.dirname(expected_fname),
                                         device, partition, policy)
            df = diskfile_mgr.get_diskfile_from_audit_location(loc)
            try:
                with df.open():
                    pass
            except DiskFileQuarantined as exc:
                logger.warning(
                    'ERROR Object %(obj)s failed audit and was'
                    ' quarantined: %(err)r', {
                        'obj': loc,
                        'err': exc
                    })
                errors += 1
                continue
            except DiskFileDeleted:
                pass
            except DiskFileNotExist as exc:
                err = False
                if policy.policy_type == 'erasure_coding':
                    # Might be a non-durable fragment - check that there is
                    # a fragment in the new path. Will be fixed by the
                    # reconstructor then
                    if not os.path.isfile(expected_fname):
                        err = True
                else:
                    err = True
                if err:
                    logger.warning('Error cleaning up %s: %r', fname, exc)
                    errors += 1
                    continue
            try:
                os.remove(fname)
                cleaned_up += 1
                logger.debug("Removed %s", fname)
                suffix_dir = os.path.dirname(os.path.dirname(fname))
                diskfile.invalidate_hash(suffix_dir)
            except OSError as exc:
                logger.warning('Error cleaning up %s: %r', fname, exc)
                errors += 1
    return determine_exit_code(
        logger=logger,
        found_policy=found_policy,
        processed=cleaned_up,
        action='cleaned up',
        action_errors=errors,
        error_counter=error_counter,
    )
Example #8
0
def relink(conf, logger, device):
    diskfile_router = diskfile.DiskFileRouter(conf, logger)
    found_policy = False
    relinked = errors = 0
    error_counter = {}
    for policy in POLICIES:
        diskfile_mgr = diskfile_router[policy]
        policy.object_ring = None  # Ensure it will be reloaded
        policy.load_ring(conf['swift_dir'])
        part_power = policy.object_ring.part_power
        next_part_power = policy.object_ring.next_part_power
        if not next_part_power or next_part_power == part_power:
            continue
        logger.info('Relinking files for policy %s under %s', policy.name,
                    conf['devices'])
        found_policy = True
        datadir = diskfile.get_data_dir(policy)

        locks = [None]
        states = {
            "part_power": part_power,
            "next_part_power": next_part_power,
            "state": {},
        }
        relink_devices_filter = partial(devices_filter, device)
        relink_hook_pre_device = partial(hook_pre_device, locks, states,
                                         datadir)
        relink_hook_post_device = partial(hook_post_device, locks)
        relink_partition_filter = partial(partitions_filter, states,
                                          part_power, next_part_power)
        relink_hook_post_partition = partial(hook_post_partition, states,
                                             STEP_RELINK, policy, diskfile_mgr)
        relink_hashes_filter = partial(hashes_filter, next_part_power)

        locations = audit_location_generator(
            conf['devices'],
            datadir,
            mount_check=conf['mount_check'],
            devices_filter=relink_devices_filter,
            hook_pre_device=relink_hook_pre_device,
            hook_post_device=relink_hook_post_device,
            partitions_filter=relink_partition_filter,
            hook_post_partition=relink_hook_post_partition,
            hashes_filter=relink_hashes_filter,
            logger=logger,
            error_counter=error_counter)
        if conf['files_per_second'] > 0:
            locations = RateLimitedIterator(locations,
                                            conf['files_per_second'])
        for fname, _, _ in locations:
            newfname = replace_partition_in_path(fname, next_part_power)
            try:
                diskfile.relink_paths(fname, newfname, check_existing=True)
                relinked += 1
                suffix_dir = os.path.dirname(os.path.dirname(newfname))
                diskfile.invalidate_hash(suffix_dir)
            except OSError as exc:
                errors += 1
                logger.warning("Relinking %s to %s failed: %s", fname,
                               newfname, exc)
    return determine_exit_code(
        logger=logger,
        found_policy=found_policy,
        processed=relinked,
        action='relinked',
        action_errors=errors,
        error_counter=error_counter,
    )
Example #9
0
    def process_location(self, hash_path, new_hash_path):
        # Compare the contents of each hash dir with contents of same hash
        # dir in its new partition to verify that the new location has the
        # most up to date set of files. The new location may have newer
        # files if it has been updated since relinked.
        self.stats['hash_dirs'] += 1

        # Get on disk data for new and old locations, cleaning up any
        # reclaimable or obsolete files in each. The new location is
        # cleaned up *before* the old location to prevent false negatives
        # where the old still has a file that has been cleaned up in the
        # new; cleaning up the new location first ensures that the old will
        # always be 'cleaner' than the new.
        new_df_data = self.diskfile_mgr.cleanup_ondisk_files(new_hash_path)
        old_df_data = self.diskfile_mgr.cleanup_ondisk_files(hash_path)
        # Now determine the most up to date set of on disk files would be
        # given the content of old and new locations...
        new_files = set(new_df_data['files'])
        old_files = set(old_df_data['files'])
        union_files = new_files.union(old_files)
        union_data = self.diskfile_mgr.get_ondisk_files(
            union_files, '', verify=False)
        obsolete_files = set(info['filename']
                             for info in union_data.get('obsolete', []))
        # drop 'obsolete' files but retain 'unexpected' files which might
        # be misplaced diskfiles from another policy
        required_files = union_files.difference(obsolete_files)
        required_links = required_files.intersection(old_files)

        missing_links = 0
        created_links = 0
        unwanted_files = []
        for filename in required_links:
            # Before removing old files, be sure that the corresponding
            # required new files exist by calling relink_paths again. There
            # are several possible outcomes:
            #  - The common case is that the new file exists, in which case
            #    relink_paths checks that the new file has the same inode
            #    as the old file. An exception is raised if the inode of
            #    the new file is not the same as the old file.
            #  - The new file may not exist because the relinker failed to
            #    create the link to the old file and has erroneously moved
            #    on to cleanup. In this case the relink_paths will create
            #    the link now or raise an exception if that fails.
            #  - The new file may not exist because some other process,
            #    such as an object server handling a request, has cleaned
            #    it up since we called cleanup_ondisk_files(new_hash_path).
            #    In this case a new link will be created to the old file.
            #    This is unnecessary but simpler than repeating the
            #    evaluation of what links are now required and safer than
            #    assuming that a non-existent file that *was* required is
            #    no longer required. The new file will eventually be
            #    cleaned up again.
            self.stats['files'] += 1
            old_file = os.path.join(hash_path, filename)
            new_file = os.path.join(new_hash_path, filename)
            try:
                if diskfile.relink_paths(old_file, new_file):
                    self.logger.debug(
                        "Relinking%s created link: %s to %s",
                        ' (cleanup)' if self.do_cleanup else '',
                        old_file, new_file)
                    created_links += 1
                    self.stats['linked'] += 1
            except OSError as exc:
                existing_link = None
                link_created = False
                if exc.errno == errno.EEXIST and filename.endswith('.ts'):
                    # special case for duplicate tombstones in older partition
                    # power locations
                    # (https://bugs.launchpad.net/swift/+bug/1921718)
                    existing_link, link_created = self.check_existing(new_file)
                if existing_link:
                    self.logger.debug(
                        "Relinking%s: link not needed: %s to %s due to "
                        "existing %s", ' (cleanup)' if self.do_cleanup else '',
                        old_file, new_file, existing_link)
                    if link_created:
                        # uncommon case: the retry succeeded in creating a link
                        created_links += 1
                        self.stats['linked'] += 1
                    wanted_file = replace_partition_in_path(
                        self.conf['devices'], old_file, self.part_power)
                    if old_file not in (existing_link, wanted_file):
                        # A link exists to a different file and this file
                        # is not the current target for client requests. If
                        # this file is visited again it is possible that
                        # the existing_link will have been cleaned up and
                        # the check will fail, so clean it up now.
                        self.logger.info(
                            "Relinking%s: cleaning up unwanted file: %s",
                            ' (cleanup)' if self.do_cleanup else '', old_file)
                        unwanted_files.append(filename)
                else:
                    self.logger.warning(
                        "Error relinking%s: failed to relink %s to %s: %s",
                        ' (cleanup)' if self.do_cleanup else '',
                        old_file, new_file, exc)
                    self.stats['errors'] += 1
                    missing_links += 1
        if created_links:
            diskfile.invalidate_hash(os.path.dirname(new_hash_path))

        if self.do_cleanup and not missing_links:
            # use the sorted list to help unit testing
            unwanted_files = old_df_data['files']

        # the new partition hash dir has the most up to date set of on
        # disk files so it is safe to delete the old location...
        rehash = False
        for filename in unwanted_files:
            old_file = os.path.join(hash_path, filename)
            try:
                os.remove(old_file)
            except OSError as exc:
                self.logger.warning('Error cleaning up %s: %r', old_file, exc)
                self.stats['errors'] += 1
            else:
                rehash = True
                self.stats['removed'] += 1
                self.logger.debug("Removed %s", old_file)

        if rehash:
            try:
                diskfile.invalidate_hash(os.path.dirname(hash_path))
            except Exception as exc:
                # note: not counted as an error
                self.logger.warning(
                    'Error invalidating suffix for %s: %r',
                    hash_path, exc)