Example #1
0
 def __init__(self, conf, **kwargs):
     super(BlobIndexer, self).__init__(conf)
     self.logger = get_logger(conf)
     volume = conf.get('volume')
     if not volume:
         raise exc.ConfigurationException('No volume specified for indexer')
     self.volume = volume
     self.passes = 0
     self.errors = 0
     self.successes = 0
     self.last_reported = 0
     self.total_since_last_reported = 0
     self.chunks_run_time = 0
     self.interval = int_value(
         conf.get('interval'), 300)
     self.report_interval = int_value(
         conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(
         conf.get('chunks_per_second'), 30)
     pm = get_pool_manager(pool_connections=10)
     self.index_client = RdirClient(conf, logger=self.logger,
                                    pool_manager=pm)
     self.namespace, self.volume_id = check_volume(self.volume)
     self.convert_chunks = true_value(conf.get('convert_chunks'))
     if self.convert_chunks:
         converter_conf = self.conf.copy()
         converter_conf['no_backup'] = True
         self.converter = BlobConverter(converter_conf, logger=self.logger,
                                        pool_manager=pm)
     else:
         self.converter = None
Example #2
0
    def _convert_and_check(self,
                           chunk_volume,
                           chunk_path,
                           chunk_id_info,
                           expected_raw_meta=None,
                           expected_errors=0):
        conf = self.conf
        conf['volume'] = self.rawx_volumes[chunk_volume]
        converter = BlobConverter(conf, logger=self.logger)
        converter.safe_convert_chunk(chunk_path)
        self.assertEqual(1, converter.total_chunks_processed)
        self.assertEqual(1, converter.passes)
        self.assertEqual(expected_errors, converter.errors)

        checker = Checker(self.ns)
        for chunk_id, info in chunk_id_info.items():
            account, container, path, version, content_id = info
            fullpath = encode_fullpath(account, container, path, version,
                                       content_id)
            cid = cid_from_name(account, container)
            meta, raw_meta = read_chunk_metadata(chunk_path, chunk_id)

            self.assertEqual(meta.get('chunk_id'), chunk_id)
            self.assertEqual(meta.get('container_id'), cid)
            self.assertEqual(meta.get('content_path'), path)
            self.assertEqual(meta.get('content_version'), version)
            self.assertEqual(meta.get('content_id'), content_id)
            self.assertEqual(meta.get('full_path'), fullpath)

            checker.check(
                Target(account,
                       container=container,
                       obj=path,
                       chunk='http://' + converter.volume_id + '/' + chunk_id))
            for _ in checker.run():
                pass
            self.assertTrue(checker.report())

            if expected_raw_meta:
                self.assertDictEqual(expected_raw_meta, raw_meta)
                continue

            self.assertNotIn(CHUNK_XATTR_KEYS['chunk_id'], raw_meta)
            self.assertNotIn(CHUNK_XATTR_KEYS['container_id'], raw_meta)
            self.assertNotIn(CHUNK_XATTR_KEYS['content_path'], raw_meta)
            self.assertNotIn(CHUNK_XATTR_KEYS['content_version'], raw_meta)
            self.assertNotIn(CHUNK_XATTR_KEYS['content_id'], raw_meta)
            self.assertIn(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + chunk_id,
                          raw_meta)
            for k in raw_meta.keys():
                if k.startswith('oio:'):
                    self.fail('old fullpath always existing')
            self.assertEqual(raw_meta[CHUNK_XATTR_KEYS['oio_version']],
                             OIO_VERSION)
Example #3
0
 def test_recover_missing_fullpath_not_indexed(self):
     """
     Test what happens when the BlobConverter encounters a chunk with
     neither a fullpath extended attribute, not any of the legacy
     attributes, and the chunk does not appear in rdir.
     """
     victim = random.choice(self.chunks)
     path = self._chunk_path(victim)
     remove_fullpath_xattr(path)
     self._deindex_chunk(victim)
     conf = dict(self.conf)
     conf['volume'] = self.rawx_volumes[self._chunk_volume_id(victim)]
     converter = BlobConverter(conf)
     self.assertRaises(KeyError, converter.recover_chunk_fullpath, path)
Example #4
0
 def test_recover_missing_fullpath_orphan_chunk(self):
     """
     Test what happens when the BlobConverter encounters a chunk with
     neither a fullpath extended attribute, not any of the legacy
     attributes, and the chunk does not appear in object description.
     """
     victim = random.choice(self.chunks)
     path = self._chunk_path(victim)
     remove_fullpath_xattr(path)
     cbean = {
         'content': self.content_id,
         'hash': victim['hash'],
         'id': victim['url'],
         'size': victim['size'],
         'pos': victim['pos'],
         'type': 'chunk'
     }
     self.api.container.container_raw_delete(
         self.account, self.container, data=[cbean])
     conf = dict(self.conf)
     conf['volume'] = self.rawx_volumes[self._chunk_volume_id(victim)]
     converter = BlobConverter(conf)
     self.assertRaises(OrphanChunk, converter.recover_chunk_fullpath, path)
Example #5
0
class BlobIndexer(Daemon):
    def __init__(self, conf, **kwargs):
        super(BlobIndexer, self).__init__(conf)
        self.logger = get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise exc.ConfigurationException('No volume specified for indexer')
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.successes = 0
        self.last_reported = 0
        self.total_since_last_reported = 0
        self.chunks_run_time = 0
        self.interval = int_value(
            conf.get('interval'), 300)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        pm = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf, logger=self.logger,
                                       pool_manager=pm)
        self.namespace, self.volume_id = check_volume(self.volume)
        self.convert_chunks = true_value(conf.get('convert_chunks'))
        if self.convert_chunks:
            converter_conf = self.conf.copy()
            converter_conf['no_backup'] = True
            self.converter = BlobConverter(converter_conf, logger=self.logger,
                                           pool_manager=pm)
        else:
            self.converter = None

    def safe_recover_fullpath(self, path):
        try:
            return self.converter.recover_chunk_fullpath(path)
        except Exception as err:
            self.logger.error('Could not recover fullpath xattr of %s: %s',
                              path, err)
        return False

    def safe_update_index(self, path):
        chunk_id = path.rsplit('/', 1)[-1]
        if len(chunk_id) != STRLEN_CHUNKID:
            if chunk_id.endswith(CHUNK_SUFFIX_PENDING):
                self.logger.info('Skipping pending chunk %s', path)
            else:
                self.logger.warn('WARN Not a chunk %s', path)
            return
        for char in chunk_id:
            if char not in hexdigits:
                self.logger.warn('WARN Not a chunk %s', path)
                return
        try:
            self.update_index(path, chunk_id)
            self.successes += 1
            self.logger.debug('Updated %s', path)
        except exc.OioNetworkException as err:
            self.errors += 1
            self.logger.warn('ERROR while updating %s: %s', path, err)
        except exc.VolumeException as err:
            self.errors += 1
            self.logger.error('Cannot index %s: %s', path, err)
            # All chunks of this volume are indexed in the same service,
            # no need to try another chunk, it will generate the same
            # error. Let the upper level retry later.
            raise
        except (exc.ChunkException, exc.MissingAttribute) as err:
            if (self.convert_chunks and self.converter and
                    self.converter.is_fullpath_error(err)):
                self.logger.warn(
                    'Could not update %s: %s, will try to recover', path, err)
                if self.safe_recover_fullpath(path):
                    self.successes += 1
                    self.logger.info(
                        'Fullpath xattr of %s was recovered', path)
                else:
                    self.errors += 1
                    # Logging already done by safe_recover_fullpath
            else:
                self.errors += 1
                self.logger.error('ERROR while updating %s: %s', path, err)
        except Exception as err:
            # We cannot compare errno in the 'except' line.
            # pylint: disable=no-member
            if isinstance(err, IOError) and err.errno == errno.ENOENT:
                self.logger.debug('Chunk %s disappeared before indexing', path)
                # Neither an error nor a success, do not touch counters.
            else:
                self.errors += 1
                self.logger.exception('ERROR while updating %s', path)
        self.total_since_last_reported += 1

    def report(self, tag, start_time):
        total = self.errors + self.successes
        now = time.time()
        elapsed = (now - start_time) or 0.000001
        self.logger.info(
            '%(tag)s=%(current_time)s '
            'elapsed=%(elapsed).02f '
            'pass=%(pass)d '
            'errors=%(errors)d '
            'chunks=%(nb_chunks)d %(c_rate).2f/s' % {
                'tag': tag,
                'current_time': datetime.fromtimestamp(
                    int(now)).isoformat(),
                'pass': self.passes,
                'errors': self.errors,
                'nb_chunks': total,
                'c_rate': self.total_since_last_reported /
                (now - self.last_reported),
                'elapsed': elapsed
            }
        )
        self.last_reported = now
        self.total_since_last_reported = 0

    def index_pass(self):
        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        self.report('started', start_time)
        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                self.report('running', start_time)
        self.report('ended', start_time)

    def update_index(self, path, chunk_id):
        with open(path) as file_:
            try:
                meta = None
                if self.convert_chunks and self.converter:
                    _, meta = self.converter.convert_chunk(file_, chunk_id)
                if meta is None:
                    meta, _ = read_chunk_metadata(file_, chunk_id)
            except exc.MissingAttribute as err:
                raise exc.FaultyChunk(err)

            data = {'mtime': int(time.time())}
            headers = {REQID_HEADER: request_id('blob-indexer-')}
            self.index_client.chunk_push(self.volume_id,
                                         meta['container_id'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         headers=headers,
                                         **data)

    def run(self, *args, **kwargs):
        time.sleep(random() * self.interval)
        while True:
            pre = time.time()
            try:
                self.index_pass()
            except exc.VolumeException as err:
                self.logger.error('Cannot index chunks, will retry later: %s',
                                  err)
            except Exception as err:
                self.logger.exception('ERROR during indexing: %s', err)
            else:
                self.passes += 1
            elapsed = (time.time() - pre) or 0.000001
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)
Example #6
0
class BlobIndexer(Daemon):
    def __init__(self, conf, **kwargs):
        super(BlobIndexer, self).__init__(conf)
        self.logger = get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise exc.ConfigurationException('No volume specified for indexer')
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.successes = 0
        self.last_reported = 0
        self.total_since_last_reported = 0
        self.chunks_run_time = 0
        self.interval = int_value(conf.get('interval'), 300)
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        pm = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf,
                                       logger=self.logger,
                                       pool_manager=pm)
        self.namespace, self.volume_id = check_volume(self.volume)
        self.convert_chunks = true_value(conf.get('convert_chunks'))
        if self.convert_chunks:
            self.converter = BlobConverter(self.conf,
                                           logger=self.logger,
                                           pool_manager=pm)
        else:
            self.converter = None

    def index_pass(self):
        def safe_update_index(path):
            chunk_id = path.rsplit('/', 1)[-1]
            if len(chunk_id) != STRLEN_CHUNKID:
                self.logger.warn('WARN Not a chunk %s' % path)
                return
            for c in chunk_id:
                if c not in hexdigits:
                    self.logger.warn('WARN Not a chunk %s' % path)
                    return
            try:
                self.update_index(path, chunk_id)
                self.successes += 1
                self.logger.debug('Updated %s', path)
            except OioNetworkException as exc:
                self.errors += 1
                self.logger.warn('ERROR while updating %s: %s', path, exc)
            except VolumeException as exc:
                self.errors += 1
                self.logger.error('Cannot index %s: %s', path, exc)
                # All chunks of this volume are indexed in the same service,
                # no need to try another chunk, it will generate the same
                # error. Let the upper level retry later.
                raise
            except Exception:
                self.errors += 1
                self.logger.exception('ERROR while updating %s', path)
            self.total_since_last_reported += 1

        def report(tag):
            total = self.errors + self.successes
            now = time.time()
            elapsed = (now - start_time) or 0.000001
            self.logger.info(
                '%(tag)s=%(current_time)s '
                'elapsed=%(elapsed).02f '
                'pass=%(pass)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s' % {
                    'tag':
                    tag,
                    'current_time':
                    datetime.fromtimestamp(int(now)).isoformat(),
                    'pass':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    total,
                    'c_rate':
                    self.total_since_last_reported /
                    (now - self.last_reported),
                    'elapsed':
                    elapsed
                })
            self.last_reported = now
            self.total_since_last_reported = 0

        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        report('started')
        for path in paths:
            safe_update_index(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('running')
        report('ended')

    def update_index(self, path, chunk_id):
        with open(path) as f:
            try:
                meta = None
                if self.convert_chunks and self.converter:
                    _, meta = self.converter.convert_chunk(f, chunk_id)
                if meta is None:
                    meta, _ = read_chunk_metadata(f, chunk_id)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk('Missing extended attribute %s' % e)

            data = {'mtime': int(time.time())}
            headers = {'X-oio-req-id': 'blob-indexer-' + request_id()[:-13]}
            self.index_client.chunk_push(self.volume_id,
                                         meta['container_id'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         headers=headers,
                                         **data)

    def run(self, *args, **kwargs):
        time.sleep(random() * self.interval)
        while True:
            pre = time.time()
            try:
                self.index_pass()
            except VolumeException as exc:
                self.logger.error('Cannot index chunks, will retry later: %s',
                                  exc)
            except Exception as exc:
                self.logger.exception('ERROR during indexing: %s', exc)
            else:
                self.passes += 1
            elapsed = (time.time() - pre) or 0.000001
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)