Beispiel #1
0
class Harasser(object):
    def __init__(self, ns, max_containers=256, max_contents=256):
        conf = {'namespace': ns}
        self.cs = ConscienceClient(conf)
        self.rdir = RdirClient(conf)
        self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
        self.sent = set()
        self.max_containers = max_containers
        self.max_contents = max_contents

    def harass_put(self, loops=None):
        if loops is None:
            loops = random.randint(1000, 2000)
        print "Pushing %d fake chunks" % loops
        loop = loops
        count_start_container = random.randrange(2**20)
        count_start_content = random.randrange(2**20)
        start = time.time()
        nb_rawx = len(self.rawx_list)
        while loop > 0:
            args = {'mtime': int(start)}
            # vol_id = random.choice(self.rawx_list)
            # container_id = "%064X" % (random.randrange(self.max_containers))
            # content_id = "%032X" % (random.randrange(self.max_contents))
            vol_id = self.rawx_list[loop % nb_rawx]
            container_id = "%064X" % (loop + count_start_container)
            content_id = "%032X" % (loop + count_start_content)
            chunk_id = "http://%s/%064X" \
                % (vol_id, random.randrange(2**128))
            self.rdir.chunk_push(
                vol_id, container_id, content_id, chunk_id, **args)
            self.sent.add((vol_id, container_id, content_id, chunk_id))
            loop -= 1
        end = time.time()
        print "%d pushed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def harass_del(self, min_loops=0):
        min_loops = min(min_loops, len(self.sent))
        loops = random.randint(min_loops, len(self.sent))
        print "Removing %d fake chunks" % loops
        loop = loops
        start = time.time()
        while loop > 0:
            args = self.sent.pop()
            self.rdir.chunk_delete(*args)
            loop -= 1
        end = time.time()
        print "%d removed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def __call__(self):
        try:
            while True:
                self.harass_put()
                self.harass_del()
        except KeyboardInterrupt:
            print "Cleaning..."
            self.harass_del(len(self.sent))
Beispiel #2
0
class BlobIndexer(Daemon):
    def __init__(self, conf, **kwargs):
        super(BlobIndexer, self).__init__(conf)
        self.logger = get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise exc.ConfigurationException('No volume specified for indexer')
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.total_chunks_processed = 0
        self.interval = int_value(
            conf.get('interval'), 300)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.index_client = RdirClient(conf)
        self.namespace, self.volume_id = check_volume(self.volume)

    def index_pass(self):
        start_time = report_time = time.time()

        total_errors = 0

        paths = paths_gen(self.volume)

        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(total).2f ' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'total': (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.errors = 0
                self.last_reported = now
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f ' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed
            }
        )
        if elapsed < self.interval:
            time.sleep(self.interval - elapsed)

    def safe_update_index(self, path):
        try:
            self.logger.debug('Updating index: %s' % path)
            self.update_index(path)
        except Exception:
            self.errors += 1
            self.logger.exception('ERROR while updating index for chunk %s',
                                  path)
        self.passes += 1

    def update_index(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk(
                    'Missing extended attribute %s' % e)
            data = {
                'content_version': meta['content_version'],
                'content_nbchunks': meta['content_chunksnb'],
                'content_path': meta['content_path'],
                'content_size': meta['content_size'],
                'chunk_hash': meta['chunk_hash'],
                'chunk_position': meta['chunk_pos'],
                'chunk_size': meta['chunk_size'],
                'mtime': int(time.time())
            }
            self.index_client.chunk_push(self.volume_id,
                                         meta['content_cid'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         **data)

    def run(self, *args, **kwargs):
        time.sleep(random() * self.interval)
        while True:
            try:
                self.index_pass()
            except Exception as e:
                self.logger.exception('ERROR during indexing: %s' % e)
Beispiel #3
0
class BlobIndexer(Daemon):
    def __init__(self, conf, **kwargs):
        super(BlobIndexer, self).__init__(conf)
        self.logger = get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise exc.ConfigurationException('No volume specified for indexer')
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.successes = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.interval = int_value(conf.get('interval'), 300)
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.index_client = RdirClient(conf, logger=self.logger)
        self.namespace, self.volume_id = check_volume(self.volume)

    def index_pass(self):
        def safe_update_index(path):
            try:
                self.update_index(path)
                self.successes += 1
                self.logger.debug('Updated %s', path)
            except Exception:
                self.errors += 1
                self.logger.exception('ERROR while updating %s', path)

        def report(tag):
            total = self.errors + self.successes
            now = time.time()
            elapsed = (now - start_time) or 0.000001
            self.logger.info(
                '%(tag)s=%(current_time)s '
                'elapsed=%(elapsed).02f '
                'pass=%(pass)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s' % {
                    'tag': tag,
                    'current_time': datetime.fromtimestamp(
                        int(now)).isoformat(),
                    'pass': self.passes,
                    'errors': self.errors,
                    'nb_chunks': total,
                    'c_rate': total / (now - self.last_reported),
                    'elapsed': elapsed
                })
            self.last_reported = now

        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        report('started')
        for path in paths:
            safe_update_index(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('running')
        report('ended')

    def update_index(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk('Missing extended attribute %s' % e)
            data = {'mtime': int(time.time())}
            self.index_client.chunk_push(self.volume_id, meta['container_id'],
                                         meta['content_id'], meta['chunk_id'],
                                         **data)

    def run(self, *args, **kwargs):
        time.sleep(random() * self.interval)
        while True:
            pre = time.time()
            try:
                self.index_pass()
            except Exception as e:
                self.logger.exception('ERROR during indexing: %s' % e)
            else:
                self.passes += 1
            elapsed = (time.time() - pre) or 0.000001
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)
Beispiel #4
0
class BlobIndexer(Daemon):
    def __init__(self, conf, **kwargs):
        super(BlobIndexer, self).__init__(conf)
        self.logger = get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise exc.ConfigurationException('No volume specified for indexer')
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.successes = 0
        self.last_reported = 0
        self.total_since_last_reported = 0
        self.chunks_run_time = 0
        self.interval = int_value(
            conf.get('interval'), 300)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        pm = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf, logger=self.logger,
                                       pool_manager=pm)
        self.namespace, self.volume_id = check_volume(self.volume)
        self.convert_chunks = true_value(conf.get('convert_chunks'))
        if self.convert_chunks:
            converter_conf = self.conf.copy()
            converter_conf['no_backup'] = True
            self.converter = BlobConverter(converter_conf, logger=self.logger,
                                           pool_manager=pm)
        else:
            self.converter = None

    def safe_recover_fullpath(self, path):
        try:
            return self.converter.recover_chunk_fullpath(path)
        except Exception as err:
            self.logger.error('Could not recover fullpath xattr of %s: %s',
                              path, err)
        return False

    def safe_update_index(self, path):
        chunk_id = path.rsplit('/', 1)[-1]
        if len(chunk_id) != STRLEN_CHUNKID:
            if chunk_id.endswith(CHUNK_SUFFIX_PENDING):
                self.logger.info('Skipping pending chunk %s', path)
            else:
                self.logger.warn('WARN Not a chunk %s', path)
            return
        for char in chunk_id:
            if char not in hexdigits:
                self.logger.warn('WARN Not a chunk %s', path)
                return
        try:
            self.update_index(path, chunk_id)
            self.successes += 1
            self.logger.debug('Updated %s', path)
        except exc.OioNetworkException as err:
            self.errors += 1
            self.logger.warn('ERROR while updating %s: %s', path, err)
        except exc.VolumeException as err:
            self.errors += 1
            self.logger.error('Cannot index %s: %s', path, err)
            # All chunks of this volume are indexed in the same service,
            # no need to try another chunk, it will generate the same
            # error. Let the upper level retry later.
            raise
        except (exc.ChunkException, exc.MissingAttribute) as err:
            if (self.convert_chunks and self.converter and
                    self.converter.is_fullpath_error(err)):
                self.logger.warn(
                    'Could not update %s: %s, will try to recover', path, err)
                if self.safe_recover_fullpath(path):
                    self.successes += 1
                    self.logger.info(
                        'Fullpath xattr of %s was recovered', path)
                else:
                    self.errors += 1
                    # Logging already done by safe_recover_fullpath
            else:
                self.errors += 1
                self.logger.error('ERROR while updating %s: %s', path, err)
        except Exception as err:
            # We cannot compare errno in the 'except' line.
            # pylint: disable=no-member
            if isinstance(err, IOError) and err.errno == errno.ENOENT:
                self.logger.debug('Chunk %s disappeared before indexing', path)
                # Neither an error nor a success, do not touch counters.
            else:
                self.errors += 1
                self.logger.exception('ERROR while updating %s', path)
        self.total_since_last_reported += 1

    def report(self, tag, start_time):
        total = self.errors + self.successes
        now = time.time()
        elapsed = (now - start_time) or 0.000001
        self.logger.info(
            '%(tag)s=%(current_time)s '
            'elapsed=%(elapsed).02f '
            'pass=%(pass)d '
            'errors=%(errors)d '
            'chunks=%(nb_chunks)d %(c_rate).2f/s' % {
                'tag': tag,
                'current_time': datetime.fromtimestamp(
                    int(now)).isoformat(),
                'pass': self.passes,
                'errors': self.errors,
                'nb_chunks': total,
                'c_rate': self.total_since_last_reported /
                (now - self.last_reported),
                'elapsed': elapsed
            }
        )
        self.last_reported = now
        self.total_since_last_reported = 0

    def index_pass(self):
        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        self.report('started', start_time)
        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                self.report('running', start_time)
        self.report('ended', start_time)

    def update_index(self, path, chunk_id):
        with open(path) as file_:
            try:
                meta = None
                if self.convert_chunks and self.converter:
                    _, meta = self.converter.convert_chunk(file_, chunk_id)
                if meta is None:
                    meta, _ = read_chunk_metadata(file_, chunk_id)
            except exc.MissingAttribute as err:
                raise exc.FaultyChunk(err)

            data = {'mtime': int(time.time())}
            headers = {REQID_HEADER: request_id('blob-indexer-')}
            self.index_client.chunk_push(self.volume_id,
                                         meta['container_id'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         headers=headers,
                                         **data)

    def run(self, *args, **kwargs):
        time.sleep(random() * self.interval)
        while True:
            pre = time.time()
            try:
                self.index_pass()
            except exc.VolumeException as err:
                self.logger.error('Cannot index chunks, will retry later: %s',
                                  err)
            except Exception as err:
                self.logger.exception('ERROR during indexing: %s', err)
            else:
                self.passes += 1
            elapsed = (time.time() - pre) or 0.000001
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)
Beispiel #5
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(
            conf.get('dry_run', False))
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.rdir_fetch_limit = int_value(
            conf.get('rdir_fetch_limit'), 100)
        self.rdir_client = RdirClient(conf)
        self.content_factory = ContentFactory(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(rebuilder_time).2f'
                    '%(rebuilder_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time': rebuilder_time,
                        'rebuilder_rate': rebuilder_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(rebuilder_time).2f '
            '%(rebuilder_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': rebuilder_time / elapsed
            }
        )

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info("[dryrun] Rebuilding "
                         "container %s, content %s, chunk %s",
                         container_id, content_id, chunk_id)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id)

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        chunk = content.chunks.filter(id=chunk_id).one()
        if chunk is None:
            raise OrphanChunk("Chunk not found in content")
        chunk_size = chunk.size

        content.rebuild_chunk(chunk_id)

        self.rdir_client.chunk_push(self.volume, container_id, content_id,
                                    chunk_id, rtime=int(time.time()))

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Beispiel #6
0
class EventWorker(object):
    def __init__(self, conf, name, context, **kwargs):
        self.conf = conf
        self.name = name
        verbose = kwargs.pop('verbose', False)
        self.logger = get_logger(self.conf, verbose=verbose)
        self.init_zmq(context)
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.acct_refresh_interval = int_value(
            conf.get('acct_refresh_interval'), 60
        )
        self.acct_update = true_value(
            conf.get('acct_update', True))
        self.session = requests.Session()
        self.failed = False

    def start(self):
        self.logger.info('worker "%s" starting', self.name)
        self.running = True
        self.run()

    def stop(self):
        self.logger.info('worker "%s" stopping', self.name)
        self.running = False

    def init_zmq(self, context):
        socket = context.socket(zmq.REP)
        socket.connect('inproc://event-front')
        self.socket = socket

    def safe_ack(self, msg):
        try:
            self.socket.send_multipart(msg)
        except Exception:
            self.logger.warn('Unable to ack event')

    def run(self):
        try:
            while self.running:
                msg = self.socket.recv_multipart()
                self.logger.debug("msg received: %s" % msg)
                event = decode_msg(msg)
                success = self.process_event(event)
                f = "0" if success else ""
                self.safe_ack([msg[0], f])
        except Exception as e:
            self.logger.warn('ERROR in worker "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('worker "%s" stopped', self.name)

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("No handler found")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker "%s" handle container put', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker "%s" handle container update', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker "%s" handle container destroy', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker "%s" handle object delete', self.name)
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.exception(e)
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.info('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker "%s" handle object put', self.name)

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker "%s" handle reference update', self.name)

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        self.logger.debug('worker "%s" handle chunk creation', self.name)

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        self.logger.debug('worker "%s" handle chunk deletion', self.name)

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker "%s" handle ping', self.name)
Beispiel #7
0
class BlobIndexer(Daemon):
    def __init__(self, conf, **kwargs):
        super(BlobIndexer, self).__init__(conf)
        self.logger = get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise exc.ConfigurationException('No volume specified for indexer')
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.total_chunks_processed = 0
        self.interval = int_value(conf.get('interval'), 300)
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.index_client = RdirClient(conf)
        self.namespace, self.volume_id = check_volume(self.volume)

    def index_pass(self):
        start_time = report_time = time.time()

        total_errors = 0

        paths = paths_gen(self.volume)

        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(total).2f ' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'total': (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.errors = 0
                self.last_reported = now
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f ' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed
            })
        if elapsed < self.interval:
            time.sleep(self.interval - elapsed)

    def safe_update_index(self, path):
        try:
            self.logger.debug('Updating index: %s', path)
            self.update_index(path)
        except Exception:
            self.errors += 1
            self.logger.exception('ERROR while updating index for chunk %s',
                                  path)
        self.passes += 1

    def update_index(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk('Missing extended attribute %s' % e)
            data = {'mtime': int(time.time())}
            self.index_client.chunk_push(self.volume_id, meta['container_id'],
                                         meta['content_id'], meta['chunk_id'],
                                         **data)

    def run(self, *args, **kwargs):
        time.sleep(random() * self.interval)
        while True:
            try:
                self.index_pass()
            except Exception as e:
                self.logger.exception('ERROR during indexing: %s' % e)
Beispiel #8
0
class Harasser(object):
    def __init__(self, ns, max_containers=256, max_contents=256):
        conf = {'namespace': ns}
        self.cs = ConscienceClient(conf)
        self.rdir = RdirClient(conf)
        self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
        self.sent = set()
        self.max_containers = max_containers
        self.max_contents = max_contents
        self.pushed_count = 0
        self.pushed_time = 0
        self.removed_count = 0
        self.removed_time = 0

    def harass_put(self, loops=None):
        if loops is None:
            loops = random.randint(1000, 2000)
        print("Pushing %d fake chunks" % loops)
        loop = loops
        count_start_container = random.randrange(2**20)
        count_start_content = random.randrange(2**20)
        start = time.time()
        nb_rawx = len(self.rawx_list)
        while loop > 0:
            args = {'mtime': int(start)}
            # vol_id = random.choice(self.rawx_list)
            # container_id = "%064X" % (random.randrange(self.max_containers))
            # content_id = "%032X" % (random.randrange(self.max_contents))
            vol_id = self.rawx_list[loop % nb_rawx]
            container_id = "%064X" % (loop + count_start_container)
            content_id = "%032X" % (loop + count_start_content)
            chunk_id = "http://%s/%064X" \
                % (vol_id, random.randrange(2**128))
            self.rdir.chunk_push(vol_id, container_id, content_id, chunk_id,
                                 **args)
            self.sent.add((vol_id, container_id, content_id, chunk_id))
            loop -= 1
        end = time.time()
        self.pushed_count += loops
        self.pushed_time += end - start
        print("%d pushed in %.3fs, %d req/s" % (loops, end - start, loops /
                                                (end - start)))

    def harass_del(self, min_loops=0):
        min_loops = min(min_loops, len(self.sent))
        loops = random.randint(min_loops, len(self.sent))
        print("Removing %d fake chunks" % loops)
        loop = loops
        start = time.time()
        while loop > 0:
            args = self.sent.pop()
            self.rdir.chunk_delete(*args)
            loop -= 1
        end = time.time()
        self.removed_count += loops
        self.removed_time += end - start
        print("%d removed in %.3fs, %d req/s" % (loops, end - start, loops /
                                                 (end - start)))

    def __call__(self):
        try:
            while True:
                self.harass_put()
                self.harass_del()
        except KeyboardInterrupt:
            print("Cleaning...")
            self.harass_del(len(self.sent))
            print("Stats:")
            print("Pushed %d in %.3fs, %d req/s" %
                  (self.pushed_count, self.pushed_time,
                   self.pushed_count / self.pushed_time))
            print("Removed %d in %.3fs, %d req/s" %
                  (self.removed_count, self.removed_time,
                   self.removed_count / self.removed_time))
Beispiel #9
0
class EventWorker(Worker):
    def init(self):
        eventlet.monkey_patch(os=False)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60)
        self.concurrency = int_value(self.conf.get('concurrency'), 1000)
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job):
        try:
            return json.loads(job)
        except Exception as e:
            self.logger.warn('ERROR decoding job "%s"', str(e.message))
            return None

    def run(self):
        queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300')
        self.beanstalk = Beanstalk.from_url(queue_url)

        gt = eventlet.spawn(self.handle)

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                gt.kill(StopServe())
                gt.wait()
        except Timeout as te:
            if te != t:
                raise
            gt.kill()

    def handle(self):
        try:
            while True:
                job_id, data = self.beanstalk.reserve()
                try:
                    event = self.safe_decode_job(data)
                    if event:
                        self.process_event(event)
                    self.beanstalk.delete(job_id)
                except Exception:
                    self.logger.exception("ERROR handling event %s", job_id)
        except StopServe:
            self.logger.info('Stopping event handler')

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("ERROR no handler found for event")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker handle container put')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker handle container update')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker handle container destroy')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker handle object delete')
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.warn('error while deleting chunk %s "%s"',
                                 chunk['id'], str(e.message))
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.debug('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker handle object put')

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker handle reference update')

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk creation')
            return

        self.logger.debug('worker handle chunk creation')

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk deletion')
            return

        self.logger.debug('worker handle chunk deletion')

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker handle ping')
Beispiel #10
0
class EventWorker(Worker):
    def init(self):
        eventlet.monkey_patch(os=False)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60
        )
        self.concurrency = int_value(self.conf.get('concurrency'), 1000)
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job):
        try:
            return json.loads(job)
        except Exception as e:
            self.logger.warn('ERROR decoding job "%s"', str(e.message))
            return None

    def run(self):
        queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300')
        self.beanstalk = Beanstalk.from_url(queue_url)

        gt = eventlet.spawn(
            self.handle)

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                gt.kill(StopServe())
                gt.wait()
        except Timeout as te:
            if te != t:
                raise
            gt.kill()

    def handle(self):
        try:
            while True:
                job_id, data = self.beanstalk.reserve()
                try:
                    event = self.safe_decode_job(data)
                    if event:
                        self.process_event(event)
                    self.beanstalk.delete(job_id)
                except Exception:
                    self.logger.exception("ERROR handling event %s", job_id)
        except StopServe:
            self.logger.info('Stopping event handler')

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("ERROR no handler found for event")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker handle container put')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker handle container update')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker handle container destroy')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker handle object delete')
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.warn('error while deleting chunk %s "%s"',
                                 chunk['id'], str(e.message))
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.debug('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker handle object put')

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker handle reference update')

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk creation')
            return

        self.logger.debug('worker handle chunk creation')

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk deletion')
            return

        self.logger.debug('worker handle chunk deletion')

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker handle ping')
Beispiel #11
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(conf.get('dry_run', False))
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                              10000000)
        self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
        self.rdir_client = RdirClient(conf)
        self.content_factory = ContentFactory(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(rebuilder_time).2f'
                    '%(rebuilder_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time': rebuilder_time,
                        'rebuilder_rate': rebuilder_time / (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(rebuilder_time).2f '
            '%(rebuilder_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': rebuilder_time / elapsed
            })

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info("[dryrun] Rebuilding "
                         "container %s, content %s, chunk %s" %
                         (container_id, content_id, chunk_id))
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)' %
                         (container_id, content_id, chunk_id))

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        chunk = content.chunks.filter(id=chunk_id).one()
        if chunk is None:
            raise OrphanChunk("Chunk not found in content")
        chunk_size = chunk.size

        content.rebuild_chunk(chunk_id)

        self.rdir_client.chunk_push(self.volume,
                                    container_id,
                                    content_id,
                                    chunk_id,
                                    rtime=int(time.time()))

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Beispiel #12
0
class EventWorker(object):
    def __init__(self, conf, name, context, **kwargs):
        self.conf = conf
        self.name = name
        verbose = kwargs.pop('verbose', False)
        self.logger = get_logger(self.conf, verbose=verbose)
        self.init_zmq(context)
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.acct_refresh_interval = int_value(
            conf.get('acct_refresh_interval'), 60)
        self.acct_update = true_value(conf.get('acct_update', True))
        self.rdir_update = true_value(conf.get('rdir_update', True))
        self.session = requests.Session()
        self.failed = False

    def start(self):
        self.logger.info('worker "%s" starting', self.name)
        self.running = True
        self.run()

    def stop(self):
        self.logger.info('worker "%s" stopping', self.name)
        self.running = False

    def init_zmq(self, context):
        socket = context.socket(zmq.REP)
        socket.connect('inproc://event-front')
        self.socket = socket

    def safe_ack(self, msg):
        try:
            self.socket.send_multipart(msg)
        except Exception:
            self.logger.warn('Unable to ack event')

    def run(self):
        try:
            while self.running:
                msg = self.socket.recv_multipart()
                self.logger.debug("msg received: %s" % msg)
                event = decode_msg(msg)
                success = self.process_event(event)
                f = "0" if success else ""
                self.safe_ack([msg[0], f])
        except Exception as e:
            self.logger.warn('ERROR in worker "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('worker "%s" stopped', self.name)

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("No handler found")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker "%s" handle container put', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker "%s" handle container update', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker "%s" handle container destroy', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker "%s" handle object delete', self.name)
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.exception(e)
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.info('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker "%s" handle object put', self.name)

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker "%s" handle reference update', self.name)

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker "%s" skip chunk creation', self.name)
            return

        self.logger.debug('worker "%s" handle chunk creation', self.name)

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker "%s" skip chunk deletion', self.name)
            return

        self.logger.debug('worker "%s" handle chunk deletion', self.name)

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker "%s" handle ping', self.name)
Beispiel #13
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(
            conf.get('dry_run', False))
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.rdir_fetch_limit = int_value(
            conf.get('rdir_fetch_limit'), 100)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)
        self.rdir_client = RdirClient(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(rebuilder_time).2f'
                    '%(rebuilder_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time': rebuilder_time,
                        'rebuilder_rate': rebuilder_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(rebuilder_time).2f '
            '%(rebuilder_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': rebuilder_time / elapsed
            }
        )

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info("[dryrun] Rebuilding "
                         "container %s, content %s, chunk %s"
                         % (container_id, content_id, chunk_id))
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)'
                         % (container_id, content_id, chunk_id))
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def _meta2_get_chunks_at_pos(self, container_id, content_id, chunk_id):
        current_chunk_url = 'http://%s/%s' % (self.volume, chunk_id)

        try:
            data = self.container_client.content_show(
                cid=container_id, content=content_id)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')

        current_chunk = None
        for c in data:
            if c['url'] == current_chunk_url:
                current_chunk = c
                break
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')

        duplicate_chunks = []
        for c in data:
            if c['pos'] == current_chunk['pos'] \
                    and c['url'] != current_chunk['url']:
                duplicate_chunks.append(c)
        if len(duplicate_chunks) == 0:
            raise exc.UnrecoverableContent('No copy of missing chunk')

        return current_chunk, duplicate_chunks

    def _meta2_get_spare_chunk(self, container_id, content_id, notin, broken):
        spare_data = {'notin': notin,
                      'broken': [broken],
                      'size': 0}
        try:
            spare_resp = self.container_client.content_spare(
                cid=container_id, content=content_id, data=spare_data)
        except ClientException as e:
            raise exc.SpareChunkException('No spare chunk (%s)' % e.message)

        return spare_resp['chunks'][0]

    def _meta2_replace_chunk(self, container_id, content_id,
                             current_chunk, new_chunk):
        old = [{'type': 'chunk',
                'id': current_chunk['url'],
                'hash': current_chunk['hash'],
                'size': current_chunk['size'],
                'pos': current_chunk['pos'],
                'content': content_id}]
        new = [{'type': 'chunk',
                'id': new_chunk['id'],
                'hash': current_chunk['hash'],
                'size': current_chunk['size'],
                'pos': current_chunk['pos'],
                'content': content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=container_id, data=update_data)

    # TODO rain support
    def chunk_rebuild(self, container_id, content_id, chunk_id):

        current_chunk, duplicate_chunks = self._meta2_get_chunks_at_pos(
            container_id, content_id, chunk_id)

        spare_chunk = self._meta2_get_spare_chunk(
            container_id, content_id, duplicate_chunks, current_chunk)

        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src['url'], spare_chunk['id'])
                self.logger.debug('copy chunk from %s to %s',
                                  src['url'], spare_chunk['id'])
                uploaded = True
                break
            except Exception as e:
                self.logger.debug('Failed to copy chunk from %s to %s: %s',
                                  src['url'], spare_chunk['id'], type(e))
        if not uploaded:
            raise exc.UnrecoverableContent('No copy available '
                                           'of missing chunk')

        self._meta2_replace_chunk(container_id, content_id,
                                  current_chunk, spare_chunk)

        self.rdir_client.chunk_push(self.volume, container_id, content_id,
                                    chunk_id, rtime=int(time.time()))

        self.bytes_processed += current_chunk['size']
        self.total_bytes_processed += current_chunk['size']
Beispiel #14
0
class BlobIndexer(Daemon):
    def __init__(self, conf, **kwargs):
        super(BlobIndexer, self).__init__(conf)
        self.logger = get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise exc.ConfigurationException('No volume specified for indexer')
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.successes = 0
        self.last_reported = 0
        self.total_since_last_reported = 0
        self.chunks_run_time = 0
        self.interval = int_value(conf.get('interval'), 300)
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        pm = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf,
                                       logger=self.logger,
                                       pool_manager=pm)
        self.namespace, self.volume_id = check_volume(self.volume)
        self.convert_chunks = true_value(conf.get('convert_chunks'))
        if self.convert_chunks:
            self.converter = BlobConverter(self.conf,
                                           logger=self.logger,
                                           pool_manager=pm)
        else:
            self.converter = None

    def index_pass(self):
        def safe_update_index(path):
            chunk_id = path.rsplit('/', 1)[-1]
            if len(chunk_id) != STRLEN_CHUNKID:
                self.logger.warn('WARN Not a chunk %s' % path)
                return
            for c in chunk_id:
                if c not in hexdigits:
                    self.logger.warn('WARN Not a chunk %s' % path)
                    return
            try:
                self.update_index(path, chunk_id)
                self.successes += 1
                self.logger.debug('Updated %s', path)
            except OioNetworkException as exc:
                self.errors += 1
                self.logger.warn('ERROR while updating %s: %s', path, exc)
            except VolumeException as exc:
                self.errors += 1
                self.logger.error('Cannot index %s: %s', path, exc)
                # All chunks of this volume are indexed in the same service,
                # no need to try another chunk, it will generate the same
                # error. Let the upper level retry later.
                raise
            except Exception:
                self.errors += 1
                self.logger.exception('ERROR while updating %s', path)
            self.total_since_last_reported += 1

        def report(tag):
            total = self.errors + self.successes
            now = time.time()
            elapsed = (now - start_time) or 0.000001
            self.logger.info(
                '%(tag)s=%(current_time)s '
                'elapsed=%(elapsed).02f '
                'pass=%(pass)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s' % {
                    'tag':
                    tag,
                    'current_time':
                    datetime.fromtimestamp(int(now)).isoformat(),
                    'pass':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    total,
                    'c_rate':
                    self.total_since_last_reported /
                    (now - self.last_reported),
                    'elapsed':
                    elapsed
                })
            self.last_reported = now
            self.total_since_last_reported = 0

        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        report('started')
        for path in paths:
            safe_update_index(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('running')
        report('ended')

    def update_index(self, path, chunk_id):
        with open(path) as f:
            try:
                meta = None
                if self.convert_chunks and self.converter:
                    _, meta = self.converter.convert_chunk(f, chunk_id)
                if meta is None:
                    meta, _ = read_chunk_metadata(f, chunk_id)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk('Missing extended attribute %s' % e)

            data = {'mtime': int(time.time())}
            headers = {'X-oio-req-id': 'blob-indexer-' + request_id()[:-13]}
            self.index_client.chunk_push(self.volume_id,
                                         meta['container_id'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         headers=headers,
                                         **data)

    def run(self, *args, **kwargs):
        time.sleep(random() * self.interval)
        while True:
            pre = time.time()
            try:
                self.index_pass()
            except VolumeException as exc:
                self.logger.error('Cannot index chunks, will retry later: %s',
                                  exc)
            except Exception as exc:
                self.logger.exception('ERROR during indexing: %s', exc)
            else:
                self.passes += 1
            elapsed = (time.time() - pre) or 0.000001
            if elapsed < self.interval:
                time.sleep(self.interval - elapsed)
Beispiel #15
0
class BlobIndexerWorker(object):

    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.total_chunks_processed = 0
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.index_client = RdirClient(conf)
        self.namespace, self.volume_id = check_volume(self.volume)

    def index_pass(self):
        start_time = report_time = time.time()

        total_errors = 0

        paths = paths_gen(self.volume)

        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(total).2f ' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'total': (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.errors = 0
                self.last_reported = now
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f ' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed
            }
        )

    def safe_update_index(self, path):
        try:
            self.logger.debug('Updating index: %s' % path)
            self.update_index(path)
        except Exception:
            self.errors += 1
            self.logger.exception('ERROR while updating index for chunk %s',
                                  path)
        self.passes += 1

    def update_index(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk(
                    'Missing extended attribute %s' % e)
            data = {
                'content_version': meta['content_version'],
                'content_nbchunks': meta['content_chunksnb'],
                'content_path': meta['content_path'],
                'content_size': meta['content_size'],
                'chunk_hash': meta['chunk_hash'],
                'chunk_position': meta['chunk_pos'],
                'chunk_size': meta['chunk_size'],
                'mtime': int(time.time())
            }
            self.index_client.chunk_push(self.volume_id,
                                         meta['content_cid'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         **data)