Beispiel #1
0
 def _deindex_chunk(self, chunk):
     rdir = RdirClient(self.conf, pool_manager=self.conscience.pool_manager)
     url = chunk['url']
     volume_id = url.split('/', 3)[2]
     chunk_id = url.split('/', 3)[3]
     rdir.chunk_delete(volume_id, self.container_id,
                       self.content_id, chunk_id)
Beispiel #2
0
class ChunkOperator(object):
    """
    Execute maintenance operations on chunks.
    """

    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf, logger=self.logger)

    def rebuild(self, container_id, content_id, chunk_id_or_pos,
                rawx_id=None, try_chunk_delete=False, allow_same_rawx=True):
        """
        Try to find the chunk in the metadata of the specified object,
        then rebuild it.
        """
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_size = 0
        chunk_pos = None
        if len(chunk_id_or_pos) < 32:
            chunk_pos = chunk_id_or_pos
            chunk_id = None
            metapos = int(chunk_pos.split('.', 1)[0])
            chunk_size = content.chunks.filter(metapos=metapos).all()[0].size
        else:
            if '/' in chunk_id_or_pos:
                chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1]
            else:
                chunk_id = chunk_id_or_pos

            chunk = content.chunks.filter(id=chunk_id).one()
            if chunk is None:
                raise OrphanChunk(
                    'Chunk not found in content: possible orphan chunk')
            elif rawx_id and chunk.host != rawx_id:
                raise ValueError('Chunk does not belong to this rawx')
            chunk_size = chunk.size

        content.rebuild_chunk(
            chunk_id, allow_same_rawx=allow_same_rawx,
            chunk_pos=chunk_pos)

        if try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Chunk %s deleted", chunk.url)
            except NotFound as exc:
                self.logger.debug("Chunk %s: %s", chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            self.rdir_client.chunk_delete(
                chunk.host, container_id, content_id, chunk_id)

        return chunk_size
Beispiel #3
0
class Harasser(object):
    def __init__(self, ns, max_containers=256, max_contents=256):
        conf = {'namespace': ns}
        self.cs = ConscienceClient(conf)
        self.rdir = RdirClient(conf)
        self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
        self.sent = set()
        self.max_containers = max_containers
        self.max_contents = max_contents

    def harass_put(self, loops=None):
        if loops is None:
            loops = random.randint(1000, 2000)
        print "Pushing %d fake chunks" % loops
        loop = loops
        count_start_container = random.randrange(2**20)
        count_start_content = random.randrange(2**20)
        start = time.time()
        nb_rawx = len(self.rawx_list)
        while loop > 0:
            args = {'mtime': int(start)}
            # vol_id = random.choice(self.rawx_list)
            # container_id = "%064X" % (random.randrange(self.max_containers))
            # content_id = "%032X" % (random.randrange(self.max_contents))
            vol_id = self.rawx_list[loop % nb_rawx]
            container_id = "%064X" % (loop + count_start_container)
            content_id = "%032X" % (loop + count_start_content)
            chunk_id = "http://%s/%064X" \
                % (vol_id, random.randrange(2**128))
            self.rdir.chunk_push(
                vol_id, container_id, content_id, chunk_id, **args)
            self.sent.add((vol_id, container_id, content_id, chunk_id))
            loop -= 1
        end = time.time()
        print "%d pushed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def harass_del(self, min_loops=0):
        min_loops = min(min_loops, len(self.sent))
        loops = random.randint(min_loops, len(self.sent))
        print "Removing %d fake chunks" % loops
        loop = loops
        start = time.time()
        while loop > 0:
            args = self.sent.pop()
            self.rdir.chunk_delete(*args)
            loop -= 1
        end = time.time()
        print "%d removed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def __call__(self):
        try:
            while True:
                self.harass_put()
                self.harass_del()
        except KeyboardInterrupt:
            print "Cleaning..."
            self.harass_del(len(self.sent))
Beispiel #4
0
class BlobRebuilderWorker(object):
    def __init__(self,
                 conf,
                 logger,
                 volume,
                 input_file=None,
                 try_chunk_delete=False,
                 beanstalkd_addr=None):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(conf.get('dry_run', False))
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                              10000000)
        self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
        self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
        self.input_file = input_file
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf)
        self.try_chunk_delete = try_chunk_delete
        self.beanstalkd_addr = beanstalkd_addr
        self.beanstalkd_tube = conf.get('beanstalkd_tube', 'rebuild')
        self.beanstalk = None

    def _fetch_chunks_from_event(self, job_id, data):
        env = json.loads(data)
        for chunk_pos in env['data']['missing_chunks']:
            yield [
                env['url']['id'], env['url']['content'],
                str(chunk_pos), None
            ]

    def _connect_to_beanstalk(self):
        self.beanstalk = Beanstalk.from_url(self.beanstalkd_addr)
        self.beanstalk.use(self.beanstalkd_tube)
        self.beanstalk.watch(self.beanstalkd_tube)

    def _handle_beanstalk_event(self, conn_error):
        try:
            job_id, data = self.beanstalk.reserve()
            if conn_error:
                self.logger.warn("beanstalk reconnected")
        except ConnectionError:
            if not conn_error:
                self.logger.warn("beanstalk connection error")
            raise
        try:
            for chunk in self._fetch_chunks_from_event(job_id, data):
                yield chunk
            self.beanstalk.delete(job_id)
        except Exception:
            self.logger.exception("handling event %s (bury)", job_id)
            self.beanstalk.bury(job_id)

    def _fetch_chunks_from_beanstalk(self):
        conn_error = False
        while 1:
            try:
                self._connect_to_beanstalk()
                for chunk in self._handle_beanstalk_event(conn_error):
                    conn_error = False
                    yield chunk
            except ConnectionError:
                conn_error = True
                time.sleep(1.0)

    def _fetch_chunks_from_file(self):
        with open(self.input_file, 'r') as ifile:
            for line in ifile:
                stripped = line.strip()
                if stripped and not stripped.startswith('#'):
                    yield stripped.split('|', 3)[:3] + [None]

    def _fetch_chunks(self):
        if self.input_file:
            return self._fetch_chunks_from_file()
        elif self.beanstalkd_addr:
            return self._fetch_chunks_from_beanstalk()
        else:
            return self.rdir_client.chunk_fetch(self.volume,
                                                limit=self.rdir_fetch_limit,
                                                rebuild=True)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        rebuilder_time = 0

        chunks = self._fetch_chunks()
        for cid, content_id, chunk_id_or_pos, _ in chunks:
            loop_time = time.time()
            if self.dry_run:
                self.dryrun_chunk_rebuild(cid, content_id, chunk_id_or_pos)
            else:
                self.safe_chunk_rebuild(cid, content_id, chunk_id_or_pos)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    'RUN  %(volume)s '
                    'started=%(start_time)s '
                    'passes=%(passes)d '
                    'errors=%(errors)d '
                    'chunks=%(nb_chunks)d %(c_rate).2f/s '
                    'bytes=%(nb_bytes)d %(b_rate).2fB/s '
                    'elapsed=%(total).2f '
                    '(rebuilder: %(success_rate).2f%%)' % {
                        'volume':
                        self.volume,
                        'start_time':
                        datetime.fromtimestamp(int(report_time)).isoformat(),
                        'passes':
                        self.passes,
                        'errors':
                        self.errors,
                        'nb_chunks':
                        self.total_chunks_processed,
                        'nb_bytes':
                        self.total_bytes_processed,
                        'c_rate':
                        self.passes / (now - report_time),
                        'b_rate':
                        self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time':
                        rebuilder_time,
                        'success_rate':
                        100 * ((self.total_chunks_processed - self.errors) /
                               float(self.total_chunks_processed))
                    })
                report_time = now
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        end_time = time.time()
        elapsed = (end_time - start_time) or 0.000001
        self.logger.info(
            'DONE %(volume)s '
            'started=%(start_time)s '
            'ended=%(end_time)s '
            'passes=%(passes)d '
            'elapsed=%(elapsed).02f '
            'errors=%(errors)d '
            'chunks=%(nb_chunks)d %(c_rate).2f/s '
            'bytes=%(nb_bytes)d %(b_rate).2fB/s '
            'elapsed=%(rebuilder_time).2f '
            '(rebuilder: %(success_rate).2f%%)' % {
                'volume':
                self.volume,
                'start_time':
                datetime.fromtimestamp(int(start_time)).isoformat(),
                'end_time':
                datetime.fromtimestamp(int(end_time)).isoformat(),
                'passes':
                self.passes,
                'elapsed':
                elapsed,
                'errors':
                self.errors,
                'nb_chunks':
                self.total_chunks_processed,
                'nb_bytes':
                self.total_bytes_processed,
                'c_rate':
                self.total_chunks_processed / elapsed,
                'b_rate':
                self.total_bytes_processed / elapsed,
                'rebuilder_time':
                rebuilder_time,
                'success_rate':
                100 * ((self.total_chunks_processed - self.errors) /
                       float(self.total_chunks_processed or 1))
            })

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        self.logger.info(
            "[dryrun] Rebuilding "
            "container %s, content %s, chunk %s", container_id, content_id,
            chunk_id_or_pos)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id_or_pos)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s: %s',
                              container_id, content_id, chunk_id_or_pos, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id_or_pos)
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_size = 0
        chunk_pos = None
        if len(chunk_id_or_pos) < 32:
            chunk_pos = chunk_id_or_pos
            chunk_id = None
            metapos = int(chunk_pos.split('.', 1)[0])
            chunk_size = content.chunks.filter(metapos=metapos).all()[0].size
        else:
            if '/' in chunk_id_or_pos:
                chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1]
            else:
                chunk_id = chunk_id_or_pos

            chunk = content.chunks.filter(id=chunk_id).one()
            if chunk is None:
                raise OrphanChunk(("Chunk not found in content:"
                                   "possible orphan chunk"))
            elif self.volume and chunk.host != self.volume:
                raise ValueError("Chunk does not belong to this volume")
            chunk_size = chunk.size

        content.rebuild_chunk(chunk_id,
                              allow_same_rawx=self.allow_same_rawx,
                              chunk_pos=chunk_pos)

        if self.try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Chunk %s deleted", chunk.url)
            except NotFound as exc:
                self.logger.debug("Chunk %s: %s", chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            self.rdir_client.chunk_delete(chunk.host, container_id, content_id,
                                          chunk_id)

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Beispiel #5
0
class BlobRebuilderWorker(RebuilderWorker):
    def __init__(self, conf, logger, volume, try_chunk_delete=False, **kwargs):
        super(BlobRebuilderWorker, self).__init__(conf, logger, **kwargs)
        self.volume = volume
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.dry_run = true_value(conf.get('dry_run', False))
        self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf, logger=self.logger)
        self.try_chunk_delete = try_chunk_delete

    def _rebuild_one(self, chunk, **kwargs):
        cid, content_id, chunk_id_or_pos, _ = chunk
        if self.dry_run:
            self.dryrun_chunk_rebuild(cid, content_id, chunk_id_or_pos)
        else:
            self.safe_chunk_rebuild(cid, content_id, chunk_id_or_pos)

    def _get_report(self, num, start_time, end_time, total_time, report_time,
                    **kwargs):
        return ('RUN  %(volume)s '
                'worker=%(num)d '
                'started=%(start_time)s '
                'passes=%(passes)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s '
                'bytes=%(nb_bytes)d %(b_rate).2fB/s '
                'waiting_time=%(waiting_time).2f '
                'rebuilder_time=%(rebuilder_time).2f '
                'total_time=%(total_time).2f '
                '(rebuilder: %(success_rate).2f%%)' % {
                    'volume':
                    self.volume,
                    'num':
                    num,
                    'start_time':
                    datetime.fromtimestamp(int(report_time)).isoformat(),
                    'passes':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    self.total_items_processed,
                    'nb_bytes':
                    self.total_bytes_processed,
                    'c_rate':
                    self.passes / (end_time - report_time),
                    'b_rate':
                    self.bytes_processed / (end_time - report_time),
                    'waiting_time':
                    self.waiting_time,
                    'rebuilder_time':
                    self.rebuilder_time,
                    'total_time': (end_time - start_time),
                    'success_rate':
                    100 * ((self.total_items_processed - self.errors) /
                           float(self.total_items_processed))
                })

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        self.logger.info(
            "[dryrun] Rebuilding "
            "container %s, content %s, chunk %s", container_id, content_id,
            chunk_id_or_pos)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id_or_pos)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s: %s',
                              container_id, content_id, chunk_id_or_pos, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id_or_pos):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id_or_pos)
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_size = 0
        chunk_pos = None
        if len(chunk_id_or_pos) < 32:
            chunk_pos = chunk_id_or_pos
            chunk_id = None
            metapos = int(chunk_pos.split('.', 1)[0])
            chunk_size = content.chunks.filter(metapos=metapos).all()[0].size
        else:
            if '/' in chunk_id_or_pos:
                chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1]
            else:
                chunk_id = chunk_id_or_pos

            chunk = content.chunks.filter(id=chunk_id).one()
            if chunk is None:
                raise OrphanChunk(("Chunk not found in content:"
                                   'possible orphan chunk'))
            elif self.volume and chunk.host != self.volume:
                raise ValueError("Chunk does not belong to this volume")
            chunk_size = chunk.size

        content.rebuild_chunk(chunk_id,
                              allow_same_rawx=self.allow_same_rawx,
                              chunk_pos=chunk_pos)

        if self.try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Chunk %s deleted", chunk.url)
            except NotFound as exc:
                self.logger.debug("Chunk %s: %s", chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            self.rdir_client.chunk_delete(chunk.host, container_id, content_id,
                                          chunk_id)

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Beispiel #6
0
class EventWorker(object):
    def __init__(self, conf, name, context, **kwargs):
        self.conf = conf
        self.name = name
        verbose = kwargs.pop('verbose', False)
        self.logger = get_logger(self.conf, verbose=verbose)
        self.init_zmq(context)
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.acct_refresh_interval = int_value(
            conf.get('acct_refresh_interval'), 60
        )
        self.acct_update = true_value(
            conf.get('acct_update', True))
        self.session = requests.Session()
        self.failed = False

    def start(self):
        self.logger.info('worker "%s" starting', self.name)
        self.running = True
        self.run()

    def stop(self):
        self.logger.info('worker "%s" stopping', self.name)
        self.running = False

    def init_zmq(self, context):
        socket = context.socket(zmq.REP)
        socket.connect('inproc://event-front')
        self.socket = socket

    def safe_ack(self, msg):
        try:
            self.socket.send_multipart(msg)
        except Exception:
            self.logger.warn('Unable to ack event')

    def run(self):
        try:
            while self.running:
                msg = self.socket.recv_multipart()
                self.logger.debug("msg received: %s" % msg)
                event = decode_msg(msg)
                success = self.process_event(event)
                f = "0" if success else ""
                self.safe_ack([msg[0], f])
        except Exception as e:
            self.logger.warn('ERROR in worker "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('worker "%s" stopped', self.name)

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("No handler found")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker "%s" handle container put', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker "%s" handle container update', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker "%s" handle container destroy', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker "%s" handle object delete', self.name)
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.exception(e)
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.info('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker "%s" handle object put', self.name)

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker "%s" handle reference update', self.name)

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        self.logger.debug('worker "%s" handle chunk creation', self.name)

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        self.logger.debug('worker "%s" handle chunk deletion', self.name)

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker "%s" handle ping', self.name)
Beispiel #7
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(conf.get('dry_run', False))
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                              10000000)
        self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
        self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
        self.rdir_client = RdirClient(conf)
        self.content_factory = ContentFactory(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    'RUN  %(volume)s '
                    'started=%(start_time)s '
                    'passes=%(passes)d '
                    'errors=%(errors)d '
                    'chunks=%(nb_chunks)d %(c_rate).2f/s '
                    'bytes=%(nb_bytes)d %(b_rate).2fB/s '
                    'elapsed=%(total).2f '
                    '(rebuilder: %(rebuilder_rate).2f%%)' % {
                        'volume':
                        self.volume,
                        'start_time':
                        datetime.fromtimestamp(int(report_time)).isoformat(),
                        'passes':
                        self.passes,
                        'errors':
                        self.errors,
                        'nb_chunks':
                        self.total_chunks_processed,
                        'nb_bytes':
                        self.total_bytes_processed,
                        'c_rate':
                        self.passes / (now - report_time),
                        'b_rate':
                        self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time':
                        rebuilder_time,
                        'rebuilder_rate':
                        100.0 * rebuilder_time / float(now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        end_time = time.time()
        elapsed = (end_time - start_time) or 0.000001
        self.logger.info(
            'DONE %(volume)s '
            'started=%(start_time)s '
            'ended=%(end_time)s '
            'elapsed=%(elapsed).02f '
            'errors=%(errors)d '
            'chunks=%(nb_chunks)d %(c_rate).2f/s '
            'bytes=%(nb_bytes)d %(b_rate).2fB/s '
            'elapsed=%(rebuilder_time).2f '
            '(rebuilder: %(rebuilder_rate).2f%%)' % {
                'volume': self.volume,
                'start_time': datetime.fromtimestamp(
                    int(start_time)).isoformat(),
                'end_time': datetime.fromtimestamp(int(end_time)).isoformat(),
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'nb_chunks': self.total_chunks_processed,
                'nb_bytes': self.total_bytes_processed,
                'c_rate': self.total_chunks_processed / elapsed,
                'b_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': 100.0 * rebuilder_time / float(elapsed)
            })

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info(
            "[dryrun] Rebuilding "
            "container %s, content %s, chunk %s", container_id, content_id,
            chunk_id)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id)

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        chunk = content.chunks.filter(id=chunk_id).one()
        if chunk is None:
            raise OrphanChunk("Chunk not found in content")
        chunk_size = chunk.size

        content.rebuild_chunk(chunk_id, allow_same_rawx=self.allow_same_rawx)

        self.rdir_client.chunk_delete(self.volume, container_id, content_id,
                                      chunk_id)

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Beispiel #8
0
class ChunkOperator(object):
    """
    Execute maintenance operations on chunks.
    """
    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf, logger=self.logger)

    def rebuild(self,
                container_id,
                content_id,
                chunk_id_or_pos,
                rawx_id=None,
                try_chunk_delete=False,
                allow_frozen_container=True,
                allow_same_rawx=True):
        """
        Try to find the chunk in the metadata of the specified object,
        then rebuild it.
        """
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_pos = None
        if looks_like_chunk_position(chunk_id_or_pos):
            chunk_pos = chunk_id_or_pos
            chunk_id = None
        else:
            if '/' in chunk_id_or_pos:
                parsed = urlparse(chunk_id_or_pos)
                chunk_id = parsed.path.lstrip('/')
                rawx_id = parsed.netloc
            else:
                chunk_id = chunk_id_or_pos

            candidates = content.chunks.filter(id=chunk_id)
            # FIXME(FVE): if for some reason the chunks have been registered
            # with an IP address and port instead of an ID, this won't work.
            if rawx_id:
                candidates = candidates.filter(host=rawx_id)
            chunk = candidates.one()
            if chunk is None:
                raise OrphanChunk(
                    'Chunk not found in content: possible orphan chunk: ' +
                    '%s' % (candidates.all(), ))
            elif rawx_id and chunk.host != rawx_id:
                raise ValueError('Chunk does not belong to this rawx')

        rebuilt_bytes = content.rebuild_chunk(
            chunk_id,
            service_id=rawx_id,
            allow_frozen_container=allow_frozen_container,
            allow_same_rawx=allow_same_rawx,
            chunk_pos=chunk_pos)

        if try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Old chunk %s deleted", chunk.url)
            except Exception as exc:
                self.logger.warn('Failed to delete old chunk %s: %s',
                                 chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            try:
                self.rdir_client.chunk_delete(chunk.host, container_id,
                                              content_id, chunk_id)
            except Exception as exc:
                self.logger.warn(
                    'Failed to delete chunk entry (%s) from the rdir (%s): %s',
                    chunk_id, chunk.host, exc)

        return rebuilt_bytes
Beispiel #9
0
class Harasser(object):
    def __init__(self, ns, max_containers=256, max_contents=256):
        conf = {'namespace': ns}
        self.cs = ConscienceClient(conf)
        self.rdir = RdirClient(conf)
        self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
        self.sent = set()
        self.max_containers = max_containers
        self.max_contents = max_contents
        self.pushed_count = 0
        self.pushed_time = 0
        self.removed_count = 0
        self.removed_time = 0

    def harass_put(self, loops=None):
        if loops is None:
            loops = random.randint(1000, 2000)
        print("Pushing %d fake chunks" % loops)
        loop = loops
        count_start_container = random.randrange(2**20)
        count_start_content = random.randrange(2**20)
        start = time.time()
        nb_rawx = len(self.rawx_list)
        while loop > 0:
            args = {'mtime': int(start)}
            # vol_id = random.choice(self.rawx_list)
            # container_id = "%064X" % (random.randrange(self.max_containers))
            # content_id = "%032X" % (random.randrange(self.max_contents))
            vol_id = self.rawx_list[loop % nb_rawx]
            container_id = "%064X" % (loop + count_start_container)
            content_id = "%032X" % (loop + count_start_content)
            chunk_id = "http://%s/%064X" \
                % (vol_id, random.randrange(2**128))
            self.rdir.chunk_push(vol_id, container_id, content_id, chunk_id,
                                 **args)
            self.sent.add((vol_id, container_id, content_id, chunk_id))
            loop -= 1
        end = time.time()
        self.pushed_count += loops
        self.pushed_time += end - start
        print("%d pushed in %.3fs, %d req/s" % (loops, end - start, loops /
                                                (end - start)))

    def harass_del(self, min_loops=0):
        min_loops = min(min_loops, len(self.sent))
        loops = random.randint(min_loops, len(self.sent))
        print("Removing %d fake chunks" % loops)
        loop = loops
        start = time.time()
        while loop > 0:
            args = self.sent.pop()
            self.rdir.chunk_delete(*args)
            loop -= 1
        end = time.time()
        self.removed_count += loops
        self.removed_time += end - start
        print("%d removed in %.3fs, %d req/s" % (loops, end - start, loops /
                                                 (end - start)))

    def __call__(self):
        try:
            while True:
                self.harass_put()
                self.harass_del()
        except KeyboardInterrupt:
            print("Cleaning...")
            self.harass_del(len(self.sent))
            print("Stats:")
            print("Pushed %d in %.3fs, %d req/s" %
                  (self.pushed_count, self.pushed_time,
                   self.pushed_count / self.pushed_time))
            print("Removed %d in %.3fs, %d req/s" %
                  (self.removed_count, self.removed_time,
                   self.removed_count / self.removed_time))
Beispiel #10
0
class EventWorker(Worker):
    def init(self):
        eventlet.monkey_patch(os=False)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60)
        self.concurrency = int_value(self.conf.get('concurrency'), 1000)
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job):
        try:
            return json.loads(job)
        except Exception as e:
            self.logger.warn('ERROR decoding job "%s"', str(e.message))
            return None

    def run(self):
        queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300')
        self.beanstalk = Beanstalk.from_url(queue_url)

        gt = eventlet.spawn(self.handle)

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                gt.kill(StopServe())
                gt.wait()
        except Timeout as te:
            if te != t:
                raise
            gt.kill()

    def handle(self):
        try:
            while True:
                job_id, data = self.beanstalk.reserve()
                try:
                    event = self.safe_decode_job(data)
                    if event:
                        self.process_event(event)
                    self.beanstalk.delete(job_id)
                except Exception:
                    self.logger.exception("ERROR handling event %s", job_id)
        except StopServe:
            self.logger.info('Stopping event handler')

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("ERROR no handler found for event")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker handle container put')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker handle container update')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker handle container destroy')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker handle object delete')
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.warn('error while deleting chunk %s "%s"',
                                 chunk['id'], str(e.message))
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.debug('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker handle object put')

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker handle reference update')

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk creation')
            return

        self.logger.debug('worker handle chunk creation')

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk deletion')
            return

        self.logger.debug('worker handle chunk deletion')

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker handle ping')
Beispiel #11
0
class EventWorker(Worker):
    def init(self):
        eventlet.monkey_patch(os=False)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60
        )
        self.concurrency = int_value(self.conf.get('concurrency'), 1000)
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job):
        try:
            return json.loads(job)
        except Exception as e:
            self.logger.warn('ERROR decoding job "%s"', str(e.message))
            return None

    def run(self):
        queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300')
        self.beanstalk = Beanstalk.from_url(queue_url)

        gt = eventlet.spawn(
            self.handle)

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                gt.kill(StopServe())
                gt.wait()
        except Timeout as te:
            if te != t:
                raise
            gt.kill()

    def handle(self):
        try:
            while True:
                job_id, data = self.beanstalk.reserve()
                try:
                    event = self.safe_decode_job(data)
                    if event:
                        self.process_event(event)
                    self.beanstalk.delete(job_id)
                except Exception:
                    self.logger.exception("ERROR handling event %s", job_id)
        except StopServe:
            self.logger.info('Stopping event handler')

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("ERROR no handler found for event")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker handle container put')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker handle container update')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker handle container destroy')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker handle object delete')
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.warn('error while deleting chunk %s "%s"',
                                 chunk['id'], str(e.message))
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.debug('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker handle object put')

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker handle reference update')

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk creation')
            return

        self.logger.debug('worker handle chunk creation')

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk deletion')
            return

        self.logger.debug('worker handle chunk deletion')

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker handle ping')
Beispiel #12
0
class EventWorker(object):
    def __init__(self, conf, name, context, **kwargs):
        self.conf = conf
        self.name = name
        verbose = kwargs.pop('verbose', False)
        self.logger = get_logger(self.conf, verbose=verbose)
        self.init_zmq(context)
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.acct_refresh_interval = int_value(
            conf.get('acct_refresh_interval'), 60)
        self.acct_update = true_value(conf.get('acct_update', True))
        self.rdir_update = true_value(conf.get('rdir_update', True))
        self.session = requests.Session()
        self.failed = False

    def start(self):
        self.logger.info('worker "%s" starting', self.name)
        self.running = True
        self.run()

    def stop(self):
        self.logger.info('worker "%s" stopping', self.name)
        self.running = False

    def init_zmq(self, context):
        socket = context.socket(zmq.REP)
        socket.connect('inproc://event-front')
        self.socket = socket

    def safe_ack(self, msg):
        try:
            self.socket.send_multipart(msg)
        except Exception:
            self.logger.warn('Unable to ack event')

    def run(self):
        try:
            while self.running:
                msg = self.socket.recv_multipart()
                self.logger.debug("msg received: %s" % msg)
                event = decode_msg(msg)
                success = self.process_event(event)
                f = "0" if success else ""
                self.safe_ack([msg[0], f])
        except Exception as e:
            self.logger.warn('ERROR in worker "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('worker "%s" stopped', self.name)

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("No handler found")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker "%s" handle container put', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker "%s" handle container update', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker "%s" handle container destroy', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker "%s" handle object delete', self.name)
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.exception(e)
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.info('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker "%s" handle object put', self.name)

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker "%s" handle reference update', self.name)

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker "%s" skip chunk creation', self.name)
            return

        self.logger.debug('worker "%s" handle chunk creation', self.name)

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker "%s" skip chunk deletion', self.name)
            return

        self.logger.debug('worker "%s" handle chunk deletion', self.name)

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker "%s" handle ping', self.name)