Esempio n. 1
0
 def wait_for_score(self, types, timeout=20.0, score_threshold=35):
     """Wait for services to have a score greater than the threshold."""
     deadline = time.time() + timeout
     while time.time() < deadline:
         wait = False
         for type_ in types:
             try:
                 all_svcs = self.conscience.all_services(type_)
                 for service in all_svcs:
                     if int(service['score']) < score_threshold:
                         wait = True
                         break
                 else:
                     # No service registered yet, must wait.
                     if not all_svcs:
                         wait = True
             except Exception as err:
                 logging.warn('Could not check service score: %s', err)
                 wait = True
             if wait:
                 # No need to check other types, we have to wait anyway
                 break
         if not wait:
             return
         time.sleep(1)
     logging.info('Service(s) fails to reach %d score (timeout %d)',
                  score_threshold, timeout)
Esempio n. 2
0
    def converter_pass(self, input_file=None):
        def report(tag, now=None):
            if now is None:
                now = time.time()
            total_time = now - self.start_time
            self.logger.info(
                '%(tag)s  %(volume)s '
                'started=%(start_time)s '
                'passes=%(passes)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s '
                'total_time=%(total_time).2f '
                '(converter: %(success_rate).2f%%)' % {
                    'tag':
                    tag,
                    'volume':
                    self.volume_id,
                    'start_time':
                    datetime.fromtimestamp(int(self.start_time)).isoformat(),
                    'passes':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    self.total_chunks_processed,
                    'c_rate':
                    self.total_chunks_processed / total_time,
                    'total_time':
                    total_time,
                    'success_rate':
                    100 * ((self.total_chunks_processed - self.errors) /
                           (float(self.total_chunks_processed) or 1.0))
                })
            self.passes = 0
            self.last_reported = now

        self.start_time = time.time()
        self.errors = 0
        self.passes = 0

        self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time)

        paths = self.paths_gen(input_file=input_file)
        for path in paths:
            self.safe_convert_chunk(path)

            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('RUN', now=now)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
        report('DONE')

        return self.errors == 0
Esempio n. 3
0
 def wait_for_ready_job(self, tube, timeout=float('inf'),
                        poll_interval=0.2):
     """
     Wait until the the specified tube has a ready job,
     or the timeout expires.
     """
     self.use(tube)
     job_id, data = self.peek_ready()
     deadline = time.time() + timeout
     while job_id is None and time.time() < deadline:
         time.sleep(poll_interval)
         job_id, data = self.peek_ready()
     return job_id, data
Esempio n. 4
0
 def run(self):
     """
     Main worker loop
     """
     self.start_time = time.time()
     while not self._stop:
         try:
             self.crawl_volume()
             self.last_scan_time = time.time()
             time.sleep(self.scans_interval)
         except exc.OioException as exception:
             self.logger.exception("ERROR during indexing meta2: %s",
                                   exception)
Esempio n. 5
0
    def run(self):
        self.tool.start_time = self.tool.last_report = time.time()
        self.tool.log_report('START', force=True)
        reply_loc = {
            'addr': self.beanstalkd_reply.addr,
            'tube': self.beanstalkd_reply.tube
        }
        # pylint: disable=no-member
        thread = threading.Thread(target=self._distribute_events,
                                  args=[reply_loc])
        thread.start()

        # Wait until the thread is started sending events
        while self.sending is None:
            sleep(0.1)

        # Retrieve responses until all events are processed
        try:
            while not self._all_events_are_processed():
                tasks_res = self.beanstalkd_reply.fetch_job(
                    self._tasks_res_from_res_event,
                    timeout=DISTRIBUTED_DISPATCHER_TIMEOUT)
                for task_res in tasks_res:
                    self.tool.update_counters(task_res)
                    yield task_res
                self.tool.log_report('RUN')
        except OioTimeout:
            self.logger.error('No response for %d seconds',
                              DISTRIBUTED_DISPATCHER_TIMEOUT)
            self.tool.success = False
        except Exception:  # pylint: disable=broad-except
            self.logger.exception('ERROR in distributed dispatcher')
            self.tool.success = False

        self.tool.log_report('DONE', force=True)
Esempio n. 6
0
    def report(self, tag):
        """
        Log the status of indexer

        :param tag: One of three: starting, running, ended.
        """
        total = self.success_nb + self.failed_nb
        now = time.time()
        elapsed = (now - self.start_time) or 0.00001
        since_last_rprt = (now - self.last_report_time) or 0.00001
        self.logger.info(
            'volume_id=%(volume_id)s %(tag)s=%(current_time)s '
            'elapsed=%(elapsed).02f '
            'pass=%(pass)d '
            'errors=%(errors)d '
            'containers_indexed=%(total_indexed)d %(index_rate).2f/s',
            {
                'volume_id': self.volume_id,
                'tag': tag,
                'current_time': datetime.fromtimestamp(
                    int(now)).isoformat(),
                'pass': self.full_scan_nb,
                'errors': self.failed_nb,
                'total_indexed': total,
                'index_rate': self.indexed_since_last_report / since_last_rprt,
                'elapsed': elapsed
            }
        )
        self.last_report_time = now
        self.indexed_since_last_report = 0
Esempio n. 7
0
 def run(self, *args, **kwargs):
     time.sleep(random() * self.interval)
     while True:
         pre = time.time()
         try:
             self.index_pass()
         except exc.VolumeException as err:
             self.logger.error('Cannot index chunks, will retry later: %s',
                               err)
         except Exception as err:
             self.logger.exception('ERROR during indexing: %s', err)
         else:
             self.passes += 1
         elapsed = (time.time() - pre) or 0.000001
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
Esempio n. 8
0
 def __init__(self, conf, logger=None, **kwargs):
     self.conf = conf
     self.logger = logger or get_logger(conf)
     volume = conf.get('volume')
     if not volume:
         raise ConfigurationException('No volume specified for converter')
     self.volume = volume
     self.namespace, self.volume_id = check_volume(self.volume)
     # cache
     self.name_by_cid = CacheDict()
     self.content_id_by_name = CacheDict()
     # client
     self.container_client = ContainerClient(conf, **kwargs)
     self.content_factory = ContentFactory(conf,
                                           self.container_client,
                                           logger=self.logger)
     self._rdir = None  # we may never need it
     # stats/logs
     self.errors = 0
     self.passes = 0
     self.total_chunks_processed = 0
     self.start_time = 0
     self.last_reported = 0
     self.report_interval = int_value(conf.get('report_interval'), 3600)
     # speed
     self.chunks_run_time = 0
     self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                            30)
     # backup
     self.no_backup = true_value(conf.get('no_backup', False))
     self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir()
     self.backup_name = 'backup_%s_%f' \
         % (self.volume_id, time.time())
     # dry run
     self.dry_run = true_value(conf.get('dry_run', False))
Esempio n. 9
0
 def wait_until_empty(self, tube, timeout=float('inf'), poll_interval=0.2,
                      initial_delay=0.0):
     """
     Wait until the the specified tube is empty, or the timeout expires.
     """
     # TODO(FVE): check tube stats to ensure some jobs have passed through
     # and then get rid of the initial_delay
     # peek-ready requires "use", not "watch"
     self.use(tube)
     if initial_delay > 0.0:
         time.sleep(initial_delay)
     job_id, _ = self.peek_ready()
     deadline = time.time() + timeout
     while job_id is not None and time.time() < deadline:
         time.sleep(poll_interval)
         job_id, _ = self.peek_ready()
Esempio n. 10
0
 def report(tag, now=None):
     if now is None:
         now = time.time()
     total_time = now - self.start_time
     self.logger.info(
         '%(tag)s  %(volume)s '
         'started=%(start_time)s '
         'passes=%(passes)d '
         'errors=%(errors)d '
         'chunks=%(nb_chunks)d %(c_rate).2f/s '
         'total_time=%(total_time).2f '
         '(converter: %(success_rate).2f%%)' % {
             'tag':
             tag,
             'volume':
             self.volume_id,
             'start_time':
             datetime.fromtimestamp(int(self.start_time)).isoformat(),
             'passes':
             self.passes,
             'errors':
             self.errors,
             'nb_chunks':
             self.total_chunks_processed,
             'c_rate':
             self.total_chunks_processed / total_time,
             'total_time':
             total_time,
             'success_rate':
             100 * ((self.total_chunks_processed - self.errors) /
                    (float(self.total_chunks_processed) or 1.0))
         })
     self.passes = 0
     self.last_reported = now
Esempio n. 11
0
    def run(self):
        self.tool.start_time = self.tool.last_report = time.time()
        self.tool.log_report('START', force=True)

        try:
            with ContextPool(len(self.workers) + 1) as pool:
                # spawn workers
                for worker in self.workers:
                    pool.spawn(worker.run)

                # spawn one worker to fill the queue
                pool.spawn(self._fill_queue_and_wait_all_items)

                # with the main thread
                while True:
                    task_res = self.queue_reply.get()
                    if task_res is None:  # end signal
                        break
                    self.tool.update_counters(task_res)
                    yield task_res
                    self.tool.log_report('RUN')
        except Exception:  # pylint: disable=broad-except
            self.logger.exception('ERROR in local dispatcher')
            self.tool.success = False

        self.tool.log_report('DONE', force=True)
Esempio n. 12
0
    def rebuilder_pass(self, **kwargs):
        self.start_time = self.last_report = time.time()
        self.log_report('START', force=True)

        workers = list()
        with ContextPool(self.nworkers + 1) as pool:
            # spawn one worker for the retry queue
            rqueue = eventlet.Queue(self.nworkers)
            pool.spawn(self._read_retry_queue, rqueue, **kwargs)

            # spawn workers to rebuild
            queue = eventlet.Queue(self.nworkers * 10)
            for i in range(self.nworkers):
                worker = self._create_worker(**kwargs)
                workers.append(worker)
                pool.spawn(worker.rebuilder_pass,
                           i,
                           queue,
                           retry_queue=rqueue,
                           **kwargs)

            # fill the queue (with the main thread)
            self._fill_queue(queue, **kwargs)

            # block until all items are rebuilt
            queue.join()
            # block until the retry queue is empty
            rqueue.join()

        self.log_report('DONE', force=True)
        return self.total_errors == 0
Esempio n. 13
0
 def log_report(self, status, force=False):
     """
     Log a report with a fixed interval.
     """
     end_time = time.time()
     if force or (end_time - self.last_report >= self.report_interval):
         counters = self._update_total_counters()
         self.logger.info(self._get_report(status, end_time, counters))
         self.last_report = end_time
Esempio n. 14
0
 def access_log_wrapper(self, req, *args, **kwargs):
     code = -1
     pre = time.time()
     try:
         rc = func(self, req, *args, **kwargs)
         code = rc._status_code
         return rc
     except HTTPException as exc:
         code = exc.code
         raise
     finally:
         post = time.time()
         # remote method code time size user reqid uri
         self.logger.info('%s %s %d %d %s %s %s %s',
                          req.environ['HTTP_HOST'],
                          req.environ['REQUEST_METHOD'], code,
                          int((post - pre) * 1000000), '-', '-', '-',
                          req.environ['RAW_URI'])
Esempio n. 15
0
    def index_pass(self):
        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        self.report('started', start_time)
        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                self.report('running', start_time)
        self.report('ended', start_time)
Esempio n. 16
0
    def wait_for_event(self,
                       tube,
                       reqid=None,
                       types=None,
                       fields=None,
                       timeout=30.0):
        """
        Wait for an event in the specified tube.
        If reqid, types and/or fields are specified, drain events until the
        specified event is found.

        :param fields: dict of fields to look for in the event's URL
        :param types: list of types of events the method should look for
        """
        self.beanstalkd0.wait_for_ready_job(tube, timeout=timeout)
        self.beanstalkd0.watch(tube)
        now = time.time()
        deadline = now + timeout
        try:
            job_id = True
            while now < deadline:
                to = max(0.0, deadline - now)
                job_id, data = self.beanstalkd0.reserve(timeout=to)
                event = Event(jsonlib.loads(data))
                self.beanstalkd0.delete(job_id)
                now = time.time()
                if types and event.event_type not in types:
                    logging.debug("ignore event %s (event mismatch)", data)
                    continue
                if reqid and event.reqid != reqid:
                    logging.info("ignore event %s (request_id mismatch)", data)
                    continue
                if fields and any(fields[k] != event.url.get(k)
                                  for k in fields):
                    logging.info("ignore event %s (filter mismatch)", data)
                    continue
                logging.info("event %s", data)
                return event
            logging.warn(
                ('wait_for_event(reqid=%s, types=%s, fields=%s, timeout=%s) '
                 'reached its timeout'), reqid, types, fields, timeout)
        except ResponseError as err:
            logging.warn('%s', err)
        return None
Esempio n. 17
0
    def __init__(self, conf, logger, volume, container_ids):
        self.conf = conf
        self.logger = logger
        self.volume = volume
        self.volume_ns, self.volume_id = check_volume(self.volume)
        self.container_ids = container_ids or list()
        self.container_ids = [
            container_id.upper() for container_id in self.container_ids
        ]

        self.namespace = self.conf['namespace']
        if self.namespace != self.volume_ns:
            raise ValueError(
                'Namespace (%s) mismatch with volume namespace (%s)',
                self.namespace, self.volume_ns)

        # action
        self.action_name = self.conf['action'].lower()
        if (self.action_name == 'insert'):
            self.action = self._insert_bean
        elif (self.action_name == 'update'):
            self.action = self._update_bean
        elif (self.action_name == 'check'):
            self.action = self._check_bean
        else:
            raise ValueError('Unknown action (%s)', self.action_name)

        # speed
        self.chunks_run_time = 0
        self.max_chunks_per_second = int_value(
            self.conf.get('chunks_per_second'), self.DEFAULT_CHUNKS_PER_SECOND)

        # counters
        self.chunks_processed = 0
        self.chunk_errors = 0
        self.beans_processed = dict()
        self.bean_successes = dict()
        self.bean_already_exists = dict()
        self.bean_orphans = dict()
        self.bean_errors = dict()
        for bean_type in self.BEAN_TYPES:
            self.beans_processed[bean_type] = 0
            self.bean_successes[bean_type] = 0
            self.bean_already_exists[bean_type] = 0
            self.bean_orphans[bean_type] = 0
            self.bean_errors[bean_type] = 0

        # report
        self.start_time = 0
        self.last_report = 0
        self.report_interval = int_value(conf.get('report_interval'),
                                         self.DEFAULT_REPORT_INTERVAL)

        self.client = ContainerClient({'namespace': self.namespace},
                                      logger=self.logger)
        self.ctime = int(time.time())
Esempio n. 18
0
 def wait_for_event(self,
                    tube,
                    reqid=None,
                    type_=None,
                    fields=None,
                    timeout=30.0):
     """
     Wait for an event in the specified tube.
     If reqid, type_ and/or fields are specified, drain events until the
     specified event is found.
     """
     self.beanstalkd0.wait_for_ready_job(tube, timeout=timeout)
     self.beanstalkd0.watch(tube)
     now = time.time()
     deadline = now + timeout
     try:
         job_id = True
         while now < deadline:
             to = max(0.0, deadline - now)
             job_id, data = self.beanstalkd0.reserve(timeout=to)
             edata = jsonlib.loads(data)
             self.beanstalkd0.delete(job_id)
             now = time.time()
             if type_ and edata['event'] != type_:
                 logging.debug("ignore event %s (event mismatch)", data)
                 continue
             if reqid and edata.get('request_id') != reqid:
                 logging.info("ignore event %s (request_id mismatch)", data)
                 continue
             if fields and any(fields[k] != edata.get('url', {}).get(k)
                               for k in fields):
                 logging.info("ignore event %s (filter mismatch)", data)
                 continue
             logging.info("event %s", data)
             return edata
         logging.warn(
             ('wait_for_event(reqid=%s, type_=%s, fields=%s, timeout=%s) '
              'reached its timeout'), reqid, type_, fields, timeout)
     except ResponseError as err:
         logging.info('%s', err)
     return None
Esempio n. 19
0
 def log_report(self, status, force=False, **kwargs):
     end_time = time.time()
     if (force and self.lock_report.acquire()) \
         or (end_time - self.last_report >= self.report_interval
             and self.lock_report.acquire(False)):
         try:
             counters = self.update_totals()
             self.logger.info(
                 self._get_report(status, end_time, counters, **kwargs))
             self.last_report = end_time
         finally:
             self.lock_report.release()
Esempio n. 20
0
    def resolve_service_id(self, service_type, service_id, check_format=True):
        """
        :returns: Service address corresponding to the service ID
        """
        if check_format:
            url = "http://" + service_id
            parsed = urlparse(url)
            if parsed.port is not None:
                return service_id

        cached_service_id = self._service_ids.get(service_id)
        if cached_service_id \
                and (time.time() - cached_service_id['mtime']
                     < self._service_id_max_age):
            return cached_service_id['addr']
        result = self.resolve(srv_type=service_type, service_id=service_id)
        service_addr = result['addr']
        self._service_ids[service_id] = {
            'addr': service_addr,
            'mtime': time.time()
        }
        return service_addr
Esempio n. 21
0
    def crawl_volume(self):
        """
        Crawl the volume assigned to this worker, and index every database.
        """
        paths = paths_gen(self.volume)
        self.full_scan_nb += 1
        self.success_nb = 0
        self.failed_nb = 0
        now = time.time()
        self.last_report_time = now

        self.report("starting")

        for db_path in paths:

            # Graceful exit, hopefully
            if self._stop:
                break

            db_id = db_path.rsplit("/")[-1].rsplit(".")

            if len(db_id) != 3:
                self.warn("Malformed db file name !", db_path)
                continue

            db_id = ".".join(db_id[:2])
            self.index_meta2_database(db_id)

            self.last_index_time = ratelimit(
                self.last_index_time,
                self.max_indexed_per_second
            )

            now = time.time()
            if now - self.last_report_time >= self.report_interval:
                self.report("running")

        self.report("ended")
Esempio n. 22
0
    def get_tasks(self, job_params, marker=None):
        service_id = job_params['service_id']
        usage_target = job_params['usage_target']
        usage_check_interval = job_params['usage_check_interval']

        if usage_target > 0:
            now = time.time()
            current_usage = self.get_usage(service_id)
            if current_usage <= usage_target:
                self.logger.info(
                    'current usage %.2f%%: target already reached (%.2f%%)',
                    current_usage, usage_target)
                return
            last_usage_check = now

        chunk_infos = self.get_chunk_infos(job_params, marker=marker)
        for container_id, content_id, chunk_id, _ in chunk_infos:
            task_id = '|'.join((container_id, content_id, chunk_id))
            yield task_id, {
                'container_id': container_id,
                'content_id': content_id,
                'chunk_id': chunk_id
            }

            if usage_target <= 0:
                continue
            now = time.time()
            if now - last_usage_check < usage_check_interval:
                continue
            current_usage = self.get_usage(service_id)
            if current_usage > usage_target:
                last_usage_check = now
                continue
            self.logger.info('current usage %.2f%%: target reached (%.2f%%)',
                             current_usage, usage_target)
            return
Esempio n. 23
0
 def wait_for_event(self, tube, reqid=None, type_=None, timeout=30.0):
     """
     Wait for an event in the specified tube.
     If reqid and/or type_ are specified, drain events until the specified
     event is found.
     """
     self.beanstalkd0.wait_for_ready_job(tube, timeout=timeout)
     self.beanstalkd0.watch(tube)
     now = time.time()
     deadline = now + timeout
     try:
         job_id = True
         while now < deadline:
             to = max(0.0, deadline - now)
             job_id, data = self.beanstalkd0.reserve(timeout=to)
             edata = jsonlib.loads(data)
             self.beanstalkd0.delete(job_id)
             if not type_ or edata['event'] == type_:
                 if not reqid or edata.get('request_id') == reqid:
                     return edata
             now = time.time()
     except ResponseError as err:
         logging.info('%s', err)
     return None
Esempio n. 24
0
    def index_meta2_database(self, db_id):
        """
        Add a meta2 database to the rdir index. Fails if the database isn't
        handled by the current volume.

        :param db_id: The ContentID representing the reference to the database.
        """
        if len(db_id) < STRLEN_REFERENCEID:
            self.warn('Not a valid container ID', db_id)
            return
        try:
            srvcs = self.dir_client.list(cid=db_id)
            account, container = srvcs['account'], srvcs['name']
            is_peer = self.volume_id in [
                x['host'] for x in srvcs['srv'] if x['type'] == 'meta2'
            ]

            container_id = db_id.rsplit(".")[0]

            if six.PY2:
                if isinstance(account, six.text_type):
                    account = account.encode('utf-8')
                if isinstance(container, six.text_type):
                    container = container.encode('utf-8')
            cont_url = "{0}/{1}/{2}".format(self.namespace, account, container)

            if not is_peer:
                self.warn(
                    "Trying to index a container that isn't handled by"
                    "this volume", db_id)
                if self.attempt_bad_index_removal:
                    self._attempt_index_removal(cont_url, container_id)
                return

            self.index_client.meta2_index_push(volume_id=self.volume_id,
                                               container_url=cont_url,
                                               mtime=time.time(),
                                               container_id=container_id)

            self.success_nb += 1
        except exc.OioException as exception:
            self.failed_nb += 1
            self.warn("Unable to to index container: %s" % str(exception),
                      db_id)

        self.indexed_since_last_report += 1
Esempio n. 25
0
    def update_index(self, path, chunk_id):
        with open(path) as file_:
            try:
                meta = None
                if meta is None:
                    meta, _ = read_chunk_metadata(file_, chunk_id)
            except exc.MissingAttribute as err:
                raise exc.FaultyChunk(err)

            data = {'mtime': int(time.time())}
            headers = {REQID_HEADER: request_id('blob-indexer-')}
            self.index_client.chunk_push(self.volume_id,
                                         meta['container_id'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         headers=headers,
                                         **data)
Esempio n. 26
0
 def task_event_from_item(item):
     namespace, container_id, content_id, chunk_id_or_pos = item
     return \
         {
             'when': time.time(),
             'event': EventTypes.CONTENT_BROKEN,
             'url': {
                 'ns': namespace,
                 'id': container_id,
                 'content': content_id
             },
             'data': {
                 'missing_chunks': [
                     chunk_id_or_pos
                 ]
             }
         }
Esempio n. 27
0
 def res_event_from_task_res(task_res):
     item, bytes_processed, error = task_res
     namespace, container_id, content_id, chunk_id_or_pos = item
     return \
         {
             'when': time.time(),
             'event': EventTypes.CONTENT_REBUILT,
             'url': {
                 'ns': namespace,
                 'id': container_id,
                 'content': content_id
             },
             'data': {
                 'chunks_rebuilt': [{
                     'chunk_id_or_pos': chunk_id_or_pos,
                     'bytes_processed': bytes_processed,
                     'error': error
                 }]
             }
         }
Esempio n. 28
0
    def pass_volume(self):
        self.start_time = self.last_report = time.time()
        self.log_report('START', force=True)

        paths = paths_gen(self.volume)
        for path in paths:
            try:
                self.pass_chunk_file(path)
                self.chunks_processed += 1
            except Exception as exc:
                self.logger.error(
                    'Failed to pass chunk file (chunk_file=%s): %s', path, exc)
                self.chunk_errors += 1

            self.log_report('RUN')
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)

        self.log_report('DONE', force=True)
        return self.chunk_errors == 0 \
            and all(errors == 0 for errors in self.bean_errors.values())
Esempio n. 29
0
 def report(self, tag, start_time):
     total = self.errors + self.successes
     now = time.time()
     elapsed = (now - start_time) or 0.000001
     self.logger.info(
         '%(tag)s=%(current_time)s '
         'elapsed=%(elapsed).02f '
         'pass=%(pass)d '
         'errors=%(errors)d '
         'chunks=%(nb_chunks)d %(c_rate).2f/s' % {
             'tag': tag,
             'current_time': datetime.fromtimestamp(
                 int(now)).isoformat(),
             'pass': self.passes,
             'errors': self.errors,
             'nb_chunks': total,
             'c_rate': self.total_since_last_reported /
             (now - self.last_reported),
             'elapsed': elapsed
         }
     )
     self.last_reported = now
     self.total_since_last_reported = 0
Esempio n. 30
0
    def rebuilder_pass(self, **kwargs):
        self.start_time = self.last_report = time.time()
        self.log_report('START', force=True)

        workers = list()
        with ContextPool(self.concurrency + 1) as pool:
            # spawn one worker for the retry queue
            rqueue = eventlet.Queue(self.concurrency)
            pool.spawn(self._read_retry_queue, rqueue, **kwargs)

            # spawn workers to rebuild
            queue = eventlet.Queue(self.concurrency * 10)
            for i in range(self.concurrency):
                worker = self._create_worker(**kwargs)
                workers.append(worker)
                pool.spawn(worker.rebuilder_pass,
                           i,
                           queue,
                           retry_queue=rqueue,
                           **kwargs)

            # fill the queue (with the main thread)

            try:
                self._fill_queue(queue, **kwargs)
            except Exception as exc:
                if self.running:
                    self.logger.error("Failed to fill queue: %s", exc)
                    self.success = False

            # block until all items are rebuilt
            queue.join()
            # block until the retry queue is empty
            rqueue.join()

        self.log_report('DONE', force=True)
        return self.success and self.total_errors == 0