예제 #1
0
파일: tool.py 프로젝트: newtoncorp/oio-sds
    def _distribute_events(self, reply_loc=None):
        next_worker = 0
        items_run_time = 0

        try:
            tasks_events = self._fetch_tasks_events_to_send()
            items_run_time = ratelimit(items_run_time,
                                       self.max_items_per_second)
            next_worker = self._send_task_event(next(tasks_events), reply_loc,
                                                next_worker)
            self.sending = True
            for task_event in tasks_events:
                items_run_time = ratelimit(items_run_time,
                                           self.max_items_per_second)
                next_worker = self._send_task_event(task_event, reply_loc,
                                                    next_worker)

                if not self.tool.running:
                    break
        except Exception as exc:
            if not isinstance(exc, StopIteration) and self.tool.running:
                self.logger.error("Failed to distribute events: %s", exc)
                self.tool.success = False
        finally:
            self.sending = False
예제 #2
0
파일: worker.py 프로젝트: murlock/oio-sds
    def process(self, beanstalkd_job):
        job_id = beanstalkd_job['job_id']
        job_config = beanstalkd_job['job_config']

        task = self.tasks.get(job_id)
        if task is None:
            job_type = beanstalkd_job['job_type']
            task_class = JOB_TYPES[job_type].TASK_CLASS
            job_params = job_config['params']
            task = task_class(self.conf, job_params, logger=self.logger)
            self.tasks[job_id] = task

        tasks_per_second = job_config['tasks_per_second']
        tasks = beanstalkd_job['tasks']

        task_errors = Counter()
        task_results = Counter()

        tasks_run_time = 0
        for task_id, task_payload in iteritems(tasks):
            tasks_run_time = ratelimit(tasks_run_time, tasks_per_second)

            reqid = job_id + request_id('-')
            reqid = reqid[:STRLEN_REQID]
            try:
                task_result = task.process(task_id, task_payload, reqid=reqid)
                task_results.update(task_result)
            except Exception as exc:
                self.logger.warn('[job_id=%s] Fail to process task %s: %s',
                                 job_id, task_id, exc)
                task_errors[type(exc).__name__] += 1

        return job_id, list(tasks.keys()), task_results, task_errors, \
            beanstalkd_job['beanstalkd_reply']
예제 #3
0
    def rebuilder_pass(self, num, queue, **kwargs):
        start_time = report_time = time.time()

        while True:
            item = queue.get()
            begin_time = time.time()
            self._rebuild_one(item, **kwargs)
            end_time = time.time()

            self.rebuilder_time += (end_time - begin_time)
            total_time = end_time - start_time
            self.waiting_time = total_time - self.rebuilder_time
            self.total_items_processed += 1
            queue.task_done()

            if end_time - self.last_reported >= self.report_interval:
                self.logger.info(
                    self._get_report(num, start_time, end_time, total_time,
                                     report_time, **kwargs))
                report_time = end_time
                self.last_reported = end_time
                self.passes = 0

            self.items_run_time = ratelimit(self.items_run_time,
                                            self.max_items_per_second)
예제 #4
0
    def rebuilder_pass(self, num, queue, retry_queue=None, **kwargs):
        while True:
            info = None
            err = None
            item = queue.get()
            try:
                info = self._rebuild_one(item, **kwargs)
            except exceptions.RetryLater as exc:
                if retry_queue:
                    self.logger.warn("Putting an item in the retry queue: %s",
                                     exc.args[1])
                    retry_queue.put(exc.args[0])
                else:
                    err = str(exc)
            except Exception as exc:
                err = str(exc)
            queue.task_done()

            self.update_processed(item, info, error=err, **kwargs)
            self.log_report(**kwargs)

            self.items_run_time = ratelimit(self.items_run_time,
                                            self.max_items_per_second)
            if self.random_wait:
                eventlet.sleep(random.randint(0, self.random_wait) / 1.0e6)
예제 #5
0
 def _distribute_events(self, reply_loc=None):
     next_worker = 0
     items_run_time = 0
     tasks_events = self._fetch_tasks_events_to_send()
     try:
         items_run_time = ratelimit(items_run_time,
                                    self.max_items_per_second)
         next_worker = self._send_task_event(next(tasks_events), reply_loc,
                                             next_worker)
         self.sending = True
     except StopIteration:
         self.sending = False
         return
     for task_event in tasks_events:
         items_run_time = ratelimit(items_run_time,
                                    self.max_items_per_second)
         next_worker = self._send_task_event(task_event, reply_loc,
                                             next_worker)
     self.sending = False
예제 #6
0
    def converter_pass(self, input_file=None):
        def report(tag, now=None):
            if now is None:
                now = time.time()
            total_time = now - self.start_time
            self.logger.info(
                '%(tag)s  %(volume)s '
                'started=%(start_time)s '
                'passes=%(passes)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s '
                'total_time=%(total_time).2f '
                '(converter: %(success_rate).2f%%)' % {
                    'tag':
                    tag,
                    'volume':
                    self.volume_id,
                    'start_time':
                    datetime.fromtimestamp(int(self.start_time)).isoformat(),
                    'passes':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    self.total_chunks_processed,
                    'c_rate':
                    self.total_chunks_processed / total_time,
                    'total_time':
                    total_time,
                    'success_rate':
                    100 * ((self.total_chunks_processed - self.errors) /
                           float(self.total_chunks_processed))
                })
            self.passes = 0
            self.last_reported = now

        self.start_time = time.time()
        self.errors = 0
        self.passes = 0

        self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time)

        paths = self.paths_gen(input_file=input_file)
        for path in paths:
            self.safe_convert_chunk(path)

            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('RUN', now=now)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
        report('DONE')

        return self.errors == 0
예제 #7
0
    def index_pass(self):
        start_time = report_time = time.time()

        total_errors = 0

        paths = paths_gen(self.volume)

        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    'started=%(start_time)s '
                    'passes=%(passes)d '
                    'errors=%(errors)d '
                    'chunks=%(nb_chunks)d %(c_rate).2f/s '
                    'total=%(total).2f ' % {
                        'start_time':
                        datetime.fromtimestamp(int(report_time)).isoformat(),
                        'passes':
                        self.passes,
                        'errors':
                        self.errors,
                        'nb_chunks':
                        self.total_chunks_processed,
                        'c_rate':
                        self.passes / (now - report_time),
                        'total': (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.errors = 0
                self.last_reported = now
        end_time = time.time()
        elapsed = (end_time - start_time) or 0.000001
        self.logger.info(
            'started=%(start_time)s '
            'ended=%(end_time)s '
            'elapsed=%(elapsed).02f '
            'errors=%(errors)d '
            'chunks=%(nb_chunks)d %(c_rate).2f/s ' % {
                'start_time': datetime.fromtimestamp(
                    int(start_time)).isoformat(),
                'end_time': datetime.fromtimestamp(int(end_time)).isoformat(),
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'nb_chunks': self.total_chunks_processed,
                'c_rate': self.total_chunks_processed / elapsed
            })
        if elapsed < self.interval:
            time.sleep(self.interval - elapsed)
예제 #8
0
    def index_pass(self):
        def safe_update_index(path):
            chunk_id = path.rsplit('/', 1)[-1]
            if len(chunk_id) != STRLEN_CHUNKID:
                return
            for c in chunk_id:
                if c not in hexdigits:
                    return
            try:
                self.update_index(path)
                self.successes += 1
                self.logger.debug('Updated %s', path)
            except OioNetworkException as exc:
                self.errors += 1
                self.logger.warn('ERROR while updating %s: %s', path, exc)
            except Exception:
                self.errors += 1
                self.logger.exception('ERROR while updating %s', path)

        def report(tag):
            total = self.errors + self.successes
            now = time.time()
            elapsed = (now - start_time) or 0.000001
            self.logger.info(
                '%(tag)s=%(current_time)s '
                'elapsed=%(elapsed).02f '
                'pass=%(pass)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s' % {
                    'tag': tag,
                    'current_time': datetime.fromtimestamp(
                        int(now)).isoformat(),
                    'pass': self.passes,
                    'errors': self.errors,
                    'nb_chunks': total,
                    'c_rate': total / (now - self.last_reported),
                    'elapsed': elapsed
                })
            self.last_reported = now

        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        report('started')
        for path in paths:
            safe_update_index(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('running')
        report('ended')
예제 #9
0
 def _fill_queue(self):
     """
     Fill the queue.
     """
     items_run_time = 0
     items_with_beanstalkd_reply = \
         self.tool.fetch_items_with_beanstalkd_reply()
     for item_with_beanstalkd_reply in items_with_beanstalkd_reply:
         items_run_time = ratelimit(items_run_time,
                                    self.max_items_per_second)
         self.queue_workers.put(item_with_beanstalkd_reply)
예제 #10
0
    def chunk_file_audit(self, chunk_file, chunk_id):
        try:
            meta, _ = read_chunk_metadata(chunk_file, chunk_id)
        except exc.MissingAttribute as err:
            raise exc.FaultyChunk(err)
        size = int(meta['chunk_size'])
        md5_checksum = meta['chunk_hash'].lower()
        reader = ChunkReader(chunk_file,
                             size,
                             md5_checksum,
                             compression=meta.get("compression", ""))
        with closing(reader):
            for buf in reader:
                buf_len = len(buf)
                self.bytes_running_time = ratelimit(self.bytes_running_time,
                                                    self.max_bytes_per_second,
                                                    increment=buf_len)
                self.bytes_processed += buf_len
                self.total_bytes_processed += buf_len

        try:
            container_id = meta['container_id']
            content_id = meta['content_id']
            _obj_meta, data = self.container_client.content_locate(
                cid=container_id, content=content_id, properties=False)

            # Check chunk data
            chunk_data = None
            metachunks = set()
            for c in data:
                if c['url'].endswith(meta['chunk_id']):
                    metachunks.add(c['pos'].split('.', 2)[0])
                    chunk_data = c
            if not chunk_data:
                raise exc.OrphanChunk('Not found in content')

            metachunk_size = meta.get('metachunk_size')
            if metachunk_size is not None \
                    and chunk_data['size'] != int(metachunk_size):
                raise exc.FaultyChunk('Invalid metachunk size found')

            metachunk_hash = meta.get('metachunk_hash')
            if metachunk_hash is not None \
                    and chunk_data['hash'] != meta['metachunk_hash']:
                raise exc.FaultyChunk('Invalid metachunk hash found')

            if chunk_data['pos'] != meta['chunk_pos']:
                raise exc.FaultyChunk('Invalid chunk position found')

        except exc.NotFound:
            raise exc.OrphanChunk('Chunk not found in container')
예제 #11
0
파일: rebuilder.py 프로젝트: slpcat/oio-sds
    def rebuilder_pass(self, num, queue, **kwargs):
        while True:
            info = None
            err = None
            item = queue.get()
            try:
                info = self._rebuild_one(item, **kwargs)
            except Exception as exc:
                err = str(exc)
            queue.task_done()

            self.update_processed(item, info, error=err, **kwargs)
            self.log_report(**kwargs)

            self.items_run_time = ratelimit(self.items_run_time,
                                            self.max_items_per_second)
예제 #12
0
    def chunk_audit(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk('Missing extended attribute %s' % e)
            size = int(meta['chunk_size'])
            md5_checksum = meta['chunk_hash'].lower()
            reader = ChunkReader(f, size, md5_checksum)
            with closing(reader):
                for buf in reader:
                    buf_len = len(buf)
                    self.bytes_running_time = ratelimit(
                        self.bytes_running_time,
                        self.max_bytes_per_second,
                        increment=buf_len)
                    self.bytes_processed += buf_len
                    self.total_bytes_processed += buf_len

            try:
                container_id = meta['container_id']
                content_path = meta['content_path']
                content_attr, data = self.container_client.content_locate(
                    cid=container_id, path=content_path)

                # Check chunk data
                chunk_data = None
                metachunks = set()
                for c in data:
                    if c['url'].endswith(meta['chunk_id']):
                        metachunks.add(c['pos'].split('.', 2)[0])
                        chunk_data = c
                if not chunk_data:
                    raise exc.OrphanChunk('Not found in content')

                if chunk_data['size'] != int(meta['chunk_size']):
                    raise exc.FaultyChunk('Invalid chunk size found')

                if chunk_data['hash'] != meta['chunk_hash']:
                    raise exc.FaultyChunk('Invalid chunk hash found')

                if chunk_data['pos'] != meta['chunk_pos']:
                    raise exc.FaultyChunk('Invalid chunk position found')

            except exc.NotFound:
                raise exc.OrphanChunk('Chunk not found in container')
예제 #13
0
    def index_pass(self):
        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        self.report('started', start_time)
        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                self.report('running', start_time)
        self.report('ended', start_time)
예제 #14
0
    def run(self):
        start_time = report_time = time.time()

        total_errors = 0

        for (container_id, content_id) in self._list_contents():
            self.safe_change_policy(container_id, content_id)

            self.contents_run_time = ratelimit(
                self.contents_run_time,
                self.max_contents_per_second
            )
            self.total_contents_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(total).2f ' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'total': (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.errors = 0
                self.last_reported = now
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(content_rate).2f ' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'content_rate': self.total_contents_processed / elapsed
            }
        )
예제 #15
0
파일: tool.py 프로젝트: newtoncorp/oio-sds
    def _fill_queue(self):
        """
        Fill the queue.
        """
        items_run_time = 0

        try:
            items_with_beanstalkd_reply = \
                self.tool.fetch_items_with_beanstalkd_reply()
            for item_with_beanstalkd_reply in items_with_beanstalkd_reply:
                items_run_time = ratelimit(items_run_time,
                                           self.max_items_per_second)
                self.queue_workers.put(item_with_beanstalkd_reply)

                if not self.tool.running:
                    break
        except Exception as exc:
            if self.tool.running:
                self.logger.error("Failed to fill queue: %s", exc)
                self.tool.success = False
예제 #16
0
    def pass_volume(self):
        self.start_time = self.last_report = time.time()
        self.log_report('START', force=True)

        paths = paths_gen(self.volume)
        for path in paths:
            try:
                self.pass_chunk_file(path)
                self.chunks_processed += 1
            except Exception as exc:
                self.logger.error(
                    'Failed to pass chunk file (chunk_file=%s): %s', path, exc)
                self.chunk_errors += 1

            self.log_report('RUN')
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)

        self.log_report('DONE', force=True)
        return self.chunk_errors == 0 \
            and all(errors == 0 for errors in self.bean_errors.values())
예제 #17
0
    def crawl_volume(self):
        """
        Crawl the volume assigned to this worker, and index every database.
        """
        paths = paths_gen(self.volume)
        self.full_scan_nb += 1
        self.success_nb = 0
        self.failed_nb = 0
        now = time.time()
        self.last_report_time = now

        self.report("starting")

        for db_path in paths:

            # Graceful exit, hopefully
            if self._stop:
                break

            db_id = db_path.rsplit("/")[-1].rsplit(".")

            if len(db_id) != 3:
                self.warn("Malformed db file name !", db_path)
                continue

            db_id = ".".join(db_id[:2])
            self.index_meta2_database(db_id)

            self.last_index_time = ratelimit(
                self.last_index_time,
                self.max_indexed_per_second
            )

            now = time.time()
            if now - self.last_report_time >= self.report_interval:
                self.report("running")

        self.report("ended")
예제 #18
0
    def audit_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        total_corrupted = 0
        total_orphans = 0
        total_faulty = 0
        audit_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()
            self.safe_chunk_audit(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(corrupted)d '
                    '%(faulty)d '
                    '%(orphans)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(audit_time).2f'
                    '%(audit_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'corrupted': self.corrupted_chunks,
                        'faulty': self.faulty_chunks,
                        'orphans': self.orphan_chunks,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'audit_time': audit_time,
                        'audit_rate': audit_time / (now - start_time)
                    })
                report_time = now
                total_corrupted += self.corrupted_chunks
                total_orphans += self.orphan_chunks
                total_faulty += self.faulty_chunks
                total_errors += self.errors
                self.passes = 0
                self.corrupted_chunks = 0
                self.orphan_chunks = 0
                self.faulty_chunks = 0
                self.errors = 0
                self.bytes_processed = 0
                self.last_reported = now
            audit_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(corrupted)d '
            '%(faulty)d '
            '%(orphans)d '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(audit_time).2f '
            '%(audit_rate).2f' % {
                'elapsed': elapsed,
                'corrupted': total_corrupted + self.corrupted_chunks,
                'faulty': total_faulty + self.faulty_chunks,
                'orphans': total_orphans + self.orphan_chunks,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'audit_time': audit_time,
                'audit_rate': audit_time / elapsed
            })
예제 #19
0
    def index_pass(self):
        def safe_update_index(path):
            chunk_id = path.rsplit('/', 1)[-1]
            if len(chunk_id) != STRLEN_CHUNKID:
                self.logger.warn('WARN Not a chunk %s' % path)
                return
            for c in chunk_id:
                if c not in hexdigits:
                    self.logger.warn('WARN Not a chunk %s' % path)
                    return
            try:
                self.update_index(path, chunk_id)
                self.successes += 1
                self.logger.debug('Updated %s', path)
            except OioNetworkException as exc:
                self.errors += 1
                self.logger.warn('ERROR while updating %s: %s', path, exc)
            except VolumeException as exc:
                self.errors += 1
                self.logger.error('Cannot index %s: %s', path, exc)
                # All chunks of this volume are indexed in the same service,
                # no need to try another chunk, it will generate the same
                # error. Let the upper level retry later.
                raise
            except Exception:
                self.errors += 1
                self.logger.exception('ERROR while updating %s', path)
            self.total_since_last_reported += 1

        def report(tag):
            total = self.errors + self.successes
            now = time.time()
            elapsed = (now - start_time) or 0.000001
            self.logger.info(
                '%(tag)s=%(current_time)s '
                'elapsed=%(elapsed).02f '
                'pass=%(pass)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s' % {
                    'tag':
                    tag,
                    'current_time':
                    datetime.fromtimestamp(int(now)).isoformat(),
                    'pass':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    total,
                    'c_rate':
                    self.total_since_last_reported /
                    (now - self.last_reported),
                    'elapsed':
                    elapsed
                })
            self.last_reported = now
            self.total_since_last_reported = 0

        start_time = time.time()
        self.last_reported = start_time
        self.errors = 0
        self.successes = 0

        paths = paths_gen(self.volume)
        report('started')
        for path in paths:
            safe_update_index(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('running')
        report('ended')
예제 #20
0
    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            })
예제 #21
0
    def dispatch_tasks(self, job_id, job_type, job_info, job):
        job_config = job_info['config']
        job_params = job_config['params']
        tasks_per_second = job_config['tasks_per_second']
        tasks_batch_size = job_config['tasks_batch_size']
        last_task_id = job_info['tasks']['last_sent']

        job_tasks = job.get_tasks(job_params, marker=last_task_id)
        beanstalkd_workers = self.get_beanstalkd_workers()

        tasks_run_time = 0
        batch_per_second = tasks_per_second / float(
            tasks_batch_size)
        # The backend must have the tasks in order
        # to know the last task sent
        tasks = OrderedDict()
        for task_id, task_payload in job_tasks:
            if not self.running:
                break

            tasks[task_id] = task_payload
            if len(tasks) < tasks_batch_size:
                continue

            tasks_run_time = ratelimit(
                tasks_run_time, batch_per_second)

            sent = self.dispatch_tasks_batch(
                beanstalkd_workers,
                job_id, job_type, job_config, tasks)
            if sent:
                job_status, exc = self.handle_backend_errors(
                    self.backend.update_tasks_sent, job_id, tasks.keys())
                tasks.clear()
                if exc is not None:
                    self.logger.warn(
                        '[job_id=%s] Job has not been updated '
                        'with the sent tasks: %s', job_id, exc)
                    break
                if job_status == 'PAUSED':
                    self.logger.info('Job %s is paused', job_id)
                    return

            if not self.running:
                break
        else:
            sent = True
            if tasks:
                sent = self.dispatch_tasks_batch(
                    beanstalkd_workers,
                    job_id, job_type, job_config, tasks)
            if sent:
                job_status, exc = self.handle_backend_errors(
                    self.backend.update_tasks_sent, job_id, tasks.keys(),
                    all_tasks_sent=True)
                if exc is None:
                    if job_status == 'FINISHED':
                        self.logger.info('Job %s is finished', job_id)

                    self.logger.info(
                        'Finished dispatching job (job_id=%s)', job_id)
                    return
                else:
                    self.logger.warn(
                        '[job_id=%s] Job has not been updated '
                        'with the last sent tasks: %s', job_id, exc)

        _, exc = self.handle_backend_errors(self.backend.free, job_id)
        if exc is not None:
            self.logger.warn(
                '[job_id=%s] Job has not been freed: %s', job_id, exc)
예제 #22
0
    def rebuilder_pass(self):
        start_time = report_time = time.time()

        rebuilder_time = 0

        chunks = self._fetch_chunks()
        for cid, content_id, chunk_id_or_pos, _ in chunks:
            loop_time = time.time()
            if self.dry_run:
                self.dryrun_chunk_rebuild(cid, content_id, chunk_id_or_pos)
            else:
                self.safe_chunk_rebuild(cid, content_id, chunk_id_or_pos)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    'RUN  %(volume)s '
                    'started=%(start_time)s '
                    'passes=%(passes)d '
                    'errors=%(errors)d '
                    'chunks=%(nb_chunks)d %(c_rate).2f/s '
                    'bytes=%(nb_bytes)d %(b_rate).2fB/s '
                    'elapsed=%(total).2f '
                    '(rebuilder: %(success_rate).2f%%)' % {
                        'volume':
                        self.volume,
                        'start_time':
                        datetime.fromtimestamp(int(report_time)).isoformat(),
                        'passes':
                        self.passes,
                        'errors':
                        self.errors,
                        'nb_chunks':
                        self.total_chunks_processed,
                        'nb_bytes':
                        self.total_bytes_processed,
                        'c_rate':
                        self.passes / (now - report_time),
                        'b_rate':
                        self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time':
                        rebuilder_time,
                        'success_rate':
                        100 * ((self.total_chunks_processed - self.errors) /
                               float(self.total_chunks_processed))
                    })
                report_time = now
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        end_time = time.time()
        elapsed = (end_time - start_time) or 0.000001
        self.logger.info(
            'DONE %(volume)s '
            'started=%(start_time)s '
            'ended=%(end_time)s '
            'passes=%(passes)d '
            'elapsed=%(elapsed).02f '
            'errors=%(errors)d '
            'chunks=%(nb_chunks)d %(c_rate).2f/s '
            'bytes=%(nb_bytes)d %(b_rate).2fB/s '
            'elapsed=%(rebuilder_time).2f '
            '(rebuilder: %(success_rate).2f%%)' % {
                'volume':
                self.volume,
                'start_time':
                datetime.fromtimestamp(int(start_time)).isoformat(),
                'end_time':
                datetime.fromtimestamp(int(end_time)).isoformat(),
                'passes':
                self.passes,
                'elapsed':
                elapsed,
                'errors':
                self.errors,
                'nb_chunks':
                self.total_chunks_processed,
                'nb_bytes':
                self.total_bytes_processed,
                'c_rate':
                self.total_chunks_processed / elapsed,
                'b_rate':
                self.total_bytes_processed / elapsed,
                'rebuilder_time':
                rebuilder_time,
                'success_rate':
                100 * ((self.total_chunks_processed - self.errors) /
                       float(self.total_chunks_processed or 1))
            })
예제 #23
0
    def dispatch_tasks(self, job_id, job_type, job_info, job):
        job_config = job_info['config']
        job_params = job_config['params']
        tasks_per_second = job_config['tasks_per_second']
        tasks_batch_size = job_config['tasks_batch_size']
        last_task_id = job_info['tasks']['last_sent']

        job_tasks = job.get_tasks(job_params, marker=last_task_id)
        beanstalkd_workers = self.get_beanstalkd_workers()

        tasks_run_time = 0
        batch_per_second = tasks_per_second / float(tasks_batch_size)
        # The backend must have the tasks in order
        # to know the last task sent
        tasks = OrderedDict()
        for task_id, task_payload in job_tasks:
            if not self.running:
                break

            tasks[task_id] = task_payload
            if len(tasks) < tasks_batch_size:
                continue

            tasks_run_time = ratelimit(tasks_run_time, batch_per_second)

            # Make sure that the sent tasks will be saved
            # before being processed
            exc = None
            sent = False
            while not sent:
                (job_status, old_last_sent), exc = self.handle_backend_errors(
                    self.backend.update_tasks_sent, job_id, tasks.keys())
                if exc is not None:
                    self.logger.warn(
                        '[job_id=%s] Job could not update '
                        'the sent tasks: %s', job_id, exc)
                    break
                sent = self.dispatch_tasks_batch(beanstalkd_workers, job_id,
                                                 job_type, job_config, tasks)
                if not sent:
                    self.logger.warn(
                        '[job_id=%s] Job aborting the last sent tasks', job_id)
                    job_status, exc = self.handle_backend_errors(
                        self.backend.abort_tasks_sent, job_id, tasks.keys(),
                        old_last_sent)
                    if exc is not None:
                        self.logger.warn(
                            '[job_id=%s] Job could not abort '
                            'the last sent tasks: %s', job_id, exc)
                        break
                if job_status == XcuteJobStatus.PAUSED:
                    self.logger.info('Job %s is paused', job_id)
                    return

                if not self.running:
                    break
                sleep(1)

            if exc is not None and not self.running:
                break
            tasks.clear()
        else:
            # Make sure that the sent tasks will be saved
            # before being processed
            sent = False
            while not sent:
                (job_status, old_last_sent), exc = self.handle_backend_errors(
                    self.backend.update_tasks_sent,
                    job_id,
                    tasks.keys(),
                    all_tasks_sent=True)
                if exc is not None:
                    self.logger.warn(
                        '[job_id=%s] Job could not update '
                        'the sent tasks: %s', job_id, exc)
                    break
                if tasks:
                    sent = self.dispatch_tasks_batch(beanstalkd_workers,
                                                     job_id, job_type,
                                                     job_config, tasks)
                else:
                    sent = True
                if not sent:
                    self.logger.warn(
                        '[job_id=%s] Job aborting the last sent tasks', job_id)
                    job_status, exc = self.handle_backend_errors(
                        self.backend.abort_tasks_sent, job_id, tasks.keys(),
                        old_last_sent)
                    if exc is not None:
                        self.logger.warn(
                            '[job_id=%s] Job could not abort '
                            'the last sent tasks: %s', job_id, exc)
                        break
                else:
                    if job_status == XcuteJobStatus.FINISHED:
                        self.logger.info('Job %s is finished', job_id)

                    self.logger.info('Finished dispatching job (job_id=%s)',
                                     job_id)
                    return
                if job_status == XcuteJobStatus.PAUSED:
                    self.logger.info('Job %s is paused', job_id)
                    return

                if not self.running:
                    break
                sleep(1)

        self.logger.warn('[job_id=%s] Job was stopped before it was finished',
                         job_id)

        _, exc = self.handle_backend_errors(self.backend.free, job_id)
        if exc is not None:
            self.logger.warn('[job_id=%s] Job has not been freed: %s', job_id,
                             exc)
예제 #24
0
    def mover_pass(self, **kwargs):
        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        pool = GreenPool(self.concurrency)

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                free_ratio = statfs(self.volume)
                usage = (1 - float(free_ratio)) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    break
                self.last_usage_check = now

            # Spawn a chunk move task.
            # The call will block if no green thread is available.
            pool.spawn_n(self.safe_chunk_move, path)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
            if self.limit != 0 and self.total_chunks_processed >= self.limit:
                break
        pool.waitall()
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            })