class Meta2DecommissionJob(XcuteRdirJob):

    JOB_TYPE = 'meta2-decommission'
    TASK_CLASS = Meta2DecommissionTask

    @classmethod
    def sanitize_params(cls, job_params):
        sanitized_job_params, _ = super(Meta2DecommissionJob,
                                        cls).sanitize_params(job_params)

        src = job_params.get('service_id')
        if not src:
            raise ValueError('Missing service ID')
        sanitized_job_params['service_id'] = src

        sanitized_job_params['dst'] = job_params.get('dst')

        return sanitized_job_params, 'meta2/%s' % src

    def __init__(self, conf, logger=None):
        super(Meta2DecommissionJob, self).__init__(conf, logger=logger)
        self.rdir_client = RdirClient(conf, logger=logger)

    def get_tasks(self, job_params, marker=None):
        containers = self._containers_from_rdir(job_params, marker)

        for marker, container_id in containers:
            yield marker, dict(container_id=container_id)

    def get_total_tasks(self, job_params, marker=None):
        containers = self._containers_from_rdir(job_params, marker)

        i = 0
        for i, (marker, _) in enumerate(containers, 1):
            if i % 1000 == 0:
                yield marker, 1000

        remaining = i % 1000
        if remaining == 0:
            return

        yield marker, remaining

    def _containers_from_rdir(self, job_params, marker):
        service_id = job_params['service_id']
        rdir_fetch_limit = job_params['rdir_fetch_limit']
        rdir_timeout = job_params['rdir_timeout']

        containers = self.rdir_client.meta2_index_fetch_all(
            service_id,
            marker=marker,
            timeout=rdir_timeout,
            limit=rdir_fetch_limit)
        for container_info in containers:
            container_url = container_info['container_url']
            container_id = container_info['container_id']

            yield container_url, container_id
Beispiel #2
0
class Meta2Rebuilder(Tool):
    """
    Rebuild meta2 databases.
    """

    DEFAULT_RDIR_FETCH_LIMIT = 100

    def __init__(self, conf, input_file=None, service_id=None, **kwargs):
        super(Meta2Rebuilder, self).__init__(conf, **kwargs)

        # input
        self.input_file = input_file
        self.meta2_id = service_id

        # rawx/rdir
        self.rdir_client = RdirClient(self.conf, logger=self.logger)
        self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'),
                                          self.DEFAULT_RDIR_FETCH_LIMIT)

    @staticmethod
    def string_from_item(item):
        namespace, container_id = item
        return '%s|%s' % (namespace, container_id)

    def _fetch_items_from_input_file(self):
        with open(self.input_file, 'r') as ifile:
            for line in ifile:
                stripped = line.strip()
                if not stripped or stripped.startswith('#'):
                    continue

                container_id = stripped
                yield self.namespace, container_id

    def _fetch_items_from_meta2_id(self):
        containers = self.rdir_client.meta2_index_fetch_all(self.meta2_id)
        for container in containers:
            yield self.namespace, container['container_id']

    def _fetch_items(self):
        if self.input_file:
            return self._fetch_items_from_input_file()
        if self.meta2_id:
            return self._fetch_items_from_meta2_id()

        def _empty_generator():
            return
            yield  # pylint: disable=unreachable

        return _empty_generator()

    def _get_report(self, status, end_time, counters):
        references_processed, total_references_processed, \
            errors, total_errors = counters
        time_since_last_report = (end_time - self.last_report) or 0.00001
        total_time = (end_time - self.start_time) or 0.00001
        report = (
            '%(status)s '
            'last_report=%(last_report)s %(time_since_last_report).2fs '
            'references=%(references)d %(references_rate).2f/s '
            'errors=%(errors)d %(errors_rate).2f%% '
            'start_time=%(start_time)s %(total_time).2fs '
            'total_references='
            '%(total_references)d %(total_references_rate).2f/s '
            'total_errors=%(total_errors)d %(total_errors_rate).2f%%' % {
                'status':
                status,
                'last_report':
                datetime.fromtimestamp(int(self.last_report)).isoformat(),
                'time_since_last_report':
                time_since_last_report,
                'references':
                references_processed,
                'references_rate':
                references_processed / time_since_last_report,
                'errors':
                errors,
                'errors_rate':
                100 * errors / float(references_processed or 1),
                'start_time':
                datetime.fromtimestamp(int(self.start_time)).isoformat(),
                'total_time':
                total_time,
                'total_references':
                total_references_processed,
                'total_references_rate':
                total_references_processed / total_time,
                'total_errors':
                total_errors,
                'total_errors_rate':
                100 * total_errors / float(total_references_processed or 1)
            })
        if self.total_expected_items is not None:
            progress = 100 * total_references_processed / \
                float(self.total_expected_items or 1)
            report += ' progress=%d/%d %.2f%%' % \
                (total_references_processed, self.total_expected_items,
                 progress)
        return report

    def create_worker(self, queue_workers, queue_reply):
        return ContentRepairerWorker(self, queue_workers, queue_reply)

    def _load_total_expected_items(self):
        pass
Beispiel #3
0
class TestMeta2Indexing(BaseTestCase):
    def setUp(self):
        super(TestMeta2Indexing, self).setUp()
        self.rdir_client = RdirClient(self.conf)
        self.directory_client = DirectoryClient(self.conf)
        self.container_client = ContainerClient(self.conf)
        self.containers = [random_str(14) for _ in range(0, randint(1, 10))]
        self.containers_svcs = {}
        self.event_agent_name = 'event-agent-1'

    def tearDown(self):
        super(TestMeta2Indexing, self).tearDown()
        self._containers_cleanup()
        self._service(self.event_agent_name, 'start', wait=3)

    def _containers_cleanup(self):
        for container in self.containers:
            self.container_client.container_delete(self.account, container)
            for svc in self.containers_svcs[container]:
                self.rdir_client.meta2_index_delete(
                    volume_id=svc['host'],
                    container_path="{0}/{1}/{2}".format(
                        self.ns, self.account, container),
                    container_id=cid_from_name(self.account, container))

    def _filter_by_managing_svc(self, all_containers, svc_of_interest):
        """
        Filters through the containers returning only those that have
        svc_of_interest in their list of managing services.
        """
        containers_list = []
        for key in all_containers.keys():
            if svc_of_interest in [x['host'] for x in all_containers[key]]:
                containers_list.append(key)

        return sorted(containers_list)

    def test_volume_indexing_worker(self):
        """
        Test steps:
        - Generate a list of container names and create them
        - Collect their respective meta2 servers
        - For each meta2 server:
            - Run a meta2 indexing worker
            - List all rdir index records and match then with the
              services we're expecting.
        :return:
        """
        self._service(self.event_agent_name, "stop", wait=3)

        for container in self.containers:
            self.container_client.container_create(account=self.account,
                                                   reference=container)

        for container in self.containers:
            self.containers_svcs[container] = [
                x
                for x in self.directory_client.list(account=self.account,
                                                    reference=container)['srv']
                if x['type'] == 'meta2'
            ]

        meta2_data_paths = {}
        for svc in self.conf['services']['meta2']:
            svc_host = svc.get('service_id', svc['addr'])
            meta2_data_paths[svc_host] = svc['path']

        distinct_meta2_servers = set()
        for svc_list in self.containers_svcs.values():
            for svc in svc_list:
                distinct_meta2_servers.add(svc['host'])

        for svc in distinct_meta2_servers:
            expected_containers = self._filter_by_managing_svc(
                self.containers_svcs, svc)
            worker = Meta2IndexingWorker(meta2_data_paths[svc], self.conf)
            worker.crawl_volume()
            indexed_containers = sorted([
                x['container_url'].split('/')[-1]
                for x in self.rdir_client.meta2_index_fetch_all(volume_id=svc)
            ])

            for cont in expected_containers:
                self.assertIn(cont, indexed_containers)