Beispiel #1
0
class TestRdirMeta2Client(unittest.TestCase):
    def setUp(self):
        super(TestRdirMeta2Client, self).setUp()
        self.namespace = "dummy"
        self.volid = "e29b4c56-8522-4118-82ea"
        self.container_url = "OPENIO/testing/test1"
        self.container_id = "random833999id"
        self.mtime = 2874884.47
        self.rdir_client = RdirClient({'namespace': self.namespace},
                                      endpoint='127.0.0.0:6000')

    def tearDown(self):
        super(TestRdirMeta2Client, self).tearDown()
        del self.rdir_client

    def test_volume_create(self):
        # We should normally receive an HTTPResponse with an empty body
        self.rdir_client._rdir_request = Mock(side_effect=(None, ''))
        self.rdir_client.meta2_index_create(self.volid)
        self.rdir_client._rdir_request.assert_called_once_with(
            self.volid, 'POST', 'create', service_type='meta2')
        del self.rdir_client._rdir_request

    def test_volume_fetch(self):
        self.rdir_client._rdir_request = Mock(
            return_value=(None, {"records": [], "truncated": False}))
        expected_args = {
            'volume': self.volid,
            'method': 'POST',
            'action': 'fetch',
            'json': {
                'prefix': self.container_url,
                'limit': 4096,
            },
            'service_type': 'meta2'
        }
        self.rdir_client.meta2_index_fetch(self.volid,
                                           prefix=self.container_url)
        self.rdir_client._rdir_request.assert_called_once_with(**expected_args)
        del self.rdir_client._rdir_request

    def test_volume_push(self):
        self.rdir_client._rdir_request = Mock(side_effect=(None, ''))
        expected_args = {
            'volume': self.volid,
            'method': 'POST',
            'action': 'push',
            'create': True,
            'json': {
                'container_url': self.container_url,
                'container_id': self.container_id,
                'mtime': int(self.mtime),
            },
            'headers': None,
            'service_type': 'meta2'
        }

        self.rdir_client.meta2_index_push(self.volid, self.container_url,
                                          self.container_id, self.mtime)
        self.rdir_client._rdir_request.assert_called_once_with(**expected_args)
        del self.rdir_client._rdir_request

    def test_volume_delete(self):
        self.rdir_client._rdir_request = Mock(side_effect=(None, ''))
        expected_args = {
            'volume': self.volid,
            'method': 'POST',
            'action': 'delete',
            'create': False,
            'json': {
                'container_url': self.container_url,
                'container_id': self.container_id,
            },
            'service_type': 'meta2'
        }
        self.rdir_client.meta2_index_delete(self.volid, self.container_url,
                                            self.container_id)
        self.rdir_client._rdir_request.assert_called_once_with(**expected_args)
        del self.rdir_client._rdir_request
Beispiel #2
0
class Meta2IndexingWorker(object):
    """
    Indexing worker responsible for a single volume.
    """

    def __init__(self, volume_path, conf, pool_manager=None):
        """
        Initializes an Indexing worker for indexing meta2 databases.

        Possible values of conf relating to this worker are:
        - interval: (int) in sec time between two full scans. Default: half an
                    hour.
        - report_interval: (int) in sec, time between two reports: Default: 300
        - scanned_per_second: (int) maximum number of indexed databases /s.
        - try_removing_faulty_indexes : In the event where we encounter a
            database that's not supposed to be handled by this volume, attempt
            to remove it from this volume rdir index if it exists
            WARNING: The decision is based off of a proxy response, that could
            be affected by cache inconsistencies for example, use at your own
            risk. Default: False

        :param volume_path: The volume path to be indexed
        :param conf: The configuration to be passed to the needed services
        :param pool_manager: A connection pool manager. If none is given, a
                new one with a default size of 10 will be created.
        """
        self.logger = get_logger(conf)
        self._stop = False
        self.volume = volume_path
        self.success_nb = 0
        self.failed_nb = 0
        self.full_scan_nb = 0
        self.last_report_time = 0
        self.last_scan_time = 0
        self.last_index_time = 0
        self.start_time = 0
        self.indexed_since_last_report = 0
        self.scans_interval = int_value(
            conf.get('interval'), 1800)
        self.report_interval = int_value(
            conf.get('report_interval'), 300)
        self.max_indexed_per_second = int_value(
            conf.get('scanned_per_second'), 3000)
        self.namespace, self.volume_id = check_volume_for_service_type(
            self.volume, "meta2")
        self.attempt_bad_index_removal = boolean_value(
            conf.get('try_removing_faulty_indexes'), False)

        if not pool_manager:
            pool_manager = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf, logger=self.logger,
                                       pool_manager=pool_manager)
        self.dir_client = DirectoryClient(conf, logger=self.logger,
                                          pool_manager=pool_manager)

    def report(self, tag):
        """
        Log the status of indexer

        :param tag: One of three: starting, running, ended.
        """
        total = self.success_nb + self.failed_nb
        now = time.time()
        elapsed = (now - self.start_time) or 0.00001
        since_last_rprt = (now - self.last_report_time) or 0.00001
        self.logger.info(
            'volume_id=%(volume_id)s %(tag)s=%(current_time)s '
            'elapsed=%(elapsed).02f '
            'pass=%(pass)d '
            'errors=%(errors)d '
            'containers_indexed=%(total_indexed)d %(index_rate).2f/s',
            {
                'volume_id': self.volume_id,
                'tag': tag,
                'current_time': datetime.fromtimestamp(
                    int(now)).isoformat(),
                'pass': self.full_scan_nb,
                'errors': self.failed_nb,
                'total_indexed': total,
                'index_rate': self.indexed_since_last_report / since_last_rprt,
                'elapsed': elapsed
            }
        )
        self.last_report_time = now
        self.indexed_since_last_report = 0

    def warn(self, msg, container_id):
        self.logger.warn(
            'volume_id=%(volume_id)s container_id=%(container_id)s %(error)s',
            {
                'volume_id': self.volume_id,
                'container_id': container_id,
                'error': msg
            }
        )

    def _attempt_index_removal(self, db_path, cid):
        """
        Fail safe removal attempt.
        """
        try:
            self.index_client.meta2_index_delete(self.volume_id, db_path, cid)
        except exc.OioException as exception:
            self.warn(
                container_id=cid,
                msg="Unable to remove database from the volume "
                    "index : {0}".format(str(exception))
            )

    def index_meta2_database(self, db_id):
        """
        Add a meta2 database to the rdir index. Fails if the database isn't
        handled by the current volume.

        :param db_id: The ContentID representing the reference to the database.
        """
        if len(db_id) < STRLEN_REFERENCEID:
            self.warn('Not a valid container ID', db_id)
            return
        try:
            srvcs = self.dir_client.list(cid=db_id)
            account, container = srvcs['account'], srvcs['name']
            is_peer = self.volume_id in [x['host'] for x in srvcs['srv'] if
                                         x['type'] == 'meta2']

            container_id = db_id.rsplit(".")[0]

            if six.PY2:
                if isinstance(account, six.text_type):
                    account = account.encode('utf-8')
                if isinstance(container, six.text_type):
                    container = container.encode('utf-8')
            cont_url = "{0}/{1}/{2}".format(self.namespace, account, container)

            if not is_peer:
                self.warn("Trying to index a container that isn't handled by"
                          "this volume", db_id)
                if self.attempt_bad_index_removal:
                    self._attempt_index_removal(cont_url, container_id)
                return

            self.index_client.meta2_index_push(volume_id=self.volume_id,
                                               container_url=cont_url,
                                               mtime=time.time(),
                                               container_id=container_id)

            self.success_nb += 1
        except exc.OioException as exception:
            self.failed_nb += 1
            self.warn("Unable to to index container: %s" % str(exception),
                      db_id)

        self.indexed_since_last_report += 1

    def crawl_volume(self):
        """
        Crawl the volume assigned to this worker, and index every database.
        """
        paths = paths_gen(self.volume)
        self.full_scan_nb += 1
        self.success_nb = 0
        self.failed_nb = 0
        now = time.time()
        self.last_report_time = now

        self.report("starting")

        for db_path in paths:

            # Graceful exit, hopefully
            if self._stop:
                break

            db_id = db_path.rsplit("/")[-1].rsplit(".")

            if len(db_id) != 3:
                self.warn("Malformed db file name !", db_path)
                continue

            db_id = ".".join(db_id[:2])
            self.index_meta2_database(db_id)

            self.last_index_time = ratelimit(
                self.last_index_time,
                self.max_indexed_per_second
            )

            now = time.time()
            if now - self.last_report_time >= self.report_interval:
                self.report("running")

        self.report("ended")

    def run(self):
        """
        Main worker loop
        """
        self.start_time = time.time()
        while not self._stop:
            try:
                self.crawl_volume()
                self.last_scan_time = time.time()
                time.sleep(self.scans_interval)
            except exc.OioException as exception:
                self.logger.exception("ERROR during indexing meta2: %s",
                                      exception)

    def stop(self):
        """
        Could be needed for eventually gracefully stopping.
        """
        self._stop = True
Beispiel #3
0
class TestMeta2Indexing(BaseTestCase):
    def setUp(self):
        super(TestMeta2Indexing, self).setUp()
        self.rdir_client = RdirClient(self.conf)
        self.directory_client = DirectoryClient(self.conf)
        self.container_client = ContainerClient(self.conf)
        self.containers = [random_str(14) for _ in range(0, randint(1, 10))]
        self.containers_svcs = {}
        self.event_agent_name = 'event-agent-1'

    def tearDown(self):
        super(TestMeta2Indexing, self).tearDown()
        self._containers_cleanup()
        self._service(self.event_agent_name, 'start', wait=3)

    def _containers_cleanup(self):
        for container in self.containers:
            self.container_client.container_delete(self.account, container)
            for svc in self.containers_svcs[container]:
                self.rdir_client.meta2_index_delete(
                    volume_id=svc['host'],
                    container_path="{0}/{1}/{2}".format(
                        self.ns, self.account, container),
                    container_id=cid_from_name(self.account, container))

    def _filter_by_managing_svc(self, all_containers, svc_of_interest):
        """
        Filters through the containers returning only those that have
        svc_of_interest in their list of managing services.
        """
        containers_list = []
        for key in all_containers.keys():
            if svc_of_interest in [x['host'] for x in all_containers[key]]:
                containers_list.append(key)

        return sorted(containers_list)

    def test_volume_indexing_worker(self):
        """
        Test steps:
        - Generate a list of container names and create them
        - Collect their respective meta2 servers
        - For each meta2 server:
            - Run a meta2 indexing worker
            - List all rdir index records and match then with the
              services we're expecting.
        :return:
        """
        self._service(self.event_agent_name, "stop", wait=3)

        for container in self.containers:
            self.container_client.container_create(account=self.account,
                                                   reference=container)

        for container in self.containers:
            self.containers_svcs[container] = [
                x
                for x in self.directory_client.list(account=self.account,
                                                    reference=container)['srv']
                if x['type'] == 'meta2'
            ]

        meta2_data_paths = {}
        for svc in self.conf['services']['meta2']:
            svc_host = svc.get('service_id', svc['addr'])
            meta2_data_paths[svc_host] = svc['path']

        distinct_meta2_servers = set()
        for svc_list in self.containers_svcs.values():
            for svc in svc_list:
                distinct_meta2_servers.add(svc['host'])

        for svc in distinct_meta2_servers:
            expected_containers = self._filter_by_managing_svc(
                self.containers_svcs, svc)
            worker = Meta2IndexingWorker(meta2_data_paths[svc], self.conf)
            worker.crawl_volume()
            indexed_containers = sorted([
                x['container_url'].split('/')[-1]
                for x in self.rdir_client.meta2_index_fetch_all(volume_id=svc)
            ])

            for cont in expected_containers:
                self.assertIn(cont, indexed_containers)