Example #1
0
    def test_exception_handling(self):
        """
        Test if the scheduled job can handle exceptions
        """
        def raise_an_exception(*args, **kwargs):
            raise RuntimeError('Emulated snapshot delete error')

        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'vdisks': [
                (1, 1, 1, 1), (2, 1, 1, 1)
            ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
            'mds_services': [(1, 1)],
            'storagerouters': [1],
            'storagedrivers': [(1, 1, 1)]
        }  # (<id>, <vpool_id>, <storagerouter_id>)
                                                  )

        vdisk_1, vdisk_2 = structure['vdisks'].values()
        storagedriver_1 = structure['storagedrivers'][1]

        vdisks = [vdisk_1, vdisk_2]

        for vdisk in vdisks:
            [
                dynamic for dynamic in vdisk._dynamics
                if dynamic.name == 'snapshots'
            ][0].timeout = 0
            for i in xrange(0, 2):
                metadata = {
                    'label':
                    str(i),
                    'is_consistent':
                    False,
                    'is_sticky':
                    False,
                    'timestamp':
                    str((int(time.time() -
                             datetime.timedelta(2).total_seconds() - i)))
                }
                snapshot_id = VDiskController.create_snapshot(
                    vdisk.guid, metadata)
                if vdisk == vdisk_1:
                    StorageRouterClient.delete_snapshot_callbacks[
                        vdisk.volume_id] = {
                            snapshot_id: raise_an_exception
                        }
        with self.assertRaises(RuntimeError):
            GenericController.delete_snapshots_storagedriver(
                storagedriver_guid=storagedriver_1.guid)
        self.assertEqual(1, len(vdisk_2.snapshot_ids),
                         'One snapshot should be removed for vdisk 2')
        self.assertEqual(2, len(vdisk_1.snapshot_ids),
                         'No snapshots should be removed for vdisk 1')
Example #2
0
 def _refresh_package_information():
     # Refresh updates
     UpdateController._logger.debug('Refreshing package information')
     counter = 1
     while counter < 6:
         try:
             GenericController.refresh_package_information()
             return
         except NoLockAvailableException:
             UpdateController._logger.debug('Attempt {0}: Could not refresh the update information, trying again'.format(counter))
             time.sleep(6)  # Wait 30 seconds max in total
         counter += 1
         if counter == 6:
             raise Exception('Could not refresh the update information')
Example #3
0
    def test_delete_snapshot_scrubbing_lock(self):
        """
        Tests the skip-if-scrubbed logic
        """
        snapshot_while_scrub_results = []

        def delete_snapshot_while_scrubbing(*args, **kwargs):
            _ = args, kwargs
            try:
                snapshot_while_scrub_results.append(VDiskController.delete_snapshot(vdisk_1.guid, vdisk_1.snapshot_ids[0]))
            except RuntimeError as ex:
                snapshot_while_scrub_results.append(ex)

        structure = DalHelper.build_dal_structure(
            {'vpools': [1],
             'vdisks': [(1, 1, 1, 1)],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
             'mds_services': [(1, 1)],
             'storagerouters': [1],
             'storagedrivers': [(1, 1, 1)]}  # (<id>, <vpool_id>, <storagerouter_id>)
        )
        vdisks = structure['vdisks']
        vdisk_1 = vdisks[1]

        # Create automatic snapshot for both vDisks
        success, fail = GenericController.snapshot_all_vdisks()
        self.assertEqual(first=len(fail), second=0, msg='Expected 0 failed snapshots')
        self.assertEqual(first=len(success), second=1, msg='Expected 1 successful snapshots')
        self.assertEqual(first=len(vdisk_1.snapshot_ids), second=1, msg='Expected 1 snapshot ID for vDisk {0}'.format(vdisk_1.name))
        self.assertEqual(first=len(vdisk_1.snapshots), second=1, msg='Expected 1 snapshot for vDisk {0}'.format(vdisk_1.name))

        proxy_names, thread_names, vdisk_namespaces = self.generate_scrub_related_info(structure)
        LockedClient.scrub_controller = {'possible_threads': thread_names,
                                         'volumes': {},
                                         'waiter': Waiter(len(thread_names[0:1]))}  # only 1 disks -> 1 thread
        # Scrub all volumes
        for vdisk_id, vdisk in vdisks.iteritems():
            LockedClient.scrub_controller['volumes'][vdisk.volume_id] = {'success': True,
                                                                         'scrub_work': range(vdisk_id)}

        hooks = {'post_vdisk_scrub_registration': delete_snapshot_while_scrubbing}  # Make the scrubber wait
        ScrubShared._test_hooks.update(hooks)
        GenericController.execute_scrub()

        # Ensure delete snapshot fails for vdisk_1 because it is being scrubbed
        result_while_scrub = snapshot_while_scrub_results[0]
        self.assertIsInstance(result_while_scrub, Exception, 'Expected an exception to have occurred')
        self.assertEqual(str(result_while_scrub), 'VDisk is being scrubbed. Unable to remove snapshots at this time', 'Excpetion should be about disk being scrubbed')
        self.assertEqual(first=len(vdisk_1.snapshot_ids), second=1, msg='Expected 1 snapshot ID for vDisk {0}'.format(vdisk_1.name))
        self.assertEqual(first=len(vdisk_1.snapshots), second=1, msg='Expected 1 snapshot for vDisk {0}'.format(vdisk_1.name))
Example #4
0
    def test_scrubbing_exception_handling(self):
        """
        Test if the scheduled job can handle scrub related exceptions
        """
        def raise_an_exception(*args, **kwargs):
            raise RuntimeError(SCRUB_VDISK_EXCEPTION_MESSAGE)

        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'vdisks': [
                (1, 1, 1, 1)
            ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
            'mds_services': [(1, 1)],
            'storagerouters': [1],
            'storagedrivers': [(1, 1, 1)]
        }  # (<id>, <vpool_id>, <storagerouter_id>)
                                                  )
        vdisk_1 = structure['vdisks'][1]
        storagedriver_1 = structure['storagedrivers'][1]

        [
            dynamic for dynamic in vdisk_1._dynamics
            if dynamic.name == 'snapshots'
        ][0].timeout = 0

        for i in xrange(0, 2):
            metadata = {
                'label':
                str(i),
                'is_consistent':
                False,
                'is_sticky':
                False,
                'timestamp':
                str((int(time.time() - datetime.timedelta(2).total_seconds() -
                         i)))
            }
            snapshot_id = VDiskController.create_snapshot(
                vdisk_1.guid, metadata)
            StorageRouterClient.delete_snapshot_callbacks[
                vdisk_1.volume_id] = {
                    snapshot_id: raise_an_exception
                }

        GenericController.delete_snapshots_storagedriver(
            storagedriver_guid=storagedriver_1.guid)
        self.assertEqual(2, len(vdisk_1.snapshot_ids),
                         'No snapshots should be removed for vdisk 1')
Example #5
0
    def execute_scrubbing():
        """
        Execute scrubbing on the cluster

        :return:
        """

        return GenericController.execute_scrub()
    def ovs_4509_validate_arakoon_collapse_test():
        """
        Validate arakoon collapse
        """
        node_ips = [sr.ip for sr in GeneralStorageRouter.get_storage_routers()]
        node_ips.sort()
        for node_ip in node_ips:
            root_client = SSHClient(node_ip, username='******')
            arakoon_clusters = []
            for service in ServiceList.get_services():
                if service.is_internal is True and service.storagerouter.ip == node_ip and \
                    service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                          ServiceType.SERVICE_TYPES.NS_MGR,
                                          ServiceType.SERVICE_TYPES.ALBA_MGR):
                    arakoon_clusters.append(service.name.replace('arakoon-', ''))

            for arakoon_cluster in arakoon_clusters:
                arakoon_config_path = Configuration.get_configuration_path('/ovs/arakoon/{0}/config'.format(arakoon_cluster))
                tlog_location = '/opt/OpenvStorage/db/arakoon/{0}/tlogs'.format(arakoon_cluster)

                # read_tlog_dir
                with remote(node_ip, [Configuration]) as rem:
                    config_contents = rem.Configuration.get('/ovs/arakoon/{0}/config'.format(arakoon_cluster), raw=True)
                for line in config_contents.splitlines():
                    if 'tlog_dir' in line:
                        tlog_location = line.split()[-1]

                nr_of_tlogs = TestArakoon.get_nr_of_tlogs_in_folder(root_client, tlog_location)
                old_headdb_timestamp = 0
                if root_client.file_exists('/'.join([tlog_location, 'head.db'])):
                    old_headdb_timestamp = root_client.run(['stat', '--format=%Y', tlog_location + '/head.db'])
                if nr_of_tlogs <= 2:
                    benchmark_command = ['arakoon', '--benchmark', '-n_clients', '1', '-max_n', '5_000', '-config', arakoon_config_path]
                    root_client.run(benchmark_command)

                GenericController.collapse_arakoon()

                nr_of_tlogs = TestArakoon.get_nr_of_tlogs_in_folder(root_client, tlog_location)
                new_headdb_timestamp = root_client.run(['stat', '--format=%Y', tlog_location + '/head.db'])
                assert nr_of_tlogs <= 2,\
                    'Arakoon collapse left {0} tlogs on the environment, expecting less than 2'.format(nr_of_tlogs)
                assert old_headdb_timestamp != new_headdb_timestamp,\
                    'Timestamp of the head_db file was not changed in the process of collapsing tlogs'
Example #7
0
    def test_snapshot_sticky(self):
        """
        is_sticky: True --> Sticky snapshots of any kind should never be deleted (Only possible to delete manually)
        """
        minute = 60
        hour = minute * 60
        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'vdisks': [
                (1, 1, 1, 1)
            ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
            'mds_services': [(1, 1)],
            'storagerouters': [1],
            'storagedrivers': [(1, 1, 1)]
        }  # (<id>, <vpool_id>, <storagerouter_id>)
                                                  )
        base = datetime.datetime.now().date()
        vdisk_1 = structure['vdisks'][1]
        storagedriver_1 = structure['storagedrivers'][1]

        label = 'c'
        # Extra time to add to the hourly timestamps
        additional_time = minute * 30
        # Hours to create a snapshot on
        sticky_hours = [2]
        consistent_hours = [2]
        inconsistent_hours = []
        # Snapshot details
        is_sticky = len(sticky_hours) > 0
        is_consistent = len(consistent_hours) > 0
        is_automatic = False

        for day in xrange(35):
            base_timestamp = self._make_timestamp(base,
                                                  datetime.timedelta(1) * day)
            self._print_message('')
            self._print_message('Day cycle: {0}: {1}'.format(
                day,
                datetime.datetime.fromtimestamp(base_timestamp).strftime(
                    '%Y-%m-%d')))

            self._print_message('- Deleting snapshots')
            GenericController.delete_snapshots_storagedriver(
                storagedriver_guid=storagedriver_1.guid,
                timestamp=base_timestamp + (minute * 30))

            self._validate(vdisk=vdisk_1,
                           current_day=day,
                           base_date=base,
                           sticky_hours=sticky_hours,
                           consistent_hours=consistent_hours,
                           inconsistent_hours=inconsistent_hours)

            self._print_message('- Creating snapshots')
            for x in consistent_hours + inconsistent_hours:
                timestamp = base_timestamp + (hour * x) + additional_time
                VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid,
                                                metadata={
                                                    'label':
                                                    'ss_{0}_{1}:00'.format(
                                                        label, x),
                                                    'is_sticky':
                                                    is_sticky,
                                                    'timestamp':
                                                    str(timestamp),
                                                    'is_automatic':
                                                    is_automatic,
                                                    'is_consistent':
                                                    is_consistent
                                                })
Example #8
0
    def test_refresh_package_information(self):
        """
        Test the refresh package information functionality
        """
        def _update_info_cluster_1(client, update_info, package_info):
            _ = package_info
            update_info[client.ip]['framework'] = {
                'packages': {
                    'package1': {
                        'candidate': 'version2',
                        'installed': 'version1'
                    }
                },
                'prerequisites': []
            }

        def _update_info_cluster_2(client, update_info, package_info):
            _ = package_info
            update_info[client.ip]['component2'] = {
                'packages': {
                    'package2': {
                        'candidate': 'version2',
                        'installed': 'version1'
                    }
                },
                'prerequisites': []
            }
            if client.ip == storagerouter_3.ip:
                update_info[client.ip]['errors'] = [
                    'Unexpected error occurred for StorageRouter {0}'.format(
                        storagerouter_3.name)
                ]

        def _update_info_plugin_1(error_information):
            _ = error_information  # get_update_info_plugin is used for Alba nodes, so not testing here

        expected_package_info = {
            'framework': {
                'packages': {
                    'package1': {
                        'candidate': 'version2',
                        'installed': 'version1'
                    }
                },
                'prerequisites': [['node_down', '2']]
            },
            'component2': {
                'packages': {
                    'package2': {
                        'candidate': 'version2',
                        'installed': 'version1'
                    }
                },
                'prerequisites': []
            }
        }

        # StorageRouter 1 successfully updates its package info
        # StorageRouter 2 is inaccessible
        # StorageRouter 3 gets error in 2nd hook --> package_information is reset to {}
        structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1, 2, 3]})
        storagerouter_1 = structure['storagerouters'][1]
        storagerouter_2 = structure['storagerouters'][2]
        storagerouter_3 = structure['storagerouters'][3]
        Toolbox._function_pointers['update-get_update_info_cluster'] = [
            _update_info_cluster_1, _update_info_cluster_2
        ]
        Toolbox._function_pointers['update-get_update_info_plugin'] = [
            _update_info_plugin_1
        ]

        SSHClient._raise_exceptions[storagerouter_2.ip] = {
            'users': ['root'],
            'exception': UnableToConnectException('No route to host')
        }

        with self.assertRaises(excClass=Exception) as raise_info:
            GenericController.refresh_package_information()

        storagerouter_1.discard()
        storagerouter_2.discard()
        storagerouter_3.discard()
        self.assertDictEqual(
            d1=expected_package_info,
            d2=storagerouter_1.package_information,
            msg='Incorrect package information found for StorageRouter 1'.
            format(storagerouter_1.name))
        self.assertDictEqual(
            d1={},
            d2=storagerouter_2.package_information,
            msg='Incorrect package information found for StorageRouter 2'.
            format(storagerouter_2.name))
        self.assertDictEqual(
            d1={},
            d2=storagerouter_3.package_information,
            msg='Incorrect package information found for StorageRouter {0}'.
            format(storagerouter_3.name))
        self.assertIn(
            member='Unexpected error occurred for StorageRouter {0}'.format(
                storagerouter_3.name),
            container=raise_info.exception.message,
            msg=
            'Expected to find log message about unexpected error for StorageRouter {0}'
            .format(storagerouter_3.name))
    def check_scrubbing_test():
        """
        Check scrubbing of vdisks test
        """
        initial_counter = 100
        step = 5
        vdisk = None
        vpool_name = General.get_config().get('vpool', 'name')
        vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name)
        assert vpool, "No vpool found where one was expected"

        template_folder = GeneralVMachine.template_target_folder
        image_name = GeneralVMachine.template_image

        disk_name = "scrubdisk"
        GeneralVMachine.logger.info("Starting RAW disk creation")
        out, err, _ = General.execute_command('qemu-img convert -O raw {0}{1} /mnt/{2}/{3}.raw'.format(template_folder, image_name, vpool_name, disk_name))
        if err:
            GeneralVMachine.logger.error("Error while creating raw disk: {0}".format(err))

        def snapshot_vdisk(vdisk):
            metadata = {'label': 'snap-' + vdisk.name,
                        'is_consistent': True,
                        'timestamp': time.time(),
                        'is_automatic': False,
                        'is_sticky': False}
            VDiskController.create_snapshot(vdisk.guid, metadata)

        counter = initial_counter
        while counter and vdisk is None:
            time.sleep(step)
            vdisk = VDiskList.get_by_devicename_and_vpool('/' + disk_name + '.raw', vpool)
            counter -= step
        assert counter > 0, "Vdisk with name {0} didn't appear in the model after 60 seconds".format(disk_name)
        # snapshot disks for the first time
        snapshot_vdisk(vdisk)
        counter = initial_counter
        while counter > 0:
            time.sleep(step)
            out, err, _ = General.execute_command('dd if=/dev/zero of=/mnt/{0}/{1}.raw bs=10K count=1000 conv=notrunc'.format(vpool_name, disk_name))
            counter -= step
            snapshot_vdisk(vdisk)

        # saving disk 'stored' info / the only attribute that is lowered after scrubbing
        vdisk.invalidate_dynamics(['statistics'])
        disk_backend_data = vdisk.statistics['stored']

        # deleting middle snapshots
        for snapshot in vdisk.snapshots[1:-1]:
            VDiskController.delete_snapshot(vdisk.guid, snapshot['guid'])

        # starting scrubber
        try:
            GenericController.execute_scrub()
            # waiting for model to catch up
            counter = initial_counter
            while counter > 0:
                time.sleep(step)
                vdisk.invalidate_dynamics(['statistics'])
                # checking result of scrub work
                if vdisk.statistics['stored'] < disk_backend_data:
                    GeneralVMachine.logger.info("It took {0} seconds for the value to change from {1} to {2}\n".format((initial_counter - counter) * step,
                                                                                                                       disk_backend_data,
                                                                                                                       vdisk.statistics['stored']))
                    break
                counter -= step
        finally:
            # removing vdisk
            GeneralVMachine.logger.info("Removing vpool vdisks from {0} vpool".format(vpool_name))
            out, err, _ = General.execute_command("rm -rf /mnt/{0}/*.raw".format(vpool_name))
            if err:
                GeneralVMachine.logger.error("Error while removing vdisk: {0}".format(err))

        assert counter > 0, "Scrubbing didn't run as expected, backend size of vdisk remained at {0}:\n".format(disk_backend_data)
Example #10
0
    def test_happypath(self):
        """
        Validates the happy path; Hourly snapshots are taken with a few manual consistent
        every now and then. The delete policy is executed every day
        """
        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'vdisks': [
                (1, 1, 1, 1)
            ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
            'mds_services': [(1, 1)],
            'storagerouters': [1],
            'storagedrivers': [(1, 1, 1)]
        }  # (<id>, <vpool_id>, <storagerouter_id>)
                                                  )
        vdisk_1 = structure['vdisks'][1]
        [
            dynamic for dynamic in vdisk_1._dynamics
            if dynamic.name == 'snapshots'
        ][0].timeout = 0

        # Run the testing scenario
        travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true'
        if travis is True:
            self._print_message('Running in Travis, reducing output.')
        base = datetime.datetime.now().date()
        minute = 60
        hour = minute * 60
        consistent_hours = [6, 12, 18]
        inconsistent_hours = xrange(2, 23)

        for day in xrange(0, 35):
            base_timestamp = self._make_timestamp(base,
                                                  datetime.timedelta(1) * day)
            self._print_message('')
            self._print_message('Day cycle: {0}: {1}'.format(
                day,
                datetime.datetime.fromtimestamp(base_timestamp).strftime(
                    '%Y-%m-%d')))

            # At the start of the day, delete snapshot policy runs at 00:30
            self._print_message('- Deleting snapshots')
            GenericController.delete_snapshots(timestamp=base_timestamp +
                                               (minute * 30))

            # Validate snapshots
            self._print_message('- Validating snapshots')
            self._validate(vdisk=vdisk_1,
                           current_day=day,
                           base_date=base,
                           sticky_hours=[],
                           consistent_hours=consistent_hours,
                           inconsistent_hours=inconsistent_hours)

            # During the day, snapshots are taken
            # - Create non consistent snapshot every hour, between 2:00 and 22:00
            # - Create consistent snapshot at 6:30, 12:30, 18:30
            self._print_message('- Creating snapshots')
            for h in inconsistent_hours:
                timestamp = base_timestamp + (hour * h)
                VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid,
                                                metadata={
                                                    'label':
                                                    'ss_i_{0}:00'.format(
                                                        str(h)),
                                                    'is_consistent':
                                                    False,
                                                    'timestamp':
                                                    str(timestamp)
                                                })
                if h in consistent_hours:
                    ts = (timestamp + (minute * 30))
                    VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid,
                                                    metadata={
                                                        'label':
                                                        'ss_c_{0}:30'.format(
                                                            str(h)),
                                                        'is_consistent':
                                                        True,
                                                        'timestamp':
                                                        str(ts)
                                                    })
    def test_scrubbing(self):
        """
        Validates the scrubbing workflow
        * Scenario 1: Validate disabled scrub task and single vDisk scrub logic
        * Scenario 2: 1 vPool, 10 vDisks, 1 scrub role
                      Scrubbing fails for 5 vDisks, check if scrubbing completed for all other vDisks
                      Run scrubbing a 2nd time and verify scrubbing now works for failed vDisks
        * Scenario 3: 1 vPool, 10 vDisks, 5 scrub roles
                      Check if vDisks are divided among all threads
        * Scenario 4: 3 vPools, 9 vDisks, 5 scrub roles
                      Validate 6 threads will be spawned and used out of a potential of 15 (5 scrub roles * 3 vPools)
                      We limit max amount of threads spawned per vPool to 2 in case 3 to 5 vPools are present
        """
        _ = self
        for i in xrange(1, 6):
            Configuration.set("/ovs/framework/hosts/{0}/ports".format(i), {"storagedriver": [10000, 10100]})

        ##############
        # Scenario 1 #
        ##############
        structure = Helper.build_service_structure(
            {
                "vpools": [1],
                "vdisks": [(1, 1, 1, 1)],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
                "mds_services": [(1, 1)],  # (<id>, <storagedriver_id>)
                "storagerouters": [1],
                "storagedrivers": [(1, 1, 1)],
            }  # (<id>, <vpool_id>, <storagerouter_id>)
        )
        vdisk = structure["vdisks"][1]
        vpool = structure["vpools"][1]
        storagerouter = structure["storagerouters"][1]
        System._machine_id = {storagerouter.ip: "1"}
        Configuration.set(
            "/ovs/vpools/{0}/proxies/scrub/generic_scrub".format(vpool.guid), json.dumps({}, indent=4), raw=True
        )
        LockedClient.scrub_controller = {"possible_threads": None, "volumes": {}, "waiter": Waiter(1)}
        LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {"success": False, "scrub_work": [0]}
        with self.assertRaises(Exception) as raise_info:
            VDiskController.scrub_single_vdisk(vdisk.guid, storagerouter.guid)
        self.assertIn(vdisk.name, raise_info.exception.message)
        LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {"success": True, "scrub_work": [0]}
        VDiskController.scrub_single_vdisk(vdisk.guid, storagerouter.guid)
        with vdisk.storagedriver_client.make_locked_client(vdisk.volume_id) as locked_client:
            self.assertEqual(
                first=len(locked_client.get_scrubbing_workunits()),
                second=0,
                msg="Scrubbed vDisk {0} does not have the expected amount of scrubbing items: {1}".format(
                    vdisk.name, 0
                ),
            )

        ##############
        # Scenario 2 #
        ##############
        self.volatile.clean()
        self.persistent.clean()
        structure = Helper.build_service_structure(
            {
                "vpools": [1],
                "vdisks": [
                    (1, 1, 1, 1),
                    (2, 1, 1, 1),
                    (3, 1, 1, 1),
                    (4, 1, 1, 1),
                    (5, 1, 1, 1),
                    (6, 1, 1, 1),
                    (7, 1, 1, 1),
                    (8, 1, 1, 1),
                    (9, 1, 1, 1),
                    (10, 1, 1, 1),
                ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
                "mds_services": [(1, 1)],  # (<id>, <storagedriver_id>)
                "storagerouters": [1],
                "storagedrivers": [(1, 1, 1)],
            }  # (<id>, <vpool_id>, <storagerouter_id>)
        )
        vpool = structure["vpools"][1]
        vdisks = structure["vdisks"]
        storagerouter = structure["storagerouters"][1]
        System._machine_id = {storagerouter.ip: "1"}
        Configuration.set(
            "/ovs/vpools/{0}/proxies/scrub/generic_scrub".format(vpool.guid), json.dumps({}, indent=4), raw=True
        )
        LockedClient.scrub_controller = {
            "possible_threads": ["scrub_{0}_{1}".format(vpool.guid, storagerouter.guid)],
            "volumes": {},
            "waiter": Waiter(1),
        }
        failed_vdisks = []
        successful_vdisks = []
        for vdisk_id in sorted(vdisks):
            vdisk = vdisks[vdisk_id]
            success = vdisk_id % 2 == 0
            LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {
                "success": success,
                "scrub_work": range(vdisk_id),
            }
            if success is True:
                successful_vdisks.append(vdisk)
            else:
                failed_vdisks.append(vdisk)

        # Execute scrubbing a 1st time
        with self.assertRaises(Exception) as raise_info:
            GenericController.execute_scrub()
        for vdisk in failed_vdisks:
            self.assertIn(vdisk.name, raise_info.exception.message)

        # Validate expected successful vDisks
        for vdisk in successful_vdisks:
            with vdisk.storagedriver_client.make_locked_client(vdisk.volume_id) as locked_client:
                self.assertEqual(
                    first=len(locked_client.get_scrubbing_workunits()),
                    second=0,
                    msg="Scrubbed vDisk {0} does still have scrubbing work left".format(vdisk.name),
                )
        # Validate expected failed vDisks
        for vdisk in failed_vdisks:
            with vdisk.storagedriver_client.make_locked_client(vdisk.volume_id) as locked_client:
                self.assertEqual(
                    first=len(locked_client.get_scrubbing_workunits()),
                    second=int(vdisk.name),
                    msg="Scrubbed vDisk {0} does not have the expected amount of scrubbing items: {1}".format(
                        vdisk.name, int(vdisk.name)
                    ),
                )

        # Execute scrubbing again
        for vdisk_id in sorted(vdisks):
            vdisk = vdisks[vdisk_id]
            LockedClient.scrub_controller["volumes"][vdisk.volume_id]["success"] = True
        GenericController.execute_scrub()
        for vdisk in vdisks.values():
            with vdisk.storagedriver_client.make_locked_client(vdisk.volume_id) as locked_client:
                self.assertEqual(
                    first=len(locked_client.get_scrubbing_workunits()),
                    second=0,
                    msg="Scrubbed vDisk {0} does still have scrubbing work left after scrubbing a 2nd time".format(
                        vdisk.name
                    ),
                )

        ##############
        # Scenario 3 #
        ##############
        self.volatile.clean()
        self.persistent.clean()
        structure = Helper.build_service_structure(
            {
                "vpools": [1],
                "vdisks": [
                    (1, 1, 1, 1),
                    (2, 1, 1, 1),
                    (3, 1, 1, 1),
                    (4, 1, 1, 1),
                    (5, 1, 1, 1),
                    (6, 1, 1, 1),
                    (7, 1, 1, 1),
                    (8, 1, 1, 1),
                    (9, 1, 1, 1),
                    (10, 1, 1, 1),
                ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
                "mds_services": [(1, 1)],  # (<id>, <storagedriver_id>)
                "storagerouters": [1, 2, 3, 4, 5],
                "storagedrivers": [(1, 1, 1)],
            }  # (<id>, <vpool_id>, <storagerouter_id>)
        )
        vpool = structure["vpools"][1]
        vdisks = structure["vdisks"]
        storagerouters = structure["storagerouters"]
        System._machine_id = dict((sr.ip, sr.machine_id) for sr in storagerouters.values())
        Configuration.set(
            "/ovs/vpools/{0}/proxies/scrub/generic_scrub".format(vpool.guid), json.dumps({}, indent=4), raw=True
        )

        thread_names = [
            "scrub_{0}_{1}".format(vpool.guid, storagerouter.guid) for storagerouter in storagerouters.values()
        ]
        LockedClient.scrub_controller = {
            "possible_threads": thread_names,
            "volumes": {},
            "waiter": Waiter(len(thread_names)),
        }
        LockedClient.thread_names = thread_names[:]
        for vdisk_id in sorted(vdisks):
            vdisk = vdisks[vdisk_id]
            LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {"success": True, "scrub_work": range(vdisk_id)}
        GenericController.execute_scrub()
        self.assertEqual(
            first=len(LockedClient.thread_names), second=0, msg="Not all threads have been used in the process"
        )

        ##############
        # Scenario 4 #
        ##############
        self.volatile.clean()
        self.persistent.clean()
        structure = Helper.build_service_structure(
            {
                "vpools": [1, 2, 3],
                "vdisks": [
                    (1, 1, 1, 1),
                    (2, 1, 1, 1),
                    (3, 1, 1, 1),
                    (4, 2, 2, 2),
                    (5, 2, 2, 2),
                    (6, 2, 2, 2),
                    (7, 3, 3, 3),
                    (8, 3, 3, 3),
                    (9, 3, 3, 3),
                ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
                "mds_services": [(1, 1), (2, 2), (3, 3)],  # (<id>, <storagedriver_id>)
                "storagerouters": [1, 2, 3, 4, 5],
                "storagedrivers": [(1, 1, 1), (2, 2, 1), (3, 3, 1)],
            }  # (<id>, <vpool_id>, <storagerouter_id>)
        )
        vpools = structure["vpools"]
        vdisks = structure["vdisks"]
        storagerouters = structure["storagerouters"]

        thread_names = []
        for vpool in vpools.values():
            Configuration.set(
                "/ovs/vpools/{0}/proxies/scrub/generic_scrub".format(vpool.guid), json.dumps({}, indent=4), raw=True
            )
            for storagerouter in storagerouters.values():
                thread_names.append("scrub_{0}_{1}".format(vpool.guid, storagerouter.guid))
        LockedClient.scrub_controller = {
            "possible_threads": thread_names,
            "volumes": {},
            "waiter": Waiter(len(thread_names) - 9),
        }
        LockedClient.thread_names = thread_names[:]
        for vdisk_id in sorted(vdisks):
            vdisk = vdisks[vdisk_id]
            LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {"success": True, "scrub_work": range(vdisk_id)}
        GenericController.execute_scrub()
        self.assertEqual(
            first=len(LockedClient.thread_names),
            second=9,  # 5 srs * 3 vps = 15 threads, but only 2 will be spawned per vPool --> 15 - 6 = 9 left
            msg="Not all threads have been used in the process",
        )

        # 3 vPools will cause the scrubber to only launch 2 threads per vPool --> 1 possible thread should be unused per vPool
        for vpool in vpools.values():
            threads_left = [thread_name for thread_name in LockedClient.thread_names if vpool.guid in thread_name]
            self.assertEqual(
                first=len(threads_left),
                second=3,
                msg="Unexpected amount of threads left for vPool {0}".format(vpool.name),
            )
Example #12
0
    def test_collapse():
        """
        Test the arakoon collapsing

        :return:
        """
        ArakoonCollapse.LOGGER.info("Starting validating arakoon collapse")
        node_ips = StoragerouterHelper.get_storagerouter_ips()
        node_ips.sort()
        for node_ip in node_ips:
            ArakoonCollapse.LOGGER.info(
                "Fetching arakoons on node `{0}`".format(node_ip))
            arakoon_clusters = []
            root_client = SSHClient(node_ip, username='******')

            # fetch arakoon clusters
            for service in ServiceList.get_services():
                if service.is_internal is True and service.storagerouter.ip == node_ip and \
                    service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                          ServiceType.SERVICE_TYPES.NS_MGR,
                                          ServiceType.SERVICE_TYPES.ALBA_MGR):
                    arakoon_clusters.append(
                        service.name.replace('arakoon-', ''))

            # perform collapse
            ArakoonCollapse.LOGGER.info(
                "Starting arakoon collapse on node `{0}`".format(node_ip))
            for arakoon_cluster in arakoon_clusters:
                ArakoonCollapse.LOGGER.info(
                    "Fetching `{0}` arakoon on node `{1}`".format(
                        arakoon_cluster, node_ip))
                arakoon_config_path = Configuration.get_configuration_path(
                    '/ovs/arakoon/{0}/config'.format(arakoon_cluster))
                tlog_location = '/opt/OpenvStorage/db/arakoon/{0}/tlogs'.format(
                    arakoon_cluster)

                # read_tlog_dir
                with remote(node_ip, [Configuration]) as rem:
                    config_contents = rem.Configuration.get(
                        '/ovs/arakoon/{0}/config'.format(arakoon_cluster),
                        raw=True)
                for line in config_contents.splitlines():
                    if 'tlog_dir' in line:
                        tlog_location = line.split()[-1]

                nr_of_tlogs = ArakoonCollapse.get_nr_of_tlogs_in_folder(
                    root_client, tlog_location)
                old_headdb_timestamp = 0
                if root_client.file_exists('/'.join([tlog_location,
                                                     'head.db'])):
                    old_headdb_timestamp = root_client.run([
                        'stat', '--format=%Y',
                        '{0}/{1}'.format(tlog_location, 'head.db')
                    ])
                if nr_of_tlogs <= 2:
                    benchmark_command = [
                        'arakoon', '--benchmark', '-n_clients', '1', '-max_n',
                        '5_000', '-config', arakoon_config_path
                    ]
                    root_client.run(benchmark_command)

                ArakoonCollapse.LOGGER.info(
                    "Collapsing arakoon `{0}` on node `{1}` ...".format(
                        arakoon_cluster, node_ip))
                GenericController.collapse_arakoon()

                nr_of_tlogs = ArakoonCollapse.get_nr_of_tlogs_in_folder(
                    root_client, tlog_location)
                new_headdb_timestamp = root_client.run([
                    'stat', '--format=%Y',
                    '{0}/{1}'.format(tlog_location, 'head.db')
                ])

                # perform assertion
                assert nr_of_tlogs <= 2,\
                    'Arakoon collapse left {0} tlogs on the environment, expecting less than 2 in `{1}` on node `{1}`'\
                    .format(nr_of_tlogs, arakoon_cluster, node_ip)
                assert old_headdb_timestamp != new_headdb_timestamp,\
                    'Timestamp of the head_db file was not changed ' \
                    'in the process of collapsing tlogs of arakoon `{0}` on node `{1}`'\
                    .format(arakoon_cluster, node_ip)

                ArakoonCollapse.LOGGER.info(
                    "Successfully collapsed arakoon `{0}` on node `{1}`".
                    format(arakoon_cluster, node_ip))

        ArakoonCollapse.LOGGER.info("Finished validating arakoon collapsing")
    def test_clone_snapshot(self):
        """
        Validates that a snapshot that has clones will not be deleted while other snapshots will be deleted
        """
        # Setup
        # There are 2 disks, second one cloned from a snapshot of the first
        vpool = VPool()
        vpool.name = 'vpool'
        vpool.status = 'RUNNING'
        vpool.save()
        storage_router = StorageRouter()
        storage_router.name = 'storage_router'
        storage_router.ip = '127.0.0.1'
        storage_router.machine_id = System.get_my_machine_id()
        storage_router.rdma_capable = False
        storage_router.save()
        disk = Disk()
        disk.name = 'physical_disk_1'
        disk.aliases = ['/dev/non-existent']
        disk.size = 500 * 1024 ** 3
        disk.state = 'OK'
        disk.is_ssd = True
        disk.storagerouter = storage_router
        disk.save()
        disk_partition = DiskPartition()
        disk_partition.disk = disk
        disk_partition.aliases = ['/dev/disk/non-existent']
        disk_partition.size = 400 * 1024 ** 3
        disk_partition.state = 'OK'
        disk_partition.offset = 1024
        disk_partition.roles = [DiskPartition.ROLES.SCRUB]
        disk_partition.mountpoint = '/var/tmp'
        disk_partition.save()
        storage_driver = StorageDriver()
        storage_driver.vpool = vpool
        storage_driver.storagerouter = storage_router
        storage_driver.name = 'storage_driver_1'
        storage_driver.mountpoint = '/'
        storage_driver.cluster_ip = storage_router.ip
        storage_driver.storage_ip = '127.0.0.1'
        storage_driver.storagedriver_id = 'storage_driver_1'
        storage_driver.ports = {'management': 1,
                                'xmlrpc': 2,
                                'dtl': 3,
                                'edge': 4}
        storage_driver.save()
        service_type = ServiceType()
        service_type.name = 'MetadataServer'
        service_type.save()
        service = Service()
        service.name = 'service_1'
        service.storagerouter = storage_driver.storagerouter
        service.ports = [1]
        service.type = service_type
        service.save()
        mds_service = MDSService()
        mds_service.service = service
        mds_service.number = 0
        mds_service.capacity = 10
        mds_service.vpool = storage_driver.vpool
        mds_service.save()
        vdisk_1_1 = VDisk()
        vdisk_1_1.name = 'vdisk_1_1'
        vdisk_1_1.volume_id = 'vdisk_1_1'
        vdisk_1_1.vpool = vpool
        vdisk_1_1.devicename = 'dummy'
        vdisk_1_1.size = 0
        vdisk_1_1.save()
        vdisk_1_1.reload_client('storagedriver')

        [dynamic for dynamic in vdisk_1_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0

        travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true'
        if travis is True:
            print 'Running in Travis, reducing output.'

        base = datetime.datetime.now().date()
        day = datetime.timedelta(1)
        base_timestamp = self._make_timestamp(base, day)
        minute = 60
        hour = minute * 60
        for h in [6, 12, 18]:
            timestamp = base_timestamp + (hour * h)
            VDiskController.create_snapshot(vdisk_guid=vdisk_1_1.guid,
                                            metadata={'label': 'snapshot_{0}:30'.format(str(h)),
                                                      'is_consistent': True,
                                                      'timestamp': str(timestamp),
                                                      'machineguid': None})

        base_snapshot_guid = vdisk_1_1.snapshots[0]['guid']  # Oldest
        clone_vdisk = VDisk()
        clone_vdisk.name = 'clone_vdisk'
        clone_vdisk.volume_id = 'clone_vdisk'
        clone_vdisk.vpool = vpool
        clone_vdisk.devicename = 'dummy'
        clone_vdisk.parentsnapshot = base_snapshot_guid
        clone_vdisk.size = 0
        clone_vdisk.save()
        clone_vdisk.reload_client('storagedriver')

        for h in [6, 12, 18]:
            timestamp = base_timestamp + (hour * h)
            VDiskController.create_snapshot(vdisk_guid=clone_vdisk.guid,
                                            metadata={'label': 'snapshot_{0}:30'.format(str(h)),
                                                      'is_consistent': True,
                                                      'timestamp': str(timestamp),
                                                      'machineguid': None})

        base_timestamp = self._make_timestamp(base, day * 2)
        GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30))
        self.assertIn(base_snapshot_guid, [snap['guid'] for snap in vdisk_1_1.snapshots], 'Snapshot was deleted while there are still clones of it')
    def test_happypath(self):
        """
        Validates the happy path; Hourly snapshots are taken with a few manual consistent
        every now and then. The delete policy is executed every day
        """
        vpool = VPool()
        vpool.name = 'vpool'
        vpool.status = 'RUNNING'
        vpool.save()
        storage_router = StorageRouter()
        storage_router.name = 'storage_router'
        storage_router.ip = '127.0.0.1'
        storage_router.machine_id = System.get_my_machine_id()
        storage_router.rdma_capable = False
        storage_router.save()
        disk = Disk()
        disk.name = 'physical_disk_1'
        disk.aliases = ['/dev/non-existent']
        disk.size = 500 * 1024 ** 3
        disk.state = 'OK'
        disk.is_ssd = True
        disk.storagerouter = storage_router
        disk.save()
        disk_partition = DiskPartition()
        disk_partition.disk = disk
        disk_partition.aliases = ['/dev/disk/non-existent']
        disk_partition.size = 400 * 1024 ** 3
        disk_partition.state = 'OK'
        disk_partition.offset = 1024
        disk_partition.roles = [DiskPartition.ROLES.SCRUB]
        disk_partition.mountpoint = '/var/tmp'
        disk_partition.save()
        vdisk_1 = VDisk()
        vdisk_1.name = 'vdisk_1'
        vdisk_1.volume_id = 'vdisk_1'
        vdisk_1.vpool = vpool
        vdisk_1.devicename = 'dummy'
        vdisk_1.size = 0
        vdisk_1.save()
        vdisk_1.reload_client('storagedriver')

        [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0

        # Run the testing scenario
        travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true'
        if travis is True:
            self._print_message('Running in Travis, reducing output.')
        debug = not travis
        amount_of_days = 50
        base = datetime.datetime.now().date()
        day = datetime.timedelta(1)
        minute = 60
        hour = minute * 60

        for d in xrange(0, amount_of_days):
            base_timestamp = self._make_timestamp(base, day * d)
            self._print_message('')
            self._print_message('Day cycle: {0}: {1}'.format(d, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d')))

            # At the start of the day, delete snapshot policy runs at 00:30
            self._print_message('- Deleting snapshots')
            GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30))

            # Validate snapshots
            self._print_message('- Validating snapshots')
            self._validate(vdisk_1, d, base, amount_of_days, debug)

            # During the day, snapshots are taken
            # - Create non consistent snapshot every hour, between 2:00 and 22:00
            # - Create consistent snapshot at 6:30, 12:30, 18:30
            self._print_message('- Creating snapshots')
            for h in xrange(2, 23):
                timestamp = base_timestamp + (hour * h)
                VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid,
                                                metadata={'label': 'ss_i_{0}:00'.format(str(h)),
                                                          'is_consistent': False,
                                                          'timestamp': str(timestamp),
                                                          'machineguid': None})
                if h in [6, 12, 18]:
                    ts = (timestamp + (minute * 30))
                    VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid,
                                                    metadata={'label': 'ss_c_{0}:30'.format(str(h)),
                                                              'is_consistent': True,
                                                              'timestamp': str(ts),
                                                              'machineguid': None})
Example #15
0
    def test_clone_snapshot(self):
        """
        Validates that a snapshot that has clones will not be deleted while other snapshots will be deleted
        """
        # Setup
        # There are 2 disks, second one cloned from a snapshot of the first
        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'vdisks': [
                (1, 1, 1, 1), (2, 1, 1, 1)
            ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
            'mds_services': [(1, 1)],  # (<id>, <storagedriver_id>)
            'storagerouters': [1],
            'storagedrivers': [(1, 1, 1)]
        }  # (<id>, <vpool_id>, <storagerouter_id>)
                                                  )
        vdisk_1 = structure['vdisks'][1]
        [
            dynamic for dynamic in vdisk_1._dynamics
            if dynamic.name == 'snapshots'
        ][0].timeout = 0

        base = datetime.datetime.now().date()
        base_timestamp = self._make_timestamp(base, datetime.timedelta(1))
        minute = 60
        hour = minute * 60
        for h in [6, 12, 18]:
            timestamp = base_timestamp + (hour * h)
            VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid,
                                            metadata={
                                                'label':
                                                'snapshot_{0}:30'.format(
                                                    str(h)),
                                                'is_consistent':
                                                True,
                                                'timestamp':
                                                str(timestamp)
                                            })

        structure = DalHelper.build_dal_structure(
            structure={'vdisks': [(2, 1, 1, 1)]}, previous_structure=structure)
        clone_vdisk = structure['vdisks'][2]
        base_snapshot_guid = vdisk_1.snapshot_ids[0]  # Oldest
        clone_vdisk.parentsnapshot = base_snapshot_guid
        clone_vdisk.save()

        for day in range(10):
            base_timestamp = self._make_timestamp(base,
                                                  datetime.timedelta(1) * day)
            for h in [6, 12, 18]:
                timestamp = base_timestamp + (hour * h)
                VDiskController.create_snapshot(vdisk_guid=clone_vdisk.guid,
                                                metadata={
                                                    'label':
                                                    'snapshot_{0}:30'.format(
                                                        str(h)),
                                                    'is_consistent':
                                                    True,
                                                    'timestamp':
                                                    str(timestamp)
                                                })
            base_timestamp = self._make_timestamp(base,
                                                  datetime.timedelta(1) * 2)
            GenericController.delete_snapshots(timestamp=base_timestamp +
                                               (minute * 30))
        self.assertIn(
            base_snapshot_guid, vdisk_1.snapshot_ids,
            'Snapshot was deleted while there are still clones of it')
Example #16
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        """
        MigrationController._logger.info('Preparing out of band migrations...')

        from ovs.dal.lists.storagedriverlist import StorageDriverList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.vpoollist import VPoolList
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient
        from ovs_extensions.generic.toolbox import ExtensionsToolbox
        from ovs_extensions.services.interfaces.systemd import Systemd
        from ovs.extensions.services.servicefactory import ServiceFactory
        from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
        from ovs.lib.generic import GenericController

        MigrationController._logger.info('Start out of band migrations...')
        service_manager = ServiceFactory.get_manager()

        sr_client_map = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter,
                                                          username='******')

        #########################################################
        # Addition of 'ExecReload' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue

                    try:
                        service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        ##################################################################
        # Adjustment of open file descriptors for Arakoon services to 8192
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-arakoon-'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'LimitNOFILE=8192'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'limit nofile 8192 8192'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name)
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        #############################
        # Migrate to multiple proxies
        for storagedriver in StorageDriverList.get_storagedrivers():
            vpool = storagedriver.vpool
            root_client = sr_client_map[storagedriver.storagerouter_guid]
            for alba_proxy in storagedriver.alba_proxies:
                # Rename alba_proxy service in model
                service = alba_proxy.service
                old_service_name = 'albaproxy_{0}'.format(vpool.name)
                new_service_name = 'albaproxy_{0}_0'.format(vpool.name)
                if old_service_name != service.name:
                    continue
                service.name = new_service_name
                service.save()

                if not service_manager.has_service(name=old_service_name, client=root_client):
                    continue
                old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name)
                if not Configuration.exists(key=old_configuration_key):
                    continue

                # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='alba',
                                                    old_service_name=old_service_name,
                                                    new_service_name=new_service_name)

                # Register new service and remove old service
                service_manager.add_service(name='ovs-albaproxy',
                                            client=root_client,
                                            params=Configuration.get(old_configuration_key),
                                            target_name='ovs-{0}'.format(new_service_name))

                # Update scrub proxy config
                proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)
                proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key)
                if proxy_config is not None:
                    fragment_cache = proxy_config.get('fragment_cache', ['none', {}])
                    if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True:  # Accelerated ALBA configured
                        fragment_cache_scrub_info = copy.deepcopy(fragment_cache)
                        fragment_cache_scrub_info[1]['cache_on_read'] = False
                        proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)
                        proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key)
                        if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']:
                            proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info
                            Configuration.set(proxy_scrub_config_key,
                                              json.dumps(proxy_scrub_config, indent=4),
                                              raw=True)

            # Update 'backend_connection_manager' section
            changes = False
            storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id)
            storagedriver_config.load()
            if 'backend_connection_manager' not in storagedriver_config.configuration:
                continue

            current_config = storagedriver_config.configuration['backend_connection_manager']
            if current_config.get('backend_type') != 'MULTI':
                changes = True
                backend_connection_manager = {'backend_type': 'MULTI'}
                for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])):
                    backend_connection_manager[str(index)] = copy.deepcopy(current_config)
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_use_rora'] = True
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000
                    # noinspection PyUnresolvedReferences
                    for key, value in backend_connection_manager[str(index)].items():
                        if key.startswith('backend_interface'):
                            backend_connection_manager[key] = value
                            # noinspection PyUnresolvedReferences
                            del backend_connection_manager[str(index)][key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        backend_connection_manager[key] = value
            else:
                backend_connection_manager = current_config
                for value in backend_connection_manager.values():
                    if isinstance(value, dict):
                        for key, val in value.items():
                            if key.startswith('backend_interface'):
                                backend_connection_manager[key] = val
                                changes = True
                                del value[key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        changes = True
                        backend_connection_manager[key] = value

            if changes is True:
                storagedriver_config.clear_backend_connection_manager()
                storagedriver_config.configure_backend_connection_manager(**backend_connection_manager)
                storagedriver_config.save(root_client)

                # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='volumedriver',
                                                    old_service_name='volumedriver_{0}'.format(vpool.name))
                if service_manager.ImplementationClass == Systemd:
                    root_client.run(['systemctl', 'daemon-reload'])

        ########################################
        # Update metadata_store_bits information
        for vpool in VPoolList.get_vpools():
            bits = None
            for storagedriver in vpool.storagedrivers:
                key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name)
                if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key):
                    if bits is None:
                        entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name),
                                                                            client=sr_client_map[storagedriver.storagerouter_guid],
                                                                            entries=['METADATASTORE_BITS='])
                        if len(entries) == 1:
                            bits = entries[0].split('=')[-1]
                            bits = int(bits) if bits.isdigit() else 5
                    if bits is not None:
                        try:
                            content = Configuration.get(key=key)
                            content['METADATASTORE_BITS'] = bits
                            Configuration.set(key=key, value=content)
                        except:
                            MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name))

            if bits is not None:
                vpool.metadata_store_bits = bits
                vpool.save()

        MigrationController._logger.info('Finished out of band migrations')
        GenericController.refresh_package_information()
Example #17
0
    def test_different_snapshot_flags(self):
        """
        Tests the GenericController.delete_snapshots() call, but with different snapshot flags
            Scenario 1: is_automatic: True, is_consistent: True --> Automatically created consistent snapshots should be deleted
            Scenario 2: is_automatic: True, is_consistent: False --> Automatically created non-consistent snapshots should be deleted
            Scenario 3: is_automatic: False, is_consistent: True --> Manually created consistent snapshots should be deleted
            Scenario 4: is_automatic: False, is_consistent: False --> Manually created non-consistent snapshots should be deleted
            Scenario 5: is_sticky: True --> Sticky snapshots of any kind should never be deleted (Only possible to delete manually)
        """
        minute = 60
        hour = minute * 60

        for scenario in range(5):
            structure = DalHelper.build_dal_structure({
                'vpools': [1],
                'vdisks': [
                    (1, 1, 1, 1)
                ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
                'mds_services': [(1, 1)],
                'storagerouters': [1],
                'storagedrivers': [(1, 1, 1)]
            }  # (<id>, <vpool_id>, <storagerouter_id>)
                                                      )
            base = datetime.datetime.now().date()
            vdisk_1 = structure['vdisks'][1]
            is_sticky = False
            sticky_hours = []
            if scenario % 2 == 0:
                label = 'c'
                additional_time = minute * 30
                consistent_hours = [2]
                inconsistent_hours = []
            else:
                label = 'i'
                additional_time = 0
                consistent_hours = []
                inconsistent_hours = [2]

            if scenario == 4:
                is_sticky = True
                sticky_hours = consistent_hours

            for day in xrange(35):
                base_timestamp = self._make_timestamp(
                    base,
                    datetime.timedelta(1) * day)
                self._print_message('')
                self._print_message('Day cycle: {0}: {1}'.format(
                    day,
                    datetime.datetime.fromtimestamp(base_timestamp).strftime(
                        '%Y-%m-%d')))

                self._print_message('- Deleting snapshots')
                GenericController.delete_snapshots(timestamp=base_timestamp +
                                                   (minute * 30))

                self._validate(vdisk=vdisk_1,
                               current_day=day,
                               base_date=base,
                               sticky_hours=sticky_hours,
                               consistent_hours=consistent_hours,
                               inconsistent_hours=inconsistent_hours)

                self._print_message('- Creating snapshots')
                for x in consistent_hours + inconsistent_hours:
                    timestamp = base_timestamp + (hour * x) + additional_time
                    VDiskController.create_snapshot(
                        vdisk_guid=vdisk_1.guid,
                        metadata={
                            'label': 'ss_{0}_{1}:00'.format(label, x),
                            'is_sticky': is_sticky,
                            'timestamp': str(timestamp),
                            'is_automatic': scenario in [0, 1],
                            'is_consistent': len(consistent_hours) > 0
                        })
            self.persistent._clean()
            self.volatile._clean()
Example #18
0
    def test_arakoon_collapse(self):
        """
        Test the Arakoon collapse functionality
        """
        # Set up the test
        structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1, 2]})
        storagerouter_1 = structure['storagerouters'][1]
        storagerouter_2 = structure['storagerouters'][2]
        MockedSSHClient._run_returns[storagerouter_1.ip] = {}
        MockedSSHClient._run_returns[storagerouter_2.ip] = {}

        # Make sure we cover all Arakoon cluster types
        clusters_to_create = {
            ServiceType.ARAKOON_CLUSTER_TYPES.SD: [{
                'name': 'unittest-voldrv',
                'internal': True,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.CFG: [{
                'name': 'unittest-cacc',
                'internal': True,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.FWK: [{
                'name': 'unittest-ovsdb',
                'internal': True,
                'success': False
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.ABM: [{
                'name': 'unittest-cluster-1-abm',
                'internal': True,
                'success': False
            }, {
                'name': 'unittest-random-abm-name',
                'internal': False,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.NSM: [{
                'name': 'unittest-cluster-1-nsm_0',
                'internal': True,
                'success': True
            }]
        }
        self.assertEqual(
            first=sorted(clusters_to_create.keys()),
            second=sorted(ServiceType.ARAKOON_CLUSTER_TYPES.keys()),
            msg=
            'An Arakoon cluster type has been removed or added, please update this test accordingly'
        )

        # Create all Arakoon clusters and related services
        failed_clusters = []
        external_clusters = []
        successful_clusters = []
        for cluster_type, cluster_infos in clusters_to_create.iteritems():
            filesystem = cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG
            for cluster_info in cluster_infos:
                internal = cluster_info['internal']
                cluster_name = cluster_info['name']

                base_dir = DalHelper.CLUSTER_DIR.format(cluster_name)
                arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
                arakoon_installer.create_cluster(cluster_type=cluster_type,
                                                 ip=storagerouter_1.ip,
                                                 base_dir=base_dir,
                                                 internal=internal)
                arakoon_installer.start_cluster()
                arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip,
                                                 base_dir=base_dir)

                service_name = ArakoonInstaller.get_service_name_for_cluster(
                    cluster_name=cluster_name)
                if cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.ALBA_MGR)
                elif cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.NS_MGR)
                else:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.ARAKOON)

                if internal is True:
                    DalHelper.create_service(
                        service_name=service_name,
                        service_type=service_type,
                        storagerouter=storagerouter_1,
                        ports=arakoon_installer.ports[storagerouter_1.ip])
                    DalHelper.create_service(
                        service_name=service_name,
                        service_type=service_type,
                        storagerouter=storagerouter_2,
                        ports=arakoon_installer.ports[storagerouter_2.ip])
                else:
                    DalHelper.create_service(service_name=service_name,
                                             service_type=service_type)

                    external_clusters.append(cluster_name)
                    continue

                if cluster_info['success'] is True:
                    if filesystem is True:
                        config_path = ArakoonClusterConfig.CONFIG_FILE.format(
                            cluster_name)
                    else:
                        config_path = Configuration.get_configuration_path(
                            ArakoonClusterConfig.CONFIG_KEY.format(
                                cluster_name))
                    MockedSSHClient._run_returns[storagerouter_1.ip][
                        'arakoon --collapse-local 1 2 -config {0}'.format(
                            config_path)] = None
                    MockedSSHClient._run_returns[storagerouter_2.ip][
                        'arakoon --collapse-local 2 2 -config {0}'.format(
                            config_path)] = None
                    successful_clusters.append(cluster_name)
                else:  # For successful False clusters we don't emulate the collapse, thus making it fail
                    failed_clusters.append(cluster_name)

        # Start collapse and make it fail for all clusters on StorageRouter 2
        SSHClient._raise_exceptions[storagerouter_2.ip] = {
            'users': ['ovs'],
            'exception': UnableToConnectException('No route to host')
        }
        GenericController.collapse_arakoon()

        # Verify all log messages for each type of cluster
        generic_logs = Logger._logs.get('lib', {})
        for cluster_name in successful_clusters + failed_clusters + external_clusters:
            collect_msg = (
                'DEBUG',
                'Collecting info for cluster {0}'.format(cluster_name))
            unreachable_msg = (
                'ERROR',
                'Could not collapse any cluster on {0} (not reachable)'.format(
                    storagerouter_2.name))
            end_collapse_msg = (
                'DEBUG', 'Collapsing cluster {0} on {1} completed'.format(
                    cluster_name, storagerouter_1.ip))
            start_collapse_msg = ('DEBUG',
                                  'Collapsing cluster {0} on {1}'.format(
                                      cluster_name, storagerouter_1.ip))
            failed_collapse_msg = (
                'ERROR', 'Collapsing cluster {0} on {1} failed'.format(
                    cluster_name, storagerouter_1.ip))
            messages_to_validate = []
            if cluster_name in successful_clusters:
                assert_function = self.assertIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(unreachable_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(end_collapse_msg)
            elif cluster_name in failed_clusters:
                assert_function = self.assertIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(unreachable_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(failed_collapse_msg)
            else:
                assert_function = self.assertNotIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(end_collapse_msg)

            for severity, message in messages_to_validate:
                if assert_function == self.assertIn:
                    assert_message = 'Expected to find log message: {0}'.format(
                        message)
                else:
                    assert_message = 'Did not expect to find log message: {0}'.format(
                        message)
                assert_function(member=message,
                                container=generic_logs,
                                msg=assert_message)
                if assert_function == self.assertIn:
                    self.assertEqual(
                        first=severity,
                        second=generic_logs[message],
                        msg='Log message {0} is of severity {1} expected {2}'.
                        format(message, generic_logs[message], severity))

        # Collapse should always have a 'finished' message since each cluster should be attempted to be collapsed
        for general_message in [
                'Arakoon collapse started', 'Arakoon collapse finished'
        ]:
            self.assertIn(member=general_message,
                          container=generic_logs,
                          msg='Expected to find log message: {0}'.format(
                              general_message))
Example #19
0
    def test_snapshot_all_vdisks(self):
        """
        Tests GenericController.snapshot_all_vdisks functionality
        """
        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'vdisks': [
                (1, 1, 1, 1), (2, 1, 1, 1)
            ],  # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>)
            'mds_services': [(1, 1)],
            'storagerouters': [1],
            'storagedrivers': [(1, 1, 1)]
        }  # (<id>, <vpool_id>, <storagerouter_id>)
                                                  )
        vdisk_1 = structure['vdisks'][1]
        vdisk_2 = structure['vdisks'][2]

        # Create automatic snapshot for both vDisks
        success, fail = GenericController.snapshot_all_vdisks()
        self.assertEqual(first=len(fail),
                         second=0,
                         msg='Expected 0 failed snapshots')
        self.assertEqual(first=len(success),
                         second=2,
                         msg='Expected 2 successful snapshots')
        self.assertEqual(first=len(vdisk_1.snapshot_ids),
                         second=1,
                         msg='Expected 1 snapshot ID for vDisk {0}'.format(
                             vdisk_1.name))
        self.assertEqual(first=len(vdisk_2.snapshot_ids),
                         second=1,
                         msg='Expected 1 snapshot ID for vDisk {0}'.format(
                             vdisk_2.name))
        self.assertEqual(first=len(vdisk_1.snapshots),
                         second=1,
                         msg='Expected 1 snapshot for vDisk {0}'.format(
                             vdisk_1.name))
        self.assertEqual(first=len(vdisk_2.snapshots),
                         second=1,
                         msg='Expected 1 snapshot for vDisk {0}'.format(
                             vdisk_2.name))

        # Ensure automatic snapshot fails for vdisk_1 and succeeds for vdisk_2
        vdisk_1.storagedriver_client._set_snapshot_in_backend(
            volume_id=vdisk_1.volume_id,
            snapshot_id=vdisk_1.snapshots[0]['guid'],
            in_backend=False)
        success, fail = GenericController.snapshot_all_vdisks()
        self.assertEqual(first=len(fail),
                         second=1,
                         msg='Expected 1 failed snapshot')
        self.assertEqual(first=fail[0],
                         second=vdisk_1.guid,
                         msg='Expected vDisk {0} to have failed'.format(
                             vdisk_1.name))
        self.assertEqual(first=len(success),
                         second=1,
                         msg='Expected 1 successful snapshot')
        self.assertEqual(first=success[0],
                         second=vdisk_2.guid,
                         msg='Expected vDisk {0} to have succeeded'.format(
                             vdisk_2.name))
        self.assertEqual(first=len(vdisk_1.snapshot_ids),
                         second=1,
                         msg='Expected 1 snapshot ID for vDisk {0}'.format(
                             vdisk_1.name))
        self.assertEqual(first=len(vdisk_2.snapshot_ids),
                         second=2,
                         msg='Expected 2 snapshot IDs for vDisk {0}'.format(
                             vdisk_2.name))
        self.assertEqual(first=len(vdisk_1.snapshots),
                         second=1,
                         msg='Expected 1 snapshot for vDisk {0}'.format(
                             vdisk_1.name))
        self.assertEqual(first=len(vdisk_2.snapshots),
                         second=2,
                         msg='Expected 2 snapshots for vDisk {0}'.format(
                             vdisk_2.name))
Example #20
0
    def _post_update_alba_plugin_alba(cls, components):
        """
        Execute some functionality after the ALBA plugin packages have been updated for the ASD manager nodes
        :param components: Update components which have been executed
        :type components: list
        :return: None
        :rtype: NoneType
        """
        if PackageFactory.COMP_ALBA not in components:
            return

        # First run post-update migrations to update services, config mgmt, ... and restart services afterwards
        for method_name in ['migrate', 'migrate_sdm']:
            try:
                # noinspection PyUnresolvedReferences
                from ovs.lib.albamigration import AlbaMigrationController
                cls._logger.debug(
                    'Executing migration code: AlbaMigrationController.{0}()'.
                    format(method_name))
                getattr(AlbaMigrationController, method_name)()
            except ImportError:
                cls._logger.error('Could not import AlbaMigrationController')
            except Exception:
                cls._logger.exception(
                    'Migration code for the ALBA plugin failed to be executed')

        # Update ALBA nodes
        method_name = inspect.currentframe().f_code.co_name
        cls._logger.info('Executing hook {0}'.format(method_name))
        alba_nodes = sorted(
            AlbaNodeList.get_albanodes_by_type(AlbaNode.NODE_TYPES.ASD),
            key=lambda an: ExtensionsToolbox.advanced_sort(element=an.ip,
                                                           separator='.'))
        for alba_node in alba_nodes:
            services_to_restart = []
            for component in components:
                if component not in alba_node.package_information:
                    continue

                component_info = alba_node.package_information[component]
                if 'services_post_update' not in component_info:
                    # Package_information still has the old format, so refresh update information
                    # This can occur when updating from earlier than 2.11.0 to 2.11.0 and older
                    try:
                        GenericController.refresh_package_information()
                    except:
                        cls._logger.exception(
                            '{0}: Refreshing package information failed'.
                            format(alba_node.ip))
                    alba_node.discard()
                    component_info = alba_node.package_information.get(
                        component, {})

                services_post_update = dict(
                    (int(key), value) for key, value in component_info.get(
                        'services_post_update', {}).iteritems())
                for restart_order in sorted(services_post_update):
                    for service_name in sorted(
                            services_post_update[restart_order]):
                        if service_name not in services_to_restart:
                            services_to_restart.append(service_name)

            if len(services_to_restart) > 0:
                alba_node.client.restart_services(
                    service_names=services_to_restart)

        # Renew maintenance services
        cls._logger.info('Checkup maintenance agents')
        AlbaController.checkup_maintenance_agents.delay()

        cls._logger.info('Executed hook {0}'.format(method_name))