def test_exception_handling(self): """ Test if the scheduled job can handle exceptions """ def raise_an_exception(*args, **kwargs): raise RuntimeError('Emulated snapshot delete error') structure = DalHelper.build_dal_structure({ 'vpools': [1], 'vdisks': [ (1, 1, 1, 1), (2, 1, 1, 1) ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) 'mds_services': [(1, 1)], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)] } # (<id>, <vpool_id>, <storagerouter_id>) ) vdisk_1, vdisk_2 = structure['vdisks'].values() storagedriver_1 = structure['storagedrivers'][1] vdisks = [vdisk_1, vdisk_2] for vdisk in vdisks: [ dynamic for dynamic in vdisk._dynamics if dynamic.name == 'snapshots' ][0].timeout = 0 for i in xrange(0, 2): metadata = { 'label': str(i), 'is_consistent': False, 'is_sticky': False, 'timestamp': str((int(time.time() - datetime.timedelta(2).total_seconds() - i))) } snapshot_id = VDiskController.create_snapshot( vdisk.guid, metadata) if vdisk == vdisk_1: StorageRouterClient.delete_snapshot_callbacks[ vdisk.volume_id] = { snapshot_id: raise_an_exception } with self.assertRaises(RuntimeError): GenericController.delete_snapshots_storagedriver( storagedriver_guid=storagedriver_1.guid) self.assertEqual(1, len(vdisk_2.snapshot_ids), 'One snapshot should be removed for vdisk 2') self.assertEqual(2, len(vdisk_1.snapshot_ids), 'No snapshots should be removed for vdisk 1')
def _refresh_package_information(): # Refresh updates UpdateController._logger.debug('Refreshing package information') counter = 1 while counter < 6: try: GenericController.refresh_package_information() return except NoLockAvailableException: UpdateController._logger.debug('Attempt {0}: Could not refresh the update information, trying again'.format(counter)) time.sleep(6) # Wait 30 seconds max in total counter += 1 if counter == 6: raise Exception('Could not refresh the update information')
def test_delete_snapshot_scrubbing_lock(self): """ Tests the skip-if-scrubbed logic """ snapshot_while_scrub_results = [] def delete_snapshot_while_scrubbing(*args, **kwargs): _ = args, kwargs try: snapshot_while_scrub_results.append(VDiskController.delete_snapshot(vdisk_1.guid, vdisk_1.snapshot_ids[0])) except RuntimeError as ex: snapshot_while_scrub_results.append(ex) structure = DalHelper.build_dal_structure( {'vpools': [1], 'vdisks': [(1, 1, 1, 1)], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) 'mds_services': [(1, 1)], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)]} # (<id>, <vpool_id>, <storagerouter_id>) ) vdisks = structure['vdisks'] vdisk_1 = vdisks[1] # Create automatic snapshot for both vDisks success, fail = GenericController.snapshot_all_vdisks() self.assertEqual(first=len(fail), second=0, msg='Expected 0 failed snapshots') self.assertEqual(first=len(success), second=1, msg='Expected 1 successful snapshots') self.assertEqual(first=len(vdisk_1.snapshot_ids), second=1, msg='Expected 1 snapshot ID for vDisk {0}'.format(vdisk_1.name)) self.assertEqual(first=len(vdisk_1.snapshots), second=1, msg='Expected 1 snapshot for vDisk {0}'.format(vdisk_1.name)) proxy_names, thread_names, vdisk_namespaces = self.generate_scrub_related_info(structure) LockedClient.scrub_controller = {'possible_threads': thread_names, 'volumes': {}, 'waiter': Waiter(len(thread_names[0:1]))} # only 1 disks -> 1 thread # Scrub all volumes for vdisk_id, vdisk in vdisks.iteritems(): LockedClient.scrub_controller['volumes'][vdisk.volume_id] = {'success': True, 'scrub_work': range(vdisk_id)} hooks = {'post_vdisk_scrub_registration': delete_snapshot_while_scrubbing} # Make the scrubber wait ScrubShared._test_hooks.update(hooks) GenericController.execute_scrub() # Ensure delete snapshot fails for vdisk_1 because it is being scrubbed result_while_scrub = snapshot_while_scrub_results[0] self.assertIsInstance(result_while_scrub, Exception, 'Expected an exception to have occurred') self.assertEqual(str(result_while_scrub), 'VDisk is being scrubbed. Unable to remove snapshots at this time', 'Excpetion should be about disk being scrubbed') self.assertEqual(first=len(vdisk_1.snapshot_ids), second=1, msg='Expected 1 snapshot ID for vDisk {0}'.format(vdisk_1.name)) self.assertEqual(first=len(vdisk_1.snapshots), second=1, msg='Expected 1 snapshot for vDisk {0}'.format(vdisk_1.name))
def test_scrubbing_exception_handling(self): """ Test if the scheduled job can handle scrub related exceptions """ def raise_an_exception(*args, **kwargs): raise RuntimeError(SCRUB_VDISK_EXCEPTION_MESSAGE) structure = DalHelper.build_dal_structure({ 'vpools': [1], 'vdisks': [ (1, 1, 1, 1) ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) 'mds_services': [(1, 1)], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)] } # (<id>, <vpool_id>, <storagerouter_id>) ) vdisk_1 = structure['vdisks'][1] storagedriver_1 = structure['storagedrivers'][1] [ dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots' ][0].timeout = 0 for i in xrange(0, 2): metadata = { 'label': str(i), 'is_consistent': False, 'is_sticky': False, 'timestamp': str((int(time.time() - datetime.timedelta(2).total_seconds() - i))) } snapshot_id = VDiskController.create_snapshot( vdisk_1.guid, metadata) StorageRouterClient.delete_snapshot_callbacks[ vdisk_1.volume_id] = { snapshot_id: raise_an_exception } GenericController.delete_snapshots_storagedriver( storagedriver_guid=storagedriver_1.guid) self.assertEqual(2, len(vdisk_1.snapshot_ids), 'No snapshots should be removed for vdisk 1')
def execute_scrubbing(): """ Execute scrubbing on the cluster :return: """ return GenericController.execute_scrub()
def ovs_4509_validate_arakoon_collapse_test(): """ Validate arakoon collapse """ node_ips = [sr.ip for sr in GeneralStorageRouter.get_storage_routers()] node_ips.sort() for node_ip in node_ips: root_client = SSHClient(node_ip, username='******') arakoon_clusters = [] for service in ServiceList.get_services(): if service.is_internal is True and service.storagerouter.ip == node_ip and \ service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON, ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR): arakoon_clusters.append(service.name.replace('arakoon-', '')) for arakoon_cluster in arakoon_clusters: arakoon_config_path = Configuration.get_configuration_path('/ovs/arakoon/{0}/config'.format(arakoon_cluster)) tlog_location = '/opt/OpenvStorage/db/arakoon/{0}/tlogs'.format(arakoon_cluster) # read_tlog_dir with remote(node_ip, [Configuration]) as rem: config_contents = rem.Configuration.get('/ovs/arakoon/{0}/config'.format(arakoon_cluster), raw=True) for line in config_contents.splitlines(): if 'tlog_dir' in line: tlog_location = line.split()[-1] nr_of_tlogs = TestArakoon.get_nr_of_tlogs_in_folder(root_client, tlog_location) old_headdb_timestamp = 0 if root_client.file_exists('/'.join([tlog_location, 'head.db'])): old_headdb_timestamp = root_client.run(['stat', '--format=%Y', tlog_location + '/head.db']) if nr_of_tlogs <= 2: benchmark_command = ['arakoon', '--benchmark', '-n_clients', '1', '-max_n', '5_000', '-config', arakoon_config_path] root_client.run(benchmark_command) GenericController.collapse_arakoon() nr_of_tlogs = TestArakoon.get_nr_of_tlogs_in_folder(root_client, tlog_location) new_headdb_timestamp = root_client.run(['stat', '--format=%Y', tlog_location + '/head.db']) assert nr_of_tlogs <= 2,\ 'Arakoon collapse left {0} tlogs on the environment, expecting less than 2'.format(nr_of_tlogs) assert old_headdb_timestamp != new_headdb_timestamp,\ 'Timestamp of the head_db file was not changed in the process of collapsing tlogs'
def test_snapshot_sticky(self): """ is_sticky: True --> Sticky snapshots of any kind should never be deleted (Only possible to delete manually) """ minute = 60 hour = minute * 60 structure = DalHelper.build_dal_structure({ 'vpools': [1], 'vdisks': [ (1, 1, 1, 1) ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) 'mds_services': [(1, 1)], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)] } # (<id>, <vpool_id>, <storagerouter_id>) ) base = datetime.datetime.now().date() vdisk_1 = structure['vdisks'][1] storagedriver_1 = structure['storagedrivers'][1] label = 'c' # Extra time to add to the hourly timestamps additional_time = minute * 30 # Hours to create a snapshot on sticky_hours = [2] consistent_hours = [2] inconsistent_hours = [] # Snapshot details is_sticky = len(sticky_hours) > 0 is_consistent = len(consistent_hours) > 0 is_automatic = False for day in xrange(35): base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) self._print_message('') self._print_message('Day cycle: {0}: {1}'.format( day, datetime.datetime.fromtimestamp(base_timestamp).strftime( '%Y-%m-%d'))) self._print_message('- Deleting snapshots') GenericController.delete_snapshots_storagedriver( storagedriver_guid=storagedriver_1.guid, timestamp=base_timestamp + (minute * 30)) self._validate(vdisk=vdisk_1, current_day=day, base_date=base, sticky_hours=sticky_hours, consistent_hours=consistent_hours, inconsistent_hours=inconsistent_hours) self._print_message('- Creating snapshots') for x in consistent_hours + inconsistent_hours: timestamp = base_timestamp + (hour * x) + additional_time VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={ 'label': 'ss_{0}_{1}:00'.format( label, x), 'is_sticky': is_sticky, 'timestamp': str(timestamp), 'is_automatic': is_automatic, 'is_consistent': is_consistent })
def test_refresh_package_information(self): """ Test the refresh package information functionality """ def _update_info_cluster_1(client, update_info, package_info): _ = package_info update_info[client.ip]['framework'] = { 'packages': { 'package1': { 'candidate': 'version2', 'installed': 'version1' } }, 'prerequisites': [] } def _update_info_cluster_2(client, update_info, package_info): _ = package_info update_info[client.ip]['component2'] = { 'packages': { 'package2': { 'candidate': 'version2', 'installed': 'version1' } }, 'prerequisites': [] } if client.ip == storagerouter_3.ip: update_info[client.ip]['errors'] = [ 'Unexpected error occurred for StorageRouter {0}'.format( storagerouter_3.name) ] def _update_info_plugin_1(error_information): _ = error_information # get_update_info_plugin is used for Alba nodes, so not testing here expected_package_info = { 'framework': { 'packages': { 'package1': { 'candidate': 'version2', 'installed': 'version1' } }, 'prerequisites': [['node_down', '2']] }, 'component2': { 'packages': { 'package2': { 'candidate': 'version2', 'installed': 'version1' } }, 'prerequisites': [] } } # StorageRouter 1 successfully updates its package info # StorageRouter 2 is inaccessible # StorageRouter 3 gets error in 2nd hook --> package_information is reset to {} structure = DalHelper.build_dal_structure( structure={'storagerouters': [1, 2, 3]}) storagerouter_1 = structure['storagerouters'][1] storagerouter_2 = structure['storagerouters'][2] storagerouter_3 = structure['storagerouters'][3] Toolbox._function_pointers['update-get_update_info_cluster'] = [ _update_info_cluster_1, _update_info_cluster_2 ] Toolbox._function_pointers['update-get_update_info_plugin'] = [ _update_info_plugin_1 ] SSHClient._raise_exceptions[storagerouter_2.ip] = { 'users': ['root'], 'exception': UnableToConnectException('No route to host') } with self.assertRaises(excClass=Exception) as raise_info: GenericController.refresh_package_information() storagerouter_1.discard() storagerouter_2.discard() storagerouter_3.discard() self.assertDictEqual( d1=expected_package_info, d2=storagerouter_1.package_information, msg='Incorrect package information found for StorageRouter 1'. format(storagerouter_1.name)) self.assertDictEqual( d1={}, d2=storagerouter_2.package_information, msg='Incorrect package information found for StorageRouter 2'. format(storagerouter_2.name)) self.assertDictEqual( d1={}, d2=storagerouter_3.package_information, msg='Incorrect package information found for StorageRouter {0}'. format(storagerouter_3.name)) self.assertIn( member='Unexpected error occurred for StorageRouter {0}'.format( storagerouter_3.name), container=raise_info.exception.message, msg= 'Expected to find log message about unexpected error for StorageRouter {0}' .format(storagerouter_3.name))
def check_scrubbing_test(): """ Check scrubbing of vdisks test """ initial_counter = 100 step = 5 vdisk = None vpool_name = General.get_config().get('vpool', 'name') vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) assert vpool, "No vpool found where one was expected" template_folder = GeneralVMachine.template_target_folder image_name = GeneralVMachine.template_image disk_name = "scrubdisk" GeneralVMachine.logger.info("Starting RAW disk creation") out, err, _ = General.execute_command('qemu-img convert -O raw {0}{1} /mnt/{2}/{3}.raw'.format(template_folder, image_name, vpool_name, disk_name)) if err: GeneralVMachine.logger.error("Error while creating raw disk: {0}".format(err)) def snapshot_vdisk(vdisk): metadata = {'label': 'snap-' + vdisk.name, 'is_consistent': True, 'timestamp': time.time(), 'is_automatic': False, 'is_sticky': False} VDiskController.create_snapshot(vdisk.guid, metadata) counter = initial_counter while counter and vdisk is None: time.sleep(step) vdisk = VDiskList.get_by_devicename_and_vpool('/' + disk_name + '.raw', vpool) counter -= step assert counter > 0, "Vdisk with name {0} didn't appear in the model after 60 seconds".format(disk_name) # snapshot disks for the first time snapshot_vdisk(vdisk) counter = initial_counter while counter > 0: time.sleep(step) out, err, _ = General.execute_command('dd if=/dev/zero of=/mnt/{0}/{1}.raw bs=10K count=1000 conv=notrunc'.format(vpool_name, disk_name)) counter -= step snapshot_vdisk(vdisk) # saving disk 'stored' info / the only attribute that is lowered after scrubbing vdisk.invalidate_dynamics(['statistics']) disk_backend_data = vdisk.statistics['stored'] # deleting middle snapshots for snapshot in vdisk.snapshots[1:-1]: VDiskController.delete_snapshot(vdisk.guid, snapshot['guid']) # starting scrubber try: GenericController.execute_scrub() # waiting for model to catch up counter = initial_counter while counter > 0: time.sleep(step) vdisk.invalidate_dynamics(['statistics']) # checking result of scrub work if vdisk.statistics['stored'] < disk_backend_data: GeneralVMachine.logger.info("It took {0} seconds for the value to change from {1} to {2}\n".format((initial_counter - counter) * step, disk_backend_data, vdisk.statistics['stored'])) break counter -= step finally: # removing vdisk GeneralVMachine.logger.info("Removing vpool vdisks from {0} vpool".format(vpool_name)) out, err, _ = General.execute_command("rm -rf /mnt/{0}/*.raw".format(vpool_name)) if err: GeneralVMachine.logger.error("Error while removing vdisk: {0}".format(err)) assert counter > 0, "Scrubbing didn't run as expected, backend size of vdisk remained at {0}:\n".format(disk_backend_data)
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now and then. The delete policy is executed every day """ structure = DalHelper.build_dal_structure({ 'vpools': [1], 'vdisks': [ (1, 1, 1, 1) ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) 'mds_services': [(1, 1)], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)] } # (<id>, <vpool_id>, <storagerouter_id>) ) vdisk_1 = structure['vdisks'][1] [ dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots' ][0].timeout = 0 # Run the testing scenario travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: self._print_message('Running in Travis, reducing output.') base = datetime.datetime.now().date() minute = 60 hour = minute * 60 consistent_hours = [6, 12, 18] inconsistent_hours = xrange(2, 23) for day in xrange(0, 35): base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) self._print_message('') self._print_message('Day cycle: {0}: {1}'.format( day, datetime.datetime.fromtimestamp(base_timestamp).strftime( '%Y-%m-%d'))) # At the start of the day, delete snapshot policy runs at 00:30 self._print_message('- Deleting snapshots') GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots self._print_message('- Validating snapshots') self._validate(vdisk=vdisk_1, current_day=day, base_date=base, sticky_hours=[], consistent_hours=consistent_hours, inconsistent_hours=inconsistent_hours) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 self._print_message('- Creating snapshots') for h in inconsistent_hours: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={ 'label': 'ss_i_{0}:00'.format( str(h)), 'is_consistent': False, 'timestamp': str(timestamp) }) if h in consistent_hours: ts = (timestamp + (minute * 30)) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={ 'label': 'ss_c_{0}:30'.format( str(h)), 'is_consistent': True, 'timestamp': str(ts) })
def test_scrubbing(self): """ Validates the scrubbing workflow * Scenario 1: Validate disabled scrub task and single vDisk scrub logic * Scenario 2: 1 vPool, 10 vDisks, 1 scrub role Scrubbing fails for 5 vDisks, check if scrubbing completed for all other vDisks Run scrubbing a 2nd time and verify scrubbing now works for failed vDisks * Scenario 3: 1 vPool, 10 vDisks, 5 scrub roles Check if vDisks are divided among all threads * Scenario 4: 3 vPools, 9 vDisks, 5 scrub roles Validate 6 threads will be spawned and used out of a potential of 15 (5 scrub roles * 3 vPools) We limit max amount of threads spawned per vPool to 2 in case 3 to 5 vPools are present """ _ = self for i in xrange(1, 6): Configuration.set("/ovs/framework/hosts/{0}/ports".format(i), {"storagedriver": [10000, 10100]}) ############## # Scenario 1 # ############## structure = Helper.build_service_structure( { "vpools": [1], "vdisks": [(1, 1, 1, 1)], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) "mds_services": [(1, 1)], # (<id>, <storagedriver_id>) "storagerouters": [1], "storagedrivers": [(1, 1, 1)], } # (<id>, <vpool_id>, <storagerouter_id>) ) vdisk = structure["vdisks"][1] vpool = structure["vpools"][1] storagerouter = structure["storagerouters"][1] System._machine_id = {storagerouter.ip: "1"} Configuration.set( "/ovs/vpools/{0}/proxies/scrub/generic_scrub".format(vpool.guid), json.dumps({}, indent=4), raw=True ) LockedClient.scrub_controller = {"possible_threads": None, "volumes": {}, "waiter": Waiter(1)} LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {"success": False, "scrub_work": [0]} with self.assertRaises(Exception) as raise_info: VDiskController.scrub_single_vdisk(vdisk.guid, storagerouter.guid) self.assertIn(vdisk.name, raise_info.exception.message) LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {"success": True, "scrub_work": [0]} VDiskController.scrub_single_vdisk(vdisk.guid, storagerouter.guid) with vdisk.storagedriver_client.make_locked_client(vdisk.volume_id) as locked_client: self.assertEqual( first=len(locked_client.get_scrubbing_workunits()), second=0, msg="Scrubbed vDisk {0} does not have the expected amount of scrubbing items: {1}".format( vdisk.name, 0 ), ) ############## # Scenario 2 # ############## self.volatile.clean() self.persistent.clean() structure = Helper.build_service_structure( { "vpools": [1], "vdisks": [ (1, 1, 1, 1), (2, 1, 1, 1), (3, 1, 1, 1), (4, 1, 1, 1), (5, 1, 1, 1), (6, 1, 1, 1), (7, 1, 1, 1), (8, 1, 1, 1), (9, 1, 1, 1), (10, 1, 1, 1), ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) "mds_services": [(1, 1)], # (<id>, <storagedriver_id>) "storagerouters": [1], "storagedrivers": [(1, 1, 1)], } # (<id>, <vpool_id>, <storagerouter_id>) ) vpool = structure["vpools"][1] vdisks = structure["vdisks"] storagerouter = structure["storagerouters"][1] System._machine_id = {storagerouter.ip: "1"} Configuration.set( "/ovs/vpools/{0}/proxies/scrub/generic_scrub".format(vpool.guid), json.dumps({}, indent=4), raw=True ) LockedClient.scrub_controller = { "possible_threads": ["scrub_{0}_{1}".format(vpool.guid, storagerouter.guid)], "volumes": {}, "waiter": Waiter(1), } failed_vdisks = [] successful_vdisks = [] for vdisk_id in sorted(vdisks): vdisk = vdisks[vdisk_id] success = vdisk_id % 2 == 0 LockedClient.scrub_controller["volumes"][vdisk.volume_id] = { "success": success, "scrub_work": range(vdisk_id), } if success is True: successful_vdisks.append(vdisk) else: failed_vdisks.append(vdisk) # Execute scrubbing a 1st time with self.assertRaises(Exception) as raise_info: GenericController.execute_scrub() for vdisk in failed_vdisks: self.assertIn(vdisk.name, raise_info.exception.message) # Validate expected successful vDisks for vdisk in successful_vdisks: with vdisk.storagedriver_client.make_locked_client(vdisk.volume_id) as locked_client: self.assertEqual( first=len(locked_client.get_scrubbing_workunits()), second=0, msg="Scrubbed vDisk {0} does still have scrubbing work left".format(vdisk.name), ) # Validate expected failed vDisks for vdisk in failed_vdisks: with vdisk.storagedriver_client.make_locked_client(vdisk.volume_id) as locked_client: self.assertEqual( first=len(locked_client.get_scrubbing_workunits()), second=int(vdisk.name), msg="Scrubbed vDisk {0} does not have the expected amount of scrubbing items: {1}".format( vdisk.name, int(vdisk.name) ), ) # Execute scrubbing again for vdisk_id in sorted(vdisks): vdisk = vdisks[vdisk_id] LockedClient.scrub_controller["volumes"][vdisk.volume_id]["success"] = True GenericController.execute_scrub() for vdisk in vdisks.values(): with vdisk.storagedriver_client.make_locked_client(vdisk.volume_id) as locked_client: self.assertEqual( first=len(locked_client.get_scrubbing_workunits()), second=0, msg="Scrubbed vDisk {0} does still have scrubbing work left after scrubbing a 2nd time".format( vdisk.name ), ) ############## # Scenario 3 # ############## self.volatile.clean() self.persistent.clean() structure = Helper.build_service_structure( { "vpools": [1], "vdisks": [ (1, 1, 1, 1), (2, 1, 1, 1), (3, 1, 1, 1), (4, 1, 1, 1), (5, 1, 1, 1), (6, 1, 1, 1), (7, 1, 1, 1), (8, 1, 1, 1), (9, 1, 1, 1), (10, 1, 1, 1), ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) "mds_services": [(1, 1)], # (<id>, <storagedriver_id>) "storagerouters": [1, 2, 3, 4, 5], "storagedrivers": [(1, 1, 1)], } # (<id>, <vpool_id>, <storagerouter_id>) ) vpool = structure["vpools"][1] vdisks = structure["vdisks"] storagerouters = structure["storagerouters"] System._machine_id = dict((sr.ip, sr.machine_id) for sr in storagerouters.values()) Configuration.set( "/ovs/vpools/{0}/proxies/scrub/generic_scrub".format(vpool.guid), json.dumps({}, indent=4), raw=True ) thread_names = [ "scrub_{0}_{1}".format(vpool.guid, storagerouter.guid) for storagerouter in storagerouters.values() ] LockedClient.scrub_controller = { "possible_threads": thread_names, "volumes": {}, "waiter": Waiter(len(thread_names)), } LockedClient.thread_names = thread_names[:] for vdisk_id in sorted(vdisks): vdisk = vdisks[vdisk_id] LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {"success": True, "scrub_work": range(vdisk_id)} GenericController.execute_scrub() self.assertEqual( first=len(LockedClient.thread_names), second=0, msg="Not all threads have been used in the process" ) ############## # Scenario 4 # ############## self.volatile.clean() self.persistent.clean() structure = Helper.build_service_structure( { "vpools": [1, 2, 3], "vdisks": [ (1, 1, 1, 1), (2, 1, 1, 1), (3, 1, 1, 1), (4, 2, 2, 2), (5, 2, 2, 2), (6, 2, 2, 2), (7, 3, 3, 3), (8, 3, 3, 3), (9, 3, 3, 3), ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) "mds_services": [(1, 1), (2, 2), (3, 3)], # (<id>, <storagedriver_id>) "storagerouters": [1, 2, 3, 4, 5], "storagedrivers": [(1, 1, 1), (2, 2, 1), (3, 3, 1)], } # (<id>, <vpool_id>, <storagerouter_id>) ) vpools = structure["vpools"] vdisks = structure["vdisks"] storagerouters = structure["storagerouters"] thread_names = [] for vpool in vpools.values(): Configuration.set( "/ovs/vpools/{0}/proxies/scrub/generic_scrub".format(vpool.guid), json.dumps({}, indent=4), raw=True ) for storagerouter in storagerouters.values(): thread_names.append("scrub_{0}_{1}".format(vpool.guid, storagerouter.guid)) LockedClient.scrub_controller = { "possible_threads": thread_names, "volumes": {}, "waiter": Waiter(len(thread_names) - 9), } LockedClient.thread_names = thread_names[:] for vdisk_id in sorted(vdisks): vdisk = vdisks[vdisk_id] LockedClient.scrub_controller["volumes"][vdisk.volume_id] = {"success": True, "scrub_work": range(vdisk_id)} GenericController.execute_scrub() self.assertEqual( first=len(LockedClient.thread_names), second=9, # 5 srs * 3 vps = 15 threads, but only 2 will be spawned per vPool --> 15 - 6 = 9 left msg="Not all threads have been used in the process", ) # 3 vPools will cause the scrubber to only launch 2 threads per vPool --> 1 possible thread should be unused per vPool for vpool in vpools.values(): threads_left = [thread_name for thread_name in LockedClient.thread_names if vpool.guid in thread_name] self.assertEqual( first=len(threads_left), second=3, msg="Unexpected amount of threads left for vPool {0}".format(vpool.name), )
def test_collapse(): """ Test the arakoon collapsing :return: """ ArakoonCollapse.LOGGER.info("Starting validating arakoon collapse") node_ips = StoragerouterHelper.get_storagerouter_ips() node_ips.sort() for node_ip in node_ips: ArakoonCollapse.LOGGER.info( "Fetching arakoons on node `{0}`".format(node_ip)) arakoon_clusters = [] root_client = SSHClient(node_ip, username='******') # fetch arakoon clusters for service in ServiceList.get_services(): if service.is_internal is True and service.storagerouter.ip == node_ip and \ service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON, ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR): arakoon_clusters.append( service.name.replace('arakoon-', '')) # perform collapse ArakoonCollapse.LOGGER.info( "Starting arakoon collapse on node `{0}`".format(node_ip)) for arakoon_cluster in arakoon_clusters: ArakoonCollapse.LOGGER.info( "Fetching `{0}` arakoon on node `{1}`".format( arakoon_cluster, node_ip)) arakoon_config_path = Configuration.get_configuration_path( '/ovs/arakoon/{0}/config'.format(arakoon_cluster)) tlog_location = '/opt/OpenvStorage/db/arakoon/{0}/tlogs'.format( arakoon_cluster) # read_tlog_dir with remote(node_ip, [Configuration]) as rem: config_contents = rem.Configuration.get( '/ovs/arakoon/{0}/config'.format(arakoon_cluster), raw=True) for line in config_contents.splitlines(): if 'tlog_dir' in line: tlog_location = line.split()[-1] nr_of_tlogs = ArakoonCollapse.get_nr_of_tlogs_in_folder( root_client, tlog_location) old_headdb_timestamp = 0 if root_client.file_exists('/'.join([tlog_location, 'head.db'])): old_headdb_timestamp = root_client.run([ 'stat', '--format=%Y', '{0}/{1}'.format(tlog_location, 'head.db') ]) if nr_of_tlogs <= 2: benchmark_command = [ 'arakoon', '--benchmark', '-n_clients', '1', '-max_n', '5_000', '-config', arakoon_config_path ] root_client.run(benchmark_command) ArakoonCollapse.LOGGER.info( "Collapsing arakoon `{0}` on node `{1}` ...".format( arakoon_cluster, node_ip)) GenericController.collapse_arakoon() nr_of_tlogs = ArakoonCollapse.get_nr_of_tlogs_in_folder( root_client, tlog_location) new_headdb_timestamp = root_client.run([ 'stat', '--format=%Y', '{0}/{1}'.format(tlog_location, 'head.db') ]) # perform assertion assert nr_of_tlogs <= 2,\ 'Arakoon collapse left {0} tlogs on the environment, expecting less than 2 in `{1}` on node `{1}`'\ .format(nr_of_tlogs, arakoon_cluster, node_ip) assert old_headdb_timestamp != new_headdb_timestamp,\ 'Timestamp of the head_db file was not changed ' \ 'in the process of collapsing tlogs of arakoon `{0}` on node `{1}`'\ .format(arakoon_cluster, node_ip) ArakoonCollapse.LOGGER.info( "Successfully collapsed arakoon `{0}` on node `{1}`". format(arakoon_cluster, node_ip)) ArakoonCollapse.LOGGER.info("Finished validating arakoon collapsing")
def test_clone_snapshot(self): """ Validates that a snapshot that has clones will not be deleted while other snapshots will be deleted """ # Setup # There are 2 disks, second one cloned from a snapshot of the first vpool = VPool() vpool.name = 'vpool' vpool.status = 'RUNNING' vpool.save() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.save() disk = Disk() disk.name = 'physical_disk_1' disk.aliases = ['/dev/non-existent'] disk.size = 500 * 1024 ** 3 disk.state = 'OK' disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.disk = disk disk_partition.aliases = ['/dev/disk/non-existent'] disk_partition.size = 400 * 1024 ** 3 disk_partition.state = 'OK' disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = '/var/tmp' disk_partition.save() storage_driver = StorageDriver() storage_driver.vpool = vpool storage_driver.storagerouter = storage_router storage_driver.name = 'storage_driver_1' storage_driver.mountpoint = '/' storage_driver.cluster_ip = storage_router.ip storage_driver.storage_ip = '127.0.0.1' storage_driver.storagedriver_id = 'storage_driver_1' storage_driver.ports = {'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4} storage_driver.save() service_type = ServiceType() service_type.name = 'MetadataServer' service_type.save() service = Service() service.name = 'service_1' service.storagerouter = storage_driver.storagerouter service.ports = [1] service.type = service_type service.save() mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = storage_driver.vpool mds_service.save() vdisk_1_1 = VDisk() vdisk_1_1.name = 'vdisk_1_1' vdisk_1_1.volume_id = 'vdisk_1_1' vdisk_1_1.vpool = vpool vdisk_1_1.devicename = 'dummy' vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client('storagedriver') [dynamic for dynamic in vdisk_1_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: print 'Running in Travis, reducing output.' base = datetime.datetime.now().date() day = datetime.timedelta(1) base_timestamp = self._make_timestamp(base, day) minute = 60 hour = minute * 60 for h in [6, 12, 18]: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1_1.guid, metadata={'label': 'snapshot_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(timestamp), 'machineguid': None}) base_snapshot_guid = vdisk_1_1.snapshots[0]['guid'] # Oldest clone_vdisk = VDisk() clone_vdisk.name = 'clone_vdisk' clone_vdisk.volume_id = 'clone_vdisk' clone_vdisk.vpool = vpool clone_vdisk.devicename = 'dummy' clone_vdisk.parentsnapshot = base_snapshot_guid clone_vdisk.size = 0 clone_vdisk.save() clone_vdisk.reload_client('storagedriver') for h in [6, 12, 18]: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=clone_vdisk.guid, metadata={'label': 'snapshot_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(timestamp), 'machineguid': None}) base_timestamp = self._make_timestamp(base, day * 2) GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) self.assertIn(base_snapshot_guid, [snap['guid'] for snap in vdisk_1_1.snapshots], 'Snapshot was deleted while there are still clones of it')
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now and then. The delete policy is executed every day """ vpool = VPool() vpool.name = 'vpool' vpool.status = 'RUNNING' vpool.save() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.save() disk = Disk() disk.name = 'physical_disk_1' disk.aliases = ['/dev/non-existent'] disk.size = 500 * 1024 ** 3 disk.state = 'OK' disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.disk = disk disk_partition.aliases = ['/dev/disk/non-existent'] disk_partition.size = 400 * 1024 ** 3 disk_partition.state = 'OK' disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = '/var/tmp' disk_partition.save() vdisk_1 = VDisk() vdisk_1.name = 'vdisk_1' vdisk_1.volume_id = 'vdisk_1' vdisk_1.vpool = vpool vdisk_1.devicename = 'dummy' vdisk_1.size = 0 vdisk_1.save() vdisk_1.reload_client('storagedriver') [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: self._print_message('Running in Travis, reducing output.') debug = not travis amount_of_days = 50 base = datetime.datetime.now().date() day = datetime.timedelta(1) minute = 60 hour = minute * 60 for d in xrange(0, amount_of_days): base_timestamp = self._make_timestamp(base, day * d) self._print_message('') self._print_message('Day cycle: {0}: {1}'.format(d, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d'))) # At the start of the day, delete snapshot policy runs at 00:30 self._print_message('- Deleting snapshots') GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots self._print_message('- Validating snapshots') self._validate(vdisk_1, d, base, amount_of_days, debug) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 self._print_message('- Creating snapshots') for h in xrange(2, 23): timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={'label': 'ss_i_{0}:00'.format(str(h)), 'is_consistent': False, 'timestamp': str(timestamp), 'machineguid': None}) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={'label': 'ss_c_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(ts), 'machineguid': None})
def test_clone_snapshot(self): """ Validates that a snapshot that has clones will not be deleted while other snapshots will be deleted """ # Setup # There are 2 disks, second one cloned from a snapshot of the first structure = DalHelper.build_dal_structure({ 'vpools': [1], 'vdisks': [ (1, 1, 1, 1), (2, 1, 1, 1) ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)] } # (<id>, <vpool_id>, <storagerouter_id>) ) vdisk_1 = structure['vdisks'][1] [ dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots' ][0].timeout = 0 base = datetime.datetime.now().date() base_timestamp = self._make_timestamp(base, datetime.timedelta(1)) minute = 60 hour = minute * 60 for h in [6, 12, 18]: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={ 'label': 'snapshot_{0}:30'.format( str(h)), 'is_consistent': True, 'timestamp': str(timestamp) }) structure = DalHelper.build_dal_structure( structure={'vdisks': [(2, 1, 1, 1)]}, previous_structure=structure) clone_vdisk = structure['vdisks'][2] base_snapshot_guid = vdisk_1.snapshot_ids[0] # Oldest clone_vdisk.parentsnapshot = base_snapshot_guid clone_vdisk.save() for day in range(10): base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) for h in [6, 12, 18]: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=clone_vdisk.guid, metadata={ 'label': 'snapshot_{0}:30'.format( str(h)), 'is_consistent': True, 'timestamp': str(timestamp) }) base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * 2) GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) self.assertIn( base_snapshot_guid, vdisk_1.snapshot_ids, 'Snapshot was deleted while there are still clones of it')
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.generic import GenericController MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter, username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_service_name=old_service_name, new_service_name=new_service_name) # Register new service and remove old service service_manager.add_service(name='ovs-albaproxy', client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get('fragment_cache', ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']: proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info Configuration.set(proxy_scrub_config_key, json.dumps(proxy_scrub_config, indent=4), raw=True) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_service_name='volumedriver_{0}'.format(vpool.name)) if service_manager.ImplementationClass == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information for vpool in VPoolList.get_vpools(): bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() MigrationController._logger.info('Finished out of band migrations') GenericController.refresh_package_information()
def test_different_snapshot_flags(self): """ Tests the GenericController.delete_snapshots() call, but with different snapshot flags Scenario 1: is_automatic: True, is_consistent: True --> Automatically created consistent snapshots should be deleted Scenario 2: is_automatic: True, is_consistent: False --> Automatically created non-consistent snapshots should be deleted Scenario 3: is_automatic: False, is_consistent: True --> Manually created consistent snapshots should be deleted Scenario 4: is_automatic: False, is_consistent: False --> Manually created non-consistent snapshots should be deleted Scenario 5: is_sticky: True --> Sticky snapshots of any kind should never be deleted (Only possible to delete manually) """ minute = 60 hour = minute * 60 for scenario in range(5): structure = DalHelper.build_dal_structure({ 'vpools': [1], 'vdisks': [ (1, 1, 1, 1) ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) 'mds_services': [(1, 1)], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)] } # (<id>, <vpool_id>, <storagerouter_id>) ) base = datetime.datetime.now().date() vdisk_1 = structure['vdisks'][1] is_sticky = False sticky_hours = [] if scenario % 2 == 0: label = 'c' additional_time = minute * 30 consistent_hours = [2] inconsistent_hours = [] else: label = 'i' additional_time = 0 consistent_hours = [] inconsistent_hours = [2] if scenario == 4: is_sticky = True sticky_hours = consistent_hours for day in xrange(35): base_timestamp = self._make_timestamp( base, datetime.timedelta(1) * day) self._print_message('') self._print_message('Day cycle: {0}: {1}'.format( day, datetime.datetime.fromtimestamp(base_timestamp).strftime( '%Y-%m-%d'))) self._print_message('- Deleting snapshots') GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) self._validate(vdisk=vdisk_1, current_day=day, base_date=base, sticky_hours=sticky_hours, consistent_hours=consistent_hours, inconsistent_hours=inconsistent_hours) self._print_message('- Creating snapshots') for x in consistent_hours + inconsistent_hours: timestamp = base_timestamp + (hour * x) + additional_time VDiskController.create_snapshot( vdisk_guid=vdisk_1.guid, metadata={ 'label': 'ss_{0}_{1}:00'.format(label, x), 'is_sticky': is_sticky, 'timestamp': str(timestamp), 'is_automatic': scenario in [0, 1], 'is_consistent': len(consistent_hours) > 0 }) self.persistent._clean() self.volatile._clean()
def test_arakoon_collapse(self): """ Test the Arakoon collapse functionality """ # Set up the test structure = DalHelper.build_dal_structure( structure={'storagerouters': [1, 2]}) storagerouter_1 = structure['storagerouters'][1] storagerouter_2 = structure['storagerouters'][2] MockedSSHClient._run_returns[storagerouter_1.ip] = {} MockedSSHClient._run_returns[storagerouter_2.ip] = {} # Make sure we cover all Arakoon cluster types clusters_to_create = { ServiceType.ARAKOON_CLUSTER_TYPES.SD: [{ 'name': 'unittest-voldrv', 'internal': True, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.CFG: [{ 'name': 'unittest-cacc', 'internal': True, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.FWK: [{ 'name': 'unittest-ovsdb', 'internal': True, 'success': False }], ServiceType.ARAKOON_CLUSTER_TYPES.ABM: [{ 'name': 'unittest-cluster-1-abm', 'internal': True, 'success': False }, { 'name': 'unittest-random-abm-name', 'internal': False, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.NSM: [{ 'name': 'unittest-cluster-1-nsm_0', 'internal': True, 'success': True }] } self.assertEqual( first=sorted(clusters_to_create.keys()), second=sorted(ServiceType.ARAKOON_CLUSTER_TYPES.keys()), msg= 'An Arakoon cluster type has been removed or added, please update this test accordingly' ) # Create all Arakoon clusters and related services failed_clusters = [] external_clusters = [] successful_clusters = [] for cluster_type, cluster_infos in clusters_to_create.iteritems(): filesystem = cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG for cluster_info in cluster_infos: internal = cluster_info['internal'] cluster_name = cluster_info['name'] base_dir = DalHelper.CLUSTER_DIR.format(cluster_name) arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.create_cluster(cluster_type=cluster_type, ip=storagerouter_1.ip, base_dir=base_dir, internal=internal) arakoon_installer.start_cluster() arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip, base_dir=base_dir) service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) if cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR) elif cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR) else: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) if internal is True: DalHelper.create_service( service_name=service_name, service_type=service_type, storagerouter=storagerouter_1, ports=arakoon_installer.ports[storagerouter_1.ip]) DalHelper.create_service( service_name=service_name, service_type=service_type, storagerouter=storagerouter_2, ports=arakoon_installer.ports[storagerouter_2.ip]) else: DalHelper.create_service(service_name=service_name, service_type=service_type) external_clusters.append(cluster_name) continue if cluster_info['success'] is True: if filesystem is True: config_path = ArakoonClusterConfig.CONFIG_FILE.format( cluster_name) else: config_path = Configuration.get_configuration_path( ArakoonClusterConfig.CONFIG_KEY.format( cluster_name)) MockedSSHClient._run_returns[storagerouter_1.ip][ 'arakoon --collapse-local 1 2 -config {0}'.format( config_path)] = None MockedSSHClient._run_returns[storagerouter_2.ip][ 'arakoon --collapse-local 2 2 -config {0}'.format( config_path)] = None successful_clusters.append(cluster_name) else: # For successful False clusters we don't emulate the collapse, thus making it fail failed_clusters.append(cluster_name) # Start collapse and make it fail for all clusters on StorageRouter 2 SSHClient._raise_exceptions[storagerouter_2.ip] = { 'users': ['ovs'], 'exception': UnableToConnectException('No route to host') } GenericController.collapse_arakoon() # Verify all log messages for each type of cluster generic_logs = Logger._logs.get('lib', {}) for cluster_name in successful_clusters + failed_clusters + external_clusters: collect_msg = ( 'DEBUG', 'Collecting info for cluster {0}'.format(cluster_name)) unreachable_msg = ( 'ERROR', 'Could not collapse any cluster on {0} (not reachable)'.format( storagerouter_2.name)) end_collapse_msg = ( 'DEBUG', 'Collapsing cluster {0} on {1} completed'.format( cluster_name, storagerouter_1.ip)) start_collapse_msg = ('DEBUG', 'Collapsing cluster {0} on {1}'.format( cluster_name, storagerouter_1.ip)) failed_collapse_msg = ( 'ERROR', 'Collapsing cluster {0} on {1} failed'.format( cluster_name, storagerouter_1.ip)) messages_to_validate = [] if cluster_name in successful_clusters: assert_function = self.assertIn messages_to_validate.append(collect_msg) messages_to_validate.append(unreachable_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(end_collapse_msg) elif cluster_name in failed_clusters: assert_function = self.assertIn messages_to_validate.append(collect_msg) messages_to_validate.append(unreachable_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(failed_collapse_msg) else: assert_function = self.assertNotIn messages_to_validate.append(collect_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(end_collapse_msg) for severity, message in messages_to_validate: if assert_function == self.assertIn: assert_message = 'Expected to find log message: {0}'.format( message) else: assert_message = 'Did not expect to find log message: {0}'.format( message) assert_function(member=message, container=generic_logs, msg=assert_message) if assert_function == self.assertIn: self.assertEqual( first=severity, second=generic_logs[message], msg='Log message {0} is of severity {1} expected {2}'. format(message, generic_logs[message], severity)) # Collapse should always have a 'finished' message since each cluster should be attempted to be collapsed for general_message in [ 'Arakoon collapse started', 'Arakoon collapse finished' ]: self.assertIn(member=general_message, container=generic_logs, msg='Expected to find log message: {0}'.format( general_message))
def test_snapshot_all_vdisks(self): """ Tests GenericController.snapshot_all_vdisks functionality """ structure = DalHelper.build_dal_structure({ 'vpools': [1], 'vdisks': [ (1, 1, 1, 1), (2, 1, 1, 1) ], # (<id>, <storagedriver_id>, <vpool_id>, <mds_service_id>) 'mds_services': [(1, 1)], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)] } # (<id>, <vpool_id>, <storagerouter_id>) ) vdisk_1 = structure['vdisks'][1] vdisk_2 = structure['vdisks'][2] # Create automatic snapshot for both vDisks success, fail = GenericController.snapshot_all_vdisks() self.assertEqual(first=len(fail), second=0, msg='Expected 0 failed snapshots') self.assertEqual(first=len(success), second=2, msg='Expected 2 successful snapshots') self.assertEqual(first=len(vdisk_1.snapshot_ids), second=1, msg='Expected 1 snapshot ID for vDisk {0}'.format( vdisk_1.name)) self.assertEqual(first=len(vdisk_2.snapshot_ids), second=1, msg='Expected 1 snapshot ID for vDisk {0}'.format( vdisk_2.name)) self.assertEqual(first=len(vdisk_1.snapshots), second=1, msg='Expected 1 snapshot for vDisk {0}'.format( vdisk_1.name)) self.assertEqual(first=len(vdisk_2.snapshots), second=1, msg='Expected 1 snapshot for vDisk {0}'.format( vdisk_2.name)) # Ensure automatic snapshot fails for vdisk_1 and succeeds for vdisk_2 vdisk_1.storagedriver_client._set_snapshot_in_backend( volume_id=vdisk_1.volume_id, snapshot_id=vdisk_1.snapshots[0]['guid'], in_backend=False) success, fail = GenericController.snapshot_all_vdisks() self.assertEqual(first=len(fail), second=1, msg='Expected 1 failed snapshot') self.assertEqual(first=fail[0], second=vdisk_1.guid, msg='Expected vDisk {0} to have failed'.format( vdisk_1.name)) self.assertEqual(first=len(success), second=1, msg='Expected 1 successful snapshot') self.assertEqual(first=success[0], second=vdisk_2.guid, msg='Expected vDisk {0} to have succeeded'.format( vdisk_2.name)) self.assertEqual(first=len(vdisk_1.snapshot_ids), second=1, msg='Expected 1 snapshot ID for vDisk {0}'.format( vdisk_1.name)) self.assertEqual(first=len(vdisk_2.snapshot_ids), second=2, msg='Expected 2 snapshot IDs for vDisk {0}'.format( vdisk_2.name)) self.assertEqual(first=len(vdisk_1.snapshots), second=1, msg='Expected 1 snapshot for vDisk {0}'.format( vdisk_1.name)) self.assertEqual(first=len(vdisk_2.snapshots), second=2, msg='Expected 2 snapshots for vDisk {0}'.format( vdisk_2.name))
def _post_update_alba_plugin_alba(cls, components): """ Execute some functionality after the ALBA plugin packages have been updated for the ASD manager nodes :param components: Update components which have been executed :type components: list :return: None :rtype: NoneType """ if PackageFactory.COMP_ALBA not in components: return # First run post-update migrations to update services, config mgmt, ... and restart services afterwards for method_name in ['migrate', 'migrate_sdm']: try: # noinspection PyUnresolvedReferences from ovs.lib.albamigration import AlbaMigrationController cls._logger.debug( 'Executing migration code: AlbaMigrationController.{0}()'. format(method_name)) getattr(AlbaMigrationController, method_name)() except ImportError: cls._logger.error('Could not import AlbaMigrationController') except Exception: cls._logger.exception( 'Migration code for the ALBA plugin failed to be executed') # Update ALBA nodes method_name = inspect.currentframe().f_code.co_name cls._logger.info('Executing hook {0}'.format(method_name)) alba_nodes = sorted( AlbaNodeList.get_albanodes_by_type(AlbaNode.NODE_TYPES.ASD), key=lambda an: ExtensionsToolbox.advanced_sort(element=an.ip, separator='.')) for alba_node in alba_nodes: services_to_restart = [] for component in components: if component not in alba_node.package_information: continue component_info = alba_node.package_information[component] if 'services_post_update' not in component_info: # Package_information still has the old format, so refresh update information # This can occur when updating from earlier than 2.11.0 to 2.11.0 and older try: GenericController.refresh_package_information() except: cls._logger.exception( '{0}: Refreshing package information failed'. format(alba_node.ip)) alba_node.discard() component_info = alba_node.package_information.get( component, {}) services_post_update = dict( (int(key), value) for key, value in component_info.get( 'services_post_update', {}).iteritems()) for restart_order in sorted(services_post_update): for service_name in sorted( services_post_update[restart_order]): if service_name not in services_to_restart: services_to_restart.append(service_name) if len(services_to_restart) > 0: alba_node.client.restart_services( service_names=services_to_restart) # Renew maintenance services cls._logger.info('Checkup maintenance agents') AlbaController.checkup_maintenance_agents.delay() cls._logger.info('Executed hook {0}'.format(method_name))