def _create_vdisks_for_mds_service(self, amount, start_id, mds_service=None, vpool=None): """ Generates vdisks and appends them to a given mds_service """ vdisks = {} for i in xrange(start_id, start_id + amount): disk = VDisk() disk.name = str(i) disk.devicename = 'disk_{0}'.format(i) disk.volume_id = 'disk_{0}'.format(i) disk.vpool = mds_service.vpool if mds_service is not None else vpool disk.size = 0 disk.save() disk.reload_client() if mds_service is not None: storagedriver_id = None for sd in mds_service.vpool.storagedrivers: if sd.storagerouter_guid == mds_service.service.storagerouter_guid: storagedriver_id = sd.storagedriver_id junction = MDSServiceVDisk() junction.vdisk = disk junction.mds_service = mds_service junction.is_master = True junction.save() config = type('MDSNodeConfig', (), {'address': self._generate_nc_function(True, mds_service), 'port': self._generate_nc_function(False, mds_service)})() mds_backend_config = type('MDSMetaDataBackendConfig', (), {'node_configs': self._generate_bc_function([config])})() StorageDriverClient.metadata_backend_config['disk_{0}'.format(i)] = mds_backend_config StorageDriverClient.catch_up['disk_{0}'.format(i)] = 50 StorageDriverClient.vrouter_id['disk_{0}'.format(i)] = storagedriver_id vdisks[i] = disk return vdisks
def create_vdisks_for_mds_service(amount, start_id, mds_service=None, storagedriver=None): """ Generates vdisks and appends them to a given mds_service """ if (mds_service is None and storagedriver is None) or (mds_service is not None and storagedriver is not None): raise RuntimeError( 'Either `mds_service` or `storagedriver` should be passed') vdisks = {} storagedriver_id = None vpool = None mds_services = [] if mds_service is not None: mds_services.append(mds_service) for sd in mds_service.vpool.storagedrivers: if sd.storagerouter_guid == mds_service.service.storagerouter_guid: storagedriver_id = sd.storagedriver_id vpool = sd.vpool if storagedriver_id is None: raise RuntimeError( 'The given MDSService is located on a node without StorageDriver' ) else: storagedriver_id = storagedriver.storagedriver_id vpool = storagedriver.vpool srclient = StorageRouterClient(vpool.guid, None) for i in xrange(start_id, start_id + amount): devicename = 'vdisk_{0}'.format(i) mds_backend_config = DalHelper.generate_mds_metadata_backend_config( mds_services) volume_id = srclient.create_volume(devicename, mds_backend_config, 0, str(storagedriver_id)) if len(mds_services) == 1: MDSClient.set_catchup(mds_services[0], volume_id, 50) vdisk = VDisk() vdisk.name = str(i) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client('storagedriver') if mds_service is not None: junction = MDSServiceVDisk() junction.vdisk = vdisk junction.mds_service = mds_service junction.is_master = True junction.save() vdisks[i] = vdisk return vdisks
def _create_vdisks_for_mds_service(self, amount, start_id, mds_service=None, vpool=None): """ Generates vdisks and appends them to a given mds_service """ vdisks = {} for i in xrange(start_id, start_id + amount): disk = VDisk() disk.name = str(i) disk.devicename = 'disk_{0}'.format(i) disk.volume_id = 'disk_{0}'.format(i) disk.vpool = mds_service.vpool if mds_service is not None else vpool disk.size = 0 disk.save() disk.reload_client() if mds_service is not None: storagedriver_id = None for sd in mds_service.vpool.storagedrivers: if sd.storagerouter_guid == mds_service.service.storagerouter_guid: storagedriver_id = sd.storagedriver_id junction = MDSServiceVDisk() junction.vdisk = disk junction.mds_service = mds_service junction.is_master = True junction.save() config = type( 'MDSNodeConfig', (), { 'address': self._generate_nc_function( True, mds_service), 'port': self._generate_nc_function(False, mds_service) })() mds_backend_config = type( 'MDSMetaDataBackendConfig', (), {'node_configs': self._generate_bc_function([config])})() StorageDriverClient.metadata_backend_config['disk_{0}'.format( i)] = mds_backend_config StorageDriverClient.catch_up['disk_{0}'.format(i)] = 50 StorageDriverClient.vrouter_id['disk_{0}'.format( i)] = storagedriver_id vdisks[i] = disk return vdisks
def create_vdisks_for_mds_service(amount, start_id, mds_service=None, storagedriver=None): """ Generates vdisks and appends them to a given mds_service """ if (mds_service is None and storagedriver is None) or (mds_service is not None and storagedriver is not None): raise RuntimeError("Either `mds_service` or `storagedriver` should be passed") vdisks = {} storagedriver_id = None vpool = None mds_services = [] if mds_service is not None: mds_services.append(mds_service) for sd in mds_service.vpool.storagedrivers: if sd.storagerouter_guid == mds_service.service.storagerouter_guid: storagedriver_id = sd.storagedriver_id vpool = sd.vpool if storagedriver_id is None: raise RuntimeError("The given MDSService is located on a node without StorageDriver") else: storagedriver_id = storagedriver.storagedriver_id vpool = storagedriver.vpool srclient = StorageRouterClient(vpool.guid, None) for i in xrange(start_id, start_id + amount): devicename = "vdisk_{0}".format(i) mds_backend_config = Helper._generate_mdsmetadatabackendconfig(mds_services) volume_id = srclient.create_volume(devicename, mds_backend_config, 0, str(storagedriver_id)) if len(mds_services) == 1: MDSClient.set_catchup(mds_services[0], volume_id, 50) vdisk = VDisk() vdisk.name = str(i) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client("storagedriver") if mds_service is not None: junction = MDSServiceVDisk() junction.vdisk = vdisk junction.mds_service = mds_service junction.is_master = True junction.save() vdisks[i] = vdisk return vdisks
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistents every now an then. The delelete policy is exectued every day """ # Setup # There are 2 machines; one with two disks, one with one disk and an additional disk vpool = VPool() vpool.name = 'vpool' vpool.backend_type = BackendType() vpool.save() vmachine_1 = VMachine() vmachine_1.name = 'vmachine_1' vmachine_1.devicename = 'dummy' vmachine_1.pmachine = PMachine() vmachine_1.save() vdisk_1_1 = VDisk() vdisk_1_1.name = 'vdisk_1_1' vdisk_1_1.volume_id = 'vdisk_1_1' vdisk_1_1.vmachine = vmachine_1 vdisk_1_1.vpool = vpool vdisk_1_1.devicename = 'dummy' vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client() vdisk_1_2 = VDisk() vdisk_1_2.name = 'vdisk_1_2' vdisk_1_2.volume_id = 'vdisk_1_2' vdisk_1_2.vmachine = vmachine_1 vdisk_1_2.vpool = vpool vdisk_1_2.devicename = 'dummy' vdisk_1_2.size = 0 vdisk_1_2.save() vdisk_1_2.reload_client() vmachine_2 = VMachine() vmachine_2.name = 'vmachine_2' vmachine_2.devicename = 'dummy' vmachine_2.pmachine = PMachine() vmachine_2.save() vdisk_2_1 = VDisk() vdisk_2_1.name = 'vdisk_2_1' vdisk_2_1.volume_id = 'vdisk_2_1' vdisk_2_1.vmachine = vmachine_2 vdisk_2_1.vpool = vpool vdisk_2_1.devicename = 'dummy' vdisk_2_1.size = 0 vdisk_2_1.save() vdisk_2_1.reload_client() vdisk_3 = VDisk() vdisk_3.name = 'vdisk_3' vdisk_3.volume_id = 'vdisk_3' vdisk_3.vpool = vpool vdisk_3.devicename = 'dummy' vdisk_3.size = 0 vdisk_3.save() vdisk_3.reload_client() for disk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: [dynamic for dynamic in disk._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario debug = True amount_of_days = 50 now = int(mktime(datetime.now().date().timetuple())) # Last night minute = 60 hour = minute * 60 day = hour * 24 for d in xrange(0, amount_of_days): base_timestamp = now + (day * d) print '' print 'Day cycle: {}: {}'.format( d, datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d') ) # At the start of the day, delete snapshot policy runs at 00:30 print '- Deleting snapshots' ScheduledTaskController.deletescrubsnapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots print '- Validating snapshots' for vdisk in [vdisk_3]: # [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: self._validate(vdisk, d, now, amount_of_days, debug) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 print '- Creating snapshots' for h in xrange(2, 23): timestamp = base_timestamp + (hour * h) for vm in [vmachine_1, vmachine_2]: VMachineController.snapshot(machineguid=vm.guid, label='ss_i_{}:00'.format(str(h)), is_consistent=False, timestamp=timestamp) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VMachineController.snapshot(machineguid=vm.guid, label='ss_c_{}:30'.format(str(h)), is_consistent=True, timestamp=ts) VDiskController.create_snapshot(diskguid=vdisk_3.guid, metadata={'label': 'ss_i_{}:00'.format(str(h)), 'is_consistent': False, 'timestamp': timestamp, 'machineguid': None}) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VDiskController.create_snapshot(diskguid=vdisk_3.guid, metadata={'label': 'ss_c_{}:30'.format(str(h)), 'is_consistent': True, 'timestamp': ts, 'machineguid': None})
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration :param vmachine: Virtual Machine to update :param vm_object: New virtual machine info :param pmachine: Physical machine of the virtual machine """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_created', 'metadata': {'name': vm_object['name']}}) elif vmachine.name != vm_object['name']: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_renamed', 'metadata': {'old_name': vmachine.name, 'new_name': vm_object['name']}}) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename)) for disk in vm_object['disks']: ensure_safety = False if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: try: mutex.acquire(wait=10) vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] vdisk.metadata = {'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier']} # Create the disk in a locked context, but don't execute long running-task in same context vdisk.save() ensure_safety = True finally: mutex.release() if ensure_safety: MDSServiceController.ensure_safety(vdisk) VDiskController.dtl_checkup(vdisk_guid=vdisk.guid) # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_attached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': disk['name']}}) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_detached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name}}) vdisk.vmachine = None vdisk.save() VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format( vmachine.name, vdisks_synced )) except Exception as ex: VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now an then. The delete policy is executed every day """ # Setup # There are 2 machines; one with two disks, one with one disk and an additional disk failure_domain = FailureDomain() failure_domain.name = "Test" failure_domain.save() backend_type = BackendType() backend_type.name = "BackendType" backend_type.code = "BT" backend_type.save() vpool = VPool() vpool.name = "vpool" vpool.backend_type = backend_type vpool.save() pmachine = PMachine() pmachine.name = "PMachine" pmachine.username = "******" pmachine.ip = "127.0.0.1" pmachine.hvtype = "VMWARE" pmachine.save() storage_router = StorageRouter() storage_router.name = "storage_router" storage_router.ip = "127.0.0.1" storage_router.pmachine = pmachine storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.primary_failure_domain = failure_domain storage_router.save() disk = Disk() disk.name = "physical_disk_1" disk.path = "/dev/non-existent" disk.size = 500 * 1024 ** 3 disk.state = "OK" disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.id = "disk_partition_id" disk_partition.disk = disk disk_partition.path = "/dev/disk/non-existent" disk_partition.size = 400 * 1024 ** 3 disk_partition.state = "OK" disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = "/var/tmp" disk_partition.save() vmachine_1 = VMachine() vmachine_1.name = "vmachine_1" vmachine_1.devicename = "dummy" vmachine_1.pmachine = pmachine vmachine_1.save() vdisk_1_1 = VDisk() vdisk_1_1.name = "vdisk_1_1" vdisk_1_1.volume_id = "vdisk_1_1" vdisk_1_1.vmachine = vmachine_1 vdisk_1_1.vpool = vpool vdisk_1_1.devicename = "dummy" vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client() vdisk_1_2 = VDisk() vdisk_1_2.name = "vdisk_1_2" vdisk_1_2.volume_id = "vdisk_1_2" vdisk_1_2.vmachine = vmachine_1 vdisk_1_2.vpool = vpool vdisk_1_2.devicename = "dummy" vdisk_1_2.size = 0 vdisk_1_2.save() vdisk_1_2.reload_client() vmachine_2 = VMachine() vmachine_2.name = "vmachine_2" vmachine_2.devicename = "dummy" vmachine_2.pmachine = pmachine vmachine_2.save() vdisk_2_1 = VDisk() vdisk_2_1.name = "vdisk_2_1" vdisk_2_1.volume_id = "vdisk_2_1" vdisk_2_1.vmachine = vmachine_2 vdisk_2_1.vpool = vpool vdisk_2_1.devicename = "dummy" vdisk_2_1.size = 0 vdisk_2_1.save() vdisk_2_1.reload_client() vdisk_3 = VDisk() vdisk_3.name = "vdisk_3" vdisk_3.volume_id = "vdisk_3" vdisk_3.vpool = vpool vdisk_3.devicename = "dummy" vdisk_3.size = 0 vdisk_3.save() vdisk_3.reload_client() for disk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: [dynamic for dynamic in disk._dynamics if dynamic.name == "snapshots"][0].timeout = 0 # Run the testing scenario debug = True amount_of_days = 50 base = datetime.now().date() day = timedelta(1) minute = 60 hour = minute * 60 for d in xrange(0, amount_of_days): base_timestamp = DeleteSnapshots._make_timestamp(base, day * d) print "" print "Day cycle: {0}: {1}".format(d, datetime.fromtimestamp(base_timestamp).strftime("%Y-%m-%d")) # At the start of the day, delete snapshot policy runs at 00:30 print "- Deleting snapshots" ScheduledTaskController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots print "- Validating snapshots" for vdisk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: self._validate(vdisk, d, base, amount_of_days, debug) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 print "- Creating snapshots" for h in xrange(2, 23): timestamp = base_timestamp + (hour * h) for vm in [vmachine_1, vmachine_2]: VMachineController.snapshot( machineguid=vm.guid, label="ss_i_{0}:00".format(str(h)), is_consistent=False, timestamp=timestamp, ) if h in [6, 12, 18]: ts = timestamp + (minute * 30) VMachineController.snapshot( machineguid=vm.guid, label="ss_c_{0}:30".format(str(h)), is_consistent=True, timestamp=ts ) VDiskController.create_snapshot( diskguid=vdisk_3.guid, metadata={ "label": "ss_i_{0}:00".format(str(h)), "is_consistent": False, "timestamp": str(timestamp), "machineguid": None, }, ) if h in [6, 12, 18]: ts = timestamp + (minute * 30) VDiskController.create_snapshot( diskguid=vdisk_3.guid, metadata={ "label": "ss_c_{0}:30".format(str(h)), "is_consistent": True, "timestamp": str(ts), "machineguid": None, }, )
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now an then. The delete policy is executed every day """ # Setup # There are 2 machines; one with two disks, one with one disk and a stand-alone additional disk failure_domain = FailureDomain() failure_domain.name = 'Test' failure_domain.save() backend_type = BackendType() backend_type.name = 'BackendType' backend_type.code = 'BT' backend_type.save() vpool = VPool() vpool.name = 'vpool' vpool.status = 'RUNNING' vpool.backend_type = backend_type vpool.save() pmachine = PMachine() pmachine.name = 'PMachine' pmachine.username = '******' pmachine.ip = '127.0.0.1' pmachine.hvtype = 'VMWARE' pmachine.save() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.pmachine = pmachine storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.primary_failure_domain = failure_domain storage_router.save() disk = Disk() disk.name = 'physical_disk_1' disk.path = '/dev/non-existent' disk.size = 500 * 1024 ** 3 disk.state = 'OK' disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.id = 'disk_partition_id' disk_partition.disk = disk disk_partition.path = '/dev/disk/non-existent' disk_partition.size = 400 * 1024 ** 3 disk_partition.state = 'OK' disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = '/var/tmp' disk_partition.save() vmachine_1 = VMachine() vmachine_1.name = 'vmachine_1' vmachine_1.devicename = 'dummy' vmachine_1.pmachine = pmachine vmachine_1.save() vdisk_1_1 = VDisk() vdisk_1_1.name = 'vdisk_1_1' vdisk_1_1.volume_id = 'vdisk_1_1' vdisk_1_1.vmachine = vmachine_1 vdisk_1_1.vpool = vpool vdisk_1_1.devicename = 'dummy' vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client() vdisk_1_2 = VDisk() vdisk_1_2.name = 'vdisk_1_2' vdisk_1_2.volume_id = 'vdisk_1_2' vdisk_1_2.vmachine = vmachine_1 vdisk_1_2.vpool = vpool vdisk_1_2.devicename = 'dummy' vdisk_1_2.size = 0 vdisk_1_2.save() vdisk_1_2.reload_client() vmachine_2 = VMachine() vmachine_2.name = 'vmachine_2' vmachine_2.devicename = 'dummy' vmachine_2.pmachine = pmachine vmachine_2.save() vdisk_2_1 = VDisk() vdisk_2_1.name = 'vdisk_2_1' vdisk_2_1.volume_id = 'vdisk_2_1' vdisk_2_1.vmachine = vmachine_2 vdisk_2_1.vpool = vpool vdisk_2_1.devicename = 'dummy' vdisk_2_1.size = 0 vdisk_2_1.save() vdisk_2_1.reload_client() vdisk_3 = VDisk() vdisk_3.name = 'vdisk_3' vdisk_3.volume_id = 'vdisk_3' vdisk_3.vpool = vpool vdisk_3.devicename = 'dummy' vdisk_3.size = 0 vdisk_3.save() vdisk_3.reload_client() for disk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: [dynamic for dynamic in disk._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: print 'Running in Travis, reducing output.' debug = not travis amount_of_days = 50 base = datetime.datetime.now().date() day = datetime.timedelta(1) minute = 60 hour = minute * 60 for d in xrange(0, amount_of_days): base_timestamp = self._make_timestamp(base, day * d) print '' print 'Day cycle: {0}: {1}'.format(d, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d')) # At the start of the day, delete snapshot policy runs at 00:30 print '- Deleting snapshots' ScheduledTaskController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots print '- Validating snapshots' for vdisk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: self._validate(vdisk, d, base, amount_of_days, debug) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 print '- Creating snapshots' for h in xrange(2, 23): timestamp = base_timestamp + (hour * h) for vm in [vmachine_1, vmachine_2]: VMachineController.snapshot(machineguid=vm.guid, label='ss_i_{0}:00'.format(str(h)), is_consistent=False, timestamp=timestamp) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VMachineController.snapshot(machineguid=vm.guid, label='ss_c_{0}:30'.format(str(h)), is_consistent=True, timestamp=ts) VDiskController.create_snapshot(diskguid=vdisk_3.guid, metadata={'label': 'ss_i_{0}:00'.format(str(h)), 'is_consistent': False, 'timestamp': str(timestamp), 'machineguid': None}) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VDiskController.create_snapshot(diskguid=vdisk_3.guid, metadata={'label': 'ss_c_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(ts), 'machineguid': None})
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now and then. The delete policy is executed every day """ vpool = VPool() vpool.name = 'vpool' vpool.status = 'RUNNING' vpool.save() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.save() disk = Disk() disk.name = 'physical_disk_1' disk.aliases = ['/dev/non-existent'] disk.size = 500 * 1024**3 disk.state = 'OK' disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.disk = disk disk_partition.aliases = ['/dev/disk/non-existent'] disk_partition.size = 400 * 1024**3 disk_partition.state = 'OK' disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = '/var/tmp' disk_partition.save() vdisk_1 = VDisk() vdisk_1.name = 'vdisk_1' vdisk_1.volume_id = 'vdisk_1' vdisk_1.vpool = vpool vdisk_1.devicename = 'dummy' vdisk_1.size = 0 vdisk_1.save() vdisk_1.reload_client('storagedriver') [ dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots' ][0].timeout = 0 # Run the testing scenario travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: self._print_message('Running in Travis, reducing output.') debug = not travis amount_of_days = 50 base = datetime.datetime.now().date() day = datetime.timedelta(1) minute = 60 hour = minute * 60 for d in xrange(0, amount_of_days): base_timestamp = self._make_timestamp(base, day * d) self._print_message('') self._print_message('Day cycle: {0}: {1}'.format( d, datetime.datetime.fromtimestamp(base_timestamp).strftime( '%Y-%m-%d'))) # At the start of the day, delete snapshot policy runs at 00:30 self._print_message('- Deleting snapshots') ScheduledTaskController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots self._print_message('- Validating snapshots') self._validate(vdisk_1, d, base, amount_of_days, debug) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 self._print_message('- Creating snapshots') for h in xrange(2, 23): timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={ 'label': 'ss_i_{0}:00'.format( str(h)), 'is_consistent': False, 'timestamp': str(timestamp), 'machineguid': None }) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={ 'label': 'ss_c_{0}:30'.format( str(h)), 'is_consistent': True, 'timestamp': str(ts), 'machineguid': None })
def build_dal_structure(structure, previous_structure=None): """ Builds a model structure Example: structure = DalHelper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ Configuration.set(key=Configuration.EDITION_KEY, value=PackageFactory.EDITION_ENTERPRISE) if previous_structure is None: previous_structure = {} vdisks = previous_structure.get('vdisks', {}) vpools = previous_structure.get('vpools', {}) domains = previous_structure.get('domains', {}) services = previous_structure.get('services', {}) mds_services = previous_structure.get('mds_services', {}) storagerouters = previous_structure.get('storagerouters', {}) storagedrivers = previous_structure.get('storagedrivers', {}) storagerouter_domains = previous_structure.get('storagerouter_domains', {}) service_types = {} for service_type_name in ServiceType.SERVICE_TYPES.values(): service_type = ServiceTypeList.get_by_name(service_type_name) if service_type is None: service_type = ServiceType() service_type.name = service_type_name service_type.save() service_types[service_type_name] = service_type srclients = {} for domain_id in structure.get('domains', []): if domain_id not in domains: domain = Domain() domain.name = 'domain_{0}'.format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get('vpools', []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = 'RUNNING' vpool.metadata = {'backend': {}, 'caching_info': {}} vpool.metadata_store_bits = 5 vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_tlogs'.format(vpool.guid), 100) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_safety'.format(vpool.guid), 2) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid), 75) Configuration.set( '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid), json.dumps({}, indent=4), raw=True) for sr_id in structure.get('storagerouters', []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = '10.0.0.{0}'.format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = 'MASTER' storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = 'OK' disk.name = '/dev/uda' disk.size = 1 * 1024**4 disk.is_ssd = True disk.aliases = ['/dev/uda'] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ['/dev/uda-1'] partition.state = 'OK' partition.mountpoint = '/tmp/unittest/sr_{0}/disk_1/partition_1'.format( sr_id) partition.disk = disk partition.roles = [ DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB ] partition.save() else: storagerouter = storagerouters[sr_id] # noinspection PyProtectedMember System._machine_id[storagerouter.ip] = str(sr_id) mds_start = 10000 + 100 * (sr_id - 1) mds_end = 10000 + 100 * sr_id - 1 arakoon_start = 20000 + 100 * (sr_id - 1) storagedriver_start = 30000 + 100 * (sr_id - 1) storagedriver_end = 30000 + 100 * sr_id - 1 Configuration.initialize_host( host_id=sr_id, port_info={ 'mds': [mds_start, mds_end], 'arakoon': arakoon_start, 'storagedriver': [storagedriver_start, storagedriver_end] }) for sd_id, vpool_id, sr_id in structure.get('storagedrivers', ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = '/' storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = '10.0.1.{0}'.format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = { 'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4 } storagedriver.save() storagedrivers[sd_id] = storagedriver DalHelper.set_vpool_storage_driver_configuration( vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get('mds_services', ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = '{0}-{1}'.format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_types['MetadataServer'] service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': sd.storagerouter.disks[0].partitions[0], 'mds_service': mds_service }) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get( 'vdisks', ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = 'vdisk_{0}'.format(vdisk_id) mds_backend_config = DalHelper.generate_mds_metadata_backend_config( [] if mds_id is None else [mds_services[mds_id]]) volume_id = srclients[vpool_id].create_volume( devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client('storagedriver') vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get( 'storagerouter_domains', ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { 'vdisks': vdisks, 'vpools': vpools, 'domains': domains, 'services': services, 'mds_services': mds_services, 'service_types': service_types, 'storagerouters': storagerouters, 'storagedrivers': storagedrivers, 'storagerouter_domains': storagerouter_domains }
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vmachine_created', 'metadata': { 'name': vm_object['name'] } }) elif vmachine.name != vm_object['name']: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vmachine_renamed', 'metadata': { 'old_name': vmachine.name, 'new_name': vm_object['name'] } }) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{}:{}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] for disk in vm_object['disks']: if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: vdisk = VDiskList.get_by_devicename_and_vpool( disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id( str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] MDSServiceController.ensure_safety(vdisk) # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vdisk_attached', 'metadata': { 'vmachine_name': vmachine.name, 'vdisk_name': disk['name'] } }) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vdisk_detached', 'metadata': { 'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name } }) vdisk.vmachine = None vdisk.save() logger.info( 'Updating vMachine finished (name {}, {} vdisks (re)linked)'. format(vmachine.name, vdisks_synced)) except Exception as ex: logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def _prepare(self): # Setup failure_domain = FailureDomain() failure_domain.name = 'Test' failure_domain.save() backend_type = BackendType() backend_type.name = 'BackendType' backend_type.code = 'BT' backend_type.save() vpool = VPool() vpool.name = 'vpool' vpool.backend_type = backend_type vpool.save() pmachine = PMachine() pmachine.name = 'PMachine' pmachine.username = '******' pmachine.ip = '127.0.0.1' pmachine.hvtype = 'KVM' pmachine.save() vmachine_1 = VMachine() vmachine_1.name = 'vmachine_1' vmachine_1.devicename = 'dummy' vmachine_1.pmachine = pmachine vmachine_1.is_vtemplate = True vmachine_1.save() vdisk_1_1 = VDisk() vdisk_1_1.name = 'vdisk_1_1' vdisk_1_1.volume_id = 'vdisk_1_1' vdisk_1_1.vmachine = vmachine_1 vdisk_1_1.vpool = vpool vdisk_1_1.devicename = 'dummy' vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.pmachine = pmachine storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.primary_failure_domain = failure_domain storage_router.save() storagedriver = StorageDriver() storagedriver.vpool = vpool storagedriver.storagerouter = storage_router storagedriver.name = '1' storagedriver.mountpoint = '/' storagedriver.cluster_ip = storage_router.ip storagedriver.storage_ip = '127.0.0.1' storagedriver.storagedriver_id = '1' storagedriver.ports = [1, 2, 3] storagedriver.save() service_type = ServiceType() service_type.name = 'MetadataServer' service_type.save() s_id = '{0}-{1}'.format(storagedriver.storagerouter.name, '1') service = Service() service.name = s_id service.storagerouter = storagedriver.storagerouter service.ports = [1] service.type = service_type service.save() mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = storagedriver.vpool mds_service.save() def ensure_safety(vdisk): pass class Dtl_Checkup(): @staticmethod def delay(vpool_guid=None, vdisk_guid=None, storagerouters_to_exclude=None): pass MDSServiceController.ensure_safety = staticmethod(ensure_safety) VDiskController.dtl_checkup = Dtl_Checkup return vdisk_1_1, pmachine
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_created', 'metadata': {'name': vm_object['name']}}) elif vmachine.name != vm_object['name']: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_renamed', 'metadata': {'old_name': vmachine.name, 'new_name': vm_object['name']}}) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{}:{}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] for disk in vm_object['disks']: if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_attached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': disk['name']}}) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_detached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name}}) vdisk.vmachine = None vdisk.save() logger.info('Updating vMachine finished (name {}, {} vdisks (re)linked)'.format( vmachine.name, vdisks_synced )) except Exception as ex: logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def build_service_structure(structure, previous_structure=None): """ Builds an MDS service structure Example: structure = Helper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ if previous_structure is None: previous_structure = {} vdisks = previous_structure.get('vdisks', {}) vpools = previous_structure.get('vpools', {}) domains = previous_structure.get('domains', {}) services = previous_structure.get('services', {}) mds_services = previous_structure.get('mds_services', {}) storagerouters = previous_structure.get('storagerouters', {}) storagedrivers = previous_structure.get('storagedrivers', {}) storagerouter_domains = previous_structure.get('storagerouter_domains', {}) service_type = ServiceTypeList.get_by_name('MetadataServer') if service_type is None: service_type = ServiceType() service_type.name = 'MetadataServer' service_type.save() srclients = {} for domain_id in structure.get('domains', []): if domain_id not in domains: domain = Domain() domain.name = 'domain_{0}'.format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get('vpools', []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = 'RUNNING' vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) for sr_id in structure.get('storagerouters', []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = '10.0.0.{0}'.format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = 'MASTER' storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = 'OK' disk.name = '/dev/uda' disk.size = 1 * 1024**4 disk.is_ssd = True disk.aliases = ['/dev/uda'] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ['/dev/uda-1'] partition.state = 'OK' partition.mountpoint = '/tmp/unittest/sr_{0}/disk_1/partition_1'.format( sr_id) partition.disk = disk partition.roles = [ DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB ] partition.save() for sd_id, vpool_id, sr_id in structure.get('storagedrivers', ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = '/' storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = '10.0.1.{0}'.format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = { 'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4 } storagedriver.save() storagedrivers[sd_id] = storagedriver Helper._set_vpool_storage_driver_configuration( vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get('mds_services', ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = '{0}-{1}'.format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_type service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': sd.storagerouter.disks[0].partitions[0], 'mds_service': mds_service }) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get( 'vdisks', ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = 'vdisk_{0}'.format(vdisk_id) mds_backend_config = Helper._generate_mdsmetadatabackendconfig( [] if mds_id is None else [mds_services[mds_id]]) volume_id = srclients[vpool_id].create_volume( devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client('storagedriver') vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get( 'storagerouter_domains', ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { 'vdisks': vdisks, 'vpools': vpools, 'domains': domains, 'services': services, 'service_type': service_type, 'mds_services': mds_services, 'storagerouters': storagerouters, 'storagedrivers': storagedrivers, 'storagerouter_domains': storagerouter_domains }
def build_service_structure(structure, previous_structure=None): """ Builds an MDS service structure Example: structure = Helper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ if previous_structure is None: previous_structure = {} vdisks = previous_structure.get("vdisks", {}) vpools = previous_structure.get("vpools", {}) domains = previous_structure.get("domains", {}) services = previous_structure.get("services", {}) mds_services = previous_structure.get("mds_services", {}) storagerouters = previous_structure.get("storagerouters", {}) storagedrivers = previous_structure.get("storagedrivers", {}) storagerouter_domains = previous_structure.get("storagerouter_domains", {}) service_type = ServiceTypeList.get_by_name("MetadataServer") if service_type is None: service_type = ServiceType() service_type.name = "MetadataServer" service_type.save() srclients = {} for domain_id in structure.get("domains", []): if domain_id not in domains: domain = Domain() domain.name = "domain_{0}".format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get("vpools", []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = "RUNNING" vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) for sr_id in structure.get("storagerouters", []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = "10.0.0.{0}".format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = "MASTER" storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = "OK" disk.name = "/dev/uda" disk.size = 1 * 1024 ** 4 disk.is_ssd = True disk.aliases = ["/dev/uda"] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ["/dev/uda-1"] partition.state = "OK" partition.mountpoint = "/tmp/unittest/sr_{0}/disk_1/partition_1".format(sr_id) partition.disk = disk partition.roles = [DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB] partition.save() for sd_id, vpool_id, sr_id in structure.get("storagedrivers", ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = "/" storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = "10.0.1.{0}".format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = {"management": 1, "xmlrpc": 2, "dtl": 3, "edge": 4} storagedriver.save() storagedrivers[sd_id] = storagedriver Helper._set_vpool_storage_driver_configuration(vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get("mds_services", ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = "{0}-{1}".format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_type service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { "size": None, "role": DiskPartition.ROLES.DB, "sub_role": StorageDriverPartition.SUBROLE.MDS, "partition": sd.storagerouter.disks[0].partitions[0], "mds_service": mds_service, }, ) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get("vdisks", ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = "vdisk_{0}".format(vdisk_id) mds_backend_config = Helper._generate_mdsmetadatabackendconfig( [] if mds_id is None else [mds_services[mds_id]] ) volume_id = srclients[vpool_id].create_volume(devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client("storagedriver") vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get("storagerouter_domains", ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { "vdisks": vdisks, "vpools": vpools, "domains": domains, "services": services, "service_type": service_type, "mds_services": mds_services, "storagerouters": storagerouters, "storagedrivers": storagedrivers, "storagerouter_domains": storagerouter_domains, }
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistents every now an then. The delelete policy is exectued every day """ # Setup # There are 2 machines; one with two disks, one with one disk and an additional disk backend_type = BackendType() backend_type.name = 'BackendType' backend_type.code = 'BT' backend_type.save() vpool = VPool() vpool.name = 'vpool' vpool.backend_type = backend_type vpool.save() pmachine = PMachine() pmachine.name = 'PMachine' pmachine.username = '******' pmachine.ip = '127.0.0.1' pmachine.hvtype = 'VMWARE' pmachine.save() vmachine_1 = VMachine() vmachine_1.name = 'vmachine_1' vmachine_1.devicename = 'dummy' vmachine_1.pmachine = pmachine vmachine_1.save() vdisk_1_1 = VDisk() vdisk_1_1.name = 'vdisk_1_1' vdisk_1_1.volume_id = 'vdisk_1_1' vdisk_1_1.vmachine = vmachine_1 vdisk_1_1.vpool = vpool vdisk_1_1.devicename = 'dummy' vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client() vdisk_1_2 = VDisk() vdisk_1_2.name = 'vdisk_1_2' vdisk_1_2.volume_id = 'vdisk_1_2' vdisk_1_2.vmachine = vmachine_1 vdisk_1_2.vpool = vpool vdisk_1_2.devicename = 'dummy' vdisk_1_2.size = 0 vdisk_1_2.save() vdisk_1_2.reload_client() vmachine_2 = VMachine() vmachine_2.name = 'vmachine_2' vmachine_2.devicename = 'dummy' vmachine_2.pmachine = pmachine vmachine_2.save() vdisk_2_1 = VDisk() vdisk_2_1.name = 'vdisk_2_1' vdisk_2_1.volume_id = 'vdisk_2_1' vdisk_2_1.vmachine = vmachine_2 vdisk_2_1.vpool = vpool vdisk_2_1.devicename = 'dummy' vdisk_2_1.size = 0 vdisk_2_1.save() vdisk_2_1.reload_client() vdisk_3 = VDisk() vdisk_3.name = 'vdisk_3' vdisk_3.volume_id = 'vdisk_3' vdisk_3.vpool = vpool vdisk_3.devicename = 'dummy' vdisk_3.size = 0 vdisk_3.save() vdisk_3.reload_client() for disk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: [dynamic for dynamic in disk._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario debug = True amount_of_days = 50 base = datetime.now().date() day = timedelta(1) minute = 60 hour = minute * 60 for d in xrange(0, amount_of_days): base_timestamp = DeleteSnapshots._make_timestamp(base, day * d) print '' print 'Day cycle: {}: {}'.format( d, datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d') ) # At the start of the day, delete snapshot policy runs at 00:30 print '- Deleting snapshots' ScheduledTaskController.deletescrubsnapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots print '- Validating snapshots' for vdisk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: self._validate(vdisk, d, base, amount_of_days, debug) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 print '- Creating snapshots' for h in xrange(2, 23): timestamp = base_timestamp + (hour * h) for vm in [vmachine_1, vmachine_2]: VMachineController.snapshot(machineguid=vm.guid, label='ss_i_{0}:00'.format(str(h)), is_consistent=False, timestamp=timestamp) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VMachineController.snapshot(machineguid=vm.guid, label='ss_c_{0}:30'.format(str(h)), is_consistent=True, timestamp=ts) VDiskController.create_snapshot(diskguid=vdisk_3.guid, metadata={'label': 'ss_i_{0}:00'.format(str(h)), 'is_consistent': False, 'timestamp': str(timestamp), 'machineguid': None}) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VDiskController.create_snapshot(diskguid=vdisk_3.guid, metadata={'label': 'ss_c_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(ts), 'machineguid': None})
def test_clone_snapshot(self): """ Validates that a snapshot that has clones will not be deleted while other snapshots will be deleted """ # Setup # There are 2 disks, second one cloned from a snapshot of the first vpool = VPool() vpool.name = 'vpool' vpool.status = 'RUNNING' vpool.save() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.save() disk = Disk() disk.name = 'physical_disk_1' disk.aliases = ['/dev/non-existent'] disk.size = 500 * 1024 ** 3 disk.state = 'OK' disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.disk = disk disk_partition.aliases = ['/dev/disk/non-existent'] disk_partition.size = 400 * 1024 ** 3 disk_partition.state = 'OK' disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = '/var/tmp' disk_partition.save() storage_driver = StorageDriver() storage_driver.vpool = vpool storage_driver.storagerouter = storage_router storage_driver.name = 'storage_driver_1' storage_driver.mountpoint = '/' storage_driver.cluster_ip = storage_router.ip storage_driver.storage_ip = '127.0.0.1' storage_driver.storagedriver_id = 'storage_driver_1' storage_driver.ports = {'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4} storage_driver.save() service_type = ServiceType() service_type.name = 'MetadataServer' service_type.save() service = Service() service.name = 'service_1' service.storagerouter = storage_driver.storagerouter service.ports = [1] service.type = service_type service.save() mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = storage_driver.vpool mds_service.save() vdisk_1_1 = VDisk() vdisk_1_1.name = 'vdisk_1_1' vdisk_1_1.volume_id = 'vdisk_1_1' vdisk_1_1.vpool = vpool vdisk_1_1.devicename = 'dummy' vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client('storagedriver') [dynamic for dynamic in vdisk_1_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: print 'Running in Travis, reducing output.' base = datetime.datetime.now().date() day = datetime.timedelta(1) base_timestamp = self._make_timestamp(base, day) minute = 60 hour = minute * 60 for h in [6, 12, 18]: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1_1.guid, metadata={'label': 'snapshot_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(timestamp), 'machineguid': None}) base_snapshot_guid = vdisk_1_1.snapshots[0]['guid'] # Oldest clone_vdisk = VDisk() clone_vdisk.name = 'clone_vdisk' clone_vdisk.volume_id = 'clone_vdisk' clone_vdisk.vpool = vpool clone_vdisk.devicename = 'dummy' clone_vdisk.parentsnapshot = base_snapshot_guid clone_vdisk.size = 0 clone_vdisk.save() clone_vdisk.reload_client('storagedriver') for h in [6, 12, 18]: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=clone_vdisk.guid, metadata={'label': 'snapshot_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(timestamp), 'machineguid': None}) base_timestamp = self._make_timestamp(base, day * 2) ScheduledTaskController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) self.assertIn(base_snapshot_guid, [snap['guid'] for snap in vdisk_1_1.snapshots], 'Snapshot was deleted while there are still clones of it')
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now and then. The delete policy is executed every day """ vpool = VPool() vpool.name = 'vpool' vpool.status = 'RUNNING' vpool.save() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.save() disk = Disk() disk.name = 'physical_disk_1' disk.aliases = ['/dev/non-existent'] disk.size = 500 * 1024 ** 3 disk.state = 'OK' disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.disk = disk disk_partition.aliases = ['/dev/disk/non-existent'] disk_partition.size = 400 * 1024 ** 3 disk_partition.state = 'OK' disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = '/var/tmp' disk_partition.save() vdisk_1 = VDisk() vdisk_1.name = 'vdisk_1' vdisk_1.volume_id = 'vdisk_1' vdisk_1.vpool = vpool vdisk_1.devicename = 'dummy' vdisk_1.size = 0 vdisk_1.save() vdisk_1.reload_client('storagedriver') [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: self._print_message('Running in Travis, reducing output.') debug = not travis amount_of_days = 50 base = datetime.datetime.now().date() day = datetime.timedelta(1) minute = 60 hour = minute * 60 for d in xrange(0, amount_of_days): base_timestamp = self._make_timestamp(base, day * d) self._print_message('') self._print_message('Day cycle: {0}: {1}'.format(d, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d'))) # At the start of the day, delete snapshot policy runs at 00:30 self._print_message('- Deleting snapshots') ScheduledTaskController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots self._print_message('- Validating snapshots') self._validate(vdisk_1, d, base, amount_of_days, debug) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 self._print_message('- Creating snapshots') for h in xrange(2, 23): timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={'label': 'ss_i_{0}:00'.format(str(h)), 'is_consistent': False, 'timestamp': str(timestamp), 'machineguid': None}) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={'label': 'ss_c_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(ts), 'machineguid': None})
def test_clone_snapshot(self): """ Validates that a snapshot that has clones will not be deleted while other snapshots will be deleted """ # Setup # There are 2 disks, second one cloned from a snapshot of the first vpool = VPool() vpool.name = 'vpool' vpool.status = 'RUNNING' vpool.save() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.save() disk = Disk() disk.name = 'physical_disk_1' disk.aliases = ['/dev/non-existent'] disk.size = 500 * 1024**3 disk.state = 'OK' disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.disk = disk disk_partition.aliases = ['/dev/disk/non-existent'] disk_partition.size = 400 * 1024**3 disk_partition.state = 'OK' disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = '/var/tmp' disk_partition.save() storage_driver = StorageDriver() storage_driver.vpool = vpool storage_driver.storagerouter = storage_router storage_driver.name = 'storage_driver_1' storage_driver.mountpoint = '/' storage_driver.cluster_ip = storage_router.ip storage_driver.storage_ip = '127.0.0.1' storage_driver.storagedriver_id = 'storage_driver_1' storage_driver.ports = { 'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4 } storage_driver.save() service_type = ServiceType() service_type.name = 'MetadataServer' service_type.save() service = Service() service.name = 'service_1' service.storagerouter = storage_driver.storagerouter service.ports = [1] service.type = service_type service.save() mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = storage_driver.vpool mds_service.save() vdisk_1_1 = VDisk() vdisk_1_1.name = 'vdisk_1_1' vdisk_1_1.volume_id = 'vdisk_1_1' vdisk_1_1.vpool = vpool vdisk_1_1.devicename = 'dummy' vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client('storagedriver') [ dynamic for dynamic in vdisk_1_1._dynamics if dynamic.name == 'snapshots' ][0].timeout = 0 travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: print 'Running in Travis, reducing output.' base = datetime.datetime.now().date() day = datetime.timedelta(1) base_timestamp = self._make_timestamp(base, day) minute = 60 hour = minute * 60 for h in [6, 12, 18]: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1_1.guid, metadata={ 'label': 'snapshot_{0}:30'.format( str(h)), 'is_consistent': True, 'timestamp': str(timestamp), 'machineguid': None }) base_snapshot_guid = vdisk_1_1.snapshots[0]['guid'] # Oldest clone_vdisk = VDisk() clone_vdisk.name = 'clone_vdisk' clone_vdisk.volume_id = 'clone_vdisk' clone_vdisk.vpool = vpool clone_vdisk.devicename = 'dummy' clone_vdisk.parentsnapshot = base_snapshot_guid clone_vdisk.size = 0 clone_vdisk.save() clone_vdisk.reload_client('storagedriver') for h in [6, 12, 18]: timestamp = base_timestamp + (hour * h) VDiskController.create_snapshot(vdisk_guid=clone_vdisk.guid, metadata={ 'label': 'snapshot_{0}:30'.format( str(h)), 'is_consistent': True, 'timestamp': str(timestamp), 'machineguid': None }) base_timestamp = self._make_timestamp(base, day * 2) ScheduledTaskController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) self.assertIn( base_snapshot_guid, [snap['guid'] for snap in vdisk_1_1.snapshots], 'Snapshot was deleted while there are still clones of it')
def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now an then. The delete policy is executed every day """ # Setup # There are 2 machines; one with two disks, one with one disk and a stand-alone additional disk failure_domain = FailureDomain() failure_domain.name = 'Test' failure_domain.save() backend_type = BackendType() backend_type.name = 'BackendType' backend_type.code = 'BT' backend_type.save() vpool = VPool() vpool.name = 'vpool' vpool.status = 'RUNNING' vpool.backend_type = backend_type vpool.save() pmachine = PMachine() pmachine.name = 'PMachine' pmachine.username = '******' pmachine.ip = '127.0.0.1' pmachine.hvtype = 'VMWARE' pmachine.save() storage_router = StorageRouter() storage_router.name = 'storage_router' storage_router.ip = '127.0.0.1' storage_router.pmachine = pmachine storage_router.machine_id = System.get_my_machine_id() storage_router.rdma_capable = False storage_router.primary_failure_domain = failure_domain storage_router.save() disk = Disk() disk.name = 'physical_disk_1' disk.path = '/dev/non-existent' disk.size = 500 * 1024**3 disk.state = 'OK' disk.is_ssd = True disk.storagerouter = storage_router disk.save() disk_partition = DiskPartition() disk_partition.id = 'disk_partition_id' disk_partition.disk = disk disk_partition.path = '/dev/disk/non-existent' disk_partition.size = 400 * 1024**3 disk_partition.state = 'OK' disk_partition.offset = 1024 disk_partition.roles = [DiskPartition.ROLES.SCRUB] disk_partition.mountpoint = '/var/tmp' disk_partition.save() vmachine_1 = VMachine() vmachine_1.name = 'vmachine_1' vmachine_1.devicename = 'dummy' vmachine_1.pmachine = pmachine vmachine_1.save() vdisk_1_1 = VDisk() vdisk_1_1.name = 'vdisk_1_1' vdisk_1_1.volume_id = 'vdisk_1_1' vdisk_1_1.vmachine = vmachine_1 vdisk_1_1.vpool = vpool vdisk_1_1.devicename = 'dummy' vdisk_1_1.size = 0 vdisk_1_1.save() vdisk_1_1.reload_client() vdisk_1_2 = VDisk() vdisk_1_2.name = 'vdisk_1_2' vdisk_1_2.volume_id = 'vdisk_1_2' vdisk_1_2.vmachine = vmachine_1 vdisk_1_2.vpool = vpool vdisk_1_2.devicename = 'dummy' vdisk_1_2.size = 0 vdisk_1_2.save() vdisk_1_2.reload_client() vmachine_2 = VMachine() vmachine_2.name = 'vmachine_2' vmachine_2.devicename = 'dummy' vmachine_2.pmachine = pmachine vmachine_2.save() vdisk_2_1 = VDisk() vdisk_2_1.name = 'vdisk_2_1' vdisk_2_1.volume_id = 'vdisk_2_1' vdisk_2_1.vmachine = vmachine_2 vdisk_2_1.vpool = vpool vdisk_2_1.devicename = 'dummy' vdisk_2_1.size = 0 vdisk_2_1.save() vdisk_2_1.reload_client() vdisk_3 = VDisk() vdisk_3.name = 'vdisk_3' vdisk_3.volume_id = 'vdisk_3' vdisk_3.vpool = vpool vdisk_3.devicename = 'dummy' vdisk_3.size = 0 vdisk_3.save() vdisk_3.reload_client() for disk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: [ dynamic for dynamic in disk._dynamics if dynamic.name == 'snapshots' ][0].timeout = 0 # Run the testing scenario travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' if travis is True: print 'Running in Travis, reducing output.' debug = not travis amount_of_days = 50 base = datetime.datetime.now().date() day = datetime.timedelta(1) minute = 60 hour = minute * 60 for d in xrange(0, amount_of_days): base_timestamp = self._make_timestamp(base, day * d) print '' print 'Day cycle: {0}: {1}'.format( d, datetime.datetime.fromtimestamp(base_timestamp).strftime( '%Y-%m-%d')) # At the start of the day, delete snapshot policy runs at 00:30 print '- Deleting snapshots' ScheduledTaskController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) # Validate snapshots print '- Validating snapshots' for vdisk in [vdisk_1_1, vdisk_1_2, vdisk_2_1, vdisk_3]: self._validate(vdisk, d, base, amount_of_days, debug) # During the day, snapshots are taken # - Create non consistent snapshot every hour, between 2:00 and 22:00 # - Create consistent snapshot at 6:30, 12:30, 18:30 print '- Creating snapshots' for h in xrange(2, 23): timestamp = base_timestamp + (hour * h) for vm in [vmachine_1, vmachine_2]: VMachineController.snapshot(machineguid=vm.guid, label='ss_i_{0}:00'.format( str(h)), is_consistent=False, timestamp=timestamp) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VMachineController.snapshot(machineguid=vm.guid, label='ss_c_{0}:30'.format( str(h)), is_consistent=True, timestamp=ts) VDiskController.create_snapshot(diskguid=vdisk_3.guid, metadata={ 'label': 'ss_i_{0}:00'.format( str(h)), 'is_consistent': False, 'timestamp': str(timestamp), 'machineguid': None }) if h in [6, 12, 18]: ts = (timestamp + (minute * 30)) VDiskController.create_snapshot(diskguid=vdisk_3.guid, metadata={ 'label': 'ss_c_{0}:30'.format( str(h)), 'is_consistent': True, 'timestamp': str(ts), 'machineguid': None })
class SafetyEnsurer(MDSShared): """ Class responsible to ensure the MDS Safety of a volume """ _logger = Logger('lib') def __init__(self, vdisk_guid, excluded_storagerouter_guids=None): """ :param vdisk_guid: vDisk GUID to calculate a new safety for :type vdisk_guid: str :param excluded_storagerouter_guids: GUIDs of StorageRouters to leave out of calculation (Eg: When 1 is down or unavailable) :type excluded_storagerouter_guids: list[str] """ if excluded_storagerouter_guids is None: excluded_storagerouter_guids = [] self.vdisk = VDisk(vdisk_guid) self.excluded_storagerouters = [ StorageRouter(sr_guid) for sr_guid in excluded_storagerouter_guids ] self.sr_client_timeout = Configuration.get( 'ovs/vpools/{0}/mds_config|sr_client_connection_timeout'.format( self.vdisk.vpool_guid), default=300) self.mds_client_timeout = Configuration.get( 'ovs/vpools/{0}/mds_config|mds_client_connection_timeout'.format( self.vdisk.vpool_guid), default=120) self.tlogs, self.safety, self.max_load = self.get_mds_config() # Filled in by functions self.metadata_backend_config_start = {} # Layout related self.mds_layout = { 'primary': { 'used': [], 'loads': {}, 'available': [] }, 'secondary': { 'used': [], 'loads': {}, 'available': [] } } self.services_load = {} self.recommended_primary = None self.recommended_secondary = None self.master_service = None self.slave_services = [] self.mds_client_cache = {} def validate_vdisk(self): """ Validates if the vDisk is ready for ensuring the MDS safety :raises SRCObjectNotFoundException: If the vDisk is no associated with a StorageRouter :raises RuntimeError: if - Current host is in the excluded storagerouters - vDisk is in a different state than running :return: None :rtype: NoneType """ self.vdisk.invalidate_dynamics(['info', 'storagerouter_guid']) if self.vdisk.storagerouter_guid is None: raise SRCObjectNotFoundException( 'Cannot ensure MDS safety for vDisk {0} with guid {1} because vDisk is not attached to any StorageRouter' .format(self.vdisk.name, self.vdisk.guid)) vdisk_storagerouter = StorageRouter(self.vdisk.storagerouter_guid) if vdisk_storagerouter in self.excluded_storagerouters: raise RuntimeError( 'Current host ({0}) of vDisk {1} is in the list of excluded StorageRouters' .format(vdisk_storagerouter.ip, self.vdisk.guid)) if self.vdisk.info['live_status'] != VDisk.STATUSES.RUNNING: raise RuntimeError( 'vDisk {0} is not {1}, cannot update MDS configuration'.format( self.vdisk.guid, VDisk.STATUSES.RUNNING)) self.metadata_backend_config_start = self.vdisk.info[ 'metadata_backend_config'] if self.vdisk.info['metadata_backend_config'] == {}: raise RuntimeError( 'Configured MDS layout for vDisk {0} could not be retrieved}, cannot update MDS configuration' .format(self.vdisk.guid)) def map_mds_services_by_socket(self): """ Maps the mds services related to the vpool by their socket :return: A dict wth sockets as key, service as value :rtype: Dict[str, ovs.dal.hybrids.j_mdsservice.MDSService """ return super(SafetyEnsurer, self).map_mds_services_by_socket(self.vdisk) def get_primary_and_secondary_storagerouters(self): # type: () -> Tuple[List[StorageRouter], List[StorageRouter]] """ Retrieve the primary and secondary storagerouters for MDS deployment :return: Both primary and secondary storagerouters :rtype: Tuple[List[StorageRouter], List[StorageRouter]] """ # Create a pool of StorageRouters being a part of the primary and secondary domains of this StorageRouter vdisk = self.vdisk vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid) primary_domains = [ junction.domain for junction in vdisk_storagerouter.domains if junction.backup is False ] secondary_domains = [ junction.domain for junction in vdisk_storagerouter.domains if junction.backup is True ] primary_storagerouters = set() secondary_storagerouters = set() for domain in primary_domains: primary_storagerouters.update( StorageRouterList.get_primary_storagerouters_for_domain( domain)) for domain in secondary_domains: secondary_storagerouters.update( StorageRouterList.get_primary_storagerouters_for_domain( domain)) # In case no domains have been configured if len(primary_storagerouters) == 0: primary_storagerouters = set( StorageRouterList.get_storagerouters()) # Remove all excluded StorageRouters from primary StorageRouters primary_storagerouters = primary_storagerouters.difference( self.excluded_storagerouters) # Remove all StorageRouters from secondary which are present in primary, all excluded secondary_storagerouters = secondary_storagerouters.difference( primary_storagerouters) secondary_storagerouters = secondary_storagerouters.difference( self.excluded_storagerouters) # Make sure to only use the StorageRouters related to the current vDisk's vPool related_storagerouters = [ sd.storagerouter for sd in vdisk.vpool.storagedrivers if sd.storagerouter is not None ] primary_storagerouters = list( primary_storagerouters.intersection(related_storagerouters)) secondary_storagerouters = list( secondary_storagerouters.intersection(related_storagerouters)) if vdisk_storagerouter not in primary_storagerouters: raise RuntimeError( 'Host of vDisk {0} ({1}) should be part of the primary domains' .format(vdisk.name, vdisk_storagerouter.name)) primary_storagerouters.sort( key=lambda sr: ExtensionsToolbox.advanced_sort(element=sr.ip, separator='.')) secondary_storagerouters.sort( key=lambda sr: ExtensionsToolbox.advanced_sort(element=sr.ip, separator='.')) for primary_storagerouter in primary_storagerouters: self._logger.debug( 'vDisk {0} - Primary StorageRouter {1} with IP {2}'.format( vdisk.guid, primary_storagerouter.name, primary_storagerouter.ip)) for secondary_storagerouter in secondary_storagerouters: self._logger.debug( 'vDisk {0} - Secondary StorageRouter {1} with IP {2}'.format( vdisk.guid, secondary_storagerouter.name, secondary_storagerouter.ip)) for excluded_storagerouter in self.excluded_storagerouters: self._logger.debug( 'vDisk {0} - Excluded StorageRouter {1} with IP {2}'.format( vdisk.guid, excluded_storagerouter.name, excluded_storagerouter.ip)) return primary_storagerouters, secondary_storagerouters def get_mds_config(self): # type: () -> Tuple[int, int, int] """ Get the MDS Config parameters :return: tlogs, safety and maxload :rtype: int, int, int """ mds_config = Configuration.get('/ovs/vpools/{0}/mds_config'.format( self.vdisk.vpool_guid)) return mds_config['mds_tlogs'], mds_config['mds_safety'], mds_config[ 'mds_maxload'] def get_reconfiguration_reasons(self): # type: () -> List[str] """ Check if reconfiguration is required Fill in the state of all MDSes while checking the reasons :return: All reconfiguration reasons :rtype: List[str] """ services_by_socket = self.map_mds_services_by_socket() primary_storagerouters, secondary_storagerouters = self.get_primary_and_secondary_storagerouters( ) vdisk_storagerouter = StorageRouter(self.vdisk.storagerouter_guid) current_service_ips = [] reconfigure_reasons = set() for index, config in enumerate(self.metadata_backend_config_start ): # Ordered MASTER, SLAVE(S) config_key = '{0}:{1}'.format(config['ip'], config['port']) service = services_by_socket.get(config_key) if service is None: self._logger.critical( 'vDisk {0} - Storage leak detected. Namespace {1} for service {2} will never be deleted automatically because service does no longer exist in model' .format(self.vdisk.guid, self.vdisk.volume_id, config_key)) reconfigure_reasons.add( '{0} {1} cannot be used anymore'.format( 'Master' if index == 0 else 'Slave', config_key)) else: if service.storagerouter.ip in current_service_ips: reconfigure_reasons.add( 'Multiple MDS services on the same node with IP {0}'. format(service.storagerouter.ip)) else: current_service_ips.append(service.storagerouter.ip) if index == 0: self.master_service = service else: self.slave_services.append(service) nodes = set() for service in services_by_socket.itervalues(): importance = None if service.storagerouter in primary_storagerouters: importance = 'primary' elif service.storagerouter in secondary_storagerouters: importance = 'secondary' # If MDS already in use, take current load, else take next load loads = self.get_mds_load(mds_service=service.mds_service) if service == self.master_service or service in self.slave_services: # Service is still in use load = loads[0] if importance is not None: self.mds_layout[importance]['used'].append(service) else: reconfigure_reasons.add( 'Service {0} cannot be used anymore because StorageRouter with IP {1} is not part of the domains' .format(service.name, service.storagerouter.ip)) else: # Service is not in use, but available load = loads[1] self.services_load[service] = load if importance is not None: nodes.add(service.storagerouter.ip) self.mds_layout[importance]['available'].append(service) if load <= self.max_load: self._logger.debug( 'vDisk {0} - Service {1}:{2} has capacity - Load: {3}%' .format(self.vdisk.guid, service.storagerouter.ip, service.ports[0], load)) if load not in self.mds_layout[importance]['loads']: self.mds_layout[importance]['loads'][load] = [] self.mds_layout[importance]['loads'][load].append(service) else: self._logger.debug( 'vDisk {0} - Service {1}:{2} is overloaded - Load: {3}%' .format(self.vdisk.guid, service.storagerouter.ip, service.ports[0], load)) if len(current_service_ips) > self.safety: reconfigure_reasons.add( 'Too much safety - Current: {0} - Expected: {1}'.format( len(current_service_ips), self.safety)) if len(current_service_ips) < self.safety and len( current_service_ips) < len(nodes): reconfigure_reasons.add( 'Not enough safety - Current: {0} - Expected: {1}'.format( len(current_service_ips), self.safety)) if self.master_service is not None: if self.services_load[self.master_service] > self.max_load: reconfigure_reasons.add( 'Master overloaded - Current load: {0}% - Max load: {1}%'. format(self.services_load[self.master_service], self.max_load)) if self.master_service.storagerouter_guid != self.vdisk.storagerouter_guid: reconfigure_reasons.add( 'Master {0}:{1} is not local - Current location: {0} - Expected location: {2}' .format(self.master_service.storagerouter.ip, self.master_service.ports[0], vdisk_storagerouter.ip)) if self.master_service not in self.mds_layout['primary']['used']: reconfigure_reasons.add( 'Master service {0}:{1} not in primary domain'.format( self.master_service.storagerouter.ip, self.master_service.ports[0])) for slave_service in self.slave_services: if self.services_load[slave_service] > self.max_load: reconfigure_reasons.add( 'Slave {0}:{1} overloaded - Current load: {2}% - Max load: {3}%' .format(slave_service.storagerouter.ip, slave_service.ports[0], self.services_load[slave_service], self.max_load)) # Check reconfigure required based upon domains self.recommended_primary = int(math.ceil( self.safety / 2.0)) if len(secondary_storagerouters) > 0 else self.safety self.recommended_secondary = self.safety - self.recommended_primary primary_services_used = len(self.mds_layout['primary']['used']) primary_services_available = len( self.mds_layout['primary']['available']) if primary_services_used < self.recommended_primary and primary_services_used < primary_services_available: reconfigure_reasons.add( 'Not enough services in use in primary domain - Current: {0} - Expected: {1}' .format(primary_services_used, self.recommended_primary)) if primary_services_used > self.recommended_primary: reconfigure_reasons.add( 'Too many services in use in primary domain - Current: {0} - Expected: {1}' .format(primary_services_used, self.recommended_primary)) # More services can be used in secondary domain secondary_services_used = len(self.mds_layout['secondary']['used']) secondary_services_available = len( self.mds_layout['secondary']['available']) if secondary_services_used < self.recommended_secondary and secondary_services_used < secondary_services_available: reconfigure_reasons.add( 'Not enough services in use in secondary domain - Current: {0} - Expected: {1}' .format(secondary_services_used, self.recommended_secondary)) if secondary_services_used > self.recommended_secondary: # Too many services in secondary domain reconfigure_reasons.add( 'Too many services in use in secondary domain - Current: {0} - Expected: {1}' .format(secondary_services_used, self.recommended_secondary)) # If secondary domain present, check order in which the slave services are configured secondary = False for slave_service in self.slave_services: if secondary is True and slave_service in self.mds_layout[ 'primary']['used']: reconfigure_reasons.add( 'A slave in secondary domain has priority over a slave in primary domain' ) break if slave_service in self.mds_layout['secondary']['used']: secondary = True self._logger.info('vDisk {0} - Current configuration: {1}'.format( self.vdisk.guid, self.metadata_backend_config_start)) return reconfigure_reasons def create_new_master(self): # type: () -> Tuple[List[Service], Service] """ Check and create a new MDS master if necessary Master configured according to StorageDriver must be modelled Master must be local Master cannot be overloaded Master must be in primary domain (if no domains available, this check is irrelevant because all StorageRouters will match) :return: The newly created services and the previous master (if a master switch happened) :rtype: Tuple[List[Service], Service] """ new_services = [] previous_master = None log_start = 'vDisk {0}'.format(self.vdisk.guid) if self.master_service is not None and self.master_service.storagerouter_guid == self.vdisk.storagerouter_guid and self.services_load[ self. master_service] <= self.max_load and self.master_service in self.mds_layout[ 'primary']['used']: new_services.append( self.master_service ) # Master is OK, so add as 1st element to new configuration. Reconfiguration is now based purely on slave misconfiguration self._logger.debug( '{0} - Master is still OK, re-calculating slaves'.format( log_start)) else: # Master is not OK --> try to find the best non-overloaded LOCAL MDS slave in the primary domain to make master self._logger.debug( '{0} - Master is not OK, re-calculating master'.format( log_start)) current_load = 0 new_local_master_service = None re_used_local_slave_service = None for service in self.mds_layout['primary']['available']: if service == self.master_service: # Make sure the current master_service is not re-used as master for whatever reason continue next_load = self.services_load[ service] # This load indicates the load it would become if a vDisk would be moved to this Service if next_load <= self.max_load and service.storagerouter_guid == self.vdisk.storagerouter_guid: if current_load > next_load or ( re_used_local_slave_service is None and new_local_master_service is None): current_load = next_load # Load for least loaded service new_local_master_service = service # If no local slave is found to re-use, this new_local_master_service is used if service in self.slave_services: self._logger.debug( '{0} - Slave service {1}:{2} will be recycled'. format(log_start, service.storagerouter.ip, service.ports[0])) re_used_local_slave_service = service # A slave service is found to re-use as new master self.slave_services.remove(service) if re_used_local_slave_service is None: # There's no non-overloaded local slave found. Keep the current master (if available) and add a local MDS (if available) as slave. # Next iteration, the newly added slave will be checked if it has caught up already # If amount of tlogs to catchup is < configured amount of tlogs --> we wait for catchup, so master can be removed and slave can be promoted if self.master_service is not None: self._logger.debug( '{0} - Keeping current master service'.format( log_start)) new_services.append(self.master_service) if new_local_master_service is not None: self._logger.debug( '{0} - Adding new slave service {1}:{2} to catch up'. format(log_start, new_local_master_service.storagerouter.ip, new_local_master_service.ports[0])) new_services.append(new_local_master_service) else: # A non-overloaded local slave was found # We verify how many tlogs the slave is behind and do 1 of the following: # 1. tlogs_behind_master < tlogs configured --> Invoke the catchup action and wait for it # 2. tlogs_behind_master >= tlogs configured --> Add current master service as 1st in list, append non-overloaded local slave as 2nd in list and let StorageDriver do the catchup (next iteration we check again) # noinspection PyTypeChecker client = MetadataServerClient.load( service=re_used_local_slave_service, timeout=self.mds_client_timeout) if client is None: raise RuntimeError( 'Cannot establish a MDS client connection for service {0}:{1}' .format(re_used_local_slave_service.storagerouter.ip, re_used_local_slave_service.ports[0])) self.mds_client_cache[re_used_local_slave_service] = client try: tlogs_behind_master = client.catch_up( str(self.vdisk.volume_id), dry_run=True ) # Verify how much tlogs local slave Service is behind (No catchup action is invoked) except RuntimeError as ex: if 'Namespace does not exist' in ex.message: client.create_namespace(str(self.vdisk.volume_id)) tlogs_behind_master = client.catch_up(str( self.vdisk.volume_id), dry_run=True) else: raise self._logger.debug( '{0} - Recycled slave is {1} tlogs behind'.format( log_start, tlogs_behind_master)) if tlogs_behind_master < self.tlogs: start = time.time() try: client.catch_up(str(self.vdisk.volume_id), dry_run=False) self._logger.debug('{0} - Catchup took {1}s'.format( log_start, round(time.time() - start, 2))) except Exception: self._logger.exception( '{0} - Catching up failed'.format(log_start)) raise # Catchup failed, so we don't know whether the new slave can be promoted to master yet # It's up to date, so add it as a new master new_services.append(re_used_local_slave_service) if self.master_service is not None: # The current master (if available) is now candidate to become one of the slaves (Determined below during slave calculation) # The current master can potentially be on a different node, thus might become slave self.slave_services.insert(0, self.master_service) previous_master = self.master_service else: # It's not up to date, keep the previous master (if available) and give the local slave some more time to catch up if self.master_service is not None: new_services.append(self.master_service) new_services.append(re_used_local_slave_service) service_string = ', '.join([ "{{'ip': '{0}', 'port': {1}}}".format(service.storagerouter.ip, service.ports[0]) for service in new_services ]) self._logger.debug( 'vDisk {0} - Configuration after MASTER calculation: [{1}]'.format( self.vdisk.guid, service_string)) return new_services, previous_master def create_new_slaves(self, new_services): # type: (List[str]) -> Tuple[List[Service], List[Service]] """ Check and create a new MDS slaves if necessary :param new_services: Services used for MDS master :type new_services: List[str] :return: New slave services for the primary domain, New slave services for the secondary domain :rtype: Tuple[List[Service], List[Service]] """ def _add_suitable_nodes(local_importance, local_safety, services_to_recycle=None): if services_to_recycle is None: services_to_recycle = [] if local_importance == 'primary': local_services = new_primary_services else: local_services = new_secondary_services if len(new_node_ips) < local_safety: for local_load in sorted( self.mds_layout[local_importance]['loads']): possible_services = self.mds_layout[local_importance][ 'loads'][local_load] if len(services_to_recycle) > 0: possible_services = [ serv for serv in services_to_recycle if serv in possible_services ] # Maintain order of services_to_recycle for local_service in possible_services: if len(new_node_ips) >= local_safety: return if local_service.storagerouter.ip not in new_node_ips: if local_service.storagerouter not in storagerouter_cache: try: SSHClient(local_service.storagerouter) storagerouter_cache[ local_service.storagerouter] = True except UnableToConnectException: storagerouter_cache[ local_service.storagerouter] = False if storagerouter_cache[ local_service.storagerouter] is True: local_services.append(local_service) new_node_ips.add( local_service.storagerouter.ip) else: self._logger.debug( 'vDisk {0} - Skipping StorageRouter with IP {1} as it is unreachable' .format(self.vdisk.guid, local_service.storagerouter.ip)) new_node_ips = { new_services[0].storagerouter.ip } if len(new_services) > 0 else set( ) # Currently we can only have the local IP in the list of new_services storagerouter_cache = {} new_primary_services = [] new_secondary_services = [] # Try to re-use slaves from primary domain until recommended_primary safety reached _add_suitable_nodes(local_importance='primary', local_safety=self.recommended_primary, services_to_recycle=self.slave_services) # Add new slaves until primary safety reached _add_suitable_nodes(local_importance='primary', local_safety=self.recommended_primary) # Try to re-use slaves from secondary domain until safety reached _add_suitable_nodes(local_importance='secondary', local_safety=self.safety, services_to_recycle=self.slave_services) # Add new slaves until safety reached _add_suitable_nodes(local_importance='secondary', local_safety=self.safety) # In case safety has not been reached yet, we try to add nodes from primary domain until safety has been reached _add_suitable_nodes(local_importance='primary', local_safety=self.safety) # Extend the new services with the newly added primary and secondary services return new_primary_services, new_secondary_services def apply_reconfigurations(self, new_services, previous_master_service): # type: (List[Service], Service) -> None """ Applies all calculated reconfigurations - Deploys the services - Notifies the Storagerouter :param new_services: List of new services to be used in the reconfiguration (Master and slaves) Note the order matters here! First the master, then slaves in primary domain, then slaves in secondary domain :type new_services: List[Service] :param previous_master_service: Previous master service incase the master should be switched around (None if no previous master) :type previous_master_service: Service :return: None :rtype: NoneType """ # Verify an MDSClient can be created for all relevant services services_to_check = new_services + self.slave_services if self.master_service is not None: services_to_check.append(self.master_service) for service in services_to_check: if service not in self.mds_client_cache: client = MetadataServerClient.load( service=service, timeout=self.mds_client_timeout) if client is None: raise RuntimeError( 'Cannot establish a MDS client connection for service {0}:{1}' .format(service.storagerouter.ip, service.ports[0])) self.mds_client_cache[service] = client configs_all = [] new_namespace_services = [] configs_without_replaced_master = [] log_start = 'vDisk {0}'.format(self.vdisk.guid) for service in new_services: client = self.mds_client_cache[service] try: if str(self.vdisk.volume_id) not in client.list_namespaces(): client.create_namespace( str(self.vdisk.volume_id) ) # StorageDriver does not throw error if already existing or does not create a duplicate namespace new_namespace_services.append(service) except Exception: self._logger.exception( '{0} - Creating new namespace {1} failed for Service {2}:{3}' .format(log_start, self.vdisk.volume_id, service.storagerouter.ip, service.ports[0])) # Clean up newly created namespaces for new_namespace_service in new_namespace_services: client = self.mds_client_cache[new_namespace_service] try: self._logger.warning( '{0}: Deleting newly created namespace {1} for service {2}:{3}' .format(log_start, self.vdisk.volume_id, new_namespace_service.storagerouter.ip, new_namespace_service.ports[0])) client.remove_namespace(str(self.vdisk.volume_id)) except RuntimeError: pass # If somehow the namespace would not exist, we don't care. raise # Currently nothing has been changed on StorageDriver level, so we can completely abort # noinspection PyArgumentList config = MDSNodeConfig(address=str(service.storagerouter.ip), port=service.ports[0]) if previous_master_service != service: # This only occurs when a slave has caught up with master and old master gets replaced with new master configs_without_replaced_master.append(config) configs_all.append(config) start = time.time() update_failure = False try: self._logger.debug( '{0} - Updating MDS configuration'.format(log_start)) if len(configs_without_replaced_master) != len( configs_all ): # First update without previous master to avoid race conditions (required by voldrv) self._logger.debug( '{0} - Without previous master: {1}:{2}'.format( log_start, previous_master_service.storagerouter.ip, previous_master_service.ports[0])) self.vdisk.storagedriver_client.update_metadata_backend_config( volume_id=str(self.vdisk.volume_id), metadata_backend_config=MDSMetaDataBackendConfig( configs_without_replaced_master), req_timeout_secs=self.sr_client_timeout) self._logger.debug( '{0} - Updating MDS configuration without previous master took {1}s' .format(log_start, time.time() - start)) self.vdisk.storagedriver_client.update_metadata_backend_config( volume_id=str(self.vdisk.volume_id), metadata_backend_config=MDSMetaDataBackendConfig(configs_all), req_timeout_secs=self.sr_client_timeout) # Verify the configuration - chosen by the framework - passed to the StorageDriver is effectively the correct configuration self.vdisk.invalidate_dynamics('info') self._logger.debug('{0} - Configuration after update: {1}'.format( self.vdisk.guid, self.vdisk.info['metadata_backend_config'])) duration = time.time() - start if duration > 5: self._logger.critical( '{0} - Updating MDS configuration took {1}s'.format( log_start, duration)) except RuntimeError: # @TODO: Timeout throws RuntimeError for now. Replace this once https://github.com/openvstorage/volumedriver/issues/349 is fixed if time.time( ) - start >= self.sr_client_timeout: # Timeout reached, clean up must be done manually once server side finished self._logger.critical( '{0} - Updating MDS configuration timed out'.format( log_start)) for service in [ svc for svc in services_to_check if svc not in new_services ]: self._logger.critical( '{0} - Manual remove namespace action required for MDS {1}:{2} and namespace {3}' .format(log_start, service.storagerouter.ip, service.ports[0], self.vdisk.volume_id)) for service in new_services[1:]: self._logger.critical( '{0} - Manual set SLAVE role action required for MDS {1}:{2} and namespace {3}' .format(log_start, service.storagerouter.ip, service.ports[0], self.vdisk.volume_id)) self._logger.critical( '{0} - Sync vDisk to reality action required'.format( log_start)) else: self._logger.exception( '{0}: Failed to update the metadata backend configuration'. format(log_start)) update_failure = True # No need to clean new namespaces if time out would have occurred # Always raise # * In case of a timeout, the manual actions are logged and user knows the ensure_safety has failed # * In any other case, the newly created namespaces are deleted raise except Exception: self._logger.exception( '{0}: Failed to update the metadata backend configuration'. format(log_start)) update_failure = True raise finally: if update_failure is True: # Remove newly created namespaces when updating would go wrong to avoid storage leaks for new_namespace_service in new_namespace_services: client = self.mds_client_cache[new_namespace_service] try: self._logger.warning( '{0}: Deleting newly created namespace {1} for service {2}:{3}' .format(log_start, self.vdisk.volume_id, new_namespace_service.storagerouter.ip, new_namespace_service.ports[0])) client.remove_namespace(str(self.vdisk.volume_id)) except RuntimeError: pass # If somehow the namespace would not exist, we don't care. self._sync_vdisk_to_reality(self.vdisk) for service in services_to_check: if service not in new_services: self._logger.debug( '{0} - Deleting namespace for vDisk on service {1}:{2}'. format(log_start, service.storagerouter.ip, service.ports[0])) client = self.mds_client_cache[service] try: client.remove_namespace(str(self.vdisk.volume_id)) except RuntimeError: pass # If somehow the namespace would not exist, we don't care. for service in new_services[1:]: client = self.mds_client_cache[service] try: if client.get_role(nspace=str(self.vdisk.volume_id) ) != MetadataServerClient.MDS_ROLE.SLAVE: self._logger.debug( '{0} - Demoting service {1}:{2} to SLAVE'.format( log_start, service.storagerouter.ip, service.ports[0])) start = time.time() client.set_role(nspace=str(self.vdisk.volume_id), role=MetadataServerClient.MDS_ROLE.SLAVE) duration = time.time() - start if duration > 5: self._logger.critical( '{0} - Demoting service {1}:{2} to SLAVE took {3}s' .format(log_start, service.storagerouter.ip, service.ports[0], duration)) except Exception: self._logger.critical( '{0} - Failed to demote service {1}:{2} to SLAVE'.format( log_start, service.storagerouter.ip, service.ports[0])) raise def catchup_mds_slaves(self): # type: () -> None """ Performs a catchup for MDS slaves if their tlogs behind reach a certain threshold """ def ensure_safety(self): # type: () -> None """ Ensures (or tries to ensure) the safety of a given vDisk. Assumptions: * A local overloaded master is better than a non-local non-overloaded master * Prefer master/slaves to be on different hosts, a subsequent slave on the same node doesn't add safety * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded) * Too much safety is not wanted (it adds loads to nodes while not required) * Order of slaves is: * All slaves on StorageRouters in primary Domain of vDisk host * All slaves on StorageRouters in secondary Domain of vDisk host * Eg: Safety of 2 (1 master + 1 slave) mds config = [local master in primary, slave in secondary] * Eg: Safety of 3 (1 master + 2 slaves) mds config = [local master in primary, slave in primary, slave in secondary] * Eg: Safety of 4 (1 master + 3 slaves) mds config = [local master in primary, slave in primary, slave in secondary, slave in secondary] :raises RuntimeError: If host of vDisk is part of the excluded StorageRouters If host of vDisk is not part of the StorageRouters in the primary domain If catchup command fails for a slave If MDS client cannot be created for any of the current or new MDS services If updateMetadataBackendConfig would fail for whatever reason :raises SRCObjectNotFoundException: If vDisk does not have a StorageRouter GUID :return: None :rtype: NoneType """ self._logger.info('vDisk {0} - Start checkup for vDisk {1}'.format( self.vdisk.guid, self.vdisk.name)) self.validate_vdisk() self._logger.debug( 'vDisk {0} - Safety: {1}, Max load: {2}%, Tlogs: {3}'.format( self.vdisk.guid, self.safety, self.max_load, self.tlogs)) self.vdisk.reload_client('storagedriver') self.vdisk.reload_client('objectregistry') reconfigure_reasons = self.get_reconfiguration_reasons() if not reconfigure_reasons: self._logger.info('vDisk {0} - No reconfiguration required'.format( self.vdisk.guid)) self._sync_vdisk_to_reality(self.vdisk) return self._logger.info( 'vDisk {0} - Reconfiguration required. Reasons:'.format( self.vdisk.guid)) for reason in reconfigure_reasons: self._logger.info('vDisk {0} - * {1}'.format( self.vdisk.guid, reason)) new_services = [] new_master_services, previous_master = self.create_new_master() new_services.extend(new_master_services) # At this point we can have: # Local master which is OK # Local master + catching up new local master (because 1st is overloaded) # Local master + catching up slave (because 1st was overloaded) # Local slave which has caught up and been added as 1st in list of new_services # Nothing at all --> Can only occur when the current master service (according to StorageDriver) has been deleted in the model and no other local MDS is available (Very unlikely scenario to occur, if possible at all) # Now the slaves will be added according to the rules described in the docstring # When local master + catching up service is present, this counts as safety of 1, because eventually the current master will be removed new_primary_services, new_secondary_services = self.create_new_slaves( new_services) new_services.extend(new_primary_services) new_services.extend(new_secondary_services) service_string = ', '.join([ "{{'ip': '{0}', 'port': {1}}}".format(service.storagerouter.ip, service.ports[0]) for service in new_services ]) self._logger.debug( 'vDisk {0} - Configuration after SLAVE calculation: [{1}]'.format( self.vdisk.guid, service_string)) if new_services == [self.master_service] + self.slave_services and len( new_services) == len(self.metadata_backend_config_start): self._logger.info( 'vDisk {0} - Could not calculate a better MDS layout. Nothing to update' .format(self.vdisk.guid)) self._sync_vdisk_to_reality(self.vdisk) return self.apply_reconfigurations(new_services, previous_master) self._logger.info('vDisk {0}: Completed'.format(self.vdisk.guid))