def can_be_deleted(self, storagedriver):
        """
        Checks whether a Storage Driver can be deleted
        """
        result = True
        storagerouter = storagedriver.storagerouter
        storagedrivers_left = len([
            sd for sd in storagerouter.storagedrivers
            if sd.guid != storagedriver.guid
        ])
        pmachine = storagerouter.pmachine
        vmachines = VMachineList.get_customer_vmachines()
        vpools_guids = [
            vmachine.vpool_guid for vmachine in vmachines
            if vmachine.vpool_guid is not None
        ]
        pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines]
        vpool = storagedriver.vpool

        if storagedrivers_left is False and pmachine.guid in pmachine_guids and vpool.guid in vpools_guids:
            result = False
        if any(vdisk for vdisk in vpool.vdisks
               if vdisk.storagedriver_id == storagedriver.storagedriver_id):
            result = False
        return result
 def snapshot_all_vms():
     """
     Snapshots all VMachines
     """
     logger.info("[SSA] started")
     success = []
     fail = []
     machines = VMachineList.get_customer_vmachines()
     for machine in machines:
         try:
             VMachineController.snapshot(machineguid=machine.guid, label="", is_consistent=False, is_automatic=True)
             success.append(machine.guid)
         except:
             fail.append(machine.guid)
     logger.info("[SSA] Snapshot has been taken for {0} vMachines, {1} failed.".format(len(success), len(fail)))
    def can_be_deleted(self, storagedriver):
        """
        Checks whether a Storage Driver can be deleted
        """
        result = True
        storagerouter = storagedriver.storagerouter
        pmachine = storagerouter.pmachine
        vmachines = VMachineList.get_customer_vmachines()
        vpools_guids = [vmachine.vpool_guid for vmachine in vmachines if vmachine.vpool_guid is not None]
        pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines]
        vpool = storagedriver.vpool

        if pmachine.guid in pmachine_guids and vpool.guid in vpools_guids:
            result = False
        if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id):
            result = False
        return Response(result, status=status.HTTP_200_OK)
    def can_be_deleted(self, storagedriver):
        """
        Checks whether a Storage Driver can be deleted
        """
        result = True
        storagerouter = storagedriver.storagerouter
        storagedrivers_left = len([sd for sd in storagerouter.storagedrivers if sd.guid != storagedriver.guid])
        pmachine = storagerouter.pmachine
        vmachines = VMachineList.get_customer_vmachines()
        vpools_guids = [vmachine.vpool_guid for vmachine in vmachines if vmachine.vpool_guid is not None]
        pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines]
        vpool = storagedriver.vpool

        if storagedrivers_left is False and pmachine.guid in pmachine_guids and vpool.guid in vpools_guids:
            result = False
        if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id):
            result = False
        return result
 def snapshot_all_vms():
     """
     Snapshots all VMachines
     """
     logger.info('[SSA] started')
     success = []
     fail = []
     machines = VMachineList.get_customer_vmachines()
     for machine in machines:
         try:
             VMachineController.snapshot(machineguid=machine.guid,
                                         label='',
                                         is_consistent=False,
                                         is_automatic=True)
             success.append(machine.guid)
         except:
             fail.append(machine.guid)
     logger.info('[SSA] {0} vMachines were snapshotted, {1} failed.'.format(
         len(success), len(fail)))
Beispiel #6
0
 def snapshot_all_vms():
     """
     Snapshots all VMachines
     """
     ScheduledTaskController._logger.info('[SSA] started')
     success = []
     fail = []
     machines = VMachineList.get_customer_vmachines()
     for machine in machines:
         try:
             VMachineController.snapshot(machineguid=machine.guid,
                                         label='',
                                         is_consistent=False,
                                         is_automatic=True,
                                         is_sticky=False)
             success.append(machine.guid)
         except:
             fail.append(machine.guid)
     ScheduledTaskController._logger.info(
         '[SSA] Snapshot has been taken for {0} vMachines, {1} failed.'.
         format(len(success), len(fail)))
Beispiel #7
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter)
                            scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1],
                                                                             vdisk_guids=vdisk_guids_to_scrub).apply_async(
                    routing_key='sr.{0}'.format(storage_router.machine_id)
                ))
                storage_router_list.append(storage_router)

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                              vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex))
        all_results = result_set.join(propagate=False)  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(storage_router_list[index].ip, result))
        if len(processed_guids) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        logger.info('Gather Scrub - Finished')
    def remove_storagedriver(storagedriver_guid):
        """
        Removes a Storage Driver (and, if it was the last Storage Driver for a vPool, the vPool is removed as well)
        """
        # Get objects & Make some checks
        storagedriver = StorageDriver(storagedriver_guid)
        storagerouter = storagedriver.storagerouter
        ip = storagerouter.ip
        pmachine = storagerouter.pmachine
        vmachines = VMachineList.get_customer_vmachines()
        pmachine_guids = [vm.pmachine_guid for vm in vmachines]
        vpools_guids = [vm.vpool_guid for vm in vmachines if vm.vpool_guid is not None]

        vpool = storagedriver.vpool
        if pmachine.guid in pmachine_guids and vpool.guid in vpools_guids:
            raise RuntimeError('There are still vMachines served from the given Storage Driver')
        if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id):
            raise RuntimeError('There are still vDisks served from the given Storage Driver')

        services = ['volumedriver_{0}'.format(vpool.name),
                    'failovercache_{0}'.format(vpool.name)]
        storagedrivers_left = False

        # Stop services
        for current_storagedriver in vpool.storagedrivers:
            if current_storagedriver.guid != storagedriver_guid:
                storagedrivers_left = True
            client = SSHClient.load(current_storagedriver.storagerouter.ip)
            for service in services:
                System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.disable_service('{0}')
""".format(service))
                System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.stop_service('{0}')
""".format(service))

        # Unconfigure Cinder
        ovsdb = PersistentFactory.get_client()
        key = str('ovs_openstack_cinder_%s' % storagedriver.vpool_guid)
        if ovsdb.exists(key):
            cinder_password, cinder_user, tenant_name, controller_ip, _ = ovsdb.get(key)
            client = SSHClient.load(ip)
            System.exec_remote_python(client, """
from ovs.extensions.openstack.cinder import OpenStackCinder
osc = OpenStackCinder(cinder_password = '******', cinder_user = '******', tenant_name = '{2}', controller_ip = '{3}')
osc.unconfigure_vpool('{4}', '{5}', {6})
""".format(cinder_password, cinder_user, tenant_name, controller_ip, vpool.name, storagedriver.mountpoint, not storagedrivers_left))
            if not storagedrivers_left:
                ovsdb.delete(key)

        # KVM pool
        client = SSHClient.load(ip)
        if pmachine.hvtype == 'KVM':
            if vpool.name in client.run('virsh pool-list'):
                client.run('virsh pool-destroy {0}'.format(vpool.name))
            try:
                client.run('virsh pool-undefine {0}'.format(vpool.name))
            except:
                pass  # Ignore undefine errors, since that can happen on re-entrance

        # Remove services
        client = SSHClient.load(ip)
        for service in services:
            System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.remove_service(domain='openvstorage', name='{0}')
""".format(service))
        configuration_dir = System.read_remote_config(client, 'ovs.core.cfgdir')

        voldrv_arakoon_cluster_id = str(System.read_remote_config(client, 'volumedriver.arakoon.clusterid'))
        voldrv_arakoon_cluster = ArakoonManagementEx().getCluster(voldrv_arakoon_cluster_id)
        voldrv_arakoon_client_config = voldrv_arakoon_cluster.getClientConfig()
        arakoon_node_configs = []
        for arakoon_node in voldrv_arakoon_client_config.keys():
            arakoon_node_configs.append(ArakoonNodeConfig(arakoon_node,
                                                          voldrv_arakoon_client_config[arakoon_node][0][0],
                                                          voldrv_arakoon_client_config[arakoon_node][1]))
        vrouter_clusterregistry = ClusterRegistry(str(vpool.guid), voldrv_arakoon_cluster_id, arakoon_node_configs)
        # Reconfigure volumedriver
        if storagedrivers_left:
            node_configs = []
            for current_storagedriver in vpool.storagedrivers:
                if current_storagedriver.guid != storagedriver_guid:
                    node_configs.append(ClusterNodeConfig(str(current_storagedriver.storagedriver_id),
                                                          str(current_storagedriver.cluster_ip),
                                                          current_storagedriver.ports[0],
                                                          current_storagedriver.ports[1],
                                                          current_storagedriver.ports[2]))
            vrouter_clusterregistry.set_node_configs(node_configs)
        else:
            try:
                storagedriver_client = LocalStorageRouterClient('{0}/voldrv_vpools/{1}.json'.format(configuration_dir, vpool.name))
                storagedriver_client.destroy_filesystem()
                vrouter_clusterregistry.erase_node_configs()
            except RuntimeError as ex:
                print('Could not destroy filesystem or erase node configs due to error: {}'.format(ex))

        # Cleanup directories
        client = SSHClient.load(ip)
        client.run('rm -rf {}/read1_{}'.format(storagedriver.mountpoint_readcache1, vpool.name))
        if storagedriver.mountpoint_readcache2:
            client.run('rm -rf {}/read2_{}'.format(storagedriver.mountpoint_readcache2, vpool.name))
        client.run('rm -rf {}/sco_{}'.format(storagedriver.mountpoint_writecache, vpool.name))
        client.run('rm -rf {}/foc_{}'.format(storagedriver.mountpoint_foc, vpool.name))
        client.run('rm -rf {}/fd_{}'.format(storagedriver.mountpoint_writecache, vpool.name))
        client.run('rm -rf {}/metadata_{}'.format(storagedriver.mountpoint_md, vpool.name))
        client.run('rm -rf {}/tlogs_{}'.format(storagedriver.mountpoint_md, vpool.name))
        client.run('rm -rf /var/rsp/{}'.format(vpool.name))

        # Remove files
        client.run('rm -f {0}/voldrv_vpools/{1}.json'.format(configuration_dir, vpool.name))

        # Remove top directories
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_readcache1))
        if storagedriver.mountpoint_readcache2:
            client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_readcache2))
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_writecache))
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_foc))
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_md))
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint))

        # First model cleanup
        storagedriver.delete(abandon=True)  # Detach from the log entries

        if storagedrivers_left:
            # Restart leftover services
            for current_storagedriver in vpool.storagedrivers:
                if current_storagedriver.guid != storagedriver_guid:
                    client = SSHClient.load(current_storagedriver.storagerouter.ip)
                    for service in services:
                        System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.enable_service('{0}')
""".format(service))
                        System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.start_service('{0}')
""".format(service))
        else:
            # Final model cleanup
            vpool.delete()
Beispiel #9
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status('workers', sshclient) is False:
                                ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} - workers are not running'.format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info('Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1],
                                                                                              vdisk_guids=vdisk_guids_to_scrub).apply_async(routing_key='sr.{0}'.format(storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                              vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(result_set,
                                                                       timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning('Scrubbing failed on node {0}. Will reschedule on another node.'.format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info('Rescheduled scrub work from node {0} to node {1}.'.format(failed_node, storage_router.ip))
                        result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_location,
                                                                                                      vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                                                                                      routing_key='sr.{0}'.format(storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                                           vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'.format(ex))
                else:
                    ScheduledTaskController._logger.warning('No nodes left to reschedule work from node {0}'.format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(result_set,
                                                                            timesleep=60)  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')
    def deletescrubsnapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete
        """

        logger.info('Delete snapshots started')

        day = 60 * 60 * 24
        week = day * 7

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        offset = int(mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({'start': offset - (day * i),
                            'end': offset - (day * (i + 1)),
                            'type': '1d',
                            'snapshots': []})
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({'start': offset - (week * i),
                            'end': offset - (week * (i + 1)),
                            'type': '1w',
                            'snapshots': []})
        buckets.append({'start': offset - (week * 4),
                        'end': 0,
                        'type': 'rest',
                        'snapshots': []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                                bucket['snapshots'].append({'timestamp': timestamp,
                                                            'snapshotid': snapshotguid,
                                                            'diskguid': diskguid,
                                                            'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({'timestamp': timestamp,
                                                        'snapshotid': snapshot['guid'],
                                                        'diskguid': vdisk.guid,
                                                        'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot['is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != best['timestamp']]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != oldest['timestamp']]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(diskguid=snapshot['diskguid'],
                                                    snapshotid=snapshot['snapshotid'])

        logger.info('Delete snapshots finished')
        logger.info('Scrubbing started')

        vdisks = []
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] in ['BASE']:
                    vdisks.append(vdisk)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                vdisks.append(vdisk)

        total = 0
        failed = 0
        for vdisk in vdisks:
            work_units = vdisk.storagedriver_client.get_scrubbing_workunits(str(vdisk.volume_id))
            for work_unit in work_units:
                try:
                    total += 1
                    scrubbing_result = _storagedriver_scrubber.scrub(work_unit, vdisk.vpool.mountpoint_temp)
                    vdisk.storagedriver_client.apply_scrubbing_result(scrubbing_result)
                except:
                    failed += 1
                    logger.info('Failed scrubbing work unit for volume {}'.format(
                        vdisk.volume_id
                    ))

        logger.info('Scrubbing finished. {} out of {} items failed.'.format(
            failed, total
        ))
Beispiel #11
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info(
                        'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'
                        .format(storage_driver.storagerouter.ip,
                                partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter)
                            scrub_locations[
                                storage_driver.storagerouter] = str(
                                    partition.path)
                        except UnableToConnectException:
                            logger.warning(
                                'Gather Scrub - Storage Router {0:<15} is not reachable'
                                .format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        logger.info(
            'Gather Scrub - Checking {0} volumes for scrub work'.format(
                len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info(
                'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'
                .format(storage_router.ip,
                        'local' if local is True else 'remote',
                        len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(
                    ScheduledTaskController._execute_scrub_work.s(
                        scrub_location=scrub_info[1],
                        vdisk_guids=vdisk_guids_to_scrub).apply_async(
                            routing_key='sr.{0}'.format(
                                storage_router.machine_id)))
                storage_router_list.append(storage_router)

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(
                    scrub_location=local_scrub_location,
                    vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(local_storage_router.ip, ex))
        all_results = result_set.join(
            propagate=False
        )  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(storage_router_list[index].ip, result))
        if len(processed_guids) != len(vdisk_guids) or set(
                processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        logger.info('Gather Scrub - Finished')
Beispiel #12
0
    def deletescrubsnapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete
        """

        logger.info('Delete snapshots started')

        day = 60 * 60 * 24
        week = day * 7

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        offset = int(mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({'start': offset - (day * i),
                            'end': offset - (day * (i + 1)),
                            'type': '1d',
                            'snapshots': []})
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({'start': offset - (week * i),
                            'end': offset - (week * (i + 1)),
                            'type': '1w',
                            'snapshots': []})
        buckets.append({'start': offset - (week * 4),
                        'end': 0,
                        'type': 'rest',
                        'snapshots': []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                                bucket['snapshots'].append({'timestamp': timestamp,
                                                            'snapshotid': snapshotguid,
                                                            'diskguid': diskguid,
                                                            'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({'timestamp': timestamp,
                                                        'snapshotid': snapshot['guid'],
                                                        'diskguid': vdisk.guid,
                                                        'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot['is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != best['timestamp']]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != oldest['timestamp']]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(diskguid=snapshot['diskguid'],
                                                    snapshotid=snapshot['snapshotid'])

        logger.info('Delete snapshots finished')
        logger.info('Scrubbing started')

        vdisks = []
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0:
                    vdisks.append(vdisk)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0:
                vdisks.append(vdisk)

        total = 0
        failed = 0
        skipped = 0
        storagedrivers = {}
        for vdisk in vdisks:
            try:
                total += 1
                # Load the vDisk's StorageDriver
                vdisk.invalidate_dynamics(['info', 'storagedriver_id'])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
                storagedriver = storagedrivers[vdisk.storagedriver_id]
                # Load the vDisk's MDS configuration
                vdisk.invalidate_dynamics(['info'])
                configs = vdisk.info['metadata_backend_config']
                if len(configs) == 0:
                    raise RuntimeError('Could not load MDS configuration')
                if configs[0]['ip'] != storagedriver.storagerouter.ip:
                    # The MDS master is not local. Trigger an MDS handover and try again
                    logger.debug('MDS for volume {0} is not local. Trigger handover'.format(vdisk.volume_id))
                    MDSServiceController.ensure_safety(vdisk)
                    vdisk.invalidate_dynamics(['info'])
                    configs = vdisk.info['metadata_backend_config']
                    if len(configs) == 0:
                        raise RuntimeError('Could not load MDS configuration')
                    if configs[0]['ip'] != storagedriver.storagerouter.ip:
                        skipped += 1
                        logger.info('Skipping scrubbing work unit for volume {0}: MDS master is not local'.format(
                            vdisk.volume_id
                        ))
                        continue
                work_units = vdisk.storagedriver_client.get_scrubbing_workunits(str(vdisk.volume_id))
                for work_unit in work_units:
                    scrubbing_result = _storagedriver_scrubber.scrub(work_unit, str(storagedriver.mountpoint_temp))
                    vdisk.storagedriver_client.apply_scrubbing_result(scrubbing_result)
            except Exception, ex:
                failed += 1
                logger.info('Failed scrubbing work unit for volume {0}: {1}'.format(
                    vdisk.volume_id, ex
                ))
    def deletescrubsnapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete
        """

        logger.info('Delete snapshots started')

        day = 60 * 60 * 24
        week = day * 7

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        offset = int(
            mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({
                'start': offset - (day * i),
                'end': offset - (day * (i + 1)),
                'type': '1d',
                'snapshots': []
            })
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({
                'start': offset - (week * i),
                'end': offset - (week * (i + 1)),
                'type': '1w',
                'snapshots': []
            })
        buckets.append({
            'start': offset - (week * 4),
            'end': 0,
            'type': 'rest',
            'snapshots': []
        })

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE']
                   for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot[
                                    'snapshots'].iteritems():
                                bucket['snapshots'].append({
                                    'timestamp':
                                    timestamp,
                                    'snapshotid':
                                    snapshotguid,
                                    'diskguid':
                                    diskguid,
                                    'is_consistent':
                                    snapshot['is_consistent']
                                })
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({
                                'timestamp':
                                timestamp,
                                'snapshotid':
                                snapshot['guid'],
                                'diskguid':
                                vdisk.guid,
                                'is_consistent':
                                snapshot['is_consistent']
                            })
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot[
                                'is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != best['timestamp']
                    ]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != oldest['timestamp']
                    ]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(
                        diskguid=snapshot['diskguid'],
                        snapshotid=snapshot['snapshotid'])

        logger.info('Delete snapshots finished')
        logger.info('Scrubbing started')

        vdisks = []
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] in ['BASE'] and len(
                        vdisk.child_vdisks) == 0:
                    vdisks.append(vdisk)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE'] and len(
                    vdisk.child_vdisks) == 0:
                vdisks.append(vdisk)

        total = 0
        failed = 0
        skipped = 0
        storagedrivers = {}
        for vdisk in vdisks:
            try:
                total += 1
                # Load the vDisk's StorageDriver
                vdisk.invalidate_dynamics(['info', 'storagedriver_id'])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[
                        vdisk.
                        storagedriver_id] = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                storagedriver = storagedrivers[vdisk.storagedriver_id]
                # Load the vDisk's MDS configuration
                vdisk.invalidate_dynamics(['info'])
                configs = vdisk.info['metadata_backend_config']
                if len(configs) == 0:
                    raise RuntimeError('Could not load MDS configuration')
                if configs[0]['ip'] != storagedriver.storagerouter.ip:
                    # The MDS master is not local. Trigger an MDS handover and try again
                    logger.debug(
                        'MDS for volume {0} is not local. Trigger handover'.
                        format(vdisk.volume_id))
                    MDSServiceController.ensure_safety(vdisk)
                    vdisk.invalidate_dynamics(['info'])
                    configs = vdisk.info['metadata_backend_config']
                    if len(configs) == 0:
                        raise RuntimeError('Could not load MDS configuration')
                    if configs[0]['ip'] != storagedriver.storagerouter.ip:
                        skipped += 1
                        logger.info(
                            'Skipping scrubbing work unit for volume {0}: MDS master is not local'
                            .format(vdisk.volume_id))
                        continue
                work_units = vdisk.storagedriver_client.get_scrubbing_workunits(
                    str(vdisk.volume_id))
                for work_unit in work_units:
                    scrubbing_result = _storagedriver_scrubber.scrub(
                        work_unit, str(storagedriver.mountpoint_temp))
                    vdisk.storagedriver_client.apply_scrubbing_result(
                        scrubbing_result)
            except Exception, ex:
                failed += 1
                logger.info(
                    'Failed scrubbing work unit for volume {0}: {1}'.format(
                        vdisk.volume_id, ex))
Beispiel #14
0
    def delete_snapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete

        :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used
        """

        logger.info("Delete snapshots started")

        day = timedelta(1)
        week = day * 7

        def make_timestamp(offset):
            return int(mktime((base - offset).timetuple()))

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        base = datetime.fromtimestamp(timestamp).date() - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append(
                {"start": make_timestamp(day * i), "end": make_timestamp(day * (i + 1)), "type": "1d", "snapshots": []}
            )
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append(
                {
                    "start": make_timestamp(week * i),
                    "end": make_timestamp(week * (i + 1)),
                    "type": "1w",
                    "snapshots": [],
                }
            )
        buckets.append({"start": make_timestamp(week * 4), "end": 0, "type": "rest", "snapshots": []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info["object_type"] in ["BASE"] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot["timestamp"])
                    for bucket in bucket_chain:
                        if bucket["start"] >= timestamp > bucket["end"]:
                            for diskguid, snapshotguid in snapshot["snapshots"].iteritems():
                                bucket["snapshots"].append(
                                    {
                                        "timestamp": timestamp,
                                        "snapshotid": snapshotguid,
                                        "diskguid": diskguid,
                                        "is_consistent": snapshot["is_consistent"],
                                    }
                                )
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info["object_type"] in ["BASE"]:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot["timestamp"])
                    for bucket in bucket_chain:
                        if bucket["start"] >= timestamp > bucket["end"]:
                            bucket["snapshots"].append(
                                {
                                    "timestamp": timestamp,
                                    "snapshotid": snapshot["guid"],
                                    "diskguid": vdisk.guid,
                                    "is_consistent": snapshot["is_consistent"],
                                }
                            )
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket["snapshots"]:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot["is_consistent"] and not best["is_consistent"]:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif (
                            snapshot["is_consistent"] == best["is_consistent"]
                            and snapshot["timestamp"] > best["timestamp"]
                        ):
                            best = snapshot
                    bucket["snapshots"] = [s for s in bucket["snapshots"] if s["timestamp"] != best["timestamp"]]
                    first = False
                elif bucket["end"] > 0:
                    oldest = None
                    for snapshot in bucket["snapshots"]:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot["timestamp"] < oldest["timestamp"]:
                            oldest = snapshot
                    bucket["snapshots"] = [s for s in bucket["snapshots"] if s["timestamp"] != oldest["timestamp"]]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket["snapshots"]:
                    VDiskController.delete_snapshot(diskguid=snapshot["diskguid"], snapshotid=snapshot["snapshotid"])
        logger.info("Delete snapshots finished")
Beispiel #15
0
    def gather_scrub_work():
        logger.info("Divide scrubbing work among allowed Storage Routers")

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info(
                        "Scrub partition found on Storage Router {0}: {1}".format(storage_driver.name, partition.folder)
                    )
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter.ip)
                            scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            logger.warning("StorageRouter {0} is not reachable".format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError("No scrub locations found")

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0:
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0:
                vdisk_guids.add(vdisk.guid)

        logger.info("Found {0} virtual disks which need to be check for scrub work".format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info(
                "Executing scrub work on {0} Storage Router {1} for {2} virtual disks".format(
                    "local" if local is True else "remote", storage_router.name, len(vdisk_guids_to_scrub)
                )
            )

            if local is True:
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(
                    ScheduledTaskController._execute_scrub_work.s(
                        scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub
                    ).apply_async(routing_key="sr.{0}".format(storage_router.machine_id))
                )
                storage_router_list.append(storage_router)
                logger.info("Launched scrub task on Storage Router {0}".format(storage_router.name))

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            ScheduledTaskController._execute_scrub_work(
                scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub
            )
        all_results = result_set.join(
            propagate=False
        )  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if result is not None:
                logger.error(
                    "Scrubbing failed on Storage Router {0} with error {1}".format(
                        storage_router_list[index].name, result
                    )
                )
Beispiel #16
0
    def delete_snapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete

        :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used
        """

        logger.info('Delete snapshots started')

        day = timedelta(1)
        week = day * 7

        def make_timestamp(offset):
            """
            Create an integer based timestamp
            :param offset: Offset in days
            :return: Timestamp
            """
            return int(mktime((base - offset).timetuple()))

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        base = datetime.fromtimestamp(timestamp).date() - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({'start': make_timestamp(day * i),
                            'end': make_timestamp(day * (i + 1)),
                            'type': '1d',
                            'snapshots': []})
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({'start': make_timestamp(week * i),
                            'end': make_timestamp(week * (i + 1)),
                            'type': '1w',
                            'snapshots': []})
        buckets.append({'start': make_timestamp(week * 4),
                        'end': 0,
                        'type': 'rest',
                        'snapshots': []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    if snapshot.get('is_sticky') is True:
                        continue
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                                bucket['snapshots'].append({'timestamp': timestamp,
                                                            'snapshotid': snapshotguid,
                                                            'diskguid': diskguid,
                                                            'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    if snapshot.get('is_sticky') is True:
                        continue
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({'timestamp': timestamp,
                                                        'snapshotid': snapshot['guid'],
                                                        'diskguid': vdisk.guid,
                                                        'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot['is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != best['timestamp']]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != oldest['timestamp']]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(diskguid=snapshot['diskguid'],
                                                    snapshotid=snapshot['snapshotid'])
        logger.info('Delete snapshots finished')
Beispiel #17
0
    def delete_snapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete

        :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used
        """

        logger.info('Delete snapshots started')

        day = timedelta(1)
        week = day * 7

        def make_timestamp(offset):
            """
            Create an integer based timestamp
            :param offset: Offset in days
            :return: Timestamp
            """
            return int(mktime((base - offset).timetuple()))

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        base = datetime.fromtimestamp(timestamp).date() - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({
                'start': make_timestamp(day * i),
                'end': make_timestamp(day * (i + 1)),
                'type': '1d',
                'snapshots': []
            })
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({
                'start': make_timestamp(week * i),
                'end': make_timestamp(week * (i + 1)),
                'type': '1w',
                'snapshots': []
            })
        buckets.append({
            'start': make_timestamp(week * 4),
            'end': 0,
            'type': 'rest',
            'snapshots': []
        })

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE']
                   for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    if snapshot.get('is_sticky') is True:
                        continue
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot[
                                    'snapshots'].iteritems():
                                bucket['snapshots'].append({
                                    'timestamp':
                                    timestamp,
                                    'snapshotid':
                                    snapshotguid,
                                    'diskguid':
                                    diskguid,
                                    'is_consistent':
                                    snapshot['is_consistent']
                                })
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    if snapshot.get('is_sticky') is True:
                        continue
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({
                                'timestamp':
                                timestamp,
                                'snapshotid':
                                snapshot['guid'],
                                'diskguid':
                                vdisk.guid,
                                'is_consistent':
                                snapshot['is_consistent']
                            })
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot[
                                'is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != best['timestamp']
                    ]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != oldest['timestamp']
                    ]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(
                        diskguid=snapshot['diskguid'],
                        snapshotid=snapshot['snapshotid'])
        logger.info('Delete snapshots finished')
Beispiel #18
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info(
                        'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'
                        .format(storage_driver.storagerouter.ip,
                                partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status(
                                    'workers', sshclient) is False:
                                ScheduledTaskController._logger.warning(
                                    'Gather Scrub - Storage Router {0:<15} - workers are not running'
                                    .format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[
                                    storage_driver.storagerouter] = str(
                                        partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning(
                                'Gather Scrub - Storage Router {0:<15} is not reachable'
                                .format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info(
                'Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info(
            'Gather Scrub - Checking {0} volumes for scrub work'.format(
                len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info(
                'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'
                .format(storage_router.ip,
                        'local' if local is True else 'remote',
                        len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.
                           ip] = ScheduledTaskController._execute_scrub_work.s(
                               scrub_location=scrub_info[1],
                               vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                   routing_key='sr.{0}'.format(
                                       storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(
                    scrub_location=local_scrub_location,
                    vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(
            result_set,
            timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning(
                'Scrubbing failed on node {0}. Will reschedule on another node.'
                .format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info(
                            'Rescheduled scrub work from node {0} to node {1}.'
                            .format(failed_node, storage_router.ip))
                        result_set[
                            storage_router.
                            ip] = ScheduledTaskController._execute_scrub_work.s(
                                scrub_location=scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                    routing_key='sr.{0}'.format(
                                        storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(
                            ScheduledTaskController._execute_scrub_work(
                                scrub_location=local_scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'
                            .format(ex))
                else:
                    ScheduledTaskController._logger.warning(
                        'No nodes left to reschedule work from node {0}'.
                        format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(
                result_set, timesleep=60
            )  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error(
                        'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                        .format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(
                processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')