def _aws_devname_to_disk_drive(device): """Returns Win32_DiskDrive (wmi object) instance for corresponding aws device name (e.g. 'xvda', '/dev/xvdt'). MUST BE CALLED AND Win32_DiskDrive RESULT USED ONLY IN coinitialized_context!!! Formula from here: https://blogs.aws.amazon.com/net/post/Tx3IY716LF05KK6/Stripe-Windows-Ephemeral-Disks-at-Launch """ device = device.split('/')[-1] device_suffix = device[3:] if 1 == len(device_suffix): scsi_id = ord(device_suffix) - 97 elif 2 == len(device_suffix): scsi_id = (ord(device_suffix[0]) - 96) * 26 + ord( device_suffix[1]) - 97 else: raise storage2.StorageError( 'Wrong AWS disk device name format: {}'.format(device)) LOG.debug('Searching for disk with device={} and scsi_id={}'.format( device, scsi_id)) c = wmi.WMI() try: return c.Win32_DiskDrive(SCSITargetId=scsi_id)[0] except (KeyError, IndexError): raise storage2.StorageError( 'Disk device (SCSITargetId == {}) not found'.format(scsi_id))
def check_growth(self, **growth): if int(self.level) in (0, 10): raise storage2.StorageError("Raid%s doesn't support growth" % self.level) disk_growth = growth.get('disks') change_disks = False if disk_growth: for disk_cfg_or_obj in self.disks: disk = storage2.volume(disk_cfg_or_obj) try: disk.check_growth(**disk_growth) change_disks = True except storage2.NoOpError: pass new_len = growth.get('disks_count') current_len = len(self.disks) change_size = new_len and int(new_len) != current_len if not change_size and not change_disks: raise storage2.NoOpError('Configurations are equal. Nothing to do') if change_size and int(new_len) < current_len: raise storage2.StorageError('Disk count can only be increased.') if change_size and int(self.level) in (0, 10): raise storage2.StorageError("Can't add disks to raid level %s" % self.level)
def check_growth(self, **growth): size = growth.get('size') target_size = int(size or self.size) ebs_type = growth.get('volume_type') target_type = ebs_type or self.volume_type iops = growth.get('iops') target_iops = iops or self.iops change_type = ebs_type and ebs_type != self.volume_type change_size = size and size != self.size change_iops = iops and iops != self.iops if not (change_size or change_type or change_iops): raise storage2.NoOpError('New ebs volume configuration is equal' ' to present. Nothing to do.') if target_iops and (target_type != 'io1'): raise storage2.StorageError('EBS iops can only be used with ' 'io1 volume type') if 'io1' == target_type and not target_iops: raise storage2.StorageError('Iops parameter must be specified ' 'for io1 volumes') if size and int(size) < self.size: raise storage2.StorageError('New size is smaller than old.')
def check_growth(self, **growth): size = growth.get('size') if not size: raise storage2.StorageError('Size argument is missing ' 'from grow config') if float(size) < float(self.size): raise storage2.StorageError('New loop device size is less than ' 'current.')
def _ensure(self): if self.snap: try: filename = '%s.%s' % (self.snap['file'].split('.')[0], self._uniq()) shutil.copy(self.snap['file'], filename) except: msg = 'Failed to copy snapshot file %s: %s' % ( self.snap['file'], sys.exc_info()[1]) raise storage2.StorageError(msg) self.snap = None self.file = filename if not (self.device and self.file and \ self.device in coreutils.losetup_all()): # Construct volume if (not self.size and (not self.file or not os.path.exists(self.file))): msg = 'You must specify size of a new loop device ' \ 'or existing file' raise storage2.StorageError(msg) if not self.file: self.file = '/mnt/loopdev' + self._uniq() if not os.path.exists(self.file): if '%ROOT' in str(self.size).upper(): try: pc = int(self.size.split('%')[0]) except: msg = 'Incorrect size format: %s' % self.size stat = os.statvfs('/') total = stat.f_bsize * stat.f_blocks / 1048576 size = total * pc / 100 free = stat.f_bsize * stat.f_bfree / 1048576 if size > free: if self.adjust_size: size = free else: msg = 'Expected loop size is greater then ' \ 'available free space on a root filesystem. ' \ 'Expected: %sMb / Free: %sMb' % (size, free) raise storage2.StorageError(msg) else: size = int(float(self.size) * 1024) dd_kwds = {'if': '/dev/zero', 'of': self.file, 'bs': '1M'} if self.zerofill: dd_kwds.update({'count': size}) else: dd_kwds.update({'seek': size - 1, 'count': 1}) coreutils.dd(**dd_kwds) if self.device: coreutils.losetup(self.device, self.file) else: coreutils.losetup(self.file, find=True) self.device = coreutils.losetup_all(flip=True)[self.file]
def mount(self): self._check(mpoint=True) mounted_to = self._get_device_letter(self.device) if mounted_to != self.mpoint: if not re.match(r'^[a-zA-Z]$', self.mpoint): raise storage2.StorageError( "Mount point must be a single letter. Given: %s" % self.mpoint) LOG.debug('Assigning letter %s to %s', self.mpoint, self.id) self._assign_letter(self.device, self.mpoint) base.bus.fire("block_device_mounted", volume=self) try: getattr(self, 'label') except AttributeError: pass else: fs = storage2.filesystem(self.fstype) if fs.get_label(self.mpoint) != self.label: LOG.debug('Setting label "{}" for device id="{}"'.format( self.label, self.id)) fs.set_label(self.mpoint, self.label) elif self.label: LOG.debug( 'Label for device id="{}" has already been set, skipping.' .format(self.device))
def _detach(self, force, **kwds): connection = __node__['gce'].connect_compute() attachment_inf = self._attachment_info(connection) if attachment_inf: zone = os.path.basename(__node__['gce']['zone']) project_id = __node__['gce']['project_id'] server_name = __node__['server_id'] def try_detach(): op = connection.instances().detachDisk( instance=server_name, project=project_id, zone=zone, deviceName=attachment_inf['deviceName']).execute() gce_util.wait_for_operation(connection, project_id, op['name'], zone=zone) for _time in range(3): try: try_detach() return except: e = sys.exc_info()[1] LOG.debug('Detach disk attempt failed: %s' % e) if _time == 2: raise storage2.StorageError('Can not detach disk: %s' % e) time.sleep(1) LOG.debug('Trying to detach disk again.')
def get_free_name(): # Workaround: rhel 6 returns "Null body" when attach to /dev/sdf s = 7 if linux.os['release'] and linux.os.redhat_family else 5 available = set(string.ascii_lowercase[s:16]) conn = __node__['ec2']['connect_ec2']() filters = { 'attachment.instance-id': __node__['ec2']['instance_id'] } attached = set(vol.attach_data.device[-1] for vol in conn.get_all_volumes(filters=filters)) dirty_detached = set() if not linux.os.windows: dirty_detached = __node__['ec2']['t1micro_detached_ebs'] or set() dirty_detached = set(name[-1] for name in dirty_detached) try: lets = sorted(list(available - attached - dirty_detached)) let = lets[0] except IndexError: msg = 'No free letters for block device name remains' raise storage2.StorageError(msg) else: name = '/dev/sd' if not linux.os.windows else 'xvd' name = name + let return name
def _wait_status_transition(self, volume_id=None): """ Wait until volume enters stable state (not 'detaching' or 'attaching') :param volume_id: :return: volume status """ if not volume_id: volume_id = self.id status = self._cinder.volumes.get(volume_id).status vol = [None] def exit_condition(): vol[0] = self._cinder.volumes.get(volume_id) return vol[0].status not in ('attaching', 'detaching', 'creating') if not exit_condition(): msg = 'Cinder volume %s hangs in transitional state. ' \ 'Timeout reached (%s seconds)' % (volume_id, self._global_timeout) util.wait_until( exit_condition, logger=LOG, timeout=self._global_timeout, error_text=msg) if vol[0].status == 'error': msg = 'Cinder volume %s enters error state after %s.' % \ (volume_id, status) raise storage2.StorageError(msg) return vol[0].status
def _wait_snapshot(self, snapshot_id): LOG.debug('Checking that Cinder snapshot %s is completed', snapshot_id) msg = "Cinder snapshot %s wasn't completed. " \ "Timeout reached (%s seconds)" % ( snapshot_id, self._global_timeout) snap = [None] def exit_condition(): snap[0] = self._cinder.volume_snapshots.get(snapshot_id) return snap[0].status != 'creating' util.wait_until( exit_condition, logger=LOG, timeout=self._global_timeout, error_text=msg ) if snap[0].status == 'error': msg = 'Cinder snapshot %s creation failed.' \ 'AWS status is "error"' % snapshot_id raise storage2.StorageError(msg) elif snap[0].status == 'available': LOG.debug('Snapshot %s completed', snapshot_id)
def resize(self, device, size=None, *short_args, **long_kwds): cmd = (E2FSCK_EXEC, '-fy', device) rcode = filesystems.system(cmd, raise_exc=False, error_text=self.error_messages['fsck'] % device)[2] if rcode not in (0, 1): raise storage2.StorageError('Fsck failed to correct file system errors') cmd = (RESIZE2FS_EXEC, device) filesystems.system(cmd, error_text=self.error_messages['resize'] % device)
def get_free_name(): def norm_name(device): return re.sub(r'/dev/sd(.*)', r'\1', device) conn = __node__['ec2'].connect_ec2() instance = conn.get_all_instances([__node__['ec2']['instance_id'] ])[0].instances[0] # Add /dev/sd[a-z] pattern end = None if linux.os.windows else 16 start = 5 if linux.os.ubuntu and linux.os['release'] >= (14, 4): # Ubuntu 14.04 returns 'Attachment point /dev/sdf is already in used' start = 6 if linux.os.redhat_family: # RHEL 6 returns "Null body" when attaching to /dev/sdf and /dev/sdg start = 7 prefix = 'xvd' if linux.os.windows else '/dev/sd' available = list(prefix + a for a in string.ascii_lowercase[start:end]) # Add /dev/xvd[b-c][a-z] pattern if instance.virtualization_type == 'hvm': prefix = 'xvd' if linux.os.windows else '/dev/xvd' available += list(prefix + b + a for b in 'bc' for a in string.ascii_lowercase) # Exclude ephemerals from block device mapping if not linux.os.windows: # Ubuntu 14.04 fail to attach volumes on device names mentioned in block device mapping, # even if this instance type doesn't support them and OS hasn't such devices ephemerals = list(device \ for device in __node__['platform'].get_block_device_mapping().values()) available = list(a for a in available if a not in ephemerals) # Exclude devices available in OS if not linux.os.windows: available = list(a for a in available if not os.path.exists(name2device(a))) # Exclude attached volumes filters = {'attachment.instance-id': instance.id} attached = list(vol.attach_data.device \ for vol in conn.get_all_volumes(filters=filters)) available = list(a for a in available if a not in attached) # Exclude t1.micro detached volumes if instance.instance_type == 't1.micro': dirty_detached = list() if not linux.os.windows: dirty_detached = __node__['ec2']['t1micro_detached_ebs'] or list() dirty_detached = list(name for name in dirty_detached) available = list(a for a in available if a not in dirty_detached) try: return available[0] except IndexError: msg = 'No free letters for block device name remains' raise storage2.StorageError(msg)
def _ensure(self): self._check_attr('name') if self.name.startswith('google-'): self.name = self.name[7:] device = '/dev/disk/by-id/google-%s' % self.name if not os.path.exists(device): msg = "Device '%s' not found" % device raise storage2.StorageError(msg) self.device = os.path.realpath(device)
def _destroy(self): try: connection = __node__['gce'].connect_compute() project_id = __node__['gce']['project_id'] op = connection.snapshots().delete(project=project_id, snapshot=self.name).execute() gce_util.wait_for_operation(connection, project_id, op['name']) except: e = sys.exc_info()[1] raise storage2.StorageError('Failed to delete google disk snapshot. Error: %s' % e)
def _remove_letter(self, device, letter=None): msg_letter = "letter {}".format( letter) if letter else "all letters" LOG.debug('Removing {} from device: {}'.format(msg_letter, device)) out = self._letter_cmd('remove', device, letter) if 'error' in out: raise storage2.StorageError( 'Can\'t remove {} from device {}'.format( msg_letter, device))
def mount(self): # Workaround : cindervolume remounts ro sometimes, fsck it first mounted_to = self.mounted_to() if self.is_fs_created() and not mounted_to: self._check_attr('device') self._check_attr('fstype') fs = storage2.filesystem(self.fstype) if fs.type.startswith('ext'): rcode = linux.system(("/sbin/e2fsck", "-fy", self.device), raise_exc=False)[2] if rcode not in (0, 1): raise storage2.StorageError('Fsck failed to correct file system errors') super(CinderVolume, self).mount()
def _assign_letter(self, device, letter): LOG.debug('Trying to move device which taken given letter') out = self._letter_cmd('assign', letter, 'auto') LOG.debug('Assigning letter {} to device: {}'.format( letter, device)) out = self._letter_cmd('assign', device, letter) if 'specified drive letter is not free to be assigned' in out: raise storage2.StorageError( 'Letter {} is taken and can\'t be released'.format(letter)) return out
def __enter__(self): letters = list(self._all - self._acquired) letters.sort() for l in letters: pattern = name2device('/dev/vd' + l) + '*' if not glob.glob(pattern): with self._lock: if not l in self._acquired: self._acquired.add(l) self._local.letter = l return self msg = 'No free letters for block device name remains' raise storage2.StorageError(msg)
def _destroy(self, force, **kwds): self._check_attr('link') self._check_attr('name') connection = __node__['gce'].connect_compute() project_id = __node__['gce']['project_id'] zone = os.path.basename(__node__['gce']['zone']) try: op = connection.disks().delete(project=project_id, zone=zone, disk=self.name).execute() gce_util.wait_for_operation(connection, project_id, op['name'], zone=zone) except: e = sys.exc_info()[1] raise storage2.StorageError("Disk destruction failed: %s" % e)
def _attach_volume(self, server_id=None): ''' :rtype: tuple(cloud_device_name, os_device_name) ''' if server_id is None: server_id = self._server_id() volume_id = self.id self._check_nova_connection() ops_delay = 10 with self.attach_lock: for _ in xrange(5): LOG.debug('Attaching Cinder volume %s', volume_id) taken_before = base.taken_devices() try: attachment = self._nova.volumes.create_server_volume(server_id, volume_id, None) except TypeError, e: if "'NoneType' object has no attribute '__getitem__'" not in str(e): # Very often (2/5 times) we got this error on RaxNG, because of incorrect API response raise #waiting for attaching transitional state LOG.debug('Checking that Cinder volume %s is attached', volume_id) new_status = self._wait_status_transition(volume_id) if new_status == 'in-use': LOG.debug('Cinder volume %s attached', volume_id) break elif new_status == 'available': LOG.warn('Volume %s status changed to "available" instead of "in-use"', volume_id) LOG.debug('Will try attach volume again after %d seconds', ops_delay) continue else: msg = 'Unexpected status transition "available" -> "{0}".' \ ' Cinder volume {1}'.format(new_status, volume_id) raise storage2.StorageError(msg) if not linux.os.windows_family: util.wait_until(lambda: base.taken_devices() > taken_before, start_text='Checking that volume %s is available in OS' % volume_id, timeout=30, sleep=1, error_text='Volume %s attached but not available in OS' % volume_id) devices = list(base.taken_devices() - taken_before) if len(devices) > 1: msg = "While polling for attached device, got multiple new devices: %s. " \ "Don't know which one to select".format(devices) raise Exception(msg) return devices[0] else: return attachment.device
def _snapshot(self, description, tags, **kwds): """ :param nowait: if True - do not wait for snapshot to complete, just create and return """ self._check_attr('name') connection = __node__['gce']['compute_connection'] project_id = __node__['gce']['project_id'] nowait = kwds.get('nowait', True) now_raw = datetime.datetime.utcnow() now_str = now_raw.strftime('%d-%b-%Y-%H-%M-%S-%f') snap_name = ('%s-snap-%s' % (self.name, now_str)).lower() # We could put it to separate method, like _get_self_resource operation = connection.disks().createSnapshot( disk=self.name, project=project_id, zone=self.zone, body=dict(name=snap_name, description=description)).execute() #operation = connection.snapshots().insert(project=project_id, body=dict(name=snap_name, # kind="compute#snapshot", description=description, sourceDisk=self.link)).execute() try: # Wait until operation at least started gce_util.wait_for_operation(connection, project_id, operation['name'], self.zone, status_to_wait=("DONE", "RUNNING")) # If nowait=false, wait until operation is totally complete snapshot_info = connection.snapshots().get( project=project_id, snapshot=snap_name, fields='id,name,diskSizeGb,selfLink').execute() snapshot = GcePersistentSnapshot(id=snapshot_info['id'], name=snapshot_info['name'], size=snapshot_info['diskSizeGb'], link=snapshot_info['selfLink']) if not nowait: while True: status = snapshot.status() if status == snapshot.COMPLETED: break elif status == snapshot.FAILED: raise Exception('Snapshot status is "Failed"') return snapshot except: e = sys.exc_info()[1] raise storage2.StorageError('Google disk snapshot creation ' 'failed. Error: %s' % e)
def _wait_snapshot(self, snapshot): snapshot = self._ebs_snapshot(snapshot) LOG.debug('Checking that EBS snapshot %s is completed', snapshot.id) msg = "EBS snapshot %s wasn't completed. " \ "Timeout reached (%s seconds)" % ( snapshot.id, self._global_timeout) util.wait_until( lambda: snapshot.update() and snapshot.status != 'pending', logger=LOG, error_text=msg) if snapshot.status == 'error': msg = 'Snapshot %s creation failed. AWS status is "error"' % snapshot.id raise storage2.StorageError(msg) elif snapshot.status == 'completed': LOG.debug('Snapshot %s completed', snapshot.id)
def __enter__(self): letters = list(self._all - self._acquired) letters.sort() for l in letters: # #pattern = get_system_devname(l) + '*' if not (glob.glob('/dev/sd' + l + '*') or glob.glob('/dev/xvd' + l + '*')): with self._lock: if not l in self._acquired: self._acquired.add(l) self._local.letter = l return self msg = 'No free letters for block device name remains' raise storage2.StorageError(msg)
def __enter__(self): with self._lock: detached = __node__['ec2']['t1micro_detached_ebs'] or list() detached = set(name[-1] for name in detached) letters = list(set(self._all) - self._acquired - detached) for l in letters: pattern = name2device('/dev/sd' + l) + '*' if not glob.glob(pattern): with self._lock: if not l in self._acquired: self._acquired.add(l) self._local.letter = l return self msg = 'No free letters for block device name remains' raise storage2.StorageError(msg)
def _attach_volume(self, server_id=None): ''' :rtype: tuple(cloud_device_name, os_device_name) ''' if server_id is None: server_id = self._server_id() volume_id = self.id self._check_nova_connection() ops_delay = 10 with self.attach_lock: for _ in xrange(5): LOG.debug('Attaching Cinder volume %s', volume_id) taken_before = base.taken_devices() try: attachment = self._nova.volumes.create_server_volume( server_id, volume_id) except TypeError, e: if "'NoneType' object has no attribute '__getitem__'" not in str( e): # Very often (2/5 times) we got this error on RaxNG, because of incorrect API response raise #waiting for attaching transitional state LOG.debug('Checking that Cinder volume %s is attached', volume_id) new_status = self._wait_status_transition(volume_id) if new_status == 'in-use': LOG.debug('Cinder volume %s attached, device: %s', volume_id, attachment.device) break elif new_status == 'available': LOG.warn( 'Volume %s status changed to "available" instead of "in-use"', volume_id) LOG.debug('Will try attach volume again after %d seconds', ops_delay) continue else: msg = 'Unexpected status transition "available" -> "{0}".' \ ' Cinder volume {1}'.format(new_status, volume_id) raise storage2.StorageError(msg) if not linux.os.windows_family: return base.wait_device_plugged(volume_id, taken_before) else: return attachment.device
def get_free_name(): if linux.os.ubuntu and linux.os['release'] >= (14, 4): # ubuntu 14.04 returns Attachment point /dev/sdf is already in used s = 6 elif linux.os['release'] and linux.os.redhat_family: # rhel 6 returns "Null body" when attach to /dev/sdf s = 7 else: s = 5 available = set(string.ascii_lowercase[s:16]) conn = __node__['ec2'].connect_ec2() if not linux.os.windows: # Ubuntu 14.04 failed to attach volumes on device names mentioned in block device mapping, # even if this instance type doesn't support them and OS has not such devices ephemerals = set( device[-1] for device in __node__['platform'].get_block_device_mapping().values()) else: # Windows returns ephemeral[0-25] for all possible devices a-z, and makes ephemeral check senseless ephemerals = set() available = available - ephemerals filters = {'attachment.instance-id': __node__['ec2']['instance_id']} attached = set(vol.attach_data.device[-1] for vol in conn.get_all_volumes(filters=filters)) dirty_detached = set() if not linux.os.windows: dirty_detached = __node__['ec2']['t1micro_detached_ebs'] or set() dirty_detached = set(name[-1] for name in dirty_detached) try: lets = sorted(list(available - attached - dirty_detached)) let = lets[0] except IndexError: msg = 'No free letters for block device name remains' raise storage2.StorageError(msg) else: name = '/dev/sd' if not linux.os.windows else 'xvd' name = name + let return name
def _status(self): snapshots = self._new_conn().listSnapshots(id=self.id) if not snapshots: raise storage2.StorageError( 'listSnapshots returned empty list for snapshot %s' % self.id) return self._status_map[snapshots[0].state]
except socket.gaierror, e: LOG.debug('Silently ignore error: %s', e) return False if not exit_condition(): msg = 'Cinder volume %s hangs in transitional state. ' \ 'Timeout reached (%s seconds)' % (volume_id, self._global_timeout) util.wait_until(exit_condition, logger=LOG, timeout=self._global_timeout, error_text=msg) if vol[0].status == 'error': msg = 'Cinder volume %s enters error state after %s.' % \ (volume_id, status) raise storage2.StorageError(msg) return vol[0].status def _wait_snapshot(self, snapshot_id): LOG.debug('Checking that Cinder snapshot %s is completed', snapshot_id) msg = "Cinder snapshot %s wasn't completed. " \ "Timeout reached (%s seconds)" % ( snapshot_id, self._global_timeout) snap = [None] def exit_condition(): snap[0] = self._cinder.volume_snapshots.get(snapshot_id) return snap[0].status != 'creating' util.wait_until(exit_condition,
def grow(self, **growth): """ Grow (and/or alternate, e.g.: change ebs type to io1) volume and fs. Method creates clone of current volume, increases it's size and attaches it to the same place. In case of error, old volume attaches back. Old volume detached, but not destroyed. :param growth: Volume type-dependent rules for volume growth :type growth: dict :param resize_fs: Resize fs on device after it's growth or not :type resize_fs: bool :return: New, bigger (or altered) volume instance :rtype: Volume """ if not self.features.get('grow'): raise storage2.StorageError("%s volume type does not'" " support grow." % self.type) # No id, no growth if not self.id: raise storage2.StorageError('Failed to grow volume: ' 'volume has no id.') # Resize_fs is true by default resize_fs = growth.pop('resize_fs', True) self.check_growth(**growth) was_mounted = self.mounted_to() if self.device else False new_vol = None try: LOG.info('Detaching volume %s', self.id) self.detach() new_vol = self.clone() self._grow(new_vol, **growth) if resize_fs: fs_created = new_vol.detect_fstype() if self.fstype: LOG.info('Resizing filesystem') fs = storage2.filesystem(fstype=self.fstype) umount_on_resize = fs.features.get('umount_on_resize') if fs_created: if umount_on_resize: if new_vol.mounted_to(): new_vol.umount() fs.resize(new_vol.device) if was_mounted: new_vol.mount() else: new_vol.mount() fs.resize(new_vol.device) if not was_mounted: new_vol.umount() except: err_type, err_val, trace = sys.exc_info() LOG.warn('Failed to grow volume: %s. Trying to attach old volume', err_val) try: if new_vol: try: new_vol.destroy(force=True, remove_disks=True) except: destr_err = sys.exc_info()[1] LOG.error('Enlarged volume destruction failed: %s' % destr_err) self.ensure(mount=bool(was_mounted)) except: e = sys.exc_info()[1] err_val = str( err_val) + '\nFailed to restore old volume: %s' % e err_val = 'Volume growth failed: %s' % err_val raise storage2.StorageError, err_val, trace return new_vol
def _grow(self, new_vol, **growth): if int(self.level) in (0, 10): raise storage2.StorageError("Raid%s doesn't support growth" % self.level) disk_growth = growth.get('disks') current_len = len(self.disks) new_len = int(growth.get('disks_count', 0)) increase_disk_count = new_len and new_len != current_len new_vol.lvm_group_cfg = self.lvm_group_cfg new_vol.pv_uuid = self.pv_uuid growed_disks = [] added_disks = [] try: if disk_growth: def _grow(index, disk, cfg, queue): try: ret = disk.grow(resize_fs=False, **cfg) queue.put(dict(index=index, result=ret)) except: e = sys.exc_info()[1] queue.put(dict(index=index, error=e)) # Concurrently grow each descendant disk queue = Queue.Queue() pool = [] for index, disk_cfg_or_obj in enumerate(self.disks): # We use index to save disk order in raid disks disk = storage2.volume(disk_cfg_or_obj) t = threading.Thread( name='Raid %s disk %s grower' % (self.id, disk.id), target=_grow, args=(index, disk, disk_growth, queue)) t.daemon = True t.start() pool.append(t) for thread in pool: thread.join() # Get disks growth results res = [] while True: try: res.append(queue.get_nowait()) except Queue.Empty: break res.sort(key=lambda p: p['index']) growed_disks = [r['result'] for r in res if 'result' in r] # Validate concurrent growth results assert len(res) == len( self.disks), ("Not enough data in " "concurrent raid disks grow result") if not all(map(lambda x: 'result' in x, res)): errors = '\n'.join( [str(r['error']) for r in res if 'error' in r]) raise storage2.StorageError('Failed to grow raid disks.' ' Errors: \n%s' % errors) assert len(growed_disks) == len( self.disks), ("Got malformed disks" " growth result (not enough data).") new_vol.disks = growed_disks new_vol.pv_uuid = self.pv_uuid new_vol.lvm_group_cfg = self.lvm_group_cfg new_vol.ensure() if increase_disk_count: if not disk_growth: """ It means we have original disks in self.disks We need to snapshot it and make new disks. """ new_vol.disks = [] snaps = storage2.concurrent_snapshot( self.disks, 'Raid %s temp snapshot No.${index} (for growth)' % self.id, tags=dict(temp='1')) try: for disk, snap in zip(self.disks, snaps): new_disk = disk.clone() new_disk.snap = snap new_vol.disks.append(new_disk) new_disk.ensure() finally: for s in snaps: try: s.destroy() except: e = sys.exc_info()[1] LOG.debug( 'Failed to remove temporary snapshot: %s' % e) new_vol.ensure() existing_raid_disk = new_vol.disks[0] add_disks_count = new_len - current_len for _ in range(add_disks_count): disk_to_add = existing_raid_disk.clone() added_disks.append(disk_to_add) disk_to_add.ensure() added_disks_devices = [d.device for d in added_disks] mdadm.mdadm('manage', new_vol.raid_pv, add=True, *added_disks_devices) new_vol.disks.extend(added_disks) mdadm.mdadm('grow', new_vol.raid_pv, raid_devices=new_len) mdadm.mdadm('misc', None, new_vol.raid_pv, wait=True, raise_exc=False) mdadm.mdadm('grow', new_vol.raid_pv, size='max') mdadm.mdadm('misc', None, new_vol.raid_pv, wait=True, raise_exc=False) lvm2.pvresize(new_vol.raid_pv) try: lvm2.lvresize(new_vol.device, extents='100%VG') except: e = sys.exc_info()[1] if (self.level == 1 and 'matches existing size' in str(e) and not disk_growth): LOG.debug('Raid1 actual size has not changed') else: raise except: err_type, err_val, trace = sys.exc_info() if growed_disks or added_disks: LOG.debug( "Removing %s successfully growed disks and " "%s additional disks", len(growed_disks), len(added_disks)) for disk in itertools.chain(growed_disks, added_disks): try: disk.destroy(force=True) except: e = sys.exc_info()[1] LOG.error('Failed to remove raid disk: %s' % e) raise err_type, err_val, trace