def mdadm_stop(devpath, retries=None): assert_valid_devpath(devpath) if not retries: retries = [0.2] * 60 sync_action = md_sysfs_attr_path(devpath, 'sync_action') sync_max = md_sysfs_attr_path(devpath, 'sync_max') sync_min = md_sysfs_attr_path(devpath, 'sync_min') LOG.info("mdadm stopping: %s" % devpath) for (attempt, wait) in enumerate(retries): try: LOG.debug('mdadm: stop on %s attempt %s', devpath, attempt) # An array in 'resync' state may not be stoppable, attempt to # cancel an ongoing resync val = md_sysfs_attr(devpath, 'sync_action') LOG.debug('%s/sync_max = %s', sync_action, val) if val != "idle": LOG.debug("mdadm: setting array sync_action=idle") try: util.write_file(sync_action, content="idle") except (IOError, OSError) as e: LOG.debug("mdadm: (non-fatal) write to %s failed %s", sync_action, e) # Setting the sync_{max,min} may can help prevent the array from # changing back to 'resync' which may prevent the array from being # stopped val = md_sysfs_attr(devpath, 'sync_max') LOG.debug('%s/sync_max = %s', sync_max, val) if val != "0": LOG.debug("mdadm: setting array sync_{min,max}=0") try: for sync_file in [sync_max, sync_min]: util.write_file(sync_file, content="0") except (IOError, OSError) as e: LOG.debug('mdadm: (non-fatal) write to %s failed %s', sync_file, e) # one wonders why this command doesn't do any of the above itself? out, err = util.subp(["mdadm", "--manage", "--stop", devpath], capture=True) LOG.debug("mdadm stop command output:\n%s\n%s", out, err) LOG.info("mdadm: successfully stopped %s after %s attempt(s)", devpath, attempt+1) return except util.ProcessExecutionError: LOG.warning("mdadm stop failed, retrying ") if os.path.isfile('/proc/mdstat'): LOG.critical("/proc/mdstat:\n%s", util.load_file('/proc/mdstat')) LOG.debug("mdadm: stop failed, retrying in %s seconds", wait) time.sleep(wait) pass raise OSError('Failed to stop mdadm device %s', devpath)
def shutdown_mdadm(device): """ Shutdown specified mdadm device. """ blockdev = block.sysfs_to_devpath(device) LOG.info('Wiping superblock on raid device: %s', device) _wipe_superblock(blockdev, exclusive=False) md_devs = (mdadm.md_get_devices_list(blockdev) + mdadm.md_get_spares_list(blockdev)) mdadm.set_sync_action(blockdev, action="idle") mdadm.set_sync_action(blockdev, action="frozen") for mddev in md_devs: try: mdadm.fail_device(blockdev, mddev) mdadm.remove_device(blockdev, mddev) except util.ProcessExecutionError as e: LOG.debug('Non-fatal error clearing raid array: %s', e.stderr) pass LOG.debug('using mdadm.mdadm_stop on dev: %s', blockdev) mdadm.mdadm_stop(blockdev) for mddev in md_devs: mdadm.zero_device(mddev) # mdadm stop operation is asynchronous so we must wait for the kernel to # release resources. For more details see LP: #1682456 try: for wait in MDADM_RELEASE_RETRIES: if mdadm.md_present(block.path_to_kname(blockdev)): time.sleep(wait) else: LOG.debug('%s has been removed', blockdev) break if mdadm.md_present(block.path_to_kname(blockdev)): raise OSError('Timeout exceeded for removal of %s', blockdev) except OSError: LOG.critical('Failed to stop mdadm device %s', device) if os.path.exists('/proc/mdstat'): LOG.critical("/proc/mdstat:\n%s", util.load_file('/proc/mdstat')) raise
def shutdown_mdadm(device): """ Shutdown specified mdadm device. """ blockdev = block.sysfs_to_devpath(device) LOG.info('Discovering raid devices and spares for %s', device) md_devs = (mdadm.md_get_devices_list(blockdev) + mdadm.md_get_spares_list(blockdev)) mdadm.set_sync_action(blockdev, action="idle") mdadm.set_sync_action(blockdev, action="frozen") LOG.info('Wiping superblock on raid device: %s', device) try: _wipe_superblock(blockdev, exclusive=False) except ValueError as e: # if the array is not functional, writes to the device may fail # and _wipe_superblock will raise ValueError for short writes # which happens on inactive raid volumes. In that case we # shouldn't give up yet as we still want to disassemble # array and wipe members. Other errors such as IOError or OSError # are unwelcome and will stop deployment. LOG.debug( 'Non-fatal error writing to array device %s, ' 'proceeding with shutdown: %s', blockdev, e) LOG.info('Removing raid array members: %s', md_devs) for mddev in md_devs: try: mdadm.fail_device(blockdev, mddev) mdadm.remove_device(blockdev, mddev) except util.ProcessExecutionError as e: LOG.debug('Non-fatal error clearing raid array: %s', e.stderr) pass LOG.debug('using mdadm.mdadm_stop on dev: %s', blockdev) mdadm.mdadm_stop(blockdev) LOG.debug('Wiping mdadm member devices: %s' % md_devs) for mddev in md_devs: mdadm.zero_device(mddev, force=True) # mdadm stop operation is asynchronous so we must wait for the kernel to # release resources. For more details see LP: #1682456 try: for wait in MDADM_RELEASE_RETRIES: if mdadm.md_present(block.path_to_kname(blockdev)): time.sleep(wait) else: LOG.debug('%s has been removed', blockdev) break if mdadm.md_present(block.path_to_kname(blockdev)): raise OSError('Timeout exceeded for removal of %s', blockdev) except OSError: LOG.critical('Failed to stop mdadm device %s', device) if os.path.exists('/proc/mdstat'): LOG.critical("/proc/mdstat:\n%s", util.load_file('/proc/mdstat')) raise