def test_mdadm_stop_retry_exhausted(self, mock_sleep): device = "/dev/md/37" retries = 60 self._set_sys_path(device) self.mock_util_load_file.side_effect = iter([ "resync", "max", "proc/mdstat output", ] * retries) self.mock_util_subp.side_effect = iter([ util.ProcessExecutionError(), ] * retries) # sometimes we fail to modify sysfs attrs self.mock_util_write_file.side_effect = iter([ "", IOError()] * retries) with self.assertRaises(OSError): mdadm.mdadm_stop(device) expected_calls = [ call(["mdadm", "--manage", "--stop", device], capture=True), ] * retries self.mock_util_subp.assert_has_calls(expected_calls) expected_reads = [ call(self.sys_path + '/sync_action'), call(self.sys_path + '/sync_max'), call('/proc/mdstat'), ] * retries self.mock_util_load_file.assert_has_calls(expected_reads) expected_writes = [ call(self.sys_path + '/sync_action', content='idle'), call(self.sys_path + '/sync_max', content='0'), ] * retries self.mock_util_write_file.assert_has_calls(expected_writes)
def test_mdadm_stop(self): device = "/dev/md0" self._set_sys_path(device) mdadm.mdadm_stop(device) expected_calls = [ call(["mdadm", "--manage", "--stop", device], capture=True) ] self.mock_util_subp.assert_has_calls(expected_calls) expected_reads = [ call(self.sys_path + '/sync_action'), call(self.sys_path + '/sync_max'), ] self.mock_util_load_file.assert_has_calls(expected_reads)
def shutdown_mdadm(device): """ Shutdown specified mdadm device. """ blockdev = block.sysfs_to_devpath(device) LOG.info('Wiping superblock on raid device: %s', device) _wipe_superblock(blockdev, exclusive=False) md_devs = (mdadm.md_get_devices_list(blockdev) + mdadm.md_get_spares_list(blockdev)) mdadm.set_sync_action(blockdev, action="idle") mdadm.set_sync_action(blockdev, action="frozen") for mddev in md_devs: try: mdadm.fail_device(blockdev, mddev) mdadm.remove_device(blockdev, mddev) except util.ProcessExecutionError as e: LOG.debug('Non-fatal error clearing raid array: %s', e.stderr) pass LOG.debug('using mdadm.mdadm_stop on dev: %s', blockdev) mdadm.mdadm_stop(blockdev) for mddev in md_devs: mdadm.zero_device(mddev) # mdadm stop operation is asynchronous so we must wait for the kernel to # release resources. For more details see LP: #1682456 try: for wait in MDADM_RELEASE_RETRIES: if mdadm.md_present(block.path_to_kname(blockdev)): time.sleep(wait) else: LOG.debug('%s has been removed', blockdev) break if mdadm.md_present(block.path_to_kname(blockdev)): raise OSError('Timeout exceeded for removal of %s', blockdev) except OSError: LOG.critical('Failed to stop mdadm device %s', device) if os.path.exists('/proc/mdstat'): LOG.critical("/proc/mdstat:\n%s", util.load_file('/proc/mdstat')) raise
def test_mdadm_stop_retry_sysfs_write_fail(self, mock_sleep): device = "/dev/md126" self._set_sys_path(device) self.mock_util_load_file.side_effect = iter([ "resync", "max", "proc/mdstat output", "idle", "0", ]) self.mock_util_subp.side_effect = iter([ util.ProcessExecutionError(), ("mdadm stopped %s" % device, ''), ]) # sometimes we fail to modify sysfs attrs self.mock_util_write_file.side_effect = iter([ "", # write to sync_action OK IOError(), # write to sync_max FAIL ]) mdadm.mdadm_stop(device) expected_calls = [ call(["mdadm", "--manage", "--stop", device], capture=True), call(["mdadm", "--manage", "--stop", device], capture=True) ] self.mock_util_subp.assert_has_calls(expected_calls) expected_reads = [ call(self.sys_path + '/sync_action'), call(self.sys_path + '/sync_max'), call('/proc/mdstat'), call(self.sys_path + '/sync_action'), call(self.sys_path + '/sync_max'), ] self.mock_util_load_file.assert_has_calls(expected_reads) expected_writes = [ call(self.sys_path + '/sync_action', content='idle'), ] self.mock_util_write_file.assert_has_calls(expected_writes)
def test_mdadm_stop_no_devpath(self): with self.assertRaises(ValueError): mdadm.mdadm_stop(None)
def shutdown_mdadm(device): """ Shutdown specified mdadm device. """ blockdev = block.sysfs_to_devpath(device) LOG.info('Discovering raid devices and spares for %s', device) md_devs = (mdadm.md_get_devices_list(blockdev) + mdadm.md_get_spares_list(blockdev)) mdadm.set_sync_action(blockdev, action="idle") mdadm.set_sync_action(blockdev, action="frozen") LOG.info('Wiping superblock on raid device: %s', device) try: _wipe_superblock(blockdev, exclusive=False) except ValueError as e: # if the array is not functional, writes to the device may fail # and _wipe_superblock will raise ValueError for short writes # which happens on inactive raid volumes. In that case we # shouldn't give up yet as we still want to disassemble # array and wipe members. Other errors such as IOError or OSError # are unwelcome and will stop deployment. LOG.debug( 'Non-fatal error writing to array device %s, ' 'proceeding with shutdown: %s', blockdev, e) LOG.info('Removing raid array members: %s', md_devs) for mddev in md_devs: try: mdadm.fail_device(blockdev, mddev) mdadm.remove_device(blockdev, mddev) except util.ProcessExecutionError as e: LOG.debug('Non-fatal error clearing raid array: %s', e.stderr) pass LOG.debug('using mdadm.mdadm_stop on dev: %s', blockdev) mdadm.mdadm_stop(blockdev) LOG.debug('Wiping mdadm member devices: %s' % md_devs) for mddev in md_devs: mdadm.zero_device(mddev, force=True) # mdadm stop operation is asynchronous so we must wait for the kernel to # release resources. For more details see LP: #1682456 try: for wait in MDADM_RELEASE_RETRIES: if mdadm.md_present(block.path_to_kname(blockdev)): time.sleep(wait) else: LOG.debug('%s has been removed', blockdev) break if mdadm.md_present(block.path_to_kname(blockdev)): raise OSError('Timeout exceeded for removal of %s', blockdev) except OSError: LOG.critical('Failed to stop mdadm device %s', device) if os.path.exists('/proc/mdstat'): LOG.critical("/proc/mdstat:\n%s", util.load_file('/proc/mdstat')) raise