def test_nexus_multipath_remove_all_paths( create_nexus_no_destroy, create_nexus_2_no_destroy, connect_nexus, connect_nexus_2, start_fio, unpublish_nexus, delay, verify_paths, destroy_nexus_2, delay2, publish_nexus, ): """Create 2 nexuses, connect over NVMe, start fio, unpublish one nexus, verify failover, destroy the other nexus, re-publish the first nexus, verify IO restarts.""" dev = connect_nexus dev2 = connect_nexus_2 assert dev == dev2, "should have one namespace" # wait for reconnection time.sleep(10) desc = nvme_list_subsystems(dev) paths = desc["Subsystems"][0]["Paths"] assert len(paths) == 2, "should have 2 paths" assert paths[0]["State"] == "live" assert paths[1]["State"] == "connecting" # wait for fio to complete time.sleep(5)
def test_nexus_multipath_remove_3rd_path( create_nexus_dev, create_nexus_2_no_destroy, connect_nexus_2, create_nexus_3_dev, start_fio, destroy_nexus_2, ): """Create 3 nexuses, connect over NVMe, start fio, destroy 2nd nexus.""" dev = create_nexus_dev dev2 = connect_nexus_2 dev3 = create_nexus_3_dev assert dev == dev2, "should have one namespace" assert dev == dev3, "should have one namespace" desc = nvme_list_subsystems(dev) paths = desc["Subsystems"][0]["Paths"] assert len(paths) == 3, "should have 3 paths" assert paths[0]["State"] == "live" # kernel 5.4 reports resetting, 5.10 reports connecting assert paths[1]["State"] == "resetting" or paths[1]["State"] == "connecting" assert paths[2]["State"] == "live" # wait for fio to complete time.sleep(15)
def verify_paths(connect_nexus): dev = connect_nexus desc = nvme_list_subsystems(dev) paths = desc["Subsystems"][0]["Paths"] assert len(paths) == 2, "should have 2 paths" assert paths[0]["State"] == "connecting" assert paths[1]["State"] == "live"
def remove_first_path(connect_to_first_path): device_1 = connect_to_first_path desc = nvme_list_subsystems(device_1) # Find the name of the failed controller and disconnect it. broken_ctrlrs = [ p["Name"] for p in desc["Subsystems"][0]["Paths"] if p["State"] == "connecting" ] assert len(broken_ctrlrs) == 1, "No degraded paths reported" nvme_disconnect_controller(broken_ctrlrs[0]) # Check that there is only 1 healthy path left. desc = nvme_list_subsystems(device_1) subsystem = desc["Subsystems"][0] assert len( subsystem["Paths"]) == 1, "Insufficient number of I/O paths reported" assert subsystem["Paths"][0]["State"] == "live", "No healthy path reported"
def check_controllers_online(get_nvme_client): devs = get_nvme_client assert devs[0] == devs[1], "should have one namespace" desc = nvme_list_subsystems(devs[0]) paths = desc["Subsystems"][0]["Paths"] assert len(paths) == 2, "should have 2 paths" for p in paths: assert p["State"] == "live"
def run_fio_to_first_path(connect_to_first_path): device = connect_to_first_path desc = nvme_list_subsystems(device) assert (len(desc["Subsystems"]) == 1 ), "Must be exactly one NVMe subsystem for target nexus" subsystem = desc["Subsystems"][0] assert len(subsystem["Paths"] ) == 1, "Must be exactly one I/O path to target nexus" assert subsystem["Paths"][0]["State"] == "live", "I/O path is not healthy" # Launch fio in background and let it always run along with the test. fio = Fio("job", "randread", device, runtime=FIO_RUNTIME).build() return subprocess.Popen(fio, shell=True)
async def test_io_policy(create_replicas, create_nexuses, mayastor_mod): devs = connect_multipath_nexuses(create_nexuses) assert devs[0] == devs[1], "Paths are different for multipath nexus" # Make sure all we see exactly 2 paths and all paths are 'live optimized' device = devs[0] descr = nvme_list_subsystems(device) paths = descr["Subsystems"][0]["Paths"] assert len(paths) == 2, "Number of paths to Nexus mismatches" for p in paths: assert p["State"] == "live" assert p["ANAState"] == "optimized" # Make sure there are 2 virtual NVMe controllers for the namespace. ns = os.path.basename(device) for i in range(2): cname = ns.replace("n1", "c%dn1" % i) cpath = "/sys/block/%s" % cname l = os.readlink(cpath) assert l.startswith( "../devices/virtual/nvme-fabrics/ctl/" ), "Path device is not a virtual controller" # Make sure virtual NVMe namespace exists for multipath nexus. l = os.readlink("/sys/block/%s" % ns) assert l.startswith( "../devices/virtual/nvme-subsystem/nvme-subsys" ), "No virtual NVMe subsystem exists for multipath Nexus" # Make sure I/O policy is NUMA. subsys = descr["Subsystems"][0]["Name"] pfile = "/sys/class/nvme-subsystem/%s/iopolicy" % subsys assert os.path.isfile(pfile), "No iopolicy file exists" with open(pfile) as f: iopolicy = f.read().strip() assert iopolicy == "numa", "I/O policy is not NUMA" # Make sure ANA state is reported properly for both nexuses. for n in ["ms2", "ms3"]: ms = mayastor_mod.get(n) nexuses = ms.nexus_list_v2() assert len(nexuses) == 1, "Number of nexuses mismatches" assert ( nexuses[0].ana_state == pb.NVME_ANA_OPTIMIZED_STATE ), "ANA state of nexus mismatches"
def connect_to_node_2(publish_to_node_2): device = nvme_connect(publish_to_node_2) desc = nvme_list_subsystems(device) subsystem = desc["Subsystems"][0] assert len( subsystem["Paths"]) == 2, "Second nexus must be added to I/O path" good_path_checked = False broken_path_checked = False for p in subsystem["Paths"]: if p["Name"] in device: assert p[ "State"] == "connecting", "Degraded I/O path has incorrect state" broken_path_checked = True else: assert p["State"] == "live", "Healthy I/O path has incorrect state" good_path_checked = True assert good_path_checked, "No state reported for healthy I/O path" assert broken_path_checked, "No state reported for broken I/O path"
def test_nexus_multipath_add_3rd_path( create_nexus_dev, create_nexus_2_dev, start_fio, create_nexus_3_dev, ): """Create 2 nexuses, connect over NVMe, start fio, create and connect a 3rd nexus.""" dev = create_nexus_dev dev2 = create_nexus_2_dev start_fio dev3 = create_nexus_3_dev assert dev == dev2, "should have one namespace" assert dev == dev3, "should have one namespace" desc = nvme_list_subsystems(dev) paths = desc["Subsystems"][0]["Paths"] assert len(paths) == 3, "should have 3 paths" # wait for fio to complete time.sleep(15)