def share_null_devices(mayastors, device_nodes, create_null_devices): for node in device_nodes: ms = mayastors.get(node) for dev in ms.bdev_list(): ms.bdev_share(dev.name) yield for node in device_nodes: ms = mayastors.get(node) for dev in ms.bdev_list(): ms.bdev_unshare(dev.name)
def create_null_devices(mayastors, device_nodes): for node in device_nodes: ms = mayastors.get(node) for i in range(NEXUS_COUNT): ms.bdev_create("null:///null{:02d}?blk_size=512&size_mb=100".format(i)) yield for node in device_nodes: ms = mayastors.get(node) for dev in ms.bdev_list(): ms.bdev_destroy(dev.uri)
def create_nexuses(mayastors, device_nodes, nexus_node, share_null_devices): ms = mayastors.get(nexus_node) uris = [ [dev.share_uri for dev in mayastors.get(node).bdev_list()] for node in device_nodes ] for children in zip(*uris): ms.nexus_create(guid.uuid4(), 60 * 1024 * 1024, list(children)) yield for nexus in ms.nexus_list(): ms.nexus_destroy(nexus.uuid)
def create_nexus_2_v2(mayastors, nexus_name, nexus_uuid, min_cntlid_2, resv_key, resv_key_2): """Create a 2nd nexus on ms0 with the same 2 replicas but with resv_key_2 and preempt resv_key""" hdls = mayastors NEXUS_NAME = nexus_name replicas = [] list = mayastors.get("ms3").nexus_list_v2() nexus = next(n for n in list if n.name == NEXUS_NAME) replicas.append(nexus.children[0].uri) replicas.append(nexus.children[1].uri) NEXUS_UUID, size_mb = nexus_uuid hdls["ms0"].nexus_create_v2( NEXUS_NAME, NEXUS_UUID, size_mb, min_cntlid_2, min_cntlid_2 + 9, resv_key_2, resv_key, replicas, ) uri = hdls["ms0"].nexus_publish(NEXUS_NAME) assert len(hdls["ms0"].bdev_list()) == 1 assert len(hdls["ms1"].bdev_list()) == 2 assert len(hdls["ms2"].bdev_list()) == 2 assert len(hdls["ms3"].bdev_list()) == 1 yield uri hdls["ms0"].nexus_destroy(nexus_name)
async def test_nexus_2_remote_mirror_kill_all_fio(containers, mayastors, create_nexus, nexus_uuid): """Create a nexus on ms3 with replicas on ms1 and ms2. Start fio_spdk for 15s. Kill ms2 after 4s, ms1 after 4s. Assume the fail with a ChildProcessError is due to fio bailing out. Remove the nexus from ms3. """ uri = create_nexus NEXUS_UUID, _ = nexus_uuid job = FioSpdk("job1", "randwrite", uri).build() try: # create an event loop polling the async processes for completion await asyncio.gather( run_cmd_async(job), kill_after(containers.get("ms2"), 4), kill_after(containers.get("ms1"), 4), ) except ChildProcessError: pass except Exception as e: # if it's not a child process error fail the test raise (e) finally: list = mayastors.get("ms3").nexus_list() nexus = next(n for n in list if n.uuid == NEXUS_UUID) assert nexus.state == pb.NEXUS_FAULTED assert nexus.children[0].state == pb.CHILD_FAULTED assert nexus.children[1].state == pb.CHILD_FAULTED
async def test_nexus_2_remote_mirror_kill_1(containers, mayastors, create_nexus, nexus_uuid): """Create a nexus on ms3 with replicas on ms1 and ms2. Sleep for 10s. Kill ms2 after 4s, verify that the second child is degraded. """ uri = create_nexus NEXUS_UUID, _ = nexus_uuid job = "sleep 10" try: # create an event loop polling the async processes for completion await asyncio.gather( run_cmd_async(job), kill_after(containers.get("ms2"), 4), ) except Exception as e: raise (e) finally: list = mayastors.get("ms3").nexus_list() nexus = next(n for n in list if n.uuid == NEXUS_UUID) assert nexus.state == pb.NEXUS_DEGRADED assert nexus.children[0].state == pb.CHILD_ONLINE assert nexus.children[1].state == pb.CHILD_FAULTED
def test_nexus_resv_key(create_nexus_v2, nexus_name, nexus_uuid, mayastors, resv_key): """Test create_nexus_v2 replica NVMe reservation key""" uri = create_nexus_v2 NEXUS_UUID, _ = nexus_uuid list = mayastors.get("ms3").nexus_list_v2() nexus = next(n for n in list if n.name == nexus_name) assert nexus.uuid == NEXUS_UUID child_uri = nexus.children[0].uri dev = nvme_connect(child_uri) try: report = nvme_resv_report(dev) print(report) assert (report["rtype"] == 5 ), "should have write exclusive, all registrants reservation" assert report["regctl"] == 1, "should have 1 registered controller" assert report[ "ptpls"] == 0, "should have Persist Through Power Loss State of 0" assert (report["regctlext"][0]["cntlid"] == 0xFFFF ), "should have dynamic controller ID" # reservation status reserved assert (report["regctlext"][0]["rcsts"] & 0x1) == 1 assert report["regctlext"][0]["rkey"] == resv_key finally: nvme_disconnect(child_uri)
def publish_nexuses(mayastors, nexus_node, create_nexuses): nexuses = [] ms = mayastors.get(nexus_node) for nexus in ms.nexus_list(): nexuses.append(ms.nexus_publish(nexus.uuid)) yield nexuses for nexus in ms.nexus_list(): ms.nexus_unpublish(nexus.uuid)
def create_nexuses(mayastors, create_replicas_on_all_nodes): "Create a nexus for each replica on each child node." nexuses = [] ms1 = mayastors.get("ms1") uris = [ [replica.uri for replica in mayastors.get(node).replica_list().replicas] for node in ["ms2", "ms3"] ] for children in zip(*uris): uuid = guid.uuid4() ms1.nexus_create(uuid, 60 * 1024 * 1024, list(children)) nexuses.append(ms1.nexus_publish(uuid)) yield nexuses for nexus in ms1.nexus_list(): uuid = nexus.uuid ms1.nexus_unpublish(uuid) ms1.nexus_destroy(uuid)
async def test_nexus_2_remote_mirror_kill_one(containers, mayastors, nexus_uuid, create_nexus): """ This test does the following steps: - creates mayastor instances - creates pools on mayastor 1 and 2 - creates replicas on those pools - creates a nexus on mayastor 3 - starts fio on a remote VM (vixos1) for 15 secondsj - kills mayastor 2 after 4 seconds - assume the test to succeed - disconnect the VM from mayastor 3 when FIO completes - removes the nexus from mayastor 3 - removes the replicas but as mayastor 2 is down, will swallow errors - removes the pool The bulk of this is done by reusing fixtures those fitures are not as generic as one might like at this point so look/determine if you need them to begin with. By yielding from fixtures, after the tests the function is resumed where yield is called. """ uri = create_nexus dev = nvme_connect(uri) try: job = Fio("job1", "randwrite", dev).build() print(job) to_kill = containers.get("ms2") # create an event loop polling the async processes for completion await asyncio.gather(run_cmd_async(job), kill_after(to_kill, 4)) list = mayastors.get("ms3").nexus_list() NEXUS_UUID, size_mb = nexus_uuid nexus = next(n for n in list if n.uuid == NEXUS_UUID) assert nexus.state == pb.NEXUS_DEGRADED assert nexus.children[1].state == pb.CHILD_FAULTED finally: # disconnect target before we shutdown nvme_disconnect(uri)
async def mkfs_on_target(target_vm, mayastors): host_ip = mayastors.get("ms0").ip_address() remote_devices = [] for i in range(0, 15): dev = await nvme_remote_connect( target_vm, f"nvmf://{host_ip}:8420/nqn.2019-05.io.openebs:replica-{i}") remote_devices.append(dev) print(await run_cmd_async_at(target_vm, "lsblk -o name,fstype -J")) for d in remote_devices: await run_cmd_async_at(target_vm, f"sudo mkfs.xfs {d}") for i in range(0, 15): dev = await nvme_remote_disconnect( target_vm, f"nvmf://{host_ip}:8420/nqn.2019-05.io.openebs:replica-{i}")
async def test_nexus_2_remote_mirror_kill_one_spdk(containers, mayastors, nexus_uuid, create_nexus): """ Identical to the previous test except fio uses the SPDK ioengine """ uri = create_nexus job = FioSpdk("job1", "randwrite", uri).build() print(job) to_kill = containers.get("ms2") await asyncio.gather(run_cmd_async(job), kill_after(to_kill, 4)) list = mayastors.get("ms3").nexus_list() NEXUS_UUID, _ = nexus_uuid nexus = next(n for n in list if n.uuid == NEXUS_UUID) assert nexus.state == pb.NEXUS_DEGRADED assert nexus.children[1].state == pb.CHILD_FAULTED
def test_restart(containers, mayastors, create_replicas_on_all_nodes, times): """ Test that when we create replicas and destroy them the count is as expected At this point we have 3 nodes each with NEXUS_COUNT replicas. """ node = containers.get("ms1") ms1 = mayastors.get("ms1") # kill one of the nodes, restart it, and verify we still have NEXUS_COUNT replicas node.kill() node.start() # must reconnect grpc ms1.reconnect() # create does import here if found ms1.pool_create("ms1", "aio:///tmp/ms1.img") # check the list has the required number of replicas replicas = ms1.replica_list().replicas assert len(replicas) == NEXUS_COUNT # destroy a few for i in range(DESTROY_COUNT): ms1.replica_destroy(replicas[i].uuid) # kill (again) and reconnect node.kill() node.start() ms1.reconnect() # verify we have correct number of replicas remaining ms1.pool_create("ms1", "aio:///tmp/ms1.img") replicas = ms1.replica_list().replicas assert len(replicas) + DESTROY_COUNT == NEXUS_COUNT
def delete_volumes(mayastors): ms = mayastors.get("ms0") for i in range(0, 15): ms.replica_destroy(f"replica-{i}")
def create_volumes(mayastors): ms = mayastors.get("ms0") for i in range(0, 15): ms.replica_create("tpool", f"replica-{i}", 4 * 1024 * 1024)
def create_pool(mayastors): ms = mayastors.get("ms0") ms.pool_create("tpool", "aio:///dev/sda3") yield ms.pool_destroy("tpool")
async def test_null_nexus(mayastors, nexus_node, connect_devices): ms = mayastors.get(nexus_node) check_nexus_state(ms) job = Fio("job1", "randwrite", connect_devices).build() await run_cmd_async(job)
def test_nexus_preempt_key( create_nexus_v2, create_nexus_2_v2, nexus_name, nexus_uuid, mayastors, resv_key_2, ): """Create a nexus on ms3 and ms0, with the latter preempting the NVMe reservation key registered by ms3, verify that ms3 is no longer registered. Verify that writes succeed via the nexus on ms0 but not ms3.""" NEXUS_UUID, _ = nexus_uuid list = mayastors.get("ms3").nexus_list_v2() nexus = next(n for n in list if n.name == nexus_name) assert nexus.uuid == NEXUS_UUID child_uri = nexus.children[0].uri assert nexus.state == pb.NEXUS_ONLINE assert nexus.children[0].state == pb.CHILD_ONLINE assert nexus.children[1].state == pb.CHILD_ONLINE dev = nvme_connect(child_uri) try: report = nvme_resv_report(dev) print(report) assert (report["rtype"] == 5 ), "should have write exclusive, all registrants reservation" assert report["regctl"] == 1, "should have 1 registered controller" assert report[ "ptpls"] == 0, "should have Persist Through Power Loss State of 0" assert (report["regctlext"][0]["cntlid"] == 0xFFFF ), "should have dynamic controller ID" # reservation status reserved assert (report["regctlext"][0]["rcsts"] & 0x1) == 1 assert report["regctlext"][0]["rkey"] == resv_key_2 finally: nvme_disconnect(child_uri) # verify write with nexus on ms0 uri = create_nexus_2_v2 dev = nvme_connect(uri) job = "sudo dd if=/dev/urandom of={0} bs=512 count=1".format(dev) try: run_cmd(job) finally: nvme_disconnect(uri) list = mayastors.get("ms0").nexus_list_v2() nexus = next(n for n in list if n.name == nexus_name) assert nexus.state == pb.NEXUS_ONLINE assert nexus.children[0].state == pb.CHILD_ONLINE assert nexus.children[1].state == pb.CHILD_ONLINE # verify write error with nexus on ms3 uri = create_nexus_v2 dev = nvme_connect(uri) job = "sudo dd if=/dev/urandom of={0} bs=512 count=1".format(dev) try: run_cmd(job) finally: nvme_disconnect(uri) list = mayastors.get("ms3").nexus_list_v2() nexus = next(n for n in list if n.name == nexus_name) assert nexus.state == pb.NEXUS_FAULTED assert nexus.children[0].state == pb.CHILD_FAULTED assert nexus.children[1].state == pb.CHILD_FAULTED