def test_multiple_volumes(pm_client, em_client): # NOQA rs = pm_client.process_list() assert len(rs) == 0 cnt = 5 for i in range(cnt): replica_args = [] tmp_dir = tempfile.mkdtemp() replica_name = REPLICA_NAME_BASE + str(i) r = create_replica_process(pm_client, name=replica_name, replica_dir=tmp_dir) assert r.spec.name == replica_name assert r.status.state == PROC_STATE_RUNNING r = pm_client.process_get(name=replica_name) assert r.spec.name == replica_name assert r.status.state == PROC_STATE_RUNNING rs = pm_client.process_list() assert len(rs) == i + 1 assert replica_name in rs r = rs[replica_name] assert r.spec.name == replica_name assert r.status.state == PROC_STATE_RUNNING replica_args.append("tcp://localhost:" + str(r.status.port_start)) engine_name = ENGINE_NAME_BASE + str(i) volume_name = VOLUME_NAME_BASE + str(i) e = create_engine_process(em_client, name=engine_name, volume_name=volume_name, replicas=replica_args) assert e.spec.name == engine_name check_dev_existence(volume_name) es = em_client.process_list() assert len(es) == i + 1 assert engine_name in es e = es[engine_name] assert e.spec.name == engine_name assert e.status.state == PROC_STATE_RUNNING ps = pm_client.process_list() assert len(ps) == i + 1 for i in range(cnt): engine_name = ENGINE_NAME_BASE + str(i) volume_name = VOLUME_NAME_BASE + str(i) delete_process(em_client, engine_name) wait_for_process_deletion(em_client, engine_name) wait_for_dev_deletion(volume_name) es = em_client.process_list() assert len(es) == (cnt - 1 - i) assert engine_name not in es
def test_expand_multiple_times(): for i in range(30): em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA) replica_process = create_replica_process(rm_client, REPLICA_NAME) grpc_replica_client = ReplicaClient( get_process_address(replica_process)) time.sleep(3) cleanup_replica(grpc_replica_client) open_replica(grpc_replica_client) r1_url = grpc_replica_client.url v = grpc_controller_client.volume_start(replicas=[ r1_url, ]) assert v.replicaCount == 1 expand_volume_with_frontend( grpc_controller_client, EXPANDED_SIZE) wait_and_check_volume_expansion( grpc_controller_client, EXPANDED_SIZE) cleanup_process(em_client) cleanup_process(rm_client)
def generate_grpc_controller_client(engine_name, volume_name, frontend=FRONTEND_TGT_BLOCKDEV): e = create_engine_process(engine_manager_client, name=engine_name, volume_name=volume_name, frontend=frontend) grpc_controller_client.process_client = \ ControllerClient(get_process_address(e)) return grpc_controller_client.process_client
def engine_replica_mismatch(em_client, engine_rev_counter_disabled): # NOQA rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA) replica_dir1 = tempfile.mkdtemp() replica_dir2 = tempfile.mkdtemp() replica_process1 = create_replica_process( rm_client, REPLICA_NAME, replica_dir=replica_dir1, disable_revision_counter=engine_rev_counter_disabled) grpc_replica_client1 = get_replica_client_with_delay( ReplicaClient(get_process_address(replica_process1))) grpc_replica_client1.replica_create(size=SIZE_STR) replica_process2 = create_replica_process( rm_client, REPLICA_2_NAME, replica_dir=replica_dir2, disable_revision_counter=not engine_rev_counter_disabled) grpc_replica_client2 = get_replica_client_with_delay( ReplicaClient(get_process_address(replica_process2))) grpc_replica_client2.replica_create(size=SIZE_STR) engine_process = create_engine_process( em_client, disable_revision_counter=engine_rev_counter_disabled) grpc_controller_client = ControllerClient( get_process_address(engine_process)) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 # Check if replica1 is mode `ERR` rs = grpc_controller_client.replica_list() assert len(rs) == 2 r1_verified = False r2_verified = False for r in rs: if r.address == r1_url: assert r.mode == 'RW' r1_verified = True if r.address == r2_url: assert r.mode == 'ERR' r2_verified = True assert r1_verified assert r2_verified cleanup_process(em_client) cleanup_process(rm_client)
def grpc_controller_client(request, engine_manager_client): e = create_engine_process(engine_manager_client) grpc_controller_client = ControllerClient(get_process_address(e)) get_controller_version_detail(grpc_controller_client) return grpc_controller_client
def test_engine_restart_after_sigkill(bin): # NOQA """ Test if engine can be restarted after crashing by SIGKILL. 1. Create then initialize 1 engine and 2 replicas. 2. Start the engine. 3. Create 2 snapshots. 4. Use SIGKILL to kill the engine process. 5. Wait for the engine errored. 6. Mock volume detachment by deleting the engine process and replicas processes. 7. Mock volume reattachment by recreating processes and re-starting the engine. 8. Check if the engine is up with 2 replicas. 9. Check if the engine still works fine by creating/removing/purging snapshots. """ em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA) replica_dir1 = tempfile.mkdtemp() replica_dir2 = tempfile.mkdtemp() replica_process1 = create_replica_process(rm_client, REPLICA_NAME, replica_dir=replica_dir1) grpc_replica_client1 = ReplicaClient( get_process_address(replica_process1)) time.sleep(3) cleanup_replica(grpc_replica_client1) replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME, replica_dir=replica_dir2) grpc_replica_client2 = ReplicaClient( get_process_address(replica_process2)) time.sleep(3) cleanup_replica(grpc_replica_client2) open_replica(grpc_replica_client1) open_replica(grpc_replica_client2) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create'] snap0 = subprocess.check_output(cmd, encoding='utf-8').strip() expected = grpc_replica_client1.replica_get().chain[1] assert expected == 'volume-snap-{}.img'.format(snap0) cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create', '--label', 'name=snap1', '--label', 'key=value'] snap1 = subprocess.check_output(cmd, encoding='utf-8').strip() cmd = ["bash", "-c", "kill -9 $(ps aux | grep %s | grep -v grep | awk '{print $2}')" % VOLUME_NAME] subprocess.check_call(cmd) wait_for_process_error(em_client, ENGINE_NAME) # Mock detach: cleanup_process(em_client) cleanup_process(rm_client) # Mock reattach: # 1. Directly create replicas processes. # 2. Call replica_create() to init replica servers for replica processes. # 3. Create one engine process and start the engine with replicas. replica_process1 = create_replica_process(rm_client, REPLICA_NAME, replica_dir=replica_dir1) grpc_replica_client1 = ReplicaClient( get_process_address(replica_process1)) time.sleep(3) grpc_replica_client1.replica_create(size=SIZE_STR) replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME, replica_dir=replica_dir2) grpc_replica_client2 = ReplicaClient( get_process_address(replica_process2)) time.sleep(3) grpc_replica_client2.replica_create(size=SIZE_STR) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 # Verify the engine still works fine cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create'] snap2 = subprocess.check_output(cmd, encoding='utf-8').strip() cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'rm', snap1] subprocess.check_call(cmd) cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'purge'] subprocess.check_call(cmd) wait_for_purge_completion(grpc_controller_client.address) cmd = [bin, '--debug', '--url', grpc_controller_client.address, 'snapshot', 'ls'] ls_output = subprocess.check_output(cmd, encoding='utf-8') assert ls_output == '''ID {} {} '''.format(snap2, snap0) cleanup_process(em_client) cleanup_process(rm_client)
def test_single_replica_failure_during_engine_start(bin): # NOQA """ Test if engine still works fine if there is an invalid replica/backend in the starting phase 1. Create then initialize 1 engine and 2 replicas. 2. Start the engine. 3. Create 2 snapshots. 4. Mess up the replica1 by manually modifying the snapshot meta file. 5. Mock volume detachment by deleting the engine process and replicas processes. 6. Mock volume reattachment by recreating processes and re-starting the engine. 7. Check if the engine is up and if replica1 is mode ERR in the engine. 8. Check if the engine still works fine by creating one more snapshot. 9. Remove the ERR replica from the engine then check snapshot remove and snapshot purge work fine. 10. Check if the snapshot list is correct. """ em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA) replica_dir1 = tempfile.mkdtemp() replica_dir2 = tempfile.mkdtemp() replica_process1 = create_replica_process(rm_client, REPLICA_NAME, replica_dir=replica_dir1) grpc_replica_client1 = ReplicaClient( get_process_address(replica_process1)) time.sleep(3) cleanup_replica(grpc_replica_client1) replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME, replica_dir=replica_dir2) grpc_replica_client2 = ReplicaClient( get_process_address(replica_process2)) time.sleep(3) cleanup_replica(grpc_replica_client2) open_replica(grpc_replica_client1) open_replica(grpc_replica_client2) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create'] snap0 = subprocess.check_output(cmd, encoding='utf-8').strip() expected = grpc_replica_client1.replica_get().chain[1] assert expected == 'volume-snap-{}.img'.format(snap0) cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create', '--label', 'name=snap1', '--label', 'key=value'] snap1 = subprocess.check_output(cmd, encoding='utf-8').strip() # Mess up the replica1 by manually modifying the snapshot meta file r1_snap1_meta_path = os.path.join(replica_dir1, 'volume-snap-{}.img.meta'.format(snap1)) with open(r1_snap1_meta_path, 'r') as f: snap1_meta_info = json.load(f) with open(r1_snap1_meta_path, 'w') as f: snap1_meta_info["Parent"] = "invalid-parent.img" json.dump(snap1_meta_info, f) # Mock detach: cleanup_process(em_client) cleanup_process(rm_client) # Mock reattach: # 1. Directly create replicas processes. # 2. Call replica_create() to init replica servers for replica processes. # 3. Create one engine process and start the engine with replicas. replica_process1 = create_replica_process(rm_client, REPLICA_NAME, replica_dir=replica_dir1) grpc_replica_client1 = ReplicaClient( get_process_address(replica_process1)) time.sleep(3) grpc_replica_client1.replica_create(size=SIZE_STR) replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME, replica_dir=replica_dir2) grpc_replica_client2 = ReplicaClient( get_process_address(replica_process2)) time.sleep(3) grpc_replica_client2.replica_create(size=SIZE_STR) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 # Check if replica1 is mode `ERR` rs = grpc_controller_client.replica_list() assert len(rs) == 2 r1_verified = False r2_verified = False for r in rs: if r.address == r1_url: assert r.mode == 'ERR' r1_verified = True if r.address == r2_url: assert r.mode == 'RW' r2_verified = True assert r1_verified assert r2_verified # The engine still works fine cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create'] snap2 = subprocess.check_output(cmd, encoding='utf-8').strip() # Remove the ERR replica before removing snapshots grpc_controller_client.replica_delete(r1_url) rs = grpc_controller_client.replica_list() assert len(rs) == 1 assert rs[0].address == r2_url assert rs[0].mode == "RW" cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'rm', snap1] subprocess.check_call(cmd) cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'purge'] subprocess.check_call(cmd) wait_for_purge_completion(grpc_controller_client.address) cmd = [bin, '--debug', '--url', grpc_controller_client.address, 'snapshot', 'ls'] ls_output = subprocess.check_output(cmd, encoding='utf-8') assert ls_output == '''ID {} {} '''.format(snap2, snap0) cleanup_process(em_client) cleanup_process(rm_client)
def test_engine_upgrade(pm_client, em_client): # NOQA rs = pm_client.process_list() assert len(rs) == 0 dir_base = "/tmp/replica" cnt = 3 for i in range(cnt): replica_args = [] dir = dir_base + str(i) replica_name = REPLICA_NAME_BASE + str(i) r = create_replica_process(pm_client, name=replica_name, replica_dir=dir) assert r.spec.name == replica_name assert r.status.state == PROC_STATE_RUNNING r = pm_client.process_get(name=replica_name) assert r.spec.name == replica_name assert r.status.state == PROC_STATE_RUNNING rs = pm_client.process_list() assert len(rs) == i + 1 assert replica_name in rs r = rs[replica_name] assert r.spec.name == replica_name assert r.status.state == PROC_STATE_RUNNING replica_args.append("tcp://localhost:" + str(r.status.port_start)) engine_name = ENGINE_NAME_BASE + str(i) volume_name = VOLUME_NAME_BASE + str(i) e = create_engine_process(em_client, name=engine_name, volume_name=volume_name, replicas=replica_args) assert e.spec.name == engine_name check_dev_existence(volume_name) es = em_client.process_list() assert len(es) == i + 1 assert engine_name in es e = es[engine_name] assert e.spec.name == engine_name assert e.status.state == PROC_STATE_RUNNING dir = dir_base + "0" engine_name = ENGINE_NAME_BASE + "0" replica_name = REPLICA_NAME_BASE + "0" volume_name = VOLUME_NAME_BASE + "0" replica_name_upgrade = REPLICA_NAME_BASE + "0-upgrade" r = create_replica_process(pm_client, name=replica_name_upgrade, binary=LONGHORN_UPGRADE_BINARY, replica_dir=dir) assert r.spec.name == replica_name_upgrade assert r.status.state == PROC_STATE_RUNNING replicas = ["tcp://localhost:" + str(r.status.port_start)] e = upgrade_engine(em_client, LONGHORN_UPGRADE_BINARY, engine_name, volume_name, replicas) assert e.spec.name == engine_name check_dev_existence(volume_name) r = pm_client.process_delete(name=replica_name) assert r.spec.name == replica_name assert r.status.state in (PROC_STATE_STOPPING, PROC_STATE_STOPPED) wait_for_process_deletion(pm_client, replica_name) check_dev_existence(volume_name) wait_for_process_running(em_client, engine_name) es = em_client.process_list() assert engine_name in es e = es[engine_name] assert e.spec.name == engine_name assert e.status.state == PROC_STATE_RUNNING delete_process(em_client, engine_name) wait_for_process_deletion(em_client, engine_name) wait_for_dev_deletion(volume_name)
def test_one_volume(pm_client, em_client): # NOQA rs = pm_client.process_list() assert len(rs) == 0 replica_args = [] for i in range(3): tmp_dir = tempfile.mkdtemp() name = REPLICA_NAME_BASE + str(i) r = create_replica_process(pm_client, name=name, replica_dir=tmp_dir) assert r.spec.name == name assert r.status.state == PROC_STATE_RUNNING r = pm_client.process_get(name=name) assert r.spec.name == name assert r.status.state == PROC_STATE_RUNNING rs = pm_client.process_list() assert len(rs) == (i + 1) assert name in rs assert r.spec.name == name assert r.status.state == PROC_STATE_RUNNING replica_args.append("tcp://localhost:" + str(r.status.port_start)) engine_name = ENGINE_NAME_BASE + "0" volume_name = VOLUME_NAME_BASE + "0" e = create_engine_process(em_client, name=engine_name, volume_name=volume_name, replicas=replica_args) assert e.spec.name == engine_name check_dev_existence(volume_name) es = em_client.process_list() assert len(es) == 1 assert engine_name in es e = es[engine_name] assert e.spec.name == engine_name assert e.status.state == PROC_STATE_RUNNING ps = pm_client.process_list() assert len(ps) == 3 delete_process(em_client, engine_name) # test duplicate call delete_process(em_client, engine_name) wait_for_process_deletion(em_client, engine_name) # test duplicate call delete_process(em_client, engine_name) ps = pm_client.process_list() assert len(ps) == 3 for i in range(3): name = REPLICA_NAME_BASE + str(i) r = pm_client.process_delete(name=name) assert r.spec.name == name assert r.status.state in (PROC_STATE_STOPPING, PROC_STATE_STOPPED) wait_for_process_deletion(pm_client, name) ps = pm_client.process_list() assert len(ps) == 0
def grpc_controller_client(request, engine_manager_client): e = create_engine_process(engine_manager_client) return ControllerClient(get_process_address(e))