def test_expand_multiple_times(): for i in range(30): em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA) replica_process = create_replica_process(rm_client, REPLICA_NAME) grpc_replica_client = ReplicaClient( get_process_address(replica_process)) time.sleep(3) cleanup_replica(grpc_replica_client) open_replica(grpc_replica_client) r1_url = grpc_replica_client.url v = grpc_controller_client.volume_start(replicas=[ r1_url, ]) assert v.replicaCount == 1 expand_volume_with_frontend( grpc_controller_client, EXPANDED_SIZE) wait_and_check_volume_expansion( grpc_controller_client, EXPANDED_SIZE) cleanup_process(em_client) cleanup_process(rm_client)
def rm_backups(url, engine_name, backups): for b in backups: cmd.backup_rm(url, b) with pytest.raises(subprocess.CalledProcessError): restore_with_frontend(url, engine_name, b) with pytest.raises(subprocess.CalledProcessError): cmd.backup_inspect(url, b) # Engine frontend is down, Start it up client = ControllerClient(url) client.volume_frontend_start(FRONTEND_TGT_BLOCKDEV)
def engine_replica_mismatch(em_client, engine_rev_counter_disabled): # NOQA rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA) replica_dir1 = tempfile.mkdtemp() replica_dir2 = tempfile.mkdtemp() replica_process1 = create_replica_process( rm_client, REPLICA_NAME, replica_dir=replica_dir1, disable_revision_counter=engine_rev_counter_disabled) grpc_replica_client1 = get_replica_client_with_delay( ReplicaClient(get_process_address(replica_process1))) grpc_replica_client1.replica_create(size=SIZE_STR) replica_process2 = create_replica_process( rm_client, REPLICA_2_NAME, replica_dir=replica_dir2, disable_revision_counter=not engine_rev_counter_disabled) grpc_replica_client2 = get_replica_client_with_delay( ReplicaClient(get_process_address(replica_process2))) grpc_replica_client2.replica_create(size=SIZE_STR) engine_process = create_engine_process( em_client, disable_revision_counter=engine_rev_counter_disabled) grpc_controller_client = ControllerClient( get_process_address(engine_process)) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 # Check if replica1 is mode `ERR` rs = grpc_controller_client.replica_list() assert len(rs) == 2 r1_verified = False r2_verified = False for r in rs: if r.address == r1_url: assert r.mode == 'RW' r1_verified = True if r.address == r2_url: assert r.mode == 'ERR' r2_verified = True assert r1_verified assert r2_verified cleanup_process(em_client) cleanup_process(rm_client)
def restore_with_frontend(url, engine_name, backup): client = ControllerClient(url) client.volume_frontend_shutdown() cmd.backup_restore(url, backup) wait_for_restore_completion(url, backup) client.volume_frontend_start(FRONTEND_TGT_BLOCKDEV) v = client.volume_get() assert v.frontendState == "up" return
def generate_grpc_controller_client(engine_name, volume_name, frontend=FRONTEND_TGT_BLOCKDEV): e = create_engine_process(engine_manager_client, name=engine_name, volume_name=volume_name, frontend=frontend) grpc_controller_client.process_client = ControllerClient(e.spec.listen) return grpc_controller_client.process_client
def generate_grpc_controller_client(engine_name, volume_name, frontend=FRONTEND_TGT_BLOCKDEV): e = create_engine_process(engine_manager_client, name=engine_name, volume_name=volume_name, frontend=frontend) grpc_controller_client.process_client = \ ControllerClient(get_process_address(e)) time.sleep(3) return grpc_controller_client.process_client
def grpc_controller_client(request, engine_manager_client): e = create_engine_process(engine_manager_client) grpc_controller_client = ControllerClient(get_process_address(e)) get_controller_version_detail(grpc_controller_client) return grpc_controller_client
def test_engine_restart_after_sigkill(bin): # NOQA """ Test if engine can be restarted after crashing by SIGKILL. 1. Create then initialize 1 engine and 2 replicas. 2. Start the engine. 3. Create 2 snapshots. 4. Use SIGKILL to kill the engine process. 5. Wait for the engine errored. 6. Mock volume detachment by deleting the engine process and replicas processes. 7. Mock volume reattachment by recreating processes and re-starting the engine. 8. Check if the engine is up with 2 replicas. 9. Check if the engine still works fine by creating/removing/purging snapshots. """ em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA) replica_dir1 = tempfile.mkdtemp() replica_dir2 = tempfile.mkdtemp() replica_process1 = create_replica_process(rm_client, REPLICA_NAME, replica_dir=replica_dir1) grpc_replica_client1 = ReplicaClient( get_process_address(replica_process1)) time.sleep(3) cleanup_replica(grpc_replica_client1) replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME, replica_dir=replica_dir2) grpc_replica_client2 = ReplicaClient( get_process_address(replica_process2)) time.sleep(3) cleanup_replica(grpc_replica_client2) open_replica(grpc_replica_client1) open_replica(grpc_replica_client2) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create'] snap0 = subprocess.check_output(cmd, encoding='utf-8').strip() expected = grpc_replica_client1.replica_get().chain[1] assert expected == 'volume-snap-{}.img'.format(snap0) cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create', '--label', 'name=snap1', '--label', 'key=value'] snap1 = subprocess.check_output(cmd, encoding='utf-8').strip() cmd = ["bash", "-c", "kill -9 $(ps aux | grep %s | grep -v grep | awk '{print $2}')" % VOLUME_NAME] subprocess.check_call(cmd) wait_for_process_error(em_client, ENGINE_NAME) # Mock detach: cleanup_process(em_client) cleanup_process(rm_client) # Mock reattach: # 1. Directly create replicas processes. # 2. Call replica_create() to init replica servers for replica processes. # 3. Create one engine process and start the engine with replicas. replica_process1 = create_replica_process(rm_client, REPLICA_NAME, replica_dir=replica_dir1) grpc_replica_client1 = ReplicaClient( get_process_address(replica_process1)) time.sleep(3) grpc_replica_client1.replica_create(size=SIZE_STR) replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME, replica_dir=replica_dir2) grpc_replica_client2 = ReplicaClient( get_process_address(replica_process2)) time.sleep(3) grpc_replica_client2.replica_create(size=SIZE_STR) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 # Verify the engine still works fine cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create'] snap2 = subprocess.check_output(cmd, encoding='utf-8').strip() cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'rm', snap1] subprocess.check_call(cmd) cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'purge'] subprocess.check_call(cmd) wait_for_purge_completion(grpc_controller_client.address) cmd = [bin, '--debug', '--url', grpc_controller_client.address, 'snapshot', 'ls'] ls_output = subprocess.check_output(cmd, encoding='utf-8') assert ls_output == '''ID {} {} '''.format(snap2, snap0) cleanup_process(em_client) cleanup_process(rm_client)
def test_single_replica_failure_during_engine_start(bin): # NOQA """ Test if engine still works fine if there is an invalid replica/backend in the starting phase 1. Create then initialize 1 engine and 2 replicas. 2. Start the engine. 3. Create 2 snapshots. 4. Mess up the replica1 by manually modifying the snapshot meta file. 5. Mock volume detachment by deleting the engine process and replicas processes. 6. Mock volume reattachment by recreating processes and re-starting the engine. 7. Check if the engine is up and if replica1 is mode ERR in the engine. 8. Check if the engine still works fine by creating one more snapshot. 9. Remove the ERR replica from the engine then check snapshot remove and snapshot purge work fine. 10. Check if the snapshot list is correct. """ em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA) replica_dir1 = tempfile.mkdtemp() replica_dir2 = tempfile.mkdtemp() replica_process1 = create_replica_process(rm_client, REPLICA_NAME, replica_dir=replica_dir1) grpc_replica_client1 = ReplicaClient( get_process_address(replica_process1)) time.sleep(3) cleanup_replica(grpc_replica_client1) replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME, replica_dir=replica_dir2) grpc_replica_client2 = ReplicaClient( get_process_address(replica_process2)) time.sleep(3) cleanup_replica(grpc_replica_client2) open_replica(grpc_replica_client1) open_replica(grpc_replica_client2) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create'] snap0 = subprocess.check_output(cmd, encoding='utf-8').strip() expected = grpc_replica_client1.replica_get().chain[1] assert expected == 'volume-snap-{}.img'.format(snap0) cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create', '--label', 'name=snap1', '--label', 'key=value'] snap1 = subprocess.check_output(cmd, encoding='utf-8').strip() # Mess up the replica1 by manually modifying the snapshot meta file r1_snap1_meta_path = os.path.join(replica_dir1, 'volume-snap-{}.img.meta'.format(snap1)) with open(r1_snap1_meta_path, 'r') as f: snap1_meta_info = json.load(f) with open(r1_snap1_meta_path, 'w') as f: snap1_meta_info["Parent"] = "invalid-parent.img" json.dump(snap1_meta_info, f) # Mock detach: cleanup_process(em_client) cleanup_process(rm_client) # Mock reattach: # 1. Directly create replicas processes. # 2. Call replica_create() to init replica servers for replica processes. # 3. Create one engine process and start the engine with replicas. replica_process1 = create_replica_process(rm_client, REPLICA_NAME, replica_dir=replica_dir1) grpc_replica_client1 = ReplicaClient( get_process_address(replica_process1)) time.sleep(3) grpc_replica_client1.replica_create(size=SIZE_STR) replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME, replica_dir=replica_dir2) grpc_replica_client2 = ReplicaClient( get_process_address(replica_process2)) time.sleep(3) grpc_replica_client2.replica_create(size=SIZE_STR) engine_process = create_engine_process(em_client) grpc_controller_client = ControllerClient( get_process_address(engine_process)) r1_url = grpc_replica_client1.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 # Check if replica1 is mode `ERR` rs = grpc_controller_client.replica_list() assert len(rs) == 2 r1_verified = False r2_verified = False for r in rs: if r.address == r1_url: assert r.mode == 'ERR' r1_verified = True if r.address == r2_url: assert r.mode == 'RW' r2_verified = True assert r1_verified assert r2_verified # The engine still works fine cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'create'] snap2 = subprocess.check_output(cmd, encoding='utf-8').strip() # Remove the ERR replica before removing snapshots grpc_controller_client.replica_delete(r1_url) rs = grpc_controller_client.replica_list() assert len(rs) == 1 assert rs[0].address == r2_url assert rs[0].mode == "RW" cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'rm', snap1] subprocess.check_call(cmd) cmd = [bin, '--url', grpc_controller_client.address, 'snapshot', 'purge'] subprocess.check_call(cmd) wait_for_purge_completion(grpc_controller_client.address) cmd = [bin, '--debug', '--url', grpc_controller_client.address, 'snapshot', 'ls'] ls_output = subprocess.check_output(cmd, encoding='utf-8') assert ls_output == '''ID {} {} '''.format(snap2, snap0) cleanup_process(em_client) cleanup_process(rm_client)
def grpc_controller_client(request, engine_manager_client): e = create_engine_process(engine_manager_client) return ControllerClient(e.spec.listen)
def snapshot_revert_with_frontend(url, engine_name, name): client = ControllerClient(url) client.volume_frontend_shutdown() cmd.snapshot_revert(url, name) client.volume_frontend_start(FRONTEND_TGT_BLOCKDEV)
def grpc_controller_client(request, engine_manager_client): e = create_engine_process(engine_manager_client) return ControllerClient(get_process_address(e))