def test_replica_failure_detection( grpc_controller_client, # NOQA grpc_replica_client, # NOQA grpc_replica_client2): # NOQA open_replica(grpc_replica_client) open_replica(grpc_replica_client2) r1_url = grpc_replica_client.url r2_url = grpc_replica_client2.url v = grpc_controller_client.volume_start(replicas=[ r1_url, r2_url, ]) assert v.replicaCount == 2 # wait for initial read/write period to pass time.sleep(2) cleanup_replica(grpc_replica_client) detected = False for i in range(10): replicas = grpc_controller_client.replica_list() assert len(replicas) == 2 for r in replicas: if r.address == r1_url and r.mode == 'ERR': detected = True break if detected: break time.sleep(1) assert detected
def test_backup(replica1, replica2, controller, backup_targets): # NOQA for backup_target in backup_targets: dev = common.get_dev(replica1, replica2, controller) backup_test(dev, backup_target) common.cleanup_replica(replica1) common.cleanup_replica(replica2) common.cleanup_controller(controller)
def test_backup_volume_deletion( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA offset = 0 length = 128 address = grpc_controller.address for backup_target in backup_targets: dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) snap_data = random_string(length) verify_data(dev, offset, snap_data) snap = cmd.snapshot_create(address) backup_info = create_backup(address, snap, backup_target) assert backup_info["VolumeName"] == VOLUME_NAME assert backup_info["Size"] == BLOCK_SIZE_STR assert snap in backup_info["SnapshotName"] cmd.backup_volume_rm(address, VOLUME_NAME, backup_target) info = cmd.backup_volume_list(address, VOLUME_NAME, backup_target) assert "cannot find" in info[VOLUME_NAME]["Messages"]["error"] cmd.sync_agent_server_reset(address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)
def test_ha_single_replica_failure( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev(VOLUME_NAME) data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) cleanup_replica(grpc_replica2) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data_offset, data)
def test_ha_single_replica_failure(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data)
def test_snapshot_tree_backup(replica1, replica2, controller, backup_targets): # NOQA for backup_target in backup_targets: dev = common.get_dev(replica1, replica2, controller) snapshot_tree_backup_test(dev, backup_target) common.cleanup_replica(replica1) common.cleanup_replica(replica2) common.cleanup_controller(controller)
def test_ha_single_replica_failure(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data)
def test_ha_single_replica_rebuild(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data) controller.delete(replicas[1]) # Rebuild replica2 common.open_replica(replica2) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "RW") verify_read(dev, data_offset, data) # WORKAROUND for unable to remove the parent of volume head newsnap = cmd.snapshot_create() info = cmd.snapshot_info() assert len(info) == 3 sysnap = info[newsnap]["parent"] assert info[sysnap]["parent"] == "" assert newsnap in info[sysnap]["children"] assert info[sysnap]["usercreated"] is False assert info[sysnap]["removed"] is False cmd.snapshot_purge() info = cmd.snapshot_info() assert len(info) == 2 assert info[newsnap] is not None assert info[VOLUME_HEAD] is not None
def test_restore_to_file_without_backing_file(replica1, replica2, # NOQA controller, backup_targets): # NOQA for backup_target in backup_targets: dev = common.get_dev(replica1, replica2, controller) restore_to_file_without_backing_file_test(dev, backup_target) common.cleanup_replica(replica1) common.cleanup_replica(replica2) common.cleanup_controller(controller)
def test_backup_hole_with_backing_file(backing_replica1, backing_replica2, controller, backup_targets): # NOQA for backup_target in backup_targets: backing_dev = common.get_backing_dev(backing_replica1, backing_replica2, controller) backup_hole_with_backing_file_test(backing_dev, backup_target) common.cleanup_replica(backing_replica1) common.cleanup_replica(backing_replica2) common.cleanup_controller(controller)
def test_snapshot_tree_rebuild( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA address = grpc_controller.address offset = 0 length = 128 open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.name == VOLUME_NAME assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev(VOLUME_NAME) snap, snap_data = snapshot_tree_build(dev, address, ENGINE_NAME, offset, length) data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(grpc_replica2) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data_offset, data) grpc_controller.replica_delete(replicas[1].address) # Rebuild replica2 open_replica(grpc_replica2) cmd.add_replica(address, r2_url) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "RW") snapshot_tree_verify(dev, address, ENGINE_NAME, offset, length, snap, snap_data)
def test_snapshot_tree_backup( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA for backup_target in backup_targets: dev = common.get_dev(grpc_replica1, grpc_replica2, grpc_controller) snapshot_tree_backup_test(dev, backup_target) common.cleanup_replica(grpc_replica1) common.cleanup_replica(grpc_replica2) common.cleanup_controller(grpc_controller)
def test_snapshot_tree_backup( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA for backup_target in backup_targets: snapshot_tree_backup_test(backup_target, ENGINE_NAME, grpc_controller, grpc_replica1, grpc_replica2) cmd.sync_agent_server_reset(grpc_controller.address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)
def test_backup_cli( bin, engine_manager_client, # NOQA grpc_controller_client, # NOQA grpc_replica_client, grpc_replica_client2, # NOQA backup_targets): for backup_target in backup_targets: backup_core(bin, engine_manager_client, grpc_controller_client, grpc_replica_client, grpc_replica_client2, backup_target) cleanup_replica(grpc_replica_client) cleanup_replica(grpc_replica_client2) cleanup_controller(grpc_controller_client)
def test_backup( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA for backup_target in backup_targets: dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) backup_test(dev, grpc_controller.address, VOLUME_NAME, ENGINE_NAME, backup_target) cmd.sync_agent_server_reset(grpc_controller.address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)
def test_backup_with_backing_file( grpc_backing_replica1, grpc_backing_replica2, # NOQA grpc_controller, backup_targets): # NOQA for backup_target in backup_targets: backing_dev = common.get_backing_dev(grpc_backing_replica1, grpc_backing_replica2, grpc_controller) backup_with_backing_file_test(backing_dev, backup_target) common.cleanup_replica(grpc_backing_replica1) common.cleanup_replica(grpc_backing_replica2) common.cleanup_controller(grpc_controller)
def test_backup_with_backing_file( grpc_backing_replica1, grpc_backing_replica2, # NOQA grpc_backing_controller, backup_targets): # NOQA for backup_target in backup_targets: backup_with_backing_file_test(backup_target, grpc_backing_controller, grpc_backing_replica1, grpc_backing_replica2) cmd.sync_agent_server_reset(grpc_backing_controller.address) cleanup_controller(grpc_backing_controller) cleanup_replica(grpc_backing_replica1) cleanup_replica(grpc_backing_replica2)
def test_restore_to_file_without_backing_file( backup_targets, # NOQA grpc_controller, # NOQA grpc_replica1, # NOQA grpc_replica2): # NOQA for backup_target in backup_targets: restore_to_file_without_backing_file_test(backup_target, grpc_controller, grpc_replica1, grpc_replica2) cmd.sync_agent_server_reset(grpc_controller.address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)
def test_snapshot_tree_rebuild( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA offset = 0 length = 128 open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 v = grpc_controller.volume_start( replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() snap, snap_data = snapshot_tree_build(dev, offset, length) data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(grpc_replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data_offset, data) grpc_controller.replica_delete(replicas[1].address) # Rebuild replica2 open_replica(grpc_replica2) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(grpc_controller, 1, "RW") snapshot_tree_verify(dev, offset, length, snap, snap_data)
def test_snapshot_tree_rebuild(controller, replica1, replica2): # NOQA offset = 0 length = 128 open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() snap, snap_data = snapshot_tree_build(dev, offset, length) data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data) controller.delete(replicas[1]) # Rebuild replica2 common.open_replica(replica2) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "RW") snapshot_tree_verify(dev, offset, length, snap, snap_data)
def cleanup_no_frontend_volume(c, r1, r2): launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV, url=LAUNCHER_NO_FRONTEND) v = c.list_volume()[0] assert v.frontendState == "up" common.cleanup_replica(r1) common.cleanup_replica(r2) common.cleanup_controller(c) launcher.shutdown_engine_frontend(url=LAUNCHER_NO_FRONTEND) v = c.list_volume()[0] assert v.frontendState == "down" cleanup_replica_path(STANDBY_REPLICA1_PATH) cleanup_replica_path(STANDBY_REPLICA2_PATH)
def cleanup_no_frontend_volume(grpc_c, grpc_r1, grpc_r2): launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV, url=LAUNCHER_NO_FRONTEND) v = grpc_c.volume_get() assert v.frontendState == "up" cmd.sync_agent_server_reset(CONTROLLER_NO_FRONTEND) common.cleanup_replica(grpc_r1) common.cleanup_replica(grpc_r2) common.cleanup_controller(grpc_c) launcher.shutdown_engine_frontend(url=LAUNCHER_NO_FRONTEND) v = grpc_c.volume_get() assert v.frontendState == "down" cleanup_replica_path(STANDBY_REPLICA1_PATH) cleanup_replica_path(STANDBY_REPLICA2_PATH)
def cleanup_no_frontend_volume(grpc_em, grpc_c, grpc_r1, grpc_r2): grpc_em.frontend_start(ENGINE_NO_FRONTEND_NAME, FRONTEND_TGT_BLOCKDEV) v = grpc_c.volume_get() assert v.frontendState == "up" cmd.sync_agent_server_reset(grpc_c.address) grpc_em.frontend_shutdown(ENGINE_NO_FRONTEND_NAME) v = grpc_c.volume_get() assert v.frontendState == "down" ep = grpc_em.engine_get(ENGINE_NO_FRONTEND_NAME) assert ep.spec.frontend == "" cleanup_controller(grpc_c) cleanup_replica(grpc_r1) cleanup_replica(grpc_r2) cleanup_replica_dir(FIXED_REPLICA_PATH1) cleanup_replica_dir(FIXED_REPLICA_PATH2)
def restore_inc_test( controller, replica1, replica2, # NOQA sb_controller, sb_replica1, sb_replica2, backup_target): # NOQA launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV, url=LAUNCHER) dev = common.get_dev(replica1, replica2, controller) zero_string = b'\x00'.decode('utf-8') # backup0: 256 random data in 1st block length0 = 256 snap0_data = common.random_string(length0) verify_data(dev, 0, snap0_data) verify_data(dev, BLOCK_SIZE, snap0_data) snap0 = cmd.snapshot_create() backup0 = create_backup(backup_target, snap0) backup0_name = cmd.backup_inspect(backup0)['Name'] # backup1: 32 random data + 32 zero data + 192 random data in 1st block length1 = 32 offset1 = 32 snap1_data = zero_string * length1 verify_data(dev, offset1, snap1_data) snap1 = cmd.snapshot_create() backup1 = create_backup(backup_target, snap1) backup1_name = cmd.backup_inspect(backup1)['Name'] # backup2: 32 random data + 256 random data in 1st block, # 256 random data in 2nd block length2 = 256 offset2 = 32 snap2_data = common.random_string(length2) verify_data(dev, offset2, snap2_data) verify_data(dev, BLOCK_SIZE, snap2_data) snap2 = cmd.snapshot_create() backup2 = create_backup(backup_target, snap2) backup2_name = cmd.backup_inspect(backup2)['Name'] # backup3: 64 zero data + 192 random data in 1st block length3 = 64 offset3 = 0 verify_data(dev, offset3, zero_string * length3) verify_data(dev, length2, zero_string * offset2) verify_data(dev, BLOCK_SIZE, zero_string * length2) snap3 = cmd.snapshot_create() backup3 = create_backup(backup_target, snap3) backup3_name = cmd.backup_inspect(backup3)['Name'] # backup4: 256 random data in 1st block length4 = 256 offset4 = 0 snap4_data = common.random_string(length4) verify_data(dev, offset4, snap4_data) snap4 = cmd.snapshot_create() backup4 = create_backup(backup_target, snap4) backup4_name = cmd.backup_inspect(backup4)['Name'] common.cleanup_replica(replica1) common.cleanup_replica(replica2) common.cleanup_controller(controller) launcher.shutdown_engine_frontend(url=LAUNCHER) # start no-frontend volume # start standby volume (no frontend) start_no_frontend_volume(sb_controller, sb_replica1, sb_replica2) restore_for_no_frontend_volume(backup0, sb_controller) verify_no_frontend_data(0, snap0_data, sb_controller) # mock restore crash/error delta_file1 = "volume-delta-" + backup0_name + ".img" if "vfs" in backup_target: command = ["find", VFS_DIR, "-type", "d", "-name", VOLUME_NAME] backup_volume_path = subprocess.check_output(command).strip() command = ["find", backup_volume_path, "-name", "*blk"] blocks = subprocess.check_output(command).split() assert len(blocks) != 0 for blk in blocks: command = ["mv", blk, blk + ".tmp"] subprocess.check_output(command).strip() with pytest.raises(subprocess.CalledProcessError): cmd.restore_inc(backup1, backup0_name, CONTROLLER_NO_FRONTEND) assert path.exists(STANDBY_REPLICA1_PATH + delta_file1) assert path.exists(STANDBY_REPLICA2_PATH + delta_file1) for blk in blocks: command = ["mv", blk + ".tmp", blk] subprocess.check_output(command) data1 = \ snap0_data[0:offset1] + snap1_data + \ snap0_data[offset1+length1:] cmd.restore_inc(backup1, backup0_name, CONTROLLER_NO_FRONTEND) verify_no_frontend_data(0, data1, sb_controller) assert not path.exists(STANDBY_REPLICA1_PATH + delta_file1) assert not path.exists(STANDBY_REPLICA2_PATH + delta_file1) volume_info = cmd.info(CONTROLLER_NO_FRONTEND) assert volume_info['lastRestored'] == backup1_name data2 = \ data1[0:offset2] + snap2_data + \ zero_string * (BLOCK_SIZE - length2 - offset2) + snap2_data cmd.restore_inc(backup2, backup1_name, CONTROLLER_NO_FRONTEND) verify_no_frontend_data(0, data2, sb_controller) delta_file2 = "volume-delta-" + backup1_name + ".img" assert not path.exists(STANDBY_REPLICA1_PATH + delta_file2) assert not path.exists(STANDBY_REPLICA2_PATH + delta_file2) volume_info = cmd.info(CONTROLLER_NO_FRONTEND) assert volume_info['lastRestored'] == backup2_name # mock race condition with pytest.raises(subprocess.CalledProcessError) as e: cmd.restore_inc(backup1, backup0_name, CONTROLLER_NO_FRONTEND) assert "doesn't match lastRestored" in e data3 = zero_string * length3 + data2[length3:length2] cmd.restore_inc(backup3, backup2_name, CONTROLLER_NO_FRONTEND) verify_no_frontend_data(0, data3, sb_controller) delta_file3 = "volume-delta-" + backup3_name + ".img" assert not path.exists(STANDBY_REPLICA1_PATH + delta_file3) assert not path.exists(STANDBY_REPLICA2_PATH + delta_file3) volume_info = cmd.info(CONTROLLER_NO_FRONTEND) assert volume_info['lastRestored'] == backup3_name # mock corner case: invalid last-restored backup rm_backups([backup3]) # actually it is full restoration cmd.restore_inc(backup4, backup3_name, CONTROLLER_NO_FRONTEND) verify_no_frontend_data(0, snap4_data, sb_controller) volume_info = cmd.info(CONTROLLER_NO_FRONTEND) assert volume_info['lastRestored'] == backup4_name if "vfs" in backup_target: command = ["find", VFS_DIR, "-type", "d", "-name", VOLUME_NAME] backup_volume_path = subprocess.check_output(command).strip() command = ["find", backup_volume_path, "-name", "*tempoary"] tmp_files = subprocess.check_output(command).split() assert len(tmp_files) == 0 cleanup_no_frontend_volume(sb_controller, sb_replica1, sb_replica2) rm_backups([backup0, backup1, backup2, backup4])
def test_ha_single_replica_rebuild(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data) controller.delete(replicas[1]) # Rebuild replica2 common.open_replica(replica2) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "RW") verify_read(dev, data_offset, data) # WORKAROUND for unable to remove the parent of volume head newsnap = cmd.snapshot_create() info = cmd.snapshot_info() assert len(info) == 3 sysnap = info[newsnap]["parent"] assert info[sysnap]["parent"] == "" assert newsnap in info[sysnap]["children"] assert info[sysnap]["usercreated"] is False assert info[sysnap]["removed"] is False cmd.snapshot_purge() info = cmd.snapshot_info() assert len(info) == 2 assert info[newsnap] is not None assert info[VOLUME_HEAD] is not None
def restore_inc_test( grpc_engine_manager, # NOQA grpc_controller, # NOQA grpc_replica1, grpc_replica2, # NOQA grpc_dr_controller, # NOQA grpc_dr_replica1, grpc_dr_replica2, # NOQA backup_target): # NOQA address = grpc_controller.address dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) zero_string = b'\x00'.decode('utf-8') # backup0: 256 random data in 1st block length0 = 256 snap0_data = random_string(length0) verify_data(dev, 0, snap0_data) verify_data(dev, BLOCK_SIZE, snap0_data) snap0 = cmd.snapshot_create(address) backup0 = create_backup(address, snap0, backup_target)["URL"] backup0_name = cmd.backup_inspect(address, backup0)['Name'] # backup1: 32 random data + 32 zero data + 192 random data in 1st block length1 = 32 offset1 = 32 snap1_data = zero_string * length1 verify_data(dev, offset1, snap1_data) snap1 = cmd.snapshot_create(address) backup1 = create_backup(address, snap1, backup_target)["URL"] backup1_name = cmd.backup_inspect(address, backup1)['Name'] # backup2: 32 random data + 256 random data in 1st block, # 256 random data in 2nd block length2 = 256 offset2 = 32 snap2_data = random_string(length2) verify_data(dev, offset2, snap2_data) verify_data(dev, BLOCK_SIZE, snap2_data) snap2 = cmd.snapshot_create(address) backup2 = create_backup(address, snap2, backup_target)["URL"] backup2_name = cmd.backup_inspect(address, backup2)['Name'] # backup3: 64 zero data + 192 random data in 1st block length3 = 64 offset3 = 0 verify_data(dev, offset3, zero_string * length3) verify_data(dev, length2, zero_string * offset2) verify_data(dev, BLOCK_SIZE, zero_string * length2) snap3 = cmd.snapshot_create(address) backup3 = create_backup(address, snap3, backup_target)["URL"] backup3_name = cmd.backup_inspect(address, backup3)['Name'] # backup4: 256 random data in 1st block length4 = 256 offset4 = 0 snap4_data = random_string(length4) verify_data(dev, offset4, snap4_data) snap4 = cmd.snapshot_create(address) backup4 = create_backup(address, snap4, backup_target)["URL"] backup4_name = cmd.backup_inspect(address, backup4)['Name'] # start no-frontend volume # start dr volume (no frontend) dr_address = grpc_dr_controller.address start_no_frontend_volume(grpc_engine_manager, grpc_dr_controller, grpc_dr_replica1, grpc_dr_replica2) cmd.backup_restore(dr_address, backup0) wait_for_restore_completion(dr_address, backup0) verify_no_frontend_data(grpc_engine_manager, 0, snap0_data, grpc_dr_controller) # mock restore crash/error delta_file1 = "volume-delta-" + backup0_name + ".img" if "vfs" in backup_target: command = ["find", VFS_DIR, "-type", "d", "-name", VOLUME_NAME] backup_volume_path = subprocess.check_output(command).strip() command = ["find", backup_volume_path, "-name", "*blk"] blocks = subprocess.check_output(command).split() assert len(blocks) != 0 for blk in blocks: command = ["mv", blk, blk + ".tmp"] subprocess.check_output(command).strip() # should fail is_failed = False cmd.restore_inc(dr_address, backup1, backup0_name) for i in range(RETRY_COUNTS): rs = cmd.restore_status(dr_address) for status in rs.values(): if status['backupURL'] != backup1: break if 'error' in status.keys(): if status['error'] != "": assert 'no such file or directory' in \ status['error'] is_failed = True if is_failed: break time.sleep(RETRY_INTERVAL) assert is_failed assert path.exists(FIXED_REPLICA_PATH1 + delta_file1) assert path.exists(FIXED_REPLICA_PATH2 + delta_file1) for blk in blocks: command = ["mv", blk + ".tmp", blk] subprocess.check_output(command) data1 = \ snap0_data[0:offset1] + snap1_data + \ snap0_data[offset1+length1:] # race condition: last restoration has failed # but `isRestoring` hasn't been cleanup for i in range(RETRY_COUNTS): try: restore_incrementally(dr_address, backup1, backup0_name) break except subprocess.CalledProcessError as e: if "already in progress" not in e.output: time.sleep(RETRY_INTERVAL) else: raise e verify_no_frontend_data(grpc_engine_manager, 0, data1, grpc_dr_controller) assert not path.exists(FIXED_REPLICA_PATH1 + delta_file1) assert not path.exists(FIXED_REPLICA_PATH2 + delta_file1) status = cmd.restore_status(dr_address) compare_last_restored_with_backup(status, backup1_name) data2 = \ data1[0:offset2] + snap2_data + \ zero_string * (BLOCK_SIZE - length2 - offset2) + snap2_data restore_incrementally(dr_address, backup2, backup1_name) verify_no_frontend_data(grpc_engine_manager, 0, data2, grpc_dr_controller) delta_file2 = "volume-delta-" + backup1_name + ".img" assert not path.exists(FIXED_REPLICA_PATH1 + delta_file2) assert not path.exists(FIXED_REPLICA_PATH2 + delta_file2) status = cmd.restore_status(dr_address) compare_last_restored_with_backup(status, backup2_name) # mock race condition with pytest.raises(subprocess.CalledProcessError) as e: restore_incrementally(dr_address, backup1, backup0_name) assert "doesn't match lastRestored" in e data3 = zero_string * length3 + data2[length3:length2] restore_incrementally(dr_address, backup3, backup2_name) verify_no_frontend_data(grpc_engine_manager, 0, data3, grpc_dr_controller) delta_file3 = "volume-delta-" + backup3_name + ".img" assert not path.exists(FIXED_REPLICA_PATH1 + delta_file3) assert not path.exists(FIXED_REPLICA_PATH2 + delta_file3) status = cmd.restore_status(dr_address) compare_last_restored_with_backup(status, backup3_name) # mock corner case: invalid last-restored backup rm_backups(address, ENGINE_NAME, [backup3]) # actually it is full restoration restore_incrementally(dr_address, backup4, backup3_name) verify_no_frontend_data(grpc_engine_manager, 0, snap4_data, grpc_dr_controller) status = cmd.restore_status(dr_address) compare_last_restored_with_backup(status, backup4_name) if "vfs" in backup_target: command = ["find", VFS_DIR, "-type", "d", "-name", VOLUME_NAME] backup_volume_path = subprocess.check_output(command).strip() command = ["find", backup_volume_path, "-name", "*tempoary"] tmp_files = subprocess.check_output(command).split() assert len(tmp_files) == 0 cleanup_no_frontend_volume(grpc_engine_manager, grpc_dr_controller, grpc_dr_replica1, grpc_dr_replica2) rm_backups(address, ENGINE_NAME, [backup0, backup1, backup2, backup4]) cmd.sync_agent_server_reset(address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)
def test_backup_type( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA for backup_target in backup_targets: address = grpc_controller.address block_size = 2 * 1024 * 1024 dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) zero_string = b'\x00'.decode('utf-8') # backup0: 256 random data in 1st block length0 = 256 snap0_data = random_string(length0) verify_data(dev, 0, snap0_data) verify_data(dev, block_size, snap0_data) snap0 = cmd.snapshot_create(address) backup0 = create_backup(address, snap0, backup_target) backup0_url = backup0["URL"] assert backup0['IsIncremental'] is False # backup1: 32 random data + 32 zero data + 192 random data in 1st block length1 = 32 offset1 = 32 snap1_data = zero_string * length1 verify_data(dev, offset1, snap1_data) snap1 = cmd.snapshot_create(address) backup1 = create_backup(address, snap1, backup_target) backup1_url = backup1["URL"] assert backup1['IsIncremental'] is True # backup2: 32 random data + 256 random data in 1st block, # 256 random data in 2nd block length2 = 256 offset2 = 32 snap2_data = random_string(length2) verify_data(dev, offset2, snap2_data) verify_data(dev, block_size, snap2_data) snap2 = cmd.snapshot_create(address) backup2 = create_backup(address, snap2, backup_target) backup2_url = backup2["URL"] assert backup2['IsIncremental'] is True rm_backups(address, ENGINE_NAME, [backup2_url]) # backup3: 64 zero data + 192 random data in 1st block length3 = 64 offset3 = 0 verify_data(dev, offset3, zero_string * length3) verify_data(dev, length2, zero_string * offset2) verify_data(dev, block_size, zero_string * length2) snap3 = cmd.snapshot_create(address) backup3 = create_backup(address, snap3, backup_target) backup3_url = backup3["URL"] assert backup3['IsIncremental'] is False # backup4: 256 random data in 1st block length4 = 256 offset4 = 0 snap4_data = random_string(length4) verify_data(dev, offset4, snap4_data) snap4 = cmd.snapshot_create(address) backup4 = create_backup(address, snap4, backup_target) backup4_url = backup4["URL"] assert backup4['IsIncremental'] is True rm_backups(address, ENGINE_NAME, [backup0_url, backup1_url, backup3_url, backup4_url]) cmd.sync_agent_server_reset(address) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2) cleanup_controller(grpc_controller)
def test_ha_single_replica_rebuild( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA address = grpc_controller.address open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev(VOLUME_NAME) data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(grpc_replica2) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data_offset, data) grpc_controller.replica_delete(replicas[1].address) # Rebuild replica2 open_replica(grpc_replica2) cmd.add_replica(address, r2_url) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "RW") verify_read(dev, data_offset, data) # WORKAROUND for unable to remove the parent of volume head newsnap = cmd.snapshot_create(address) info = cmd.snapshot_info(address) assert len(info) == 3 sysnap = info[newsnap]["parent"] assert info[sysnap]["parent"] == "" assert newsnap in info[sysnap]["children"] assert info[sysnap]["usercreated"] is False assert info[sysnap]["removed"] is False cmd.snapshot_purge(address) wait_for_purge_completion(address) info = cmd.snapshot_info(address) assert len(info) == 2 assert info[newsnap] is not None assert info[VOLUME_HEAD] is not None
def test_upgrade( grpc_engine_manager, # NOQA grpc_controller, # NOQA grpc_fixed_dir_replica1, grpc_fixed_dir_replica2, # NOQA grpc_extra_replica1, grpc_extra_replica2): # NOQA dev = get_dev(grpc_fixed_dir_replica1, grpc_fixed_dir_replica2, grpc_controller) offset = 0 length = 128 data = random_string(length) verify_data(dev, offset, data) # both set pointed to the same volume underlying r1_url = grpc_fixed_dir_replica1.url r2_url = grpc_fixed_dir_replica2.url upgrade_r1_url = grpc_extra_replica1.url upgrade_r2_url = grpc_extra_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.replicaCount == 2 upgrade_e = grpc_engine_manager.engine_upgrade( ENGINE_NAME, LONGHORN_UPGRADE_BINARY, SIZE, [upgrade_r1_url, upgrade_r2_url]) assert upgrade_e.spec.binary == LONGHORN_UPGRADE_BINARY verify_data(dev, offset, data) grpc_controller.client_upgrade(upgrade_e.spec.listen) wait_for_process_running(grpc_engine_manager, ENGINE_NAME, INSTANCE_MANAGER_TYPE_ENGINE) # cannot start with same binary with pytest.raises(grpc.RpcError): grpc_engine_manager.engine_upgrade(ENGINE_NAME, LONGHORN_UPGRADE_BINARY, SIZE, [r1_url, r2_url]) verify_data(dev, offset, data) # cannot start with wrong replica, would trigger rollback with pytest.raises(grpc.RpcError): grpc_engine_manager.engine_upgrade(ENGINE_NAME, LONGHORN_UPGRADE_BINARY, SIZE, ["random"]) verify_data(dev, offset, data) grpc_fixed_dir_replica1 = cleanup_replica(grpc_fixed_dir_replica1) grpc_fixed_dir_replica2 = cleanup_replica(grpc_fixed_dir_replica2) open_replica(grpc_fixed_dir_replica1) open_replica(grpc_fixed_dir_replica2) e = grpc_engine_manager.engine_upgrade(ENGINE_NAME, LONGHORN_BINARY, SIZE, [r1_url, r2_url]) assert e.spec.binary == LONGHORN_BINARY verify_data(dev, offset, data) grpc_controller.client_upgrade(e.spec.listen) wait_for_process_running(grpc_engine_manager, ENGINE_NAME, INSTANCE_MANAGER_TYPE_ENGINE)