def test_backup_volume_deletion( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA offset = 0 length = 128 address = grpc_controller.address for backup_target in backup_targets: dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) snap_data = random_string(length) verify_data(dev, offset, snap_data) snap = cmd.snapshot_create(address) backup_info = create_backup(address, snap, backup_target) assert backup_info["VolumeName"] == VOLUME_NAME assert backup_info["Size"] == BLOCK_SIZE_STR assert snap in backup_info["SnapshotName"] cmd.backup_volume_rm(address, VOLUME_NAME, backup_target) url = get_backup_volume_url(backup_target, VOLUME_NAME) with pytest.raises(subprocess.CalledProcessError): cmd.backup_inspect_volume(address, url) cmd.sync_agent_server_reset(address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)
def test_backup_corrupt_deletion( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA address = grpc_controller.address length = 128 for backup_target in backup_targets: dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) # write two backup blocks verify_data(dev, 0, random_string(length)) verify_data(dev, BLOCK_SIZE, random_string(length)) snap = cmd.snapshot_create(address) backup1 = create_backup(address, snap, backup_target) # overwrite second backup block verify_data(dev, BLOCK_SIZE, random_string(length)) snap = cmd.snapshot_create(address) backup2 = create_backup(address, snap, backup_target) # check that the volume now has 3 blocks # backup1 and backup2 share the first block # and have different second blocks check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 3) # corrupt backup1 config cfg = findfile(BACKUP_DIR, "backup_" + backup1["Name"] + ".cfg") corrupt_backup = open(cfg, "w") assert corrupt_backup assert corrupt_backup.write("{corrupt: definitely") > 0 corrupt_backup.close() cmd.backup_rm(address, backup1["URL"]) # check that the volume now has 2 blocks # backup2 still relies on the backup1 first block check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 2) # remove backup 2 and check that all blocks are deleted cmd.backup_rm(address, backup2["URL"]) check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 0) # remove volume.cfg then delete the backup volume cfg = findfile(finddir(BACKUP_DIR, VOLUME_NAME), "volume.cfg") os.remove(cfg) cmd.backup_volume_rm(address, VOLUME_NAME, backup_target) url = get_backup_volume_url(backup_target, VOLUME_NAME) with pytest.raises(subprocess.CalledProcessError): cmd.backup_inspect_volume(address, url) cmd.sync_agent_server_reset(address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)
def check_backup_volume_block_count(address, volume, backup_target, expected): # check the volume block & size url = get_backup_volume_url(backup_target, volume) info = cmd.backup_inspect_volume(address, url) assert info["DataStored"] == str(BLOCK_SIZE * expected) # check the blocks on disk volume_dir = finddir(BACKUP_DIR, volume) assert os.path.exists(volume_dir) block_count = 0 block_dir = os.path.join(volume_dir, "blocks") if os.path.exists(block_dir): for _ in Path(block_dir).rglob("*.blk"): block_count += 1 assert block_count == expected
def test_inc_restore_with_rebuild_and_expansion(grpc_controller, grpc_replica1, grpc_replica2, grpc_controller_no_frontend, grpc_fixed_dir_replica1, grpc_fixed_dir_replica2, backup_targets): # NOQA # Pick up a random backup target. backup_target = backup_targets[random.randint(0, 1)] address = grpc_controller.address dr_address = grpc_controller_no_frontend.address try: cmd.backup_volume_rm(address, VOLUME_NAME, backup_target) except Exception: pass dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) start_no_frontend_volume(grpc_controller_no_frontend, grpc_fixed_dir_replica1) data0_len = random_length(PAGE_SIZE) data0 = Data(random.randrange(0, SIZE - 2 * PAGE_SIZE, PAGE_SIZE), data0_len, random_string(data0_len)) snap0 = Snapshot(dev, data0, address) backup0_info = create_backup(address, snap0.name, backup_target) assert backup0_info["VolumeName"] == VOLUME_NAME assert backup0_info["Size"] == str(BLOCK_SIZE) cmd.backup_restore(dr_address, backup0_info["URL"]) wait_for_restore_completion(dr_address, backup0_info["URL"]) verify_no_frontend_data(data0.offset, data0.content, grpc_controller_no_frontend) expand_volume_with_frontend(grpc_controller, EXPANDED_SIZE) wait_and_check_volume_expansion(grpc_controller, EXPANDED_SIZE) data1_len = random_length(PAGE_SIZE) data1 = Data(random.randrange(SIZE, EXPANDED_SIZE - PAGE_SIZE, PAGE_SIZE), data1_len, random_string(data1_len)) snap1 = Snapshot(dev, data1, address) backup1_info = create_backup(address, snap1.name, backup_target, EXPANDED_SIZE_STR) assert backup1_info["VolumeName"] == VOLUME_NAME assert backup1_info["Size"] == str(2 * BLOCK_SIZE) backup_volumes = cmd.backup_volume_list(address, VOLUME_NAME, backup_target) assert VOLUME_NAME in backup_volumes url = get_backup_volume_url(backup_target, VOLUME_NAME) backup_info = cmd.backup_inspect_volume(address, url) assert backup_info["Size"] == EXPANDED_SIZE_STR # restore command invocation should error out with pytest.raises(subprocess.CalledProcessError) as e: cmd.backup_restore(dr_address, backup1_info["URL"]) assert "need to expand the DR volume" in e.value.stdout # The above restore error is triggered before calling the replicas. # Hence the error won't be recorded in the restore status # and we can continue restoring backups for the DR volume. rs = cmd.restore_status(dr_address) for status in rs.values(): assert status['backupURL'] == backup0_info["URL"] assert status['lastRestored'] == backup0_info["Name"] assert 'error' not in status.keys() assert not status["isRestoring"] grpc_controller_no_frontend.volume_expand(EXPANDED_SIZE) wait_for_volume_expansion(grpc_controller_no_frontend, EXPANDED_SIZE) # This restore command will trigger snapshot purge. # And the error is triggered before calling the replicas. with pytest.raises(subprocess.CalledProcessError) as e: cmd.backup_restore(dr_address, backup1_info["URL"]) assert "found more than 1 snapshot in the replicas, " \ "hence started to purge snapshots before the restore" \ in e.value.stdout wait_for_purge_completion(dr_address) snaps_info = cmd.snapshot_info(dr_address) assert len(snaps_info) == 2 volume_head_name = "volume-head" snap_name = "expand-" + EXPANDED_SIZE_STR head_info = snaps_info[volume_head_name] assert head_info["name"] == volume_head_name assert head_info["parent"] == snap_name assert not head_info["children"] assert head_info["usercreated"] is False snap_info = snaps_info[snap_name] assert snap_info["name"] == snap_name assert not snap_info["parent"] assert volume_head_name in snap_info["children"] assert snap_info["usercreated"] is False cmd.backup_restore(dr_address, backup1_info["URL"]) wait_for_restore_completion(dr_address, backup1_info["URL"]) verify_no_frontend_data(data1.offset, data1.content, grpc_controller_no_frontend) # For DR volume, the rebuilding replica won't be expanded automatically. open_replica(grpc_fixed_dir_replica2) with pytest.raises(subprocess.CalledProcessError): cmd.add_replica(dr_address, grpc_fixed_dir_replica2.url, True) # Manually expand the rebuilding replica then retry `add-replica`. grpc_fixed_dir_replica2.replica_open() grpc_fixed_dir_replica2.replica_expand(EXPANDED_SIZE) grpc_fixed_dir_replica2.replica_close() cmd.add_replica(dr_address, grpc_fixed_dir_replica2.url, True) replicas = grpc_controller_no_frontend.replica_list() assert len(replicas) == 2 rw_replica, wo_replica = 0, 0 for r in replicas: if r.mode == 'RW': rw_replica += 1 else: assert r.mode == "WO" wo_replica += 1 assert rw_replica == 1 and wo_replica == 1 # The old replica will fail the restore but the error won't be recorded. # Then rebuilding replica will start full restore. with pytest.raises(subprocess.CalledProcessError) as e: cmd.backup_restore(dr_address, backup1_info["URL"]) assert "already restored backup" in e.value.stdout wait_for_restore_completion(dr_address, backup1_info["URL"]) cmd.verify_rebuild_replica(dr_address, grpc_fixed_dir_replica2.url) replicas = grpc_controller_no_frontend.replica_list() assert len(replicas) == 2 for r in replicas: assert r.mode == 'RW' verify_no_frontend_data(data1.offset, data1.content, grpc_controller_no_frontend) cmd.backup_volume_rm(grpc_controller.address, VOLUME_NAME, backup_target)
def test_backup_volume_list( grpc_replica_client, grpc_controller_client, # NOQA grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA """ Test backup volume list Context: We want to make sure that an error when listing a single backup volume does not stop us from listing all the other backup volumes. Otherwise a single faulty backup can block the retrieval of all known backup volumes. Steps: 1. Create a volume(1,2) and attach to the current node 2. write some data to volume(1,2) 3. Create a backup(1) of volume(1,2) 4. request a backup list 5. verify backup list contains no error messages for volume(1,2) 6. verify backup list contains backup(1) for volume(1,2) 7. place a file named "*****@*****.**" into the backups folder of volume(1) 8. request a backup list 9. verify backup list contains no error messages for volume(1,2) 10. verify backup list contains backup(1) for volume(1,2) 11. delete backup volumes(1 & 2) 12. cleanup """ # create a second volume grpc2_replica1 = grpc_replica_client(REPLICA_2_NAME + "-1") grpc2_replica2 = grpc_replica_client(REPLICA_2_NAME + "-2") grpc2_controller = grpc_controller_client(ENGINE2_NAME, VOLUME2_NAME) offset = 0 length = 128 address = grpc_controller.address address2 = grpc2_controller.address for backup_target in backup_targets: dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) dev2 = get_dev(grpc2_replica1, grpc2_replica2, grpc2_controller) # create a regular backup snap_data = random_string(length) verify_data(dev, offset, snap_data) snap = cmd.snapshot_create(address) backup_info = create_backup(address, snap, backup_target, backup_name="backup-z") assert backup_info["VolumeName"] == VOLUME_NAME assert backup_info["Size"] == BLOCK_SIZE_STR assert snap in backup_info["SnapshotName"] # create a regular backup on volume 2 verify_data(dev2, offset, random_string(length)) snap = cmd.snapshot_create(address2) backup_info = create_backup(address2, snap, backup_target, backup_name="backup-y") assert backup_info["VolumeName"] == VOLUME2_NAME assert backup_info["Size"] == BLOCK_SIZE_STR assert snap in backup_info["SnapshotName"] # request a volume list info = cmd.backup_volume_list(address, "", backup_target, include_backup_details=True) assert info[VOLUME_NAME] is not None assert len(info[VOLUME_NAME]["Backups"]) == 1 assert MESSAGE_TYPE_ERROR not in info[VOLUME_NAME]["Messages"] assert info[VOLUME2_NAME] is not None assert len(info[VOLUME2_NAME]["Backups"]) == 1 assert MESSAGE_TYPE_ERROR not in info[VOLUME2_NAME]["Messages"] # place badly named backup.cfg file # we want the list call to return all valid files correctly backup_dir = os.path.join(finddir(BACKUP_DIR, VOLUME_NAME), "backups") cfg = open(os.path.join(backup_dir, "*****@*****.**"), "w") cfg.close() info = cmd.backup_volume_list(address, "", backup_target, include_backup_details=True) assert info[VOLUME_NAME] is not None assert len(info[VOLUME_NAME]["Backups"]) == 1 assert MESSAGE_TYPE_ERROR not in info[VOLUME_NAME]["Messages"] assert info[VOLUME2_NAME] is not None assert len(info[VOLUME2_NAME]["Backups"]) == 1 assert MESSAGE_TYPE_ERROR not in info[VOLUME2_NAME]["Messages"] # remove the volume with the badly named backup.cfg cmd.backup_volume_rm(address, VOLUME_NAME, backup_target) info = cmd.backup_volume_list(address, VOLUME_NAME, backup_target, include_backup_details=True) url = get_backup_volume_url(backup_target, VOLUME_NAME) with pytest.raises(subprocess.CalledProcessError): cmd.backup_inspect_volume(address, url) # remove volume 2 backups cmd.backup_volume_rm(address, VOLUME2_NAME, backup_target) info = cmd.backup_volume_list(address, VOLUME2_NAME, backup_target, include_backup_details=True) url = get_backup_volume_url(backup_target, VOLUME2_NAME) with pytest.raises(subprocess.CalledProcessError): cmd.backup_inspect_volume(address, url) # cleanup volume 1 cmd.sync_agent_server_reset(address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2) # cleanup volume 2 cmd.sync_agent_server_reset(address2) cleanup_controller(grpc2_controller) cleanup_replica(grpc2_replica1) cleanup_replica(grpc2_replica2)
def test_backup_block_no_cleanup( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA address = grpc_controller.address length = 128 for backup_target in backup_targets: dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) # write two backup blocks verify_data(dev, 0, random_string(length)) verify_data(dev, BLOCK_SIZE, random_string(length)) snap = cmd.snapshot_create(address) backup1 = create_backup(address, snap, backup_target) assert backup1["VolumeName"] == VOLUME_NAME assert backup1["Size"] == str(BLOCK_SIZE * 2) assert snap in backup1["SnapshotName"] check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 2) # overwrite second backup block verify_data(dev, BLOCK_SIZE, random_string(length)) snap = cmd.snapshot_create(address) backup2 = create_backup(address, snap, backup_target) assert backup2["VolumeName"] == VOLUME_NAME assert backup2["Size"] == str(BLOCK_SIZE * 2) assert snap in backup2["SnapshotName"] # check that the volume now has 3 blocks # backup1 and backup2 share the first block # and have different second blocks check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 3) # create an artificial in progress backup # that will stop the gc from removing blocks in_progress_backup_file = create_in_progress_backup_file(VOLUME_NAME) # remove backup 1 the volume should still have 3 blocks cmd.backup_rm(address, backup1["URL"]) check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 3) # remove the in progress backup os.remove(in_progress_backup_file) # remove the last remaining backup 2 # this should remove all blocks # including the orphaned block from backup 1 cmd.backup_rm(address, backup2["URL"]) check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 0) # cleanup the backup volume cmd.backup_volume_rm(address, VOLUME_NAME, backup_target) url = get_backup_volume_url(backup_target, VOLUME_NAME) with pytest.raises(subprocess.CalledProcessError): cmd.backup_inspect_volume(address, url) cmd.sync_agent_server_reset(address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)
def test_backup_block_deletion( grpc_replica1, grpc_replica2, # NOQA grpc_controller, backup_targets): # NOQA address = grpc_controller.address length = 128 for backup_target in backup_targets: dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller) # write two backup block verify_data(dev, 0, random_string(length)) verify_data(dev, BLOCK_SIZE, random_string(length)) snap = cmd.snapshot_create(address) backup1 = create_backup(address, snap, backup_target) assert backup1["VolumeName"] == VOLUME_NAME assert backup1["Size"] == str(BLOCK_SIZE * 2) assert snap in backup1["SnapshotName"] # test block deduplication backup1_duplicate = create_backup(address, snap, backup_target) assert backup1_duplicate["VolumeName"] == VOLUME_NAME assert backup1_duplicate["Size"] == str(BLOCK_SIZE * 2) assert snap in backup1_duplicate["SnapshotName"] check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 2) # overwrite second backup block verify_data(dev, BLOCK_SIZE, random_string(length)) snap = cmd.snapshot_create(address) backup2 = create_backup(address, snap, backup_target) assert backup2["VolumeName"] == VOLUME_NAME assert backup2["Size"] == str(BLOCK_SIZE * 2) assert snap in backup2["SnapshotName"] # check that the volume now has 3 blocks # backup1 and backup2 share the first block # and have different second blocks check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 3) # remove backup 1 duplicate # this should not change the blocks on disk # since all blocks are still required cmd.backup_rm(address, backup1_duplicate["URL"]) check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 3) # remove backup 1 # the volume should now have 2 blocks # blk1 from backup1 should still be present # since it's required by backup 2 cmd.backup_rm(address, backup1["URL"]) check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 2) # remove the last remaining backup 2 # this should remove all blocks cmd.backup_rm(address, backup2["URL"]) check_backup_volume_block_count(address, VOLUME_NAME, backup_target, 0) # cleanup the backup volume cmd.backup_volume_rm(address, VOLUME_NAME, backup_target) url = get_backup_volume_url(backup_target, VOLUME_NAME) with pytest.raises(subprocess.CalledProcessError): cmd.backup_inspect_volume(address, url) cmd.sync_agent_server_reset(address) cleanup_controller(grpc_controller) cleanup_replica(grpc_replica1) cleanup_replica(grpc_replica2)