Esempio n. 1
0
def test_replica_failure_detection(
        grpc_controller_client,  # NOQA
        grpc_replica_client,  # NOQA
        grpc_replica_client2):  # NOQA
    open_replica(grpc_replica_client)
    open_replica(grpc_replica_client2)

    r1_url = grpc_replica_client.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url,
        r2_url,
    ])
    assert v.replicaCount == 2

    # wait for initial read/write period to pass
    time.sleep(2)

    cleanup_replica(grpc_replica_client)

    detected = False
    for i in range(10):
        replicas = grpc_controller_client.replica_list()
        assert len(replicas) == 2
        for r in replicas:
            if r.address == r1_url and r.mode == 'ERR':
                detected = True
                break
        if detected:
            break
        time.sleep(1)
    assert detected
Esempio n. 2
0
def test_backup(replica1, replica2, controller, backup_targets):  # NOQA
    for backup_target in backup_targets:
        dev = common.get_dev(replica1, replica2, controller)
        backup_test(dev, backup_target)
        common.cleanup_replica(replica1)
        common.cleanup_replica(replica2)
        common.cleanup_controller(controller)
Esempio n. 3
0
def test_backup_volume_deletion(
        grpc_replica1,
        grpc_replica2,  # NOQA
        grpc_controller,
        backup_targets):  # NOQA
    offset = 0
    length = 128
    address = grpc_controller.address

    for backup_target in backup_targets:
        dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)
        snap_data = random_string(length)
        verify_data(dev, offset, snap_data)
        snap = cmd.snapshot_create(address)

        backup_info = create_backup(address, snap, backup_target)
        assert backup_info["VolumeName"] == VOLUME_NAME
        assert backup_info["Size"] == BLOCK_SIZE_STR
        assert snap in backup_info["SnapshotName"]

        cmd.backup_volume_rm(address, VOLUME_NAME, backup_target)
        info = cmd.backup_volume_list(address, VOLUME_NAME, backup_target)
        assert "cannot find" in info[VOLUME_NAME]["Messages"]["error"]

        cmd.sync_agent_server_reset(address)
        cleanup_controller(grpc_controller)
        cleanup_replica(grpc_replica1)
        cleanup_replica(grpc_replica2)
Esempio n. 4
0
def test_ha_single_replica_failure(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)
Esempio n. 5
0
def test_ha_single_replica_failure(controller, replica1, replica2):  # NOQA
    open_replica(replica1)
    open_replica(replica2)

    replicas = controller.list_replica()
    assert len(replicas) == 0

    v = controller.list_volume()[0]
    v = v.start(replicas=[
        common.REPLICA1,
        common.REPLICA2
    ])
    assert v.replicaCount == 2

    replicas = controller.list_replica()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    cleanup_replica(replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(controller, 1, "ERR")

    verify_read(dev, data_offset, data)
Esempio n. 6
0
def test_snapshot_tree_backup(replica1, replica2, controller, backup_targets):  # NOQA
    for backup_target in backup_targets:
        dev = common.get_dev(replica1, replica2, controller)
        snapshot_tree_backup_test(dev, backup_target)
        common.cleanup_replica(replica1)
        common.cleanup_replica(replica2)
        common.cleanup_controller(controller)
Esempio n. 7
0
def test_ha_single_replica_failure(controller, replica1, replica2):  # NOQA
    open_replica(replica1)
    open_replica(replica2)

    replicas = controller.list_replica()
    assert len(replicas) == 0

    v = controller.list_volume()[0]
    v = v.start(replicas=[common.REPLICA1, common.REPLICA2])
    assert v.replicaCount == 2

    replicas = controller.list_replica()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    cleanup_replica(replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(controller, 1, "ERR")

    verify_read(dev, data_offset, data)
Esempio n. 8
0
def test_ha_single_replica_rebuild(controller, replica1, replica2):  # NOQA
    open_replica(replica1)
    open_replica(replica2)

    replicas = controller.list_replica()
    assert len(replicas) == 0

    v = controller.list_volume()[0]
    v = v.start(replicas=[common.REPLICA1, common.REPLICA2])
    assert v.replicaCount == 2

    replicas = controller.list_replica()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    controller.delete(replicas[1])

    # Rebuild replica2
    common.open_replica(replica2)
    cmd.add_replica(common.REPLICA2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(controller, 1, "RW")

    verify_read(dev, data_offset, data)

    # WORKAROUND for unable to remove the parent of volume head
    newsnap = cmd.snapshot_create()

    info = cmd.snapshot_info()
    assert len(info) == 3
    sysnap = info[newsnap]["parent"]
    assert info[sysnap]["parent"] == ""
    assert newsnap in info[sysnap]["children"]
    assert info[sysnap]["usercreated"] is False
    assert info[sysnap]["removed"] is False

    cmd.snapshot_purge()
    info = cmd.snapshot_info()
    assert len(info) == 2
    assert info[newsnap] is not None
    assert info[VOLUME_HEAD] is not None
Esempio n. 9
0
def test_restore_to_file_without_backing_file(replica1, replica2,  # NOQA
                                              controller, backup_targets):  # NOQA
    for backup_target in backup_targets:
        dev = common.get_dev(replica1, replica2, controller)
        restore_to_file_without_backing_file_test(dev, backup_target)
        common.cleanup_replica(replica1)
        common.cleanup_replica(replica2)
        common.cleanup_controller(controller)
Esempio n. 10
0
def test_backup_hole_with_backing_file(backing_replica1, backing_replica2,
                                       controller, backup_targets):  # NOQA
    for backup_target in backup_targets:
        backing_dev = common.get_backing_dev(backing_replica1,
                                             backing_replica2, controller)
        backup_hole_with_backing_file_test(backing_dev, backup_target)
        common.cleanup_replica(backing_replica1)
        common.cleanup_replica(backing_replica2)
        common.cleanup_controller(controller)
Esempio n. 11
0
def test_backup_hole_with_backing_file(backing_replica1, backing_replica2, controller, backup_targets):  # NOQA
    for backup_target in backup_targets:
        backing_dev = common.get_backing_dev(backing_replica1,
                                             backing_replica2,
                                             controller)
        backup_hole_with_backing_file_test(backing_dev, backup_target)
        common.cleanup_replica(backing_replica1)
        common.cleanup_replica(backing_replica2)
        common.cleanup_controller(controller)
Esempio n. 12
0
def test_snapshot_tree_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address

    offset = 0
    length = 128

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    snap, snap_data = snapshot_tree_build(dev, address, ENGINE_NAME, offset,
                                          length)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(address, r2_url)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, 1, "RW")

    snapshot_tree_verify(dev, address, ENGINE_NAME, offset, length, snap,
                         snap_data)
Esempio n. 13
0
def test_snapshot_tree_backup(
        grpc_replica1,
        grpc_replica2,  # NOQA
        grpc_controller,
        backup_targets):  # NOQA
    for backup_target in backup_targets:
        dev = common.get_dev(grpc_replica1, grpc_replica2, grpc_controller)
        snapshot_tree_backup_test(dev, backup_target)
        common.cleanup_replica(grpc_replica1)
        common.cleanup_replica(grpc_replica2)
        common.cleanup_controller(grpc_controller)
Esempio n. 14
0
def test_snapshot_tree_backup(
        grpc_replica1,
        grpc_replica2,  # NOQA
        grpc_controller,
        backup_targets):  # NOQA
    for backup_target in backup_targets:
        snapshot_tree_backup_test(backup_target, ENGINE_NAME, grpc_controller,
                                  grpc_replica1, grpc_replica2)
        cmd.sync_agent_server_reset(grpc_controller.address)
        cleanup_controller(grpc_controller)
        cleanup_replica(grpc_replica1)
        cleanup_replica(grpc_replica2)
Esempio n. 15
0
def test_backup_cli(
        bin,
        engine_manager_client,  # NOQA
        grpc_controller_client,  # NOQA
        grpc_replica_client,
        grpc_replica_client2,  # NOQA
        backup_targets):
    for backup_target in backup_targets:
        backup_core(bin, engine_manager_client, grpc_controller_client,
                    grpc_replica_client, grpc_replica_client2, backup_target)
        cleanup_replica(grpc_replica_client)
        cleanup_replica(grpc_replica_client2)
        cleanup_controller(grpc_controller_client)
Esempio n. 16
0
def test_backup(
        grpc_replica1,
        grpc_replica2,  # NOQA
        grpc_controller,
        backup_targets):  # NOQA
    for backup_target in backup_targets:
        dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)
        backup_test(dev, grpc_controller.address, VOLUME_NAME, ENGINE_NAME,
                    backup_target)
        cmd.sync_agent_server_reset(grpc_controller.address)
        cleanup_controller(grpc_controller)
        cleanup_replica(grpc_replica1)
        cleanup_replica(grpc_replica2)
Esempio n. 17
0
def test_backup_with_backing_file(
        grpc_backing_replica1,
        grpc_backing_replica2,  # NOQA
        grpc_controller,
        backup_targets):  # NOQA
    for backup_target in backup_targets:
        backing_dev = common.get_backing_dev(grpc_backing_replica1,
                                             grpc_backing_replica2,
                                             grpc_controller)
        backup_with_backing_file_test(backing_dev, backup_target)
        common.cleanup_replica(grpc_backing_replica1)
        common.cleanup_replica(grpc_backing_replica2)
        common.cleanup_controller(grpc_controller)
Esempio n. 18
0
def test_backup_with_backing_file(
        grpc_backing_replica1,
        grpc_backing_replica2,  # NOQA
        grpc_backing_controller,
        backup_targets):  # NOQA
    for backup_target in backup_targets:
        backup_with_backing_file_test(backup_target, grpc_backing_controller,
                                      grpc_backing_replica1,
                                      grpc_backing_replica2)
        cmd.sync_agent_server_reset(grpc_backing_controller.address)
        cleanup_controller(grpc_backing_controller)
        cleanup_replica(grpc_backing_replica1)
        cleanup_replica(grpc_backing_replica2)
Esempio n. 19
0
def test_restore_to_file_without_backing_file(
        backup_targets,  # NOQA
        grpc_controller,  # NOQA
        grpc_replica1,  # NOQA
        grpc_replica2):  # NOQA
    for backup_target in backup_targets:
        restore_to_file_without_backing_file_test(backup_target,
                                                  grpc_controller,
                                                  grpc_replica1, grpc_replica2)
        cmd.sync_agent_server_reset(grpc_controller.address)
        cleanup_controller(grpc_controller)
        cleanup_replica(grpc_replica1)
        cleanup_replica(grpc_replica2)
Esempio n. 20
0
def test_snapshot_tree_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    offset = 0
    length = 128

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(
        replicas=[common.REPLICA1, common.REPLICA2])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    snap, snap_data = snapshot_tree_build(dev, offset, length)

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(common.REPLICA2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "RW")

    snapshot_tree_verify(dev, offset, length, snap, snap_data)
Esempio n. 21
0
def test_snapshot_tree_rebuild(controller, replica1, replica2):  # NOQA
    offset = 0
    length = 128

    open_replica(replica1)
    open_replica(replica2)

    replicas = controller.list_replica()
    assert len(replicas) == 0

    v = controller.list_volume()[0]
    v = v.start(replicas=[
        common.REPLICA1,
        common.REPLICA2
    ])
    assert v.replicaCount == 2

    replicas = controller.list_replica()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    snap, snap_data = snapshot_tree_build(dev, offset, length)

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    controller.delete(replicas[1])

    # Rebuild replica2
    common.open_replica(replica2)
    cmd.add_replica(common.REPLICA2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(controller, 1, "RW")

    snapshot_tree_verify(dev, offset, length, snap, snap_data)
Esempio n. 22
0
def cleanup_no_frontend_volume(c, r1, r2):
    launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV,
                                   url=LAUNCHER_NO_FRONTEND)
    v = c.list_volume()[0]
    assert v.frontendState == "up"

    common.cleanup_replica(r1)
    common.cleanup_replica(r2)
    common.cleanup_controller(c)

    launcher.shutdown_engine_frontend(url=LAUNCHER_NO_FRONTEND)
    v = c.list_volume()[0]
    assert v.frontendState == "down"

    cleanup_replica_path(STANDBY_REPLICA1_PATH)
    cleanup_replica_path(STANDBY_REPLICA2_PATH)
Esempio n. 23
0
def cleanup_no_frontend_volume(grpc_c, grpc_r1, grpc_r2):
    launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV,
                                   url=LAUNCHER_NO_FRONTEND)
    v = grpc_c.volume_get()
    assert v.frontendState == "up"

    cmd.sync_agent_server_reset(CONTROLLER_NO_FRONTEND)
    common.cleanup_replica(grpc_r1)
    common.cleanup_replica(grpc_r2)
    common.cleanup_controller(grpc_c)

    launcher.shutdown_engine_frontend(url=LAUNCHER_NO_FRONTEND)
    v = grpc_c.volume_get()
    assert v.frontendState == "down"

    cleanup_replica_path(STANDBY_REPLICA1_PATH)
    cleanup_replica_path(STANDBY_REPLICA2_PATH)
def cleanup_no_frontend_volume(grpc_em, grpc_c, grpc_r1, grpc_r2):
    grpc_em.frontend_start(ENGINE_NO_FRONTEND_NAME, FRONTEND_TGT_BLOCKDEV)
    v = grpc_c.volume_get()
    assert v.frontendState == "up"

    cmd.sync_agent_server_reset(grpc_c.address)

    grpc_em.frontend_shutdown(ENGINE_NO_FRONTEND_NAME)
    v = grpc_c.volume_get()
    assert v.frontendState == "down"
    ep = grpc_em.engine_get(ENGINE_NO_FRONTEND_NAME)
    assert ep.spec.frontend == ""

    cleanup_controller(grpc_c)
    cleanup_replica(grpc_r1)
    cleanup_replica(grpc_r2)

    cleanup_replica_dir(FIXED_REPLICA_PATH1)
    cleanup_replica_dir(FIXED_REPLICA_PATH2)
Esempio n. 25
0
def restore_inc_test(
        controller,
        replica1,
        replica2,  # NOQA
        sb_controller,
        sb_replica1,
        sb_replica2,
        backup_target):  # NOQA
    launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV, url=LAUNCHER)
    dev = common.get_dev(replica1, replica2, controller)

    zero_string = b'\x00'.decode('utf-8')

    # backup0: 256 random data in 1st block
    length0 = 256
    snap0_data = common.random_string(length0)
    verify_data(dev, 0, snap0_data)
    verify_data(dev, BLOCK_SIZE, snap0_data)
    snap0 = cmd.snapshot_create()
    backup0 = create_backup(backup_target, snap0)
    backup0_name = cmd.backup_inspect(backup0)['Name']

    # backup1: 32 random data + 32 zero data + 192 random data in 1st block
    length1 = 32
    offset1 = 32
    snap1_data = zero_string * length1
    verify_data(dev, offset1, snap1_data)
    snap1 = cmd.snapshot_create()
    backup1 = create_backup(backup_target, snap1)
    backup1_name = cmd.backup_inspect(backup1)['Name']

    # backup2: 32 random data + 256 random data in 1st block,
    #          256 random data in 2nd block
    length2 = 256
    offset2 = 32
    snap2_data = common.random_string(length2)
    verify_data(dev, offset2, snap2_data)
    verify_data(dev, BLOCK_SIZE, snap2_data)
    snap2 = cmd.snapshot_create()
    backup2 = create_backup(backup_target, snap2)
    backup2_name = cmd.backup_inspect(backup2)['Name']

    # backup3: 64 zero data + 192 random data in 1st block
    length3 = 64
    offset3 = 0
    verify_data(dev, offset3, zero_string * length3)
    verify_data(dev, length2, zero_string * offset2)
    verify_data(dev, BLOCK_SIZE, zero_string * length2)
    snap3 = cmd.snapshot_create()
    backup3 = create_backup(backup_target, snap3)
    backup3_name = cmd.backup_inspect(backup3)['Name']

    # backup4: 256 random data in 1st block
    length4 = 256
    offset4 = 0
    snap4_data = common.random_string(length4)
    verify_data(dev, offset4, snap4_data)
    snap4 = cmd.snapshot_create()
    backup4 = create_backup(backup_target, snap4)
    backup4_name = cmd.backup_inspect(backup4)['Name']

    common.cleanup_replica(replica1)
    common.cleanup_replica(replica2)
    common.cleanup_controller(controller)
    launcher.shutdown_engine_frontend(url=LAUNCHER)

    # start no-frontend volume
    # start standby volume (no frontend)
    start_no_frontend_volume(sb_controller, sb_replica1, sb_replica2)

    restore_for_no_frontend_volume(backup0, sb_controller)
    verify_no_frontend_data(0, snap0_data, sb_controller)

    # mock restore crash/error
    delta_file1 = "volume-delta-" + backup0_name + ".img"
    if "vfs" in backup_target:
        command = ["find", VFS_DIR, "-type", "d", "-name", VOLUME_NAME]
        backup_volume_path = subprocess.check_output(command).strip()
        command = ["find", backup_volume_path, "-name", "*blk"]
        blocks = subprocess.check_output(command).split()
        assert len(blocks) != 0
        for blk in blocks:
            command = ["mv", blk, blk + ".tmp"]
            subprocess.check_output(command).strip()
        with pytest.raises(subprocess.CalledProcessError):
            cmd.restore_inc(backup1, backup0_name, CONTROLLER_NO_FRONTEND)
        assert path.exists(STANDBY_REPLICA1_PATH + delta_file1)
        assert path.exists(STANDBY_REPLICA2_PATH + delta_file1)
        for blk in blocks:
            command = ["mv", blk + ".tmp", blk]
            subprocess.check_output(command)

    data1 = \
        snap0_data[0:offset1] + snap1_data + \
        snap0_data[offset1+length1:]
    cmd.restore_inc(backup1, backup0_name, CONTROLLER_NO_FRONTEND)
    verify_no_frontend_data(0, data1, sb_controller)

    assert not path.exists(STANDBY_REPLICA1_PATH + delta_file1)
    assert not path.exists(STANDBY_REPLICA2_PATH + delta_file1)
    volume_info = cmd.info(CONTROLLER_NO_FRONTEND)
    assert volume_info['lastRestored'] == backup1_name

    data2 = \
        data1[0:offset2] + snap2_data + \
        zero_string * (BLOCK_SIZE - length2 - offset2) + snap2_data
    cmd.restore_inc(backup2, backup1_name, CONTROLLER_NO_FRONTEND)
    verify_no_frontend_data(0, data2, sb_controller)

    delta_file2 = "volume-delta-" + backup1_name + ".img"
    assert not path.exists(STANDBY_REPLICA1_PATH + delta_file2)
    assert not path.exists(STANDBY_REPLICA2_PATH + delta_file2)
    volume_info = cmd.info(CONTROLLER_NO_FRONTEND)
    assert volume_info['lastRestored'] == backup2_name

    # mock race condition
    with pytest.raises(subprocess.CalledProcessError) as e:
        cmd.restore_inc(backup1, backup0_name, CONTROLLER_NO_FRONTEND)
        assert "doesn't match lastRestored" in e

    data3 = zero_string * length3 + data2[length3:length2]
    cmd.restore_inc(backup3, backup2_name, CONTROLLER_NO_FRONTEND)
    verify_no_frontend_data(0, data3, sb_controller)

    delta_file3 = "volume-delta-" + backup3_name + ".img"
    assert not path.exists(STANDBY_REPLICA1_PATH + delta_file3)
    assert not path.exists(STANDBY_REPLICA2_PATH + delta_file3)
    volume_info = cmd.info(CONTROLLER_NO_FRONTEND)
    assert volume_info['lastRestored'] == backup3_name

    # mock corner case: invalid last-restored backup
    rm_backups([backup3])
    # actually it is full restoration
    cmd.restore_inc(backup4, backup3_name, CONTROLLER_NO_FRONTEND)
    verify_no_frontend_data(0, snap4_data, sb_controller)
    volume_info = cmd.info(CONTROLLER_NO_FRONTEND)
    assert volume_info['lastRestored'] == backup4_name
    if "vfs" in backup_target:
        command = ["find", VFS_DIR, "-type", "d", "-name", VOLUME_NAME]
        backup_volume_path = subprocess.check_output(command).strip()
        command = ["find", backup_volume_path, "-name", "*tempoary"]
        tmp_files = subprocess.check_output(command).split()
        assert len(tmp_files) == 0

    cleanup_no_frontend_volume(sb_controller, sb_replica1, sb_replica2)

    rm_backups([backup0, backup1, backup2, backup4])
Esempio n. 26
0
def test_ha_single_replica_rebuild(controller, replica1, replica2):  # NOQA
    open_replica(replica1)
    open_replica(replica2)

    replicas = controller.list_replica()
    assert len(replicas) == 0

    v = controller.list_volume()[0]
    v = v.start(replicas=[
        common.REPLICA1,
        common.REPLICA2
    ])
    assert v.replicaCount == 2

    replicas = controller.list_replica()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    controller.delete(replicas[1])

    # Rebuild replica2
    common.open_replica(replica2)
    cmd.add_replica(common.REPLICA2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(controller, 1, "RW")

    verify_read(dev, data_offset, data)

    # WORKAROUND for unable to remove the parent of volume head
    newsnap = cmd.snapshot_create()

    info = cmd.snapshot_info()
    assert len(info) == 3
    sysnap = info[newsnap]["parent"]
    assert info[sysnap]["parent"] == ""
    assert newsnap in info[sysnap]["children"]
    assert info[sysnap]["usercreated"] is False
    assert info[sysnap]["removed"] is False

    cmd.snapshot_purge()
    info = cmd.snapshot_info()
    assert len(info) == 2
    assert info[newsnap] is not None
    assert info[VOLUME_HEAD] is not None
def restore_inc_test(
        grpc_engine_manager,  # NOQA
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2,  # NOQA
        grpc_dr_controller,  # NOQA
        grpc_dr_replica1,
        grpc_dr_replica2,  # NOQA
        backup_target):  # NOQA
    address = grpc_controller.address

    dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

    zero_string = b'\x00'.decode('utf-8')

    # backup0: 256 random data in 1st block
    length0 = 256
    snap0_data = random_string(length0)
    verify_data(dev, 0, snap0_data)
    verify_data(dev, BLOCK_SIZE, snap0_data)
    snap0 = cmd.snapshot_create(address)
    backup0 = create_backup(address, snap0, backup_target)["URL"]
    backup0_name = cmd.backup_inspect(address, backup0)['Name']

    # backup1: 32 random data + 32 zero data + 192 random data in 1st block
    length1 = 32
    offset1 = 32
    snap1_data = zero_string * length1
    verify_data(dev, offset1, snap1_data)
    snap1 = cmd.snapshot_create(address)
    backup1 = create_backup(address, snap1, backup_target)["URL"]
    backup1_name = cmd.backup_inspect(address, backup1)['Name']

    # backup2: 32 random data + 256 random data in 1st block,
    #          256 random data in 2nd block
    length2 = 256
    offset2 = 32
    snap2_data = random_string(length2)
    verify_data(dev, offset2, snap2_data)
    verify_data(dev, BLOCK_SIZE, snap2_data)
    snap2 = cmd.snapshot_create(address)
    backup2 = create_backup(address, snap2, backup_target)["URL"]
    backup2_name = cmd.backup_inspect(address, backup2)['Name']

    # backup3: 64 zero data + 192 random data in 1st block
    length3 = 64
    offset3 = 0
    verify_data(dev, offset3, zero_string * length3)
    verify_data(dev, length2, zero_string * offset2)
    verify_data(dev, BLOCK_SIZE, zero_string * length2)
    snap3 = cmd.snapshot_create(address)
    backup3 = create_backup(address, snap3, backup_target)["URL"]
    backup3_name = cmd.backup_inspect(address, backup3)['Name']

    # backup4: 256 random data in 1st block
    length4 = 256
    offset4 = 0
    snap4_data = random_string(length4)
    verify_data(dev, offset4, snap4_data)
    snap4 = cmd.snapshot_create(address)
    backup4 = create_backup(address, snap4, backup_target)["URL"]
    backup4_name = cmd.backup_inspect(address, backup4)['Name']

    # start no-frontend volume
    # start dr volume (no frontend)
    dr_address = grpc_dr_controller.address
    start_no_frontend_volume(grpc_engine_manager, grpc_dr_controller,
                             grpc_dr_replica1, grpc_dr_replica2)

    cmd.backup_restore(dr_address, backup0)
    wait_for_restore_completion(dr_address, backup0)
    verify_no_frontend_data(grpc_engine_manager, 0, snap0_data,
                            grpc_dr_controller)

    # mock restore crash/error
    delta_file1 = "volume-delta-" + backup0_name + ".img"
    if "vfs" in backup_target:
        command = ["find", VFS_DIR, "-type", "d", "-name", VOLUME_NAME]
        backup_volume_path = subprocess.check_output(command).strip()
        command = ["find", backup_volume_path, "-name", "*blk"]
        blocks = subprocess.check_output(command).split()
        assert len(blocks) != 0
        for blk in blocks:
            command = ["mv", blk, blk + ".tmp"]
            subprocess.check_output(command).strip()
        # should fail
        is_failed = False
        cmd.restore_inc(dr_address, backup1, backup0_name)
        for i in range(RETRY_COUNTS):
            rs = cmd.restore_status(dr_address)
            for status in rs.values():
                if status['backupURL'] != backup1:
                    break
                if 'error' in status.keys():
                    if status['error'] != "":
                        assert 'no such file or directory' in \
                               status['error']
                        is_failed = True
            if is_failed:
                break
            time.sleep(RETRY_INTERVAL)
        assert is_failed

        assert path.exists(FIXED_REPLICA_PATH1 + delta_file1)
        assert path.exists(FIXED_REPLICA_PATH2 + delta_file1)
        for blk in blocks:
            command = ["mv", blk + ".tmp", blk]
            subprocess.check_output(command)

    data1 = \
        snap0_data[0:offset1] + snap1_data + \
        snap0_data[offset1+length1:]
    # race condition: last restoration has failed
    # but `isRestoring` hasn't been cleanup
    for i in range(RETRY_COUNTS):
        try:
            restore_incrementally(dr_address, backup1, backup0_name)
            break
        except subprocess.CalledProcessError as e:
            if "already in progress" not in e.output:
                time.sleep(RETRY_INTERVAL)
            else:
                raise e

    verify_no_frontend_data(grpc_engine_manager, 0, data1, grpc_dr_controller)

    assert not path.exists(FIXED_REPLICA_PATH1 + delta_file1)
    assert not path.exists(FIXED_REPLICA_PATH2 + delta_file1)

    status = cmd.restore_status(dr_address)
    compare_last_restored_with_backup(status, backup1_name)

    data2 = \
        data1[0:offset2] + snap2_data + \
        zero_string * (BLOCK_SIZE - length2 - offset2) + snap2_data
    restore_incrementally(dr_address, backup2, backup1_name)
    verify_no_frontend_data(grpc_engine_manager, 0, data2, grpc_dr_controller)

    delta_file2 = "volume-delta-" + backup1_name + ".img"
    assert not path.exists(FIXED_REPLICA_PATH1 + delta_file2)
    assert not path.exists(FIXED_REPLICA_PATH2 + delta_file2)

    status = cmd.restore_status(dr_address)
    compare_last_restored_with_backup(status, backup2_name)

    # mock race condition
    with pytest.raises(subprocess.CalledProcessError) as e:
        restore_incrementally(dr_address, backup1, backup0_name)
        assert "doesn't match lastRestored" in e

    data3 = zero_string * length3 + data2[length3:length2]
    restore_incrementally(dr_address, backup3, backup2_name)
    verify_no_frontend_data(grpc_engine_manager, 0, data3, grpc_dr_controller)

    delta_file3 = "volume-delta-" + backup3_name + ".img"
    assert not path.exists(FIXED_REPLICA_PATH1 + delta_file3)
    assert not path.exists(FIXED_REPLICA_PATH2 + delta_file3)
    status = cmd.restore_status(dr_address)
    compare_last_restored_with_backup(status, backup3_name)

    # mock corner case: invalid last-restored backup
    rm_backups(address, ENGINE_NAME, [backup3])
    # actually it is full restoration
    restore_incrementally(dr_address, backup4, backup3_name)
    verify_no_frontend_data(grpc_engine_manager, 0, snap4_data,
                            grpc_dr_controller)
    status = cmd.restore_status(dr_address)
    compare_last_restored_with_backup(status, backup4_name)

    if "vfs" in backup_target:
        command = ["find", VFS_DIR, "-type", "d", "-name", VOLUME_NAME]
        backup_volume_path = subprocess.check_output(command).strip()
        command = ["find", backup_volume_path, "-name", "*tempoary"]
        tmp_files = subprocess.check_output(command).split()
        assert len(tmp_files) == 0

    cleanup_no_frontend_volume(grpc_engine_manager, grpc_dr_controller,
                               grpc_dr_replica1, grpc_dr_replica2)

    rm_backups(address, ENGINE_NAME, [backup0, backup1, backup2, backup4])

    cmd.sync_agent_server_reset(address)
    cleanup_controller(grpc_controller)
    cleanup_replica(grpc_replica1)
    cleanup_replica(grpc_replica2)
Esempio n. 28
0
def test_backup_type(
        grpc_replica1,
        grpc_replica2,  # NOQA
        grpc_controller,
        backup_targets):  # NOQA
    for backup_target in backup_targets:
        address = grpc_controller.address
        block_size = 2 * 1024 * 1024

        dev = get_dev(grpc_replica1, grpc_replica2, grpc_controller)

        zero_string = b'\x00'.decode('utf-8')

        # backup0: 256 random data in 1st block
        length0 = 256
        snap0_data = random_string(length0)
        verify_data(dev, 0, snap0_data)
        verify_data(dev, block_size, snap0_data)
        snap0 = cmd.snapshot_create(address)
        backup0 = create_backup(address, snap0, backup_target)
        backup0_url = backup0["URL"]
        assert backup0['IsIncremental'] is False

        # backup1: 32 random data + 32 zero data + 192 random data in 1st block
        length1 = 32
        offset1 = 32
        snap1_data = zero_string * length1
        verify_data(dev, offset1, snap1_data)
        snap1 = cmd.snapshot_create(address)
        backup1 = create_backup(address, snap1, backup_target)
        backup1_url = backup1["URL"]
        assert backup1['IsIncremental'] is True

        # backup2: 32 random data + 256 random data in 1st block,
        #          256 random data in 2nd block
        length2 = 256
        offset2 = 32
        snap2_data = random_string(length2)
        verify_data(dev, offset2, snap2_data)
        verify_data(dev, block_size, snap2_data)
        snap2 = cmd.snapshot_create(address)
        backup2 = create_backup(address, snap2, backup_target)
        backup2_url = backup2["URL"]
        assert backup2['IsIncremental'] is True

        rm_backups(address, ENGINE_NAME, [backup2_url])

        # backup3: 64 zero data + 192 random data in 1st block
        length3 = 64
        offset3 = 0
        verify_data(dev, offset3, zero_string * length3)
        verify_data(dev, length2, zero_string * offset2)
        verify_data(dev, block_size, zero_string * length2)
        snap3 = cmd.snapshot_create(address)
        backup3 = create_backup(address, snap3, backup_target)
        backup3_url = backup3["URL"]
        assert backup3['IsIncremental'] is False

        # backup4: 256 random data in 1st block
        length4 = 256
        offset4 = 0
        snap4_data = random_string(length4)
        verify_data(dev, offset4, snap4_data)
        snap4 = cmd.snapshot_create(address)
        backup4 = create_backup(address, snap4, backup_target)
        backup4_url = backup4["URL"]
        assert backup4['IsIncremental'] is True

        rm_backups(address, ENGINE_NAME,
                   [backup0_url, backup1_url, backup3_url, backup4_url])

        cmd.sync_agent_server_reset(address)
        cleanup_replica(grpc_replica1)
        cleanup_replica(grpc_replica2)
        cleanup_controller(grpc_controller)
Esempio n. 29
0
def test_ha_single_replica_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(address, r2_url)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, 1, "RW")

    verify_read(dev, data_offset, data)

    # WORKAROUND for unable to remove the parent of volume head
    newsnap = cmd.snapshot_create(address)

    info = cmd.snapshot_info(address)
    assert len(info) == 3
    sysnap = info[newsnap]["parent"]
    assert info[sysnap]["parent"] == ""
    assert newsnap in info[sysnap]["children"]
    assert info[sysnap]["usercreated"] is False
    assert info[sysnap]["removed"] is False

    cmd.snapshot_purge(address)
    wait_for_purge_completion(address)

    info = cmd.snapshot_info(address)
    assert len(info) == 2
    assert info[newsnap] is not None
    assert info[VOLUME_HEAD] is not None
Esempio n. 30
0
def test_upgrade(
        grpc_engine_manager,  # NOQA
        grpc_controller,  # NOQA
        grpc_fixed_dir_replica1,
        grpc_fixed_dir_replica2,  # NOQA
        grpc_extra_replica1,
        grpc_extra_replica2):  # NOQA

    dev = get_dev(grpc_fixed_dir_replica1, grpc_fixed_dir_replica2,
                  grpc_controller)

    offset = 0
    length = 128

    data = random_string(length)
    verify_data(dev, offset, data)

    # both set pointed to the same volume underlying
    r1_url = grpc_fixed_dir_replica1.url
    r2_url = grpc_fixed_dir_replica2.url
    upgrade_r1_url = grpc_extra_replica1.url
    upgrade_r2_url = grpc_extra_replica2.url

    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    upgrade_e = grpc_engine_manager.engine_upgrade(
        ENGINE_NAME, LONGHORN_UPGRADE_BINARY, SIZE,
        [upgrade_r1_url, upgrade_r2_url])
    assert upgrade_e.spec.binary == LONGHORN_UPGRADE_BINARY

    verify_data(dev, offset, data)

    grpc_controller.client_upgrade(upgrade_e.spec.listen)
    wait_for_process_running(grpc_engine_manager, ENGINE_NAME,
                             INSTANCE_MANAGER_TYPE_ENGINE)

    # cannot start with same binary
    with pytest.raises(grpc.RpcError):
        grpc_engine_manager.engine_upgrade(ENGINE_NAME,
                                           LONGHORN_UPGRADE_BINARY, SIZE,
                                           [r1_url, r2_url])
    verify_data(dev, offset, data)

    # cannot start with wrong replica, would trigger rollback
    with pytest.raises(grpc.RpcError):
        grpc_engine_manager.engine_upgrade(ENGINE_NAME,
                                           LONGHORN_UPGRADE_BINARY, SIZE,
                                           ["random"])
    verify_data(dev, offset, data)

    grpc_fixed_dir_replica1 = cleanup_replica(grpc_fixed_dir_replica1)
    grpc_fixed_dir_replica2 = cleanup_replica(grpc_fixed_dir_replica2)
    open_replica(grpc_fixed_dir_replica1)
    open_replica(grpc_fixed_dir_replica2)

    e = grpc_engine_manager.engine_upgrade(ENGINE_NAME, LONGHORN_BINARY, SIZE,
                                           [r1_url, r2_url])
    assert e.spec.binary == LONGHORN_BINARY

    verify_data(dev, offset, data)

    grpc_controller.client_upgrade(e.spec.listen)
    wait_for_process_running(grpc_engine_manager, ENGINE_NAME,
                             INSTANCE_MANAGER_TYPE_ENGINE)