Beispiel #1
0
def test_ha_remove_extra_disks(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address

    prepare_backup_dir(BACKUP_DIR)
    open_replica(grpc_replica1)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    v = grpc_controller.volume_start(replicas=[r1_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 1

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 1
    assert replicas[0].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    wasted_data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, wasted_data)

    # now replica1 contains extra data in a snapshot
    cmd.snapshot_create(address)

    cleanup_controller(grpc_controller)

    open_replica(grpc_replica2)
    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r2_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 1

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 1
    assert replicas[0].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    r1 = grpc_replica1.replica_reload()
    print(r1)

    cmd.add_replica(address, r1_url)

    verify_data(dev, data_offset, data)
Beispiel #2
0
def test_ha_remove_extra_disks(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    prepare_backup_dir(BACKUP_DIR)
    open_replica(grpc_replica1)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(replicas=[
        common.REPLICA1,
    ])
    assert v.replicaCount == 1

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 1
    assert replicas[0].mode == "RW"

    dev = get_blockdev()

    wasted_data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, wasted_data)

    # now replica1 contains extra data in a snapshot
    cmd.snapshot_create()

    common.cleanup_controller(grpc_controller)

    open_replica(grpc_replica2)
    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(replicas=[
        common.REPLICA2,
    ])
    assert v.replicaCount == 1

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 1
    assert replicas[0].mode == "RW"

    dev = get_blockdev()

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    r1 = grpc_replica1.replica_reload()
    print(r1)

    cmd.add_replica(common.REPLICA1)

    verify_data(dev, data_offset, data)
Beispiel #3
0
def test_ha_revision_counter_consistency(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(
        replicas=[common.REPLICA1, common.REPLICA2])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    common.verify_async(dev, 10, 128, 100)

    r1 = grpc_replica1.replica_get()
    r2 = grpc_replica2.replica_get()
    # kernel can merge requests so backend may not receive 1000 writes
    assert r1.revisionCounter > 0
    assert r1.revisionCounter == r2.revisionCounter
Beispiel #4
0
def test_ha_single_replica_failure(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(
        replicas=[common.REPLICA1, common.REPLICA2])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)
Beispiel #5
0
def test_ha_single_replica_failure(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)
Beispiel #6
0
def test_ha_revision_counter_consistency(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    verify_async(dev, 10, 128, 100)

    r1 = grpc_replica1.replica_get()
    r2 = grpc_replica2.replica_get()
    # kernel can merge requests so backend may not receive 1000 writes
    assert r1.revisionCounter > 0
    assert r1.revisionCounter == r2.revisionCounter
def test_frontend_show(
        grpc_engine_manager,
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])

    ft = v.frontend
    if ft == "tgt":
        assert v.endpoint == path.join(LONGHORN_DEV_DIR, VOLUME_NAME)
    elif ft == "socket":
        assert v.endpoint == get_socket_path(VOLUME_NAME)
        engine = grpc_engine_manager.engine_get(ENGINE_NAME)
        assert engine.status.endpoint == path.join(LONGHORN_DEV_DIR,
                                                   VOLUME_NAME)

    info = cmd.info_get(grpc_controller.address)
    assert info["name"] == VOLUME_NAME
    assert info["endpoint"] == v.endpoint
Beispiel #8
0
def test_frontend_show(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    common.open_replica(grpc_replica1)
    common.open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(
        replicas=[common.REPLICA1, common.REPLICA2])

    ft = v.frontend
    if ft == "tgt" or ft == "tcmu":
        assert v.endpoint == path.join(common.LONGHORN_DEV_DIR,
                                       common.VOLUME_NAME)
    elif ft == "socket":
        assert v.endpoint == common.get_socket_path(common.VOLUME_NAME)
        launcher_info = launcher.info()
        assert launcher_info["endpoint"] == path.join(common.LONGHORN_DEV_DIR,
                                                      common.VOLUME_NAME)

    info = cmd.info()
    assert info["name"] == common.VOLUME_NAME
    assert info["endpoint"] == v.endpoint
Beispiel #9
0
def test_cleanup_leftover_blockdev(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    common.open_replica(grpc_replica1)
    common.open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    blockdev = path.join(frontend.LONGHORN_DEV_DIR, common.VOLUME_NAME)
    assert not path.exists(blockdev)
    open(blockdev, 'a').close()

    grpc_controller.volume_start(replicas=[common.REPLICA1, common.REPLICA2])

    info = cmd.info()
    assert info["name"] == common.VOLUME_NAME
Beispiel #10
0
def test_snapshot_tree_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    address = grpc_controller.address

    offset = 0
    length = 128

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.name == VOLUME_NAME
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev(VOLUME_NAME)

    snap, snap_data = snapshot_tree_build(dev, address, ENGINE_NAME, offset,
                                          length)

    data = random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(address, r2_url)

    verify_async(dev, 10, 128, 1)

    verify_replica_state(grpc_controller, 1, "RW")

    snapshot_tree_verify(dev, address, ENGINE_NAME, offset, length, snap,
                         snap_data)
def test_cleanup_leftover_blockdev(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    blockdev = path.join(LONGHORN_DEV_DIR, VOLUME_NAME)
    assert not path.exists(blockdev)
    open(blockdev, 'a').close()

    r1_url = grpc_replica1.url
    r2_url = grpc_replica2.url
    grpc_controller.volume_start(replicas=[r1_url, r2_url])

    info = cmd.info_get(grpc_controller.address)
    assert info["name"] == VOLUME_NAME
Beispiel #12
0
def test_snapshot_tree_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    offset = 0
    length = 128

    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(
        replicas=[common.REPLICA1, common.REPLICA2])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    snap, snap_data = snapshot_tree_build(dev, offset, length)

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(common.REPLICA2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "RW")

    snapshot_tree_verify(dev, offset, length, snap, snap_data)
Beispiel #13
0
def test_ha_single_replica_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(
        replicas=[common.REPLICA1, common.REPLICA2])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    data = common.random_string(128)
    data_offset = 1024
    verify_data(dev, data_offset, data)

    # Cleanup replica2
    cleanup_replica(grpc_replica2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data_offset, data)

    grpc_controller.replica_delete(replicas[1].address)

    # Rebuild replica2
    open_replica(grpc_replica2)
    cmd.add_replica(common.REPLICA2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "RW")

    verify_read(dev, data_offset, data)

    # WORKAROUND for unable to remove the parent of volume head
    newsnap = cmd.snapshot_create()

    info = cmd.snapshot_info()
    assert len(info) == 3
    sysnap = info[newsnap]["parent"]
    assert info[sysnap]["parent"] == ""
    assert newsnap in info[sysnap]["children"]
    assert info[sysnap]["usercreated"] is False
    assert info[sysnap]["removed"] is False

    cmd.snapshot_purge()
    info = cmd.snapshot_info()
    assert len(info) == 2
    assert info[newsnap] is not None
    assert info[VOLUME_HEAD] is not None
Beispiel #14
0
def test_ha_double_replica_rebuild(
        grpc_controller,  # NOQA
        grpc_replica1,
        grpc_replica2):  # NOQA
    open_replica(grpc_replica1)
    open_replica(grpc_replica2)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    v = grpc_controller.volume_start(
        replicas=[common.REPLICA1, common.REPLICA2])
    assert v.replicaCount == 2

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "RW"
    assert replicas[1].mode == "RW"

    dev = get_blockdev()

    data1 = common.random_string(128)
    data1_offset = 1024
    verify_data(dev, data1_offset, data1)

    # Close replica2
    r2 = grpc_replica2.replica_get()
    assert r2.revisionCounter == 1
    grpc_replica2.replica_close()

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "ERR")

    verify_read(dev, data1_offset, data1)

    data2 = common.random_string(128)
    data2_offset = 512
    verify_data(dev, data2_offset, data2)

    # Close replica1
    r1 = grpc_replica1.replica_get()
    assert r1.revisionCounter == 12  # 1 + 10 + 1
    grpc_replica1.replica_close()

    # Restart volume
    common.cleanup_controller(grpc_controller)

    replicas = grpc_controller.replica_list()
    assert len(replicas) == 0

    # NOTE the order is reversed here
    v = grpc_controller.volume_start(
        replicas=[common.REPLICA2, common.REPLICA1])
    assert v.replicaCount == 2

    # replica2 is out because of lower revision counter
    replicas = grpc_controller.replica_list()
    assert len(replicas) == 2
    assert replicas[0].mode == "ERR"
    assert replicas[1].mode == "RW"

    verify_read(dev, data1_offset, data1)
    verify_read(dev, data2_offset, data2)

    # Rebuild replica2
    r2 = grpc_replica2.replica_get()
    assert r2.revisionCounter == 1
    grpc_replica2.replica_close()

    grpc_controller.replica_delete(replicas[0].address)

    cmd.add_replica(common.REPLICA2)

    verify_async(dev, 10, 128, 1)

    common.verify_replica_state(grpc_controller, 1, "RW")

    verify_read(dev, data1_offset, data1)
    verify_read(dev, data2_offset, data2)

    r1 = grpc_replica1.replica_get()
    r2 = grpc_replica2.replica_get()
    assert r1.revisionCounter == 22  # 1 + 10 + 1 + 10
    assert r2.revisionCounter == 22  # must be in sync with r1
Beispiel #15
0
def test_upgrade(
        grpc_engine_manager,  # NOQA
        grpc_controller,  # NOQA
        grpc_fixed_dir_replica1,
        grpc_fixed_dir_replica2,  # NOQA
        grpc_extra_replica1,
        grpc_extra_replica2):  # NOQA

    dev = get_dev(grpc_fixed_dir_replica1, grpc_fixed_dir_replica2,
                  grpc_controller)

    offset = 0
    length = 128

    data = random_string(length)
    verify_data(dev, offset, data)

    # both set pointed to the same volume underlying
    r1_url = grpc_fixed_dir_replica1.url
    r2_url = grpc_fixed_dir_replica2.url
    upgrade_r1_url = grpc_extra_replica1.url
    upgrade_r2_url = grpc_extra_replica2.url

    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    upgrade_e = grpc_engine_manager.engine_upgrade(
        ENGINE_NAME, LONGHORN_UPGRADE_BINARY, SIZE,
        [upgrade_r1_url, upgrade_r2_url])
    assert upgrade_e.spec.binary == LONGHORN_UPGRADE_BINARY

    verify_data(dev, offset, data)

    grpc_controller.client_upgrade(upgrade_e.spec.listen)
    wait_for_process_running(grpc_engine_manager, ENGINE_NAME,
                             INSTANCE_MANAGER_TYPE_ENGINE)

    # cannot start with same binary
    with pytest.raises(grpc.RpcError):
        grpc_engine_manager.engine_upgrade(ENGINE_NAME,
                                           LONGHORN_UPGRADE_BINARY, SIZE,
                                           [r1_url, r2_url])
    verify_data(dev, offset, data)

    # cannot start with wrong replica, would trigger rollback
    with pytest.raises(grpc.RpcError):
        grpc_engine_manager.engine_upgrade(ENGINE_NAME,
                                           LONGHORN_UPGRADE_BINARY, SIZE,
                                           ["random"])
    verify_data(dev, offset, data)

    grpc_fixed_dir_replica1 = cleanup_replica(grpc_fixed_dir_replica1)
    grpc_fixed_dir_replica2 = cleanup_replica(grpc_fixed_dir_replica2)
    open_replica(grpc_fixed_dir_replica1)
    open_replica(grpc_fixed_dir_replica2)

    e = grpc_engine_manager.engine_upgrade(ENGINE_NAME, LONGHORN_BINARY, SIZE,
                                           [r1_url, r2_url])
    assert e.spec.binary == LONGHORN_BINARY

    verify_data(dev, offset, data)

    grpc_controller.client_upgrade(e.spec.listen)
    wait_for_process_running(grpc_engine_manager, ENGINE_NAME,
                             INSTANCE_MANAGER_TYPE_ENGINE)