Ejemplo n.º 1
0
def test_expand_multiple_times():
    for i in range(30):
        em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE)
        engine_process = create_engine_process(em_client)
        grpc_controller_client = ControllerClient(
            get_process_address(engine_process))
        rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
        replica_process = create_replica_process(rm_client, REPLICA_NAME)
        grpc_replica_client = ReplicaClient(
            get_process_address(replica_process))
        time.sleep(3)
        cleanup_replica(grpc_replica_client)

        open_replica(grpc_replica_client)
        r1_url = grpc_replica_client.url
        v = grpc_controller_client.volume_start(replicas=[
            r1_url,
        ])
        assert v.replicaCount == 1

        expand_volume_with_frontend(
            grpc_controller_client, EXPANDED_SIZE)
        wait_and_check_volume_expansion(
            grpc_controller_client, EXPANDED_SIZE)

        cleanup_process(em_client)
        cleanup_process(rm_client)
Ejemplo n.º 2
0
def engine_replica_mismatch(em_client, engine_rev_counter_disabled):  # NOQA
    rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
    replica_dir1 = tempfile.mkdtemp()
    replica_dir2 = tempfile.mkdtemp()

    replica_process1 = create_replica_process(
        rm_client,
        REPLICA_NAME,
        replica_dir=replica_dir1,
        disable_revision_counter=engine_rev_counter_disabled)
    grpc_replica_client1 = get_replica_client_with_delay(
        ReplicaClient(get_process_address(replica_process1)))
    grpc_replica_client1.replica_create(size=SIZE_STR)
    replica_process2 = create_replica_process(
        rm_client,
        REPLICA_2_NAME,
        replica_dir=replica_dir2,
        disable_revision_counter=not engine_rev_counter_disabled)
    grpc_replica_client2 = get_replica_client_with_delay(
        ReplicaClient(get_process_address(replica_process2)))
    grpc_replica_client2.replica_create(size=SIZE_STR)

    engine_process = create_engine_process(
        em_client, disable_revision_counter=engine_rev_counter_disabled)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url,
        r2_url,
    ])
    assert v.replicaCount == 2

    # Check if replica1 is mode `ERR`
    rs = grpc_controller_client.replica_list()
    assert len(rs) == 2
    r1_verified = False
    r2_verified = False
    for r in rs:
        if r.address == r1_url:
            assert r.mode == 'RW'
            r1_verified = True
        if r.address == r2_url:
            assert r.mode == 'ERR'
            r2_verified = True
    assert r1_verified
    assert r2_verified

    cleanup_process(em_client)
    cleanup_process(rm_client)
Ejemplo n.º 3
0
def grpc_replica_client2(process_manager_client):
    time.sleep(3)
    r = create_replica_process(process_manager_client, REPLICA_2_NAME)

    listen = get_process_address(r)
    c = ReplicaClient(listen)
    return cleanup_replica(c)
Ejemplo n.º 4
0
    def generate_grpc_replica_client(replica_name, args=[]):
        r = create_replica_process(process_manager_client,
                                   replica_name,
                                   args=args)

        listen = get_process_address(r)

        c = ReplicaClient(listen)
        grpc_replica_client.replica_client = cleanup_replica(c)
        return grpc_replica_client.replica_client
Ejemplo n.º 5
0
    def generate_grpc_controller_client(engine_name,
                                        volume_name,
                                        frontend=FRONTEND_TGT_BLOCKDEV):

        e = create_engine_process(engine_manager_client,
                                  name=engine_name,
                                  volume_name=volume_name,
                                  frontend=frontend)

        grpc_controller_client.process_client = \
            ControllerClient(get_process_address(e))
        return grpc_controller_client.process_client
Ejemplo n.º 6
0
def grpc_controller_client(request, engine_manager_client):
    e = create_engine_process(engine_manager_client)
    grpc_controller_client = ControllerClient(get_process_address(e))
    get_controller_version_detail(grpc_controller_client)

    return grpc_controller_client
Ejemplo n.º 7
0
def test_upgrade(
        grpc_engine_manager,  # NOQA
        grpc_controller,  # NOQA
        grpc_fixed_dir_replica1,
        grpc_fixed_dir_replica2,  # NOQA
        grpc_extra_replica1,
        grpc_extra_replica2):  # NOQA

    dev = get_dev(grpc_fixed_dir_replica1, grpc_fixed_dir_replica2,
                  grpc_controller)

    offset = 0
    length = 128

    data = random_string(length)
    verify_data(dev, offset, data)

    # both set pointed to the same volume underlying
    r1_url = grpc_fixed_dir_replica1.url
    r2_url = grpc_fixed_dir_replica2.url
    upgrade_r1_url = grpc_extra_replica1.url
    upgrade_r2_url = grpc_extra_replica2.url

    v = grpc_controller.volume_start(replicas=[r1_url, r2_url])
    assert v.replicaCount == 2

    upgrade_e = upgrade_engine(grpc_engine_manager,
                               LONGHORN_UPGRADE_BINARY,
                               ENGINE_NAME,
                               VOLUME_NAME,
                               replicas=[upgrade_r1_url, upgrade_r2_url])
    assert upgrade_e.spec.binary == LONGHORN_UPGRADE_BINARY

    verify_data(dev, offset, data)

    grpc_controller.client_upgrade(get_process_address(upgrade_e))
    wait_for_process_running(grpc_engine_manager, ENGINE_NAME)

    info = grpc_controller.volume_get()
    assert info.endpoint == path.join(LONGHORN_DEV_DIR, VOLUME_NAME)

    # cannot start with same binary
    # with pytest.raises(grpc.RpcError):
    #     grpc_engine_manager.engine_upgrade(
    #         ENGINE_NAME, LONGHORN_UPGRADE_BINARY,
    #         SIZE, [r1_url, r2_url])
    # verify_data(dev, offset, data)

    # cannot start with wrong replica, would trigger rollback
    with pytest.raises(grpc.RpcError):
        upgrade_engine(grpc_engine_manager, LONGHORN_BINARY, ENGINE_NAME,
                       VOLUME_NAME, ["random"])
    verify_data(dev, offset, data)

    grpc_fixed_dir_replica1 = cleanup_replica(grpc_fixed_dir_replica1)
    grpc_fixed_dir_replica2 = cleanup_replica(grpc_fixed_dir_replica2)
    open_replica(grpc_fixed_dir_replica1)
    open_replica(grpc_fixed_dir_replica2)

    e = upgrade_engine(grpc_engine_manager, LONGHORN_BINARY, ENGINE_NAME,
                       VOLUME_NAME, [r1_url, r2_url])
    assert e.spec.binary == LONGHORN_BINARY

    verify_data(dev, offset, data)

    grpc_controller.client_upgrade(get_process_address(e))
    wait_for_process_running(grpc_engine_manager, ENGINE_NAME)

    time.sleep(3)
    info = grpc_controller.volume_get()
    assert info.endpoint == path.join(LONGHORN_DEV_DIR, VOLUME_NAME)
Ejemplo n.º 8
0
def test_engine_restart_after_sigkill(bin):  # NOQA
    """
    Test if engine can be restarted after crashing by SIGKILL.

    1. Create then initialize 1 engine and 2 replicas.
    2. Start the engine.
    3. Create 2 snapshots.
    4. Use SIGKILL to kill the engine process.
    5. Wait for the engine errored.
    6. Mock volume detachment by deleting
       the engine process and replicas processes.
    7. Mock volume reattachment by recreating processes and
       re-starting the engine.
    8. Check if the engine is up with 2 replicas.
    9. Check if the engine still works fine
       by creating/removing/purging snapshots.
    """
    em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE)
    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))

    rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
    replica_dir1 = tempfile.mkdtemp()
    replica_dir2 = tempfile.mkdtemp()
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    cleanup_replica(grpc_replica_client1)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    cleanup_replica(grpc_replica_client2)

    open_replica(grpc_replica_client1)
    open_replica(grpc_replica_client2)
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap0 = subprocess.check_output(cmd, encoding='utf-8').strip()
    expected = grpc_replica_client1.replica_get().chain[1]
    assert expected == 'volume-snap-{}.img'.format(snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create',
           '--label', 'name=snap1', '--label', 'key=value']
    snap1 = subprocess.check_output(cmd, encoding='utf-8').strip()

    cmd = ["bash", "-c",
           "kill -9 $(ps aux | grep %s | grep -v grep | awk '{print $2}')" %
           VOLUME_NAME]
    subprocess.check_call(cmd)
    wait_for_process_error(em_client, ENGINE_NAME)

    # Mock detach:
    cleanup_process(em_client)
    cleanup_process(rm_client)

    # Mock reattach:
    #   1. Directly create replicas processes.
    #   2. Call replica_create() to init replica servers for replica processes.
    #   3. Create one engine process and start the engine with replicas.
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    grpc_replica_client1.replica_create(size=SIZE_STR)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    grpc_replica_client2.replica_create(size=SIZE_STR)

    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    # Verify the engine still works fine
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap2 = subprocess.check_output(cmd, encoding='utf-8').strip()
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'rm', snap1]
    subprocess.check_call(cmd)
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)
    wait_for_purge_completion(grpc_controller_client.address)
    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
'''.format(snap2, snap0)

    cleanup_process(em_client)
    cleanup_process(rm_client)
Ejemplo n.º 9
0
def test_single_replica_failure_during_engine_start(bin):  # NOQA
    """
    Test if engine still works fine if there is an invalid
    replica/backend in the starting phase

    1. Create then initialize 1 engine and 2 replicas.
    2. Start the engine.
    3. Create 2 snapshots.
    4. Mess up the replica1 by manually modifying the snapshot meta file.
    5. Mock volume detachment by deleting
       the engine process and replicas processes.
    6. Mock volume reattachment by recreating processes and
       re-starting the engine.
    7. Check if the engine is up and if replica1 is mode ERR
       in the engine.
    8. Check if the engine still works fine
       by creating one more snapshot.
    9. Remove the ERR replica from the engine
       then check snapshot remove and snapshot purge work fine.
    10. Check if the snapshot list is correct.
    """
    em_client = ProcessManagerClient(INSTANCE_MANAGER_ENGINE)
    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))

    rm_client = ProcessManagerClient(INSTANCE_MANAGER_REPLICA)
    replica_dir1 = tempfile.mkdtemp()
    replica_dir2 = tempfile.mkdtemp()
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    cleanup_replica(grpc_replica_client1)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    cleanup_replica(grpc_replica_client2)

    open_replica(grpc_replica_client1)
    open_replica(grpc_replica_client2)
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap0 = subprocess.check_output(cmd, encoding='utf-8').strip()
    expected = grpc_replica_client1.replica_get().chain[1]
    assert expected == 'volume-snap-{}.img'.format(snap0)

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create',
           '--label', 'name=snap1', '--label', 'key=value']
    snap1 = subprocess.check_output(cmd, encoding='utf-8').strip()

    # Mess up the replica1 by manually modifying the snapshot meta file
    r1_snap1_meta_path = os.path.join(replica_dir1,
                                      'volume-snap-{}.img.meta'.format(snap1))
    with open(r1_snap1_meta_path, 'r') as f:
        snap1_meta_info = json.load(f)
    with open(r1_snap1_meta_path, 'w') as f:
        snap1_meta_info["Parent"] = "invalid-parent.img"
        json.dump(snap1_meta_info, f)

    # Mock detach:
    cleanup_process(em_client)
    cleanup_process(rm_client)

    # Mock reattach:
    #   1. Directly create replicas processes.
    #   2. Call replica_create() to init replica servers for replica processes.
    #   3. Create one engine process and start the engine with replicas.
    replica_process1 = create_replica_process(rm_client, REPLICA_NAME,
                                              replica_dir=replica_dir1)
    grpc_replica_client1 = ReplicaClient(
        get_process_address(replica_process1))
    time.sleep(3)
    grpc_replica_client1.replica_create(size=SIZE_STR)
    replica_process2 = create_replica_process(rm_client, REPLICA_2_NAME,
                                              replica_dir=replica_dir2)
    grpc_replica_client2 = ReplicaClient(
        get_process_address(replica_process2))
    time.sleep(3)
    grpc_replica_client2.replica_create(size=SIZE_STR)

    engine_process = create_engine_process(em_client)
    grpc_controller_client = ControllerClient(
        get_process_address(engine_process))
    r1_url = grpc_replica_client1.url
    r2_url = grpc_replica_client2.url
    v = grpc_controller_client.volume_start(replicas=[
        r1_url, r2_url,
    ])
    assert v.replicaCount == 2

    # Check if replica1 is mode `ERR`
    rs = grpc_controller_client.replica_list()
    assert len(rs) == 2
    r1_verified = False
    r2_verified = False
    for r in rs:
        if r.address == r1_url:
            assert r.mode == 'ERR'
            r1_verified = True
        if r.address == r2_url:
            assert r.mode == 'RW'
            r2_verified = True
    assert r1_verified
    assert r2_verified

    # The engine still works fine
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'create']
    snap2 = subprocess.check_output(cmd, encoding='utf-8').strip()

    # Remove the ERR replica before removing snapshots
    grpc_controller_client.replica_delete(r1_url)
    rs = grpc_controller_client.replica_list()
    assert len(rs) == 1
    assert rs[0].address == r2_url
    assert rs[0].mode == "RW"

    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'rm', snap1]
    subprocess.check_call(cmd)
    cmd = [bin, '--url', grpc_controller_client.address,
           'snapshot', 'purge']
    subprocess.check_call(cmd)
    wait_for_purge_completion(grpc_controller_client.address)

    cmd = [bin, '--debug',
           '--url', grpc_controller_client.address,
           'snapshot', 'ls']
    ls_output = subprocess.check_output(cmd, encoding='utf-8')

    assert ls_output == '''ID
{}
{}
'''.format(snap2, snap0)

    cleanup_process(em_client)
    cleanup_process(rm_client)
Ejemplo n.º 10
0
def grpc_controller_client(request, engine_manager_client):
    e = create_engine_process(engine_manager_client)
    return ControllerClient(get_process_address(e))