def test_ha_salvage(client, volume_name):  # NOQA
    # get a random client

    volume = client.create_volume(name=volume_name,
                                  size=SIZE,
                                  numberOfReplicas=2)
    volume = common.wait_for_volume_detached(client, volume_name)
    assert volume["name"] == volume_name
    assert volume["size"] == SIZE
    assert volume["numberOfReplicas"] == 2
    assert volume["state"] == "detached"
    assert volume["created"] != ""

    host_id = get_self_host_id()
    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    assert len(volume["replicas"]) == 2
    replica0_name = volume["replicas"][0]["name"]
    replica1_name = volume["replicas"][1]["name"]

    data = write_random_data(volume["endpoint"])

    common.k8s_delete_replica_pods_for_volume(volume_name)

    volume = common.wait_for_volume_faulted(client, volume_name)
    assert len(volume["replicas"]) == 2
    assert volume["replicas"][0]["failedAt"] != ""
    assert volume["replicas"][1]["failedAt"] != ""

    volume.salvage(names=[replica0_name, replica1_name])

    volume = common.wait_for_volume_detached(client, volume_name)
    assert len(volume["replicas"]) == 2
    assert volume["replicas"][0]["failedAt"] == ""
    assert volume["replicas"][1]["failedAt"] == ""

    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    check_data(volume["endpoint"], data)

    volume = volume.detach()
    volume = common.wait_for_volume_detached(client, volume_name)

    client.delete(volume)
    common.wait_for_volume_delete(client, volume_name)

    volumes = client.list_volume()
    assert len(volumes) == 0
def test_engine_offline_upgrade(client, volume_name):  # NOQA
    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    cli_v = default_img["cliAPIVersion"]
    cli_minv = default_img["cliAPIMinVersion"]
    ctl_v = default_img["controllerAPIVersion"]
    ctl_minv = default_img["controllerAPIMinVersion"]
    data_v = default_img["dataFormatVersion"]
    data_minv = default_img["dataFormatMinVersion"]
    engine_upgrade_image = common.get_upgrade_test_image(cli_v, cli_minv,
                                                         ctl_v, ctl_minv,
                                                         data_v, data_minv)

    new_img = client.create_engine_image(image=engine_upgrade_image)
    new_img_name = new_img["name"]
    new_img = wait_for_engine_image_state(client, new_img_name, "ready")
    assert new_img["refCount"] == 0
    assert new_img["noRefSince"] != ""

    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]

    volume = client.create_volume(name=volume_name, size=SIZE,
                                  numberOfReplicas=REPLICA_COUNT)
    volume = common.wait_for_volume_detached(client, volume_name)
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 1)

    original_engine_image = default_img["image"]

    assert volume["name"] == volume_name
    assert volume["engineImage"] == original_engine_image
    assert volume["currentImage"] == original_engine_image

    # Before our upgrade, write data to the volume first.
    host_id = get_self_host_id()
    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    data = write_random_data(volume["endpoint"])

    volume = volume.detach()
    volume = common.wait_for_volume_detached(client, volume_name)

    volume.engineUpgrade(image=engine_upgrade_image)
    volume = wait_for_volume_current_image(client, volume_name,
                                           engine_upgrade_image)
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    new_img = wait_for_engine_image_ref_count(client, new_img_name, 1)

    # cannot delete a image in use
    with pytest.raises(Exception) as e:
        client.delete(new_img)
    assert "while being used" in str(e.value)

    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    assert volume["controller"]["engineImage"] == engine_upgrade_image
    assert volume["controller"]["currentImage"] == engine_upgrade_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == engine_upgrade_image
        assert replica["currentImage"] == engine_upgrade_image

    check_data(volume["endpoint"], data)

    volume = volume.detach()
    volume = common.wait_for_volume_detached(client, volume_name)

    volume.engineUpgrade(image=original_engine_image)
    volume = wait_for_volume_current_image(client, volume_name,
                                           original_engine_image)
    assert volume["engineImage"] == original_engine_image
    assert volume["controller"]["engineImage"] == original_engine_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == original_engine_image

    default_img = wait_for_engine_image_ref_count(client, default_img_name, 1)
    new_img = wait_for_engine_image_ref_count(client, new_img_name, 0)

    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    assert volume["controller"]["engineImage"] == original_engine_image
    assert volume["controller"]["currentImage"] == original_engine_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == original_engine_image
        assert replica["currentImage"] == original_engine_image

    check_data(volume["endpoint"], data)

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)

    client.delete(new_img)
def test_engine_live_upgrade_rollback(client, volume_name):  # NOQA
    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    cli_v = default_img["cliAPIVersion"]
    cli_minv = default_img["cliAPIMinVersion"]
    ctl_v = default_img["controllerAPIVersion"]
    ctl_minv = default_img["controllerAPIMinVersion"]
    data_v = default_img["dataFormatVersion"]
    data_minv = default_img["dataFormatMinVersion"]
    wrong_engine_upgrade_image = common.get_compatibility_test_image(
            cli_v, cli_minv,
            ctl_v, ctl_minv,
            data_v, data_minv)
    new_img = client.create_engine_image(image=wrong_engine_upgrade_image)
    new_img_name = new_img["name"]
    new_img = wait_for_engine_image_state(client, new_img_name, "ready")
    assert new_img["refCount"] == 0
    assert new_img["noRefSince"] != ""

    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]

    volume = client.create_volume(name=volume_name, size=SIZE,
                                  numberOfReplicas=2)
    volume = common.wait_for_volume_detached(client, volume_name)
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 1)

    original_engine_image = volume["engineImage"]
    assert original_engine_image != wrong_engine_upgrade_image

    host_id = get_self_host_id()
    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    data = write_random_data(volume["endpoint"])

    volume.engineUpgrade(image=wrong_engine_upgrade_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == wrong_engine_upgrade_image
    assert volume["currentImage"] == original_engine_image

    with pytest.raises(Exception):
        # this will timeout
        wait_for_volume_current_image(client, volume_name,
                                      wrong_engine_upgrade_image)

    # rollback
    volume.engineUpgrade(image=original_engine_image)
    volume = wait_for_volume_current_image(client, volume_name,
                                           original_engine_image)
    assert volume["engineImage"] == original_engine_image
    assert volume["currentImage"] == original_engine_image
    assert volume["controller"]["engineImage"] == original_engine_image
    assert volume["controller"]["currentImage"] == original_engine_image

    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  REPLICA_COUNT)

    check_data(volume["endpoint"], data)

    assert volume["state"] == common.VOLUME_STATE_ATTACHED
    assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY

    # try again, this time let's try detach
    volume.engineUpgrade(image=wrong_engine_upgrade_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == wrong_engine_upgrade_image
    assert volume["currentImage"] == original_engine_image

    with pytest.raises(Exception):
        # this will timeout
        wait_for_volume_current_image(client, volume_name,
                                      wrong_engine_upgrade_image)

    volume = volume.detach()
    volume = wait_for_volume_current_image(client, volume_name,
                                           wrong_engine_upgrade_image)
    # all the images would be updated
    assert volume["engineImage"] == wrong_engine_upgrade_image
    assert volume["controller"]["engineImage"] == wrong_engine_upgrade_image
    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  REPLICA_COUNT)
    for replica in volume["replicas"]:
        assert replica["engineImage"] == wrong_engine_upgrade_image

    # upgrade to the correct image when offline
    volume.engineUpgrade(image=original_engine_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == original_engine_image

    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert volume["engineImage"] == original_engine_image
    assert volume["currentImage"] == original_engine_image
    assert volume["controller"]["engineImage"] == original_engine_image
    assert volume["controller"]["currentImage"] == original_engine_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == original_engine_image
        assert replica["currentImage"] == original_engine_image

    check_data(volume["endpoint"], data)

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)

    client.delete(new_img)
def test_engine_live_upgrade(client, volume_name):  # NOQA
    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    cli_v = default_img["cliAPIVersion"]
    cli_minv = default_img["cliAPIMinVersion"]
    ctl_v = default_img["controllerAPIVersion"]
    ctl_minv = default_img["controllerAPIMinVersion"]
    data_v = default_img["dataFormatVersion"]
    data_minv = default_img["dataFormatMinVersion"]
    engine_upgrade_image = common.get_upgrade_test_image(cli_v, cli_minv,
                                                         ctl_v, ctl_minv,
                                                         data_v, data_minv)

    new_img = client.create_engine_image(image=engine_upgrade_image)
    new_img_name = new_img["name"]
    new_img = wait_for_engine_image_state(client, new_img_name, "ready")
    assert new_img["refCount"] == 0
    assert new_img["noRefSince"] != ""

    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]

    volume = client.create_volume(name=volume_name, size=SIZE,
                                  numberOfReplicas=2)
    volume = common.wait_for_volume_detached(client, volume_name)
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 1)

    assert volume["name"] == volume_name

    original_engine_image = volume["engineImage"]
    assert original_engine_image != engine_upgrade_image

    host_id = get_self_host_id()
    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert volume["engineImage"] == original_engine_image
    assert volume["currentImage"] == original_engine_image
    assert volume["controller"]["engineImage"] == original_engine_image
    assert volume["controller"]["currentImage"] == original_engine_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == original_engine_image
        assert replica["currentImage"] == original_engine_image

    data = write_random_data(volume["endpoint"])

    volume.engineUpgrade(image=engine_upgrade_image)
    volume = wait_for_volume_current_image(client, volume_name,
                                           engine_upgrade_image)

    assert volume["controller"]["engineImage"] == engine_upgrade_image

    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    new_img = wait_for_engine_image_ref_count(client, new_img_name, 1)

    count = 0
    # old replica may be in deletion process
    for replica in volume["replicas"]:
        if replica["currentImage"] == engine_upgrade_image:
            count += 1
    assert count == REPLICA_COUNT

    check_data(volume["endpoint"], data)

    volume = volume.detach()
    volume = common.wait_for_volume_detached(client, volume_name)
    assert len(volume["replicas"]) == REPLICA_COUNT
    assert volume["engineImage"] == engine_upgrade_image
    assert volume["controller"]["engineImage"] == engine_upgrade_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == engine_upgrade_image

    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert volume["engineImage"] == engine_upgrade_image
    assert volume["currentImage"] == engine_upgrade_image
    assert volume["controller"]["engineImage"] == engine_upgrade_image
    assert volume["controller"]["currentImage"] == engine_upgrade_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == engine_upgrade_image
        assert replica["currentImage"] == engine_upgrade_image

    # Make sure detaching didn't somehow interfere with the data.
    check_data(volume["endpoint"], data)

    volume.engineUpgrade(image=original_engine_image)
    volume = wait_for_volume_current_image(client, volume_name,
                                           original_engine_image)
    assert volume["controller"]["engineImage"] == original_engine_image

    default_img = wait_for_engine_image_ref_count(client, default_img_name, 1)
    new_img = wait_for_engine_image_ref_count(client, new_img_name, 0)

    assert volume["engineImage"] == original_engine_image

    assert volume["controller"]["engineImage"] == original_engine_image
    count = 0
    # old replica may be in deletion process
    for replica in volume["replicas"]:
        if replica["engineImage"] == original_engine_image:
            count += 1
    assert count == REPLICA_COUNT

    check_data(volume["endpoint"], data)

    volume = volume.detach()
    volume = common.wait_for_volume_detached(client, volume_name)
    assert len(volume["replicas"]) == REPLICA_COUNT

    assert volume["engineImage"] == original_engine_image
    assert volume["controller"]["engineImage"] == original_engine_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == original_engine_image

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)

    client.delete(new_img)
def test_ha_simple_recovery(client, volume_name):  # NOQA
    # get a random client

    volume = client.create_volume(name=volume_name,
                                  size=SIZE,
                                  numberOfReplicas=2)
    volume = common.wait_for_volume_detached(client, volume_name)
    assert volume["name"] == volume_name
    assert volume["size"] == SIZE
    assert volume["numberOfReplicas"] == 2
    assert volume["state"] == "detached"
    assert volume["created"] != ""

    host_id = get_self_host_id()
    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    volume = client.by_id_volume(volume_name)
    assert volume["endpoint"] == DEV_PATH + volume_name

    assert len(volume["replicas"]) == 2
    replica0 = volume["replicas"][0]
    assert replica0["name"] != ""

    replica1 = volume["replicas"][1]
    assert replica1["name"] != ""

    data = write_random_data(volume["endpoint"])

    volume = volume.replicaRemove(name=replica0["name"])

    # wait until we saw a replica starts rebuilding
    new_replica_found = False
    for i in range(RETRY_COUNTS):
        v = client.by_id_volume(volume_name)
        for r in v["replicas"]:
            if r["name"] != replica0["name"] and \
                    r["name"] != replica1["name"]:
                new_replica_found = True
                break
        if new_replica_found:
            break
        time.sleep(RETRY_ITERVAL)
    assert new_replica_found

    volume = common.wait_for_volume_healthy(client, volume_name)

    volume = client.by_id_volume(volume_name)
    assert volume["state"] == common.VOLUME_STATE_ATTACHED
    assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY
    assert len(volume["replicas"]) >= 2

    found = False
    for replica in volume["replicas"]:
        if replica["name"] == replica1["name"]:
            found = True
            break
    assert found

    check_data(volume['endpoint'], data)

    volume = volume.detach()
    volume = common.wait_for_volume_detached(client, volume_name)

    client.delete(volume)
    common.wait_for_volume_delete(client, volume_name)

    volumes = client.list_volume()
    assert len(volumes) == 0