def test_ha_salvage(client, volume_name): # NOQA # get a random client volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["created"] != "" host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 2 replica0_name = volume["replicas"][0]["name"] replica1_name = volume["replicas"][1]["name"] data = write_random_data(volume["endpoint"]) common.k8s_delete_replica_pods_for_volume(volume_name) volume = common.wait_for_volume_faulted(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] != "" assert volume["replicas"][1]["failedAt"] != "" volume.salvage(names=[replica0_name, replica1_name]) volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] == "" assert volume["replicas"][1]["failedAt"] == "" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) check_data(volume["endpoint"], data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_engine_offline_upgrade(client, volume_name): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] engine_upgrade_image = common.get_upgrade_test_image(cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=REPLICA_COUNT) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) original_engine_image = default_img["image"] assert volume["name"] == volume_name assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image # Before our upgrade, write data to the volume first. host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) data = write_random_data(volume["endpoint"]) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) volume.engineUpgrade(image=engine_upgrade_image) volume = wait_for_volume_current_image(client, volume_name, engine_upgrade_image) default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) new_img = wait_for_engine_image_ref_count(client, new_img_name, 1) # cannot delete a image in use with pytest.raises(Exception) as e: client.delete(new_img) assert "while being used" in str(e.value) volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["controller"]["engineImage"] == engine_upgrade_image assert volume["controller"]["currentImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image assert replica["currentImage"] == engine_upgrade_image check_data(volume["endpoint"], data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) assert volume["engineImage"] == original_engine_image assert volume["controller"]["engineImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) new_img = wait_for_engine_image_ref_count(client, new_img_name, 0) volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["controller"]["engineImage"] == original_engine_image assert volume["controller"]["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image check_data(volume["endpoint"], data) client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def test_engine_live_upgrade_rollback(client, volume_name): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] wrong_engine_upgrade_image = common.get_compatibility_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=wrong_engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) original_engine_image = volume["engineImage"] assert original_engine_image != wrong_engine_upgrade_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) data = write_random_data(volume["endpoint"]) volume.engineUpgrade(image=wrong_engine_upgrade_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["currentImage"] == original_engine_image with pytest.raises(Exception): # this will timeout wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) # rollback volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image assert volume["controller"]["engineImage"] == original_engine_image assert volume["controller"]["currentImage"] == original_engine_image volume = common.wait_for_volume_replica_count(client, volume_name, REPLICA_COUNT) check_data(volume["endpoint"], data) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY # try again, this time let's try detach volume.engineUpgrade(image=wrong_engine_upgrade_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["currentImage"] == original_engine_image with pytest.raises(Exception): # this will timeout wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) volume = volume.detach() volume = wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) # all the images would be updated assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["controller"]["engineImage"] == wrong_engine_upgrade_image volume = common.wait_for_volume_replica_count(client, volume_name, REPLICA_COUNT) for replica in volume["replicas"]: assert replica["engineImage"] == wrong_engine_upgrade_image # upgrade to the correct image when offline volume.engineUpgrade(image=original_engine_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == original_engine_image volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image assert volume["controller"]["engineImage"] == original_engine_image assert volume["controller"]["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image check_data(volume["endpoint"], data) client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def test_engine_live_upgrade(client, volume_name): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] engine_upgrade_image = common.get_upgrade_test_image(cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) assert volume["name"] == volume_name original_engine_image = volume["engineImage"] assert original_engine_image != engine_upgrade_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image assert volume["controller"]["engineImage"] == original_engine_image assert volume["controller"]["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image data = write_random_data(volume["endpoint"]) volume.engineUpgrade(image=engine_upgrade_image) volume = wait_for_volume_current_image(client, volume_name, engine_upgrade_image) assert volume["controller"]["engineImage"] == engine_upgrade_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) new_img = wait_for_engine_image_ref_count(client, new_img_name, 1) count = 0 # old replica may be in deletion process for replica in volume["replicas"]: if replica["currentImage"] == engine_upgrade_image: count += 1 assert count == REPLICA_COUNT check_data(volume["endpoint"], data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == REPLICA_COUNT assert volume["engineImage"] == engine_upgrade_image assert volume["controller"]["engineImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == engine_upgrade_image assert volume["currentImage"] == engine_upgrade_image assert volume["controller"]["engineImage"] == engine_upgrade_image assert volume["controller"]["currentImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image assert replica["currentImage"] == engine_upgrade_image # Make sure detaching didn't somehow interfere with the data. check_data(volume["endpoint"], data) volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) assert volume["controller"]["engineImage"] == original_engine_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) new_img = wait_for_engine_image_ref_count(client, new_img_name, 0) assert volume["engineImage"] == original_engine_image assert volume["controller"]["engineImage"] == original_engine_image count = 0 # old replica may be in deletion process for replica in volume["replicas"]: if replica["engineImage"] == original_engine_image: count += 1 assert count == REPLICA_COUNT check_data(volume["endpoint"], data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == REPLICA_COUNT assert volume["engineImage"] == original_engine_image assert volume["controller"]["engineImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def test_ha_simple_recovery(client, volume_name): # NOQA # get a random client volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["created"] != "" host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) assert volume["endpoint"] == DEV_PATH + volume_name assert len(volume["replicas"]) == 2 replica0 = volume["replicas"][0] assert replica0["name"] != "" replica1 = volume["replicas"][1] assert replica1["name"] != "" data = write_random_data(volume["endpoint"]) volume = volume.replicaRemove(name=replica0["name"]) # wait until we saw a replica starts rebuilding new_replica_found = False for i in range(RETRY_COUNTS): v = client.by_id_volume(volume_name) for r in v["replicas"]: if r["name"] != replica0["name"] and \ r["name"] != replica1["name"]: new_replica_found = True break if new_replica_found: break time.sleep(RETRY_ITERVAL) assert new_replica_found volume = common.wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY assert len(volume["replicas"]) >= 2 found = False for replica in volume["replicas"]: if replica["name"] == replica1["name"]: found = True break assert found check_data(volume['endpoint'], data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0