def test_ha_revision_counter_consistency( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.name == VOLUME_NAME assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev(VOLUME_NAME) verify_async(dev, 10, 128, 100) r1 = grpc_replica1.replica_get() r2 = grpc_replica2.replica_get() # kernel can merge requests so backend may not receive 1000 writes assert r1.revisionCounter > 0 assert r1.revisionCounter == r2.revisionCounter
def test_ha_revision_counter_consistency( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 v = grpc_controller.volume_start( replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() common.verify_async(dev, 10, 128, 100) r1 = grpc_replica1.replica_get() r2 = grpc_replica2.replica_get() # kernel can merge requests so backend may not receive 1000 writes assert r1.revisionCounter > 0 assert r1.revisionCounter == r2.revisionCounter
def test_ha_single_replica_failure( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev(VOLUME_NAME) data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) cleanup_replica(grpc_replica2) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data_offset, data)
def test_ha_revision_counter_consistency(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() common.verify_async(dev, 10, 128, 100) r1 = replica1.list_replica()[0] r2 = replica2.list_replica()[0] # kernel can merge requests so backend may not receive 1000 writes assert r1.revisioncounter > 0 assert r1.revisioncounter == r2.revisioncounter
def test_frontend_switch( grpc_controller_no_frontend, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller_no_frontend.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller_no_frontend.volume_start(replicas=[r1_url, r2_url]) assert v.name == VOLUME_NO_FRONTEND_NAME assert v.replicaCount == 2 assert v.frontend == "" start_engine_frontend(ENGINE_NO_FRONTEND_NAME) dev = get_blockdev(volume=VOLUME_NO_FRONTEND_NAME) data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) shutdown_engine_frontend(ENGINE_NO_FRONTEND_NAME) start_engine_frontend(ENGINE_NO_FRONTEND_NAME) dev = get_blockdev(volume=VOLUME_NO_FRONTEND_NAME) verify_read(dev, data_offset, data) shutdown_engine_frontend(ENGINE_NO_FRONTEND_NAME)
def test_ha_single_replica_failure(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data)
def test_frontend_show(controller, replica1, replica2): # NOQA common.open_replica(replica1) common.open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) ft = v["frontend"] if ft == "tgt" or ft == "tcmu": assert v["endpoint"] == path.join(common.LONGHORN_DEV_DIR, common.VOLUME_NAME) elif ft == "socket": assert v["endpoint"] == common.get_socket_path(common.VOLUME_NAME) launcher_info = launcher.info() assert launcher_info["endpoint"] == path.join(common.LONGHORN_DEV_DIR, common.VOLUME_NAME) info = cmd.info() assert info["name"] == common.VOLUME_NAME assert info["endpoint"] == v["endpoint"]
def test_ha_revision_counter_consistency(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() common.verify_async(dev, 10, 128, 100) r1 = replica1.list_replica()[0] r2 = replica2.list_replica()[0] # kernel can merge requests so backend may not receive 1000 writes assert r1.revisioncounter > '0' assert r1.revisioncounter == r2.revisioncounter
def test_ha_single_replica_failure(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data)
def test_frontend_show( grpc_engine_manager, grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) ft = v.frontend if ft == "tgt": assert v.endpoint == path.join(LONGHORN_DEV_DIR, VOLUME_NAME) elif ft == "socket": assert v.endpoint == get_socket_path(VOLUME_NAME) engine = grpc_engine_manager.engine_get(ENGINE_NAME) assert engine.status.endpoint == path.join(LONGHORN_DEV_DIR, VOLUME_NAME) info = cmd.info_get(grpc_controller.address) assert info["name"] == VOLUME_NAME assert info["endpoint"] == v.endpoint
def test_frontend_show( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA common.open_replica(grpc_replica1) common.open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 v = grpc_controller.volume_start( replicas=[common.REPLICA1, common.REPLICA2]) ft = v.frontend if ft == "tgt" or ft == "tcmu": assert v.endpoint == path.join(common.LONGHORN_DEV_DIR, common.VOLUME_NAME) elif ft == "socket": assert v.endpoint == common.get_socket_path(common.VOLUME_NAME) launcher_info = launcher.info() assert launcher_info["endpoint"] == path.join(common.LONGHORN_DEV_DIR, common.VOLUME_NAME) info = cmd.info() assert info["name"] == common.VOLUME_NAME assert info["endpoint"] == v.endpoint
def test_ha_remove_extra_disks( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA address = grpc_controller.address prepare_backup_dir(BACKUP_DIR) open_replica(grpc_replica1) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url v = grpc_controller.volume_start(replicas=[r1_url]) assert v.name == VOLUME_NAME assert v.replicaCount == 1 replicas = grpc_controller.replica_list() assert len(replicas) == 1 assert replicas[0].mode == "RW" dev = get_blockdev(VOLUME_NAME) wasted_data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, wasted_data) # now replica1 contains extra data in a snapshot cmd.snapshot_create(address) cleanup_controller(grpc_controller) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r2_url]) assert v.name == VOLUME_NAME assert v.replicaCount == 1 replicas = grpc_controller.replica_list() assert len(replicas) == 1 assert replicas[0].mode == "RW" dev = get_blockdev(VOLUME_NAME) data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) r1 = grpc_replica1.replica_reload() print(r1) cmd.add_replica(address, r1_url) verify_data(dev, data_offset, data)
def test_ha_remove_extra_disks(controller, replica1, replica2): # NOQA prepare_backup_dir(BACKUP_DIR) open_replica(replica1) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, ]) assert v.replicaCount == 1 replicas = controller.list_replica() assert len(replicas) == 1 assert replicas[0].mode == "RW" dev = get_blockdev() wasted_data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, wasted_data) # now replica1 contains extra data in a snapshot cmd.snapshot_create() common.cleanup_controller(controller) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA2, ]) assert v.replicaCount == 1 replicas = controller.list_replica() assert len(replicas) == 1 assert replicas[0].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) r = replica1.list_replica()[0] replica1 = replica1.reload(r) print(replica1) cmd.add_replica(common.REPLICA1) verify_data(dev, data_offset, data)
def test_frontend_show(controller, replica1, replica2): # NOQA common.open_replica(replica1) common.open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[common.REPLICA1, common.REPLICA2]) assert v["endpoint"] == path.join(common.LONGHORN_DEV_DIR, common.VOLUME_NAME) info = cmd.info() assert info["name"] == common.VOLUME_NAME assert info["endpoint"] == v["endpoint"]
def start_no_frontend_volume(c, r1, r2): launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV, url=LAUNCHER_NO_FRONTEND) open_replica(r1) open_replica(r2) standby_replicas = c.list_replica() assert len(standby_replicas) == 0 v = c.list_volume()[0] v = v.start(replicas=[common.STANDBY_REPLICA1, common.STANDBY_REPLICA2]) assert v.replicaCount == 2 launcher.shutdown_engine_frontend(url=LAUNCHER_NO_FRONTEND) v = c.list_volume()[0] assert v.frontendState == "down"
def start_no_frontend_volume(grpc_c, grpc_r1, grpc_r2): launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV, url=LAUNCHER_NO_FRONTEND) open_replica(grpc_r1) open_replica(grpc_r2) standby_replicas = grpc_c.replica_list() assert len(standby_replicas) == 0 grpc_c.volume_get() v = grpc_c.volume_start( replicas=[common.STANDBY_REPLICA1, common.STANDBY_REPLICA2]) assert v.replicaCount == 2 launcher.shutdown_engine_frontend(url=LAUNCHER_NO_FRONTEND) v = grpc_c.volume_get() assert v.frontendState == "down"
def test_cleanup_leftover_blockdev( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA common.open_replica(grpc_replica1) common.open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 blockdev = path.join(frontend.LONGHORN_DEV_DIR, common.VOLUME_NAME) assert not path.exists(blockdev) open(blockdev, 'a').close() grpc_controller.volume_start(replicas=[common.REPLICA1, common.REPLICA2]) info = cmd.info() assert info["name"] == common.VOLUME_NAME
def start_no_frontend_volume(grpc_em, grpc_c, grpc_r1, grpc_r2): grpc_em.frontend_start(ENGINE_NO_FRONTEND_NAME, FRONTEND_TGT_BLOCKDEV) open_replica(grpc_r1) open_replica(grpc_r2) dr_replicas = grpc_c.replica_list() assert len(dr_replicas) == 0 r1_url = grpc_r1.url r2_url = grpc_r2.url v = grpc_c.volume_start(replicas=[r1_url, r2_url]) assert v.replicaCount == 2 grpc_em.frontend_shutdown(ENGINE_NO_FRONTEND_NAME) v = grpc_c.volume_get() assert v.frontendState == "down" ep = grpc_em.engine_get(ENGINE_NO_FRONTEND_NAME) assert ep.spec.frontend == ""
def test_frontend_switch(grpc_controller_no_frontend, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller_no_frontend.replica_list() assert len(replicas) == 0 v = grpc_controller_no_frontend.volume_start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 assert v.frontend == "" launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV, url=LAUNCHER_NO_FRONTEND) v = grpc_controller_no_frontend.volume_get() assert v.frontend != "" dev = get_blockdev(volume=VOLUME2_NAME) data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) launcher.shutdown_engine_frontend(url=LAUNCHER_NO_FRONTEND) v = grpc_controller_no_frontend.volume_get() assert v.frontend != "" assert v.frontendState == "down" launcher.start_engine_frontend(FRONTEND_TGT_BLOCKDEV, url=LAUNCHER_NO_FRONTEND) v = grpc_controller_no_frontend.volume_get() assert v.frontend != "" assert v.frontendState == "up" dev = get_blockdev(volume=VOLUME2_NAME) verify_read(dev, data_offset, data) launcher.shutdown_engine_frontend(url=LAUNCHER_NO_FRONTEND)
def test_cleanup_leftover_blockdev( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 blockdev = path.join(LONGHORN_DEV_DIR, VOLUME_NAME) assert not path.exists(blockdev) open(blockdev, 'a').close() r1_url = grpc_replica1.url r2_url = grpc_replica2.url grpc_controller.volume_start(replicas=[r1_url, r2_url]) info = cmd.info_get(grpc_controller.address) assert info["name"] == VOLUME_NAME
def test_ha_single_replica_rebuild(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data) controller.delete(replicas[1]) # Rebuild replica2 common.open_replica(replica2) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "RW") verify_read(dev, data_offset, data) # WORKAROUND for unable to remove the parent of volume head newsnap = cmd.snapshot_create() info = cmd.snapshot_info() assert len(info) == 3 sysnap = info[newsnap]["parent"] assert info[sysnap]["parent"] == "" assert newsnap in info[sysnap]["children"] assert info[sysnap]["usercreated"] is False assert info[sysnap]["removed"] is False cmd.snapshot_purge() info = cmd.snapshot_info() assert len(info) == 2 assert info[newsnap] is not None assert info[VOLUME_HEAD] is not None
def test_snapshot_tree_rebuild( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA address = grpc_controller.address offset = 0 length = 128 open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.name == VOLUME_NAME assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev(VOLUME_NAME) snap, snap_data = snapshot_tree_build(dev, address, ENGINE_NAME, offset, length) data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(grpc_replica2) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data_offset, data) grpc_controller.replica_delete(replicas[1].address) # Rebuild replica2 open_replica(grpc_replica2) cmd.add_replica(address, r2_url) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "RW") snapshot_tree_verify(dev, address, ENGINE_NAME, offset, length, snap, snap_data)
def test_snapshot_tree_rebuild(controller, replica1, replica2): # NOQA offset = 0 length = 128 open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() snap, snap_data = snapshot_tree_build(dev, offset, length) data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data) controller.delete(replicas[1]) # Rebuild replica2 common.open_replica(replica2) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "RW") snapshot_tree_verify(dev, offset, length, snap, snap_data)
def test_snapshot_tree_rebuild( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA offset = 0 length = 128 open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 v = grpc_controller.volume_start( replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() snap, snap_data = snapshot_tree_build(dev, offset, length) data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(grpc_replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data_offset, data) grpc_controller.replica_delete(replicas[1].address) # Rebuild replica2 open_replica(grpc_replica2) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(grpc_controller, 1, "RW") snapshot_tree_verify(dev, offset, length, snap, snap_data)
def test_upgrade( grpc_engine_manager, # NOQA grpc_controller, # NOQA grpc_fixed_dir_replica1, grpc_fixed_dir_replica2, # NOQA grpc_extra_replica1, grpc_extra_replica2): # NOQA dev = get_dev(grpc_fixed_dir_replica1, grpc_fixed_dir_replica2, grpc_controller) offset = 0 length = 128 data = random_string(length) verify_data(dev, offset, data) # both set pointed to the same volume underlying r1_url = grpc_fixed_dir_replica1.url r2_url = grpc_fixed_dir_replica2.url upgrade_r1_url = grpc_extra_replica1.url upgrade_r2_url = grpc_extra_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.replicaCount == 2 upgrade_e = grpc_engine_manager.engine_upgrade( ENGINE_NAME, LONGHORN_UPGRADE_BINARY, SIZE, [upgrade_r1_url, upgrade_r2_url]) assert upgrade_e.spec.binary == LONGHORN_UPGRADE_BINARY verify_data(dev, offset, data) grpc_controller.client_upgrade(upgrade_e.spec.listen) wait_for_process_running(grpc_engine_manager, ENGINE_NAME, INSTANCE_MANAGER_TYPE_ENGINE) # cannot start with same binary with pytest.raises(grpc.RpcError): grpc_engine_manager.engine_upgrade(ENGINE_NAME, LONGHORN_UPGRADE_BINARY, SIZE, [r1_url, r2_url]) verify_data(dev, offset, data) # cannot start with wrong replica, would trigger rollback with pytest.raises(grpc.RpcError): grpc_engine_manager.engine_upgrade(ENGINE_NAME, LONGHORN_UPGRADE_BINARY, SIZE, ["random"]) verify_data(dev, offset, data) grpc_fixed_dir_replica1 = cleanup_replica(grpc_fixed_dir_replica1) grpc_fixed_dir_replica2 = cleanup_replica(grpc_fixed_dir_replica2) open_replica(grpc_fixed_dir_replica1) open_replica(grpc_fixed_dir_replica2) e = grpc_engine_manager.engine_upgrade(ENGINE_NAME, LONGHORN_BINARY, SIZE, [r1_url, r2_url]) assert e.spec.binary == LONGHORN_BINARY verify_data(dev, offset, data) grpc_controller.client_upgrade(e.spec.listen) wait_for_process_running(grpc_engine_manager, ENGINE_NAME, INSTANCE_MANAGER_TYPE_ENGINE)
def test_ha_double_replica_rebuild(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data1 = common.random_string(128) data1_offset = 1024 verify_data(dev, data1_offset, data1) # Close replica2 r2 = replica2.list_replica()[0] assert r2.revisioncounter == '1' r2.close() verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data1_offset, data1) data2 = common.random_string(128) data2_offset = 512 verify_data(dev, data2_offset, data2) # Close replica1 r1 = replica1.list_replica()[0] assert r1.revisioncounter == '12' # 1 + 10 + 1 r1.close() # Restart volume common.cleanup_controller(controller) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] # NOTE the order is reversed here v = v.start(replicas=[common.REPLICA2, common.REPLICA1]) assert v.replicaCount == 2 # replica2 is out because of lower revision counter replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "ERR" assert replicas[1].mode == "RW" verify_read(dev, data1_offset, data1) verify_read(dev, data2_offset, data2) # Rebuild replica2 r2 = replica2.list_replica()[0] assert r2.revisioncounter == '1' r2.close() controller.delete(replicas[0]) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "RW") verify_read(dev, data1_offset, data1) verify_read(dev, data2_offset, data2) r1 = replica1.list_replica()[0] r2 = replica2.list_replica()[0] assert r1.revisioncounter == '22' # 1 + 10 + 1 + 10 assert r2.revisioncounter == '22' # must be in sync with r1
def test_ha_double_replica_rebuild(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data1 = common.random_string(128) data1_offset = 1024 verify_data(dev, data1_offset, data1) # Close replica2 r2 = replica2.list_replica()[0] assert r2.revisioncounter == 1 r2.close() verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data1_offset, data1) data2 = common.random_string(128) data2_offset = 512 verify_data(dev, data2_offset, data2) # Close replica1 r1 = replica1.list_replica()[0] assert r1.revisioncounter == 12 # 1 + 10 + 1 r1.close() # Restart volume common.cleanup_controller(controller) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] # NOTE the order is reversed here v = v.start(replicas=[ common.REPLICA2, common.REPLICA1 ]) assert v.replicaCount == 2 # replica2 is out because of lower revision counter replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "ERR" assert replicas[1].mode == "RW" verify_read(dev, data1_offset, data1) verify_read(dev, data2_offset, data2) # Rebuild replica2 r2 = replica2.list_replica()[0] assert r2.revisioncounter == 1 r2.close() controller.delete(replicas[0]) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "RW") verify_read(dev, data1_offset, data1) verify_read(dev, data2_offset, data2) r1 = replica1.list_replica()[0] r2 = replica2.list_replica()[0] assert r1.revisioncounter == 22 # 1 + 10 + 1 + 10 assert r2.revisioncounter == 22 # must be in sync with r1
def test_ha_single_replica_rebuild(controller, replica1, replica2): # NOQA open_replica(replica1) open_replica(replica2) replicas = controller.list_replica() assert len(replicas) == 0 v = controller.list_volume()[0] v = v.start(replicas=[ common.REPLICA1, common.REPLICA2 ]) assert v.replicaCount == 2 replicas = controller.list_replica() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data = common.random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(replica2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "ERR") verify_read(dev, data_offset, data) controller.delete(replicas[1]) # Rebuild replica2 common.open_replica(replica2) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(controller, 1, "RW") verify_read(dev, data_offset, data) # WORKAROUND for unable to remove the parent of volume head newsnap = cmd.snapshot_create() info = cmd.snapshot_info() assert len(info) == 3 sysnap = info[newsnap]["parent"] assert info[sysnap]["parent"] == "" assert newsnap in info[sysnap]["children"] assert info[sysnap]["usercreated"] is False assert info[sysnap]["removed"] is False cmd.snapshot_purge() info = cmd.snapshot_info() assert len(info) == 2 assert info[newsnap] is not None assert info[VOLUME_HEAD] is not None
def test_ha_double_replica_rebuild( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 v = grpc_controller.volume_start( replicas=[common.REPLICA1, common.REPLICA2]) assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev() data1 = common.random_string(128) data1_offset = 1024 verify_data(dev, data1_offset, data1) # Close replica2 r2 = grpc_replica2.replica_get() assert r2.revisionCounter == 1 grpc_replica2.replica_close() verify_async(dev, 10, 128, 1) common.verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data1_offset, data1) data2 = common.random_string(128) data2_offset = 512 verify_data(dev, data2_offset, data2) # Close replica1 r1 = grpc_replica1.replica_get() assert r1.revisionCounter == 12 # 1 + 10 + 1 grpc_replica1.replica_close() # Restart volume common.cleanup_controller(grpc_controller) replicas = grpc_controller.replica_list() assert len(replicas) == 0 # NOTE the order is reversed here v = grpc_controller.volume_start( replicas=[common.REPLICA2, common.REPLICA1]) assert v.replicaCount == 2 # replica2 is out because of lower revision counter replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "ERR" assert replicas[1].mode == "RW" verify_read(dev, data1_offset, data1) verify_read(dev, data2_offset, data2) # Rebuild replica2 r2 = grpc_replica2.replica_get() assert r2.revisionCounter == 1 grpc_replica2.replica_close() grpc_controller.replica_delete(replicas[0].address) cmd.add_replica(common.REPLICA2) verify_async(dev, 10, 128, 1) common.verify_replica_state(grpc_controller, 1, "RW") verify_read(dev, data1_offset, data1) verify_read(dev, data2_offset, data2) r1 = grpc_replica1.replica_get() r2 = grpc_replica2.replica_get() assert r1.revisionCounter == 22 # 1 + 10 + 1 + 10 assert r2.revisionCounter == 22 # must be in sync with r1
def test_ha_single_replica_rebuild( grpc_controller, # NOQA grpc_replica1, grpc_replica2): # NOQA address = grpc_controller.address open_replica(grpc_replica1) open_replica(grpc_replica2) replicas = grpc_controller.replica_list() assert len(replicas) == 0 r1_url = grpc_replica1.url r2_url = grpc_replica2.url v = grpc_controller.volume_start(replicas=[r1_url, r2_url]) assert v.replicaCount == 2 replicas = grpc_controller.replica_list() assert len(replicas) == 2 assert replicas[0].mode == "RW" assert replicas[1].mode == "RW" dev = get_blockdev(VOLUME_NAME) data = random_string(128) data_offset = 1024 verify_data(dev, data_offset, data) # Cleanup replica2 cleanup_replica(grpc_replica2) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "ERR") verify_read(dev, data_offset, data) grpc_controller.replica_delete(replicas[1].address) # Rebuild replica2 open_replica(grpc_replica2) cmd.add_replica(address, r2_url) verify_async(dev, 10, 128, 1) verify_replica_state(grpc_controller, 1, "RW") verify_read(dev, data_offset, data) # WORKAROUND for unable to remove the parent of volume head newsnap = cmd.snapshot_create(address) info = cmd.snapshot_info(address) assert len(info) == 3 sysnap = info[newsnap]["parent"] assert info[sysnap]["parent"] == "" assert newsnap in info[sysnap]["children"] assert info[sysnap]["usercreated"] is False assert info[sysnap]["removed"] is False cmd.snapshot_purge(address) wait_for_purge_completion(address) info = cmd.snapshot_info(address) assert len(info) == 2 assert info[newsnap] is not None assert info[VOLUME_HEAD] is not None