Example #1
0
def test_structure_from_json():
    data = {
        "version": 1,
        "slices": [[0, 127], [128, 256]],
        "shape": [16, 16, 128, 128],
        "sig_dims": 2,
        "dtype": "float32",
    }
    struct = PartitionStructure.from_json(data)
    assert struct.shape.sig.dims == 2
    assert tuple(struct.shape) == (16, 16, 128, 128)
    assert struct.slices == [(0, 127), (128, 256)]
    assert struct.dtype == np.dtype("float32")
Example #2
0
def test_initialization(draw_directory, lt_ctx):
    data = {
        "version": 1,
        "slices": [[0, 8], [16, 256]],
        "shape": [4, 4, 128, 128],
        "sig_dims": 2,
        "dtype": "float32",
    }
    structure = PartitionStructure.from_json(data)
    ds = ClusterDataSet(path=str(draw_directory), structure=structure)
    ds = ds.initialize(lt_ctx.executor)
    ds.check_valid()

    assert set(os.listdir(draw_directory)) == {"parts", "structure.json"}
Example #3
0
def test_missing_sidecars_are_created(draw_directory, dist_ctx):
    # tmpdir_factory doesn't output plain strings,
    # and thus draw_directory was some pytest object. we don't have
    # pytest installed on the worker containers, so it failed spectacularly:
    draw_directory = str(draw_directory)
    data = {
        "version": 1,
        "slices": [[0, 8], [16, 256]],
        "shape": [4, 4, 128, 128],
        "sig_dims": 2,
        "dtype": "float32",
    }
    structure = PartitionStructure.from_json(data)
    ds = ClusterDataSet(path=draw_directory, structure=structure)
    ds = ds.initialize(dist_ctx.executor)
    ds.check_valid()

    dirlists = dist_ctx.executor.run_each_host(
        lambda: os.listdir(draw_directory))

    for v in dirlists.values():
        assert set(v) == {"parts", "structure.json"}
Example #4
0
def test_inconsistent_sidecars_raise_error(dist_ctx, draw_directory):
    # tmpdir_factory doesn't output plain strings,
    # and thus draw_directory was some pytest object. we don't have
    # pytest installed on the worker containers, so it failed spectacularly:
    draw_directory = str(draw_directory)
    # first, create consistent ClusterDataSet:
    data = {
        "version": 1,
        "slices": [[0, 4], [5, 16]],
        "shape": [4, 4, 128, 128],
        "sig_dims": 2,
        "dtype": "float32",
    }
    structure = PartitionStructure.from_json(data)
    ds = ClusterDataSet(path=draw_directory, structure=structure)
    ds = ds.initialize(dist_ctx.executor)
    ds.check_valid()

    # then, scramble the sidecars, creating an inconsistent state:
    data = {
        "worker-1": {
            "version": 1,
            "slices": [[0, 8], [9, 16]],
            "shape": [4, 4, 128, 128],
            "sig_dims": 2,
            "dtype": "int32",
        },
        "worker-2": {
            "version": 1,
            "slices": [[0, 4], [5, 16]],
            "shape": [4, 4, 128, 128],
            "sig_dims": 2,
            "dtype": "float32",
        },
    }

    sidecar_path = os.path.join(draw_directory, 'structure.json')

    def _scramble_sidecars():
        import socket
        sidecar_data = data[socket.gethostname()]
        print(sidecar_path, os.listdir(os.path.dirname(sidecar_path)))
        with open(sidecar_path, "w") as fh:
            json.dump(sidecar_data, fh)
        print(sidecar_path, os.listdir(os.path.dirname(sidecar_path)))

    print(list(dist_ctx.executor.run_each_host(_scramble_sidecars)))

    print(draw_directory)

    dirlists = dist_ctx.executor.run_each_host(
        lambda: os.listdir(draw_directory))
    for v in dirlists.values():
        print(set(v))

    sidecars = dist_ctx.executor.run_each_host(
        lambda: open(sidecar_path).read())
    print(sidecars)

    # try to re-open the dataset, should fail:
    ds = ClusterDataSet(path=draw_directory, structure=structure)

    with pytest.raises(DataSetException) as e:
        ds = ds.initialize(dist_ctx.executor)

    assert e.match('inconsistent sidecars, please inspect')