def test_default_reader(resources_dir, filename, expected_shape, expected_dims,
                        expected_chunksize, expected_task_count):
    # Get file
    f = resources_dir / filename

    # Read file
    img = DefaultReader(f)

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        assert img.dims == expected_dims
        assert img.metadata
        assert img.dask_data.shape == expected_shape
        assert img.dask_data.chunksize == expected_chunksize
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check computed type is numpy array, computed shape is expected shape, and task count is expected
    with Profiler() as prof:
        assert isinstance(img.data, np.ndarray)
        assert img.data.shape == expected_shape
        assert len(prof.results) == expected_task_count

    # Check that there are no open file pointers after retrieval
    assert str(f) not in [f.path for f in proc.open_files()]
Example #2
0
def test_aicsimage_serialize(
    resources_dir,
    tmpdir,
    filename,
    expected_shape,
    expected_metadata_type,
    expected_task_count,
):
    """
    Test that the entire AICSImage object can be serialized - a requirement to distribute on dask clusters.

    https://distributed.dask.org/en/latest/serialization.html
    """
    # Get file
    f = resources_dir / filename

    # Read file
    img = AICSImage(f)

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        assert img.shape == expected_shape
        assert isinstance(img.metadata, expected_metadata_type)

        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]

    # Serialize object
    serialized = pickle.dumps(img)

    # Reload
    img = pickle.loads(serialized)

    # Check computed type is numpy array, computed shape is expected shape, and task count is expected
    with Profiler() as prof:
        assert isinstance(img.data, np.ndarray)
        assert img.shape == expected_shape
        assert img.data.shape == expected_shape
        assert isinstance(img.metadata, expected_metadata_type)
        assert len(prof.results) == expected_task_count

    # Check that there are no open file pointers after retrieval
    assert str(f) not in [f.path for f in proc.open_files()]
Example #3
0
def tpot(use_dask=True):
    # TODO: Add some documentation...
    # TODO: Investigate why tpot crashes when uing Dask (probably a RAM problem).
    if use_dask:
        client = Client("tcp://192.168.1.94:8786")
        print(client)
    tpot_reg = TPOTRegressor(generations=TPOT_GENERATIONS,
                             population_size=TPOT_POPULATION_SIZE,
                             random_state=SEED,
                             cv=CV,
                             use_dask=use_dask,
                             verbosity=2,
                             memory="auto")
    df = pd.read_csv("elo/data/augmented_train.csv")
    print(df.sample(5))
    # TODO: Find a better way to impute inf and missing values.
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.fillna(df.median())
    X = df.drop(FEATS_EXCLUDED, axis=1, errors='ignore').values
    y = df.loc[:, "target"].values

    if use_dask:
        with ProgressBar() as pbar, Profiler() as prof:
            tpot_reg.fit(X, y)
    else:
        tpot_reg.fit(X, y)
    export_path = str(
        Path('elo/data/tpot_few_generations_augmented_dataset.py').absolute())
    tpot_reg.export(export_path)
    return tpot_reg
Example #4
0
def test_support_for_ndarray(arr):
    # Check basics
    with Profiler() as prof:
        actual_reader = AICSImage.determine_reader(arr)
        assert actual_reader == readers.ArrayLikeReader
        # Check that basic details don't require task computation
        assert len(prof.results) == 0
Example #5
0
def test_ome_tiff_reader(resources_dir, filename, expected_shape,
                         expected_dims, select_scene, expected_chunksize,
                         expected_task_count):
    # Get file
    f = resources_dir / filename

    # Read file
    img = OmeTiffReader(f, S=select_scene)

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        # Check that OME Metadata matches the dask data array shape and dims order
        dim_size_getters = {
            Dimensions.Scene: img.size_s,
            Dimensions.Time: img.size_t,
            Dimensions.Channel: img.size_c,
            Dimensions.SpatialZ: img.size_z,
            Dimensions.SpatialY: img.size_y,
            Dimensions.SpatialX: img.size_x
        }
        for d, getter in dim_size_getters.items():
            if d in expected_dims:
                assert getter() == img.dask_data.shape[img.dims.index(d)]

        assert img.dims == expected_dims
        assert img.is_ome()
        assert img.metadata
        assert img.dask_data.shape == expected_shape
        assert img.dask_data.chunksize == expected_chunksize
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check computed type is numpy array, computed shape is expected shape, and task count is expected
    with Profiler() as prof:
        assert isinstance(img.data, np.ndarray)
        assert img.data.shape == expected_shape
        assert len(prof.results) == expected_task_count

    # Check that there are no open file pointers after retrieval
    assert str(f) not in [f.path for f in proc.open_files()]
Example #6
0
def test_default_shape_expansion(data, expected):
    # Check basics
    with Profiler() as prof:
        img = AICSImage(data=data)
        assert img.dask_data.shape == expected
        assert img.shape == expected
        # Check that basic details don't require task computation
        assert len(prof.results) == 0
def test_arraylike_reader(arr, expected_shape, expected_dims,
                          expected_chunksize, expected_task_count):
    # Init
    reader = ArrayLikeReader(arr)

    # Check basics
    with Profiler() as prof:
        assert reader.dims == expected_dims
        assert reader.metadata is None
        assert reader.dask_data.shape == expected_shape
        assert reader.dask_data.chunksize == expected_chunksize
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check computed type is numpy array, computed shape is expected shape, and task count is expected
    with Profiler() as prof:
        assert isinstance(reader.data, np.ndarray)
        assert reader.data.shape == expected_shape
        assert len(prof.results) == expected_task_count
Example #8
0
def main():

    global sky
    global dirty
    global psf
     
    list_schedule = []
    list_compute = []
    list_total = []
    list_load = []
   
    start_time1 = time.time()
    sky_npy, sky = load_data(os.path.split(os.getcwd())[0] + '/sky.npy')
    dirty_npy, dirty = load_data(os.path.split(os.getcwd())[0] + '/dirty.npy')
    psf_npy, psf = load_data(os.path.split(os.getcwd())[0] + '/psf.npy')
    end_time1 = time.time()
        
    start_time2 = time.time()
    scheduling()
    end_time2 = time.time()

    pbar = ProgressBar()
	
    with Profiler() as prof, ResourceProfiler() as rprof, CacheProfiler() as cprof:	
        start_time3 = time.time()        
        hub.compute()
        end_time3 = time.time()
	
    #pbar.register()
    #quad.compute()
    #pbar.unregister()	
	
    with PrintKeys():
        hub.compute()

    print("\n" + "Resultats du profilling:")	
    print(prof.results[0])
    print("\n" + "La valeur d'usage de la memoire est en MB et l'information du CPU est %d'usage de la CPU")	
    print(rprof.results)
    print("\n" + "Resultats du profilling de la cache:")
    print(cprof.results[0])

    visualize([prof, rprof, cprof])

    list_load.append(end_time1 - start_time1)
    list_schedule.append(end_time2 - start_time2)
    list_compute.append(end_time3 - start_time3)
    list_total.append(end_time3 - start_time1)    

    print("\n" + "Temps du code pous analyse")
    print('load time: {}'.format(round(sum(list_load)/len(list_load), 4)))
    print('scheduling time: {}'.format(round(sum(list_schedule)/len(list_schedule), 4)))
    print('compute time: {}'.format(round(sum(list_compute)/len(list_compute), 4)))
    print('total time: {}'.format(round(sum(list_total)/len(list_total), 4)))
Example #9
0
def finalSort(data):
	with pd.HDFStore(data + 'final/final.h5') as store:
		keys = store.keys()

	with ProgressBar(), Profiler() as prof:
		with pd.HDFStore(data + 'final/final-sorted.h5', complevel=9, complib='blosc') as outstore:
			for key in keys:
				logging.info("Sorting %s" % key)
				df = dd.read_hdf(data + 'final/final.h5', key)
				sortdf = df.compute().sort_values('count', ascending=False)
				outstore.append(key, sortdf)
		logging.info("Done sorting")
def test_dims_setting(expected_starting_dims, set_dims, expected_ending_dims):
    # Read file
    img = ArrayLikeReader(da.ones((2, 2, 2)))

    # Check basics
    with Profiler() as prof:
        assert img.dims == expected_starting_dims
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check no tasks happen during dims setting
    with Profiler() as prof:
        img.dims = set_dims
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check no tasks happen during dims getting
    with Profiler() as prof:
        assert img.dims == expected_ending_dims
        # Check that basic details don't require task computation
        assert len(prof.results) == 0
def uncompress_to_hdf5():
    print('Writing to hdf5 file after loading raw data in RAM.')

    raw_arr = uncompress()

    # create dask array from data in RAM
    arr = da.from_array(raw_arr, chunks=(1400, 1400, 350))

    # write to numpy stack
    out_filepath = 'data/out.hdf5'
    if os.path.isfile(out_filepath):
        os.remove(out_filepath)

    out_file_path = "outputs/load_raw_write_hdf5_uncompressed.html"
    with Profiler() as prof, ResourceProfiler() as rprof, CacheProfiler(
            metric=nbytes) as cprof:
        t = time.time()

        da.to_hdf5(out_filepath, 'data', arr, chunks=None)

        print(
            f'time to save the array to hdf5 without compression: {time.time() - t}'
        )
        visualize([prof, rprof, cprof], out_file_path)

    # write to numpy stack
    out_filepath = 'data/out.hdf5'
    os.remove(out_filepath)

    out_file_path = "outputs/load_raw_write_hdf5_commpressed.html"
    with Profiler() as prof, ResourceProfiler() as rprof, CacheProfiler(
            metric=nbytes) as cprof:
        t = time.time()

        da.to_hdf5(out_filepath, 'data', arr, chunks=None, compression="gzip")

        print(
            f'time to save the array to hdf5 with compression: {time.time() - t}'
        )
        visualize([prof, rprof, cprof], out_file_path)
def test_dims_setting(resources_dir, expected_starting_dims, set_dims,
                      expected_ending_dims):
    # Get file
    f = resources_dir / "example.png"

    # Read file
    img = DefaultReader(f)

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        assert img.dims == expected_starting_dims
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check no tasks happen during dims setting
    with Profiler() as prof:
        img.dims = set_dims
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check no tasks happen during dims getting
    with Profiler() as prof:
        assert img.dims == expected_ending_dims
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after retrieval
    assert str(f) not in [f.path for f in proc.open_files()]
Example #13
0
def test_num_workers_config(scheduler):
    # Regression test for issue #4082

    f = delayed(pure=False)(time.sleep)
    # Be generous with the initial sleep times, as process have been observed
    # to take >0.5s to spin up
    num_workers = 3
    a = [f(1.0) for i in range(num_workers)]
    with dask.config.set(num_workers=num_workers, chunksize=1), Profiler() as prof:
        compute(*a, scheduler=scheduler)

    workers = {i.worker_id for i in prof.results}

    assert len(workers) == num_workers
Example #14
0
def test_num_workers_config(scheduler):
    pytest.importorskip("cloudpickle")
    # Regression test for issue #4082

    f = delayed(pure=False)(time.sleep)
    # Be generous with the initial sleep times, as process have been observed
    # to take >0.5s to spin up
    a = [f(1.0), f(1.0), f(1.0), f(0.1)]
    num_workers = 3
    with dask.config.set(num_workers=num_workers), Profiler() as prof:
        compute(*a, scheduler=scheduler)

    workers = {i.worker_id for i in prof.results}

    assert len(workers) == num_workers
def onthefly_to_nps():
    print('Writing to npy stack file without loading raw data in RAM.')

    out_dir = 'data/out_3_numpy'
    out_file_path = "outputs/write_npy_stack.html"

    # write to numpy stack
    with Profiler() as prof, ResourceProfiler() as rprof, CacheProfiler(
            metric=nbytes) as cprof:
        t = time.time()

        write_to_npy_stack(out_dir, arr)

        print(f'time to save the array to numpy stack: {time.time() - t}')
        visualize([prof, rprof, cprof], out_file_path)
Example #16
0
def test_known_dims(data, dims, expected_shape):
    # Check basics
    with Profiler() as prof:
        img = AICSImage(data, known_dims=dims)
        assert img.data.shape == expected_shape
        assert img.size_x == expected_shape[5]
        assert img.size_y == expected_shape[4]
        assert img.size_z == expected_shape[3]
        assert img.size_c == expected_shape[2]
        assert img.size_t == expected_shape[1]
        assert img.size_s == expected_shape[0]
        assert img.size(dims) == data.shape

        # Due to reshape and transpose there will be 2 tasks in the graph
        assert len(prof.results) == 2
Example #17
0
def test_num_workers_config(scheduler):
    # Regression test for issue #4082

    @delayed
    def f(x):
        time.sleep(0.5)
        return x

    a = [f(i) for i in range(5)]
    num_workers = 3
    with dask.config.set(num_workers=num_workers), Profiler() as prof:
        a = compute(*a, scheduler=scheduler)

    workers = {i.worker_id for i in prof.results}

    assert len(workers) == num_workers
Example #18
0
def test_typing(filename, expected_reader, resources_dir):
    # Get filepath
    f = resources_dir / filename

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        actual_reader = AICSImage.determine_reader(f)
        assert actual_reader == expected_reader
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]
Example #19
0
def test_file_passed_was_directory(resources_dir):
    # Get filepath
    f = resources_dir

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        with pytest.raises(IsADirectoryError):
            AICSImage(resources_dir)
        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]
Example #20
0
def test_large_imread_dask(resources_dir, filename, expected_shape,
                           expected_task_count):
    # Get filepath
    f = resources_dir / filename

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        img = imread_dask(f)
        assert img.shape == expected_shape
        assert len(prof.results) == expected_task_count

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]
Example #21
0
    def execute(self, wf, client):
        if not wf.processes:
            return {}

        dsk = wf.convertGraph()

        with Profiler() as prof, ResourceProfiler(
                dt=0.25) as rprof, CacheProfiler() as cprof:
            result = client.get(dsk[0], dsk[1])

        msg.logMessage('result:', result, level=msg.DEBUG)
        path = user_config_dir('xicam/profile.html')
        visualize([prof, rprof, cprof], show=False, file_path=path)
        msg.logMessage(f'Profile saved: {path}')

        wf.lastresult = result

        return result
Example #22
0
def test_physical_pixel_size(resources_dir, filename, expected_sizes):
    # Get filepath
    f = resources_dir / filename

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        img = AICSImage(f)
        assert img.get_physical_pixel_size() == expected_sizes

        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]
Example #23
0
def test_imread(resources_dir, filename, expected_shape):
    # Get filepath
    f = resources_dir / filename

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        img = imread(f)
        assert img.shape == expected_shape

        # Reshape and transpose are required so there should be two tasks in the graph
        assert len(prof.results) == 2

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]
Example #24
0
def test_metadata(resources_dir, filename, expected_metadata_type):
    # Get filepath
    f = resources_dir / filename

    # Check that there are no open file pointers after init
    proc = Process()
    assert str(f) not in [f.path for f in proc.open_files()]

    # Check basics
    with Profiler() as prof:
        img = AICSImage(f)
        assert isinstance(img.metadata, expected_metadata_type)

        # Check that basic details don't require task computation
        assert len(prof.results) == 0

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]
Example #25
0
def test_force_dims(data_shape, dims, expected):
    # Check basics
    with Profiler() as prof:
        img = AICSImage(data=da.zeros(data_shape))
        img._reader._dims = dims
        assert img.data.shape == expected
        assert data_shape == img.get_image_data(out_orientation=dims).shape
        assert img.size_x == expected[5]
        assert img.size_y == expected[4]
        assert img.size_z == expected[3]
        assert img.size_c == expected[2]
        assert img.size_t == expected[1]
        assert img.size_s == expected[0]
        assert img.size(dims) == data_shape

        # Two operations are happening
        # First, img.data is called and so two tasks of reshape and transpose are ran
        # Then get_image_data is ran and two more reshape and transpose are ran
        assert len(prof.results) == 4
def uncompress_to_npy():
    print('Writing to numpy file after loading raw data in RAM.')
    out_filepath = 'data/out_1.npy'
    diagnostics_filepath = "outputs/load_raw_write_npy_file.html"

    raw_arr = uncompress()

    # write to numpy file
    if os.path.isfile(out_filepath):
        os.remove(out_filepath)

    with Profiler() as prof, ResourceProfiler() as rprof, CacheProfiler(
            metric=nbytes) as cprof:
        t = time.time()

        np.save(out_filepath, raw_arr)

        print(f'time to save the array to numpy file: {time.time() - t}')
        visualize([prof, rprof, cprof], diagnostics_filepath)
Example #27
0
    def _execute_graph(self, *writes):
        # Set up Profilers and Progress Bars
        with ExitStack() as stack:
            profilers = []

            if can_profile:
                from dask.diagnostics import (Profiler, CacheProfiler,
                                              ResourceProfiler, visualize)

                profilers.append(stack.enter_context(Profiler()))
                profilers.append(stack.enter_context(CacheProfiler()))
                profilers.append(stack.enter_context(ResourceProfiler()))

            if sys.stdout.isatty() and not self.args.boring:
                from dask.diagnostics import ProgressBar
                stack.enter_context(ProgressBar())
            dask.compute(*writes, scheduler='single-threaded')
            logger.info("Averaging Complete")

        if can_profile:
            visualize(profilers)
def uncompress_to_nps():
    print('Writing to numpy stack after loading raw data in RAM.')

    # load data in RAM
    raw_arr = uncompress()

    # create dask array from data in RAM
    arr = da.from_array(raw_arr, chunks=(1400, 1400, 350))

    # write to numpy stack
    out_dir = 'data/out_numpy'

    out_file_path = "outputs/load_raw_write_npy_stack.html"
    with Profiler() as prof, ResourceProfiler() as rprof, CacheProfiler(
            metric=nbytes) as cprof:
        t = time.time()

        write_to_npy_stack(out_dir, arr)

        print(f'time to save the array to numpy stack: {time.time() - t}')
        visualize([prof, rprof, cprof], out_file_path)
Example #29
0
def test_daread(data_dir, img, expected_shape, expected_chunksize,
                expected_dims):
    # Read the data into a dask array
    data, dims = daread(data_dir / img)

    # Do basic checking of shape, chunksize, and dims
    assert data.shape == expected_shape
    assert data.chunksize == expected_chunksize
    assert dims == expected_dims

    # Check that when a single plane is selected, only two tasks run
    getitem_ops = []
    for dim in dims:
        if dim not in ["Y", "X"]:
            getitem_ops.append(0)
        else:
            getitem_ops.append(slice(None, None, None))

    # Run through profiler
    with Profiler() as prof:
        assert isinstance(data[tuple(getitem_ops)].compute(), np.ndarray)
        assert len(prof.results) == 2
def onthefly_to_hdf5():
    print('Writing to hdf5 file without loading raw data in RAM.')

    # write to numpy stack
    out_filepath = 'data/out.hdf5'
    if os.path.isfile(out_filepath):
        os.remove(out_filepath)

    out_file_path = "outputs/write_hdf5.html"
    with Profiler() as prof, ResourceProfiler() as rprof, CacheProfiler(
            metric=nbytes) as cprof:
        t = time.time()

        da.to_hdf5(out_filepath,
                   'data',
                   arr,
                   chunks=(1400, 1400, 350),
                   compression="gzip")

        print(
            f'time to save the array to hdf5 with compression: {time.time() - t}'
        )
        visualize([prof, rprof, cprof], out_file_path)