def getOrderOfExecution(dsk,finalTask, numConcurrentTasks=1):
    '''Get the optimal order to minimizing backtracking and memory footprint.
    numConcurrentTasks can be more than 1 if you think you can do two things at once.
    
    Put the final task you want to achieve (one of the keys in depDict)'''
    from dask.callbacks import Callback
    #from multiprocessing.pool import ThreadPool
    from dask.threaded import get
    #dask.set_options(pool=ThreadPool(numConcurrentTasks))
    class PrintKeys(Callback):
        def __init__(self,numWorkers):
            self.equivTime = None
            self.numWorkers = numWorkers
        def _start(self,dsk):
            print("Working with {} concurrent tasks".format(self.numWorkers))
            self.startTime = clock()
        def _pretask(self, key, dask, state):
            """Print the key of every task as it's started"""
            pass
        def _posttask(self,key,result,dsk,state,id):
            print("Do {} <- {}, approx. {} time units".format(repr(key),dsk[key][1:],repr(result)))
        def _finish(self,dsk,state,errored):
            self.endTime = clock()
            dt = (self.endTime - self.startTime)*100.
            print("Approximate time to complete: {} time units".format(dt))
            print("Equivalent single thread time: {} time units".format(dt*self.numWorkers))
            self.equivTime = dt*self.numWorkers
    with PrintKeys(numConcurrentTasks):
        get(dsk,finalTask,num_workers=numConcurrentTasks)
Esempio n. 2
0
def test_resource_profiler_plot():
    with ResourceProfiler(dt=0.01) as rprof:
        get(dsk2, 'c')
    p = rprof.visualize(plot_width=500,
                        plot_height=300,
                        tools="hover",
                        title="Not the default",
                        show=False, save=False)
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert check_title(p, "Not the default")

    # Test with empty and one point, checking for errors
    rprof.clear()
    for results in [[], [(1.0, 0, 0)]]:
        rprof.results = results
        with pytest.warns(None) as record:
            p = rprof.visualize(show=False, save=False)
        assert len(record) == 0
        # Check bounds are valid
        assert p.x_range.start == 0
        assert p.x_range.end == 1
        assert p.y_range.start == 0
        assert p.y_range.end == 100
        assert p.extra_y_ranges['memory'].start == 0
        assert p.extra_y_ranges['memory'].end == 100
def getOptimalNumThreads(dsk,finalTask):
    threads = np.arange(9)+1#physical limit?
    from dask.callbacks import Callback
    #from multiprocessing.pool import ThreadPool
    from dask.threaded import get
    #dask.set_options(pool=ThreadPool(numConcurrentTasks))
    equivTime = 0
    class PrintKeys(Callback):
        def __init__(self,numWorkers):
            self.equivTime = None
            self.numWorkers = numWorkers
        def _start(self,dsk):
            self.startTime = clock()
        def _finish(self,dsk,state,errored):
            self.endTime = clock()
            dt = (self.endTime - self.startTime)*100.
            self.equivTime = dt*self.numWorkers
            equivTime = self.equivTime
    eqT = []
    eff = []
    optnum = 1
    for i in threads:
        with PrintKeys(i) as pk:
            get(dsk,finalTask,num_workers=i)
            eqT.append(pk.equivTime)
            print("Efficiency [{}]: {}".format(i,1-pk.equivTime/eqT[0]/i))
            ef = 1-pk.equivTime/eqT[0]/i
            eff.append(ef)
            if len(eff) > 1:
                if ef/eff[-2] < 1.1:
                    optnum = i - 1
                    break
    print("Optimal number of concurrent tasks (if possible): {}".format(optnum))
    return optnum
Esempio n. 4
0
def test_blockiter_bothmasks(signal, flat, dtype, nm, sm):
    real_first = get(signal.data.dask, (signal.data.name, 0, 0, 0, 0)).copy()
    real_second = get(signal.data.dask, (signal.data.name, 0, 1, 0, 0)).copy()
    # Don't want to rechunk, so change dtype manually
    signal.data = signal.data.astype(dtype)
    it = signal._block_iterator(flat_signal=flat,
                                navigation_mask=nm,
                                signal_mask=sm,
                                get=get)
    first_block = next(it)
    second_block = next(it)
    if nm is not None:
        nm = nm[:2, :4]
    real_first = real_first.astype(dtype)
    real_second = real_second.astype(dtype)
    if flat:
        if nm is not None:
            nm = ~nm
            navslice = np.where(nm.flat)[0]
        else:
            navslice = slice(None)
        sigslice = slice(11, None) if sm is not None else slice(None)
        slices1 = (navslice, sigslice)
        real_first = real_first.reshape((2 * 4, -1))[slices1]
        real_second = real_second.reshape((2 * 5, -1))[:, sigslice]
    else:
        value = np.nan if dtype is 'float' else 0
        if nm is not None:
            real_first[nm, ...] = value
        if sm is not None:
            real_first[..., sm] = value
            real_second[..., sm] = value
    np.testing.assert_allclose(first_block, real_first)
    np.testing.assert_allclose(second_block, real_second)
Esempio n. 5
0
def test_interrupt():
    # Python 2 and windows 2 & 3 both implement `queue.get` using polling,
    # which means we can set an exception to interrupt the call to `get`.
    # Python 3 on other platforms requires sending SIGINT to the main thread.
    if PY2:
        from thread import interrupt_main
    elif os.name == 'nt':
        from _thread import interrupt_main
    else:
        main_thread = threading.get_ident()

        def interrupt_main():
            signal.pthread_kill(main_thread, signal.SIGINT)

    def long_task():
        sleep(5)

    dsk = {('x', i): (long_task,) for i in range(20)}
    dsk['x'] = (len, list(dsk.keys()))
    try:
        interrupter = threading.Timer(0.5, interrupt_main)
        interrupter.start()
        start = time()
        get(dsk, 'x')
    except KeyboardInterrupt:
        pass
    except Exception:
        assert False, "Failed to interrupt"
    stop = time()
    if stop - start > 4:
        assert False, "Failed to interrupt"
Esempio n. 6
0
def test_no_tasks(capsys):
    with ProgressBar():
        get({'x': 1}, 'x')

    out, err = capsys.readouterr()
    bar, percent, time = [i.strip() for i in out.split('\r')[-1].split('|')]
    assert bar == "[########################################]"
    assert percent == "100% Completed"
Esempio n. 7
0
def test_progressbar(capsys):
    with ProgressBar():
        out = get(dsk, 'e')
    assert out == 6
    check_bar_completed(capsys)
    with ProgressBar(width=20):
        out = get(dsk, 'e')
    check_bar_completed(capsys, 20)
Esempio n. 8
0
def test_clean_exit():
    dsk = {"a": (lambda: 1 / 0,)}
    try:
        with ProgressBar() as pbar:
            get(dsk, "a")
    except:
        pass
    assert not pbar._running
    assert not pbar._timer.is_alive()
Esempio n. 9
0
def test_clean_exit():
    dsk = {'a': (lambda: 1 / 0, )}
    try:
        with ProgressBar() as pbar:
            get(dsk, 'a')
    except ZeroDivisionError:
        pass
    assert not pbar._running
    assert not pbar._timer.is_alive()
Esempio n. 10
0
def test_saves_file():
    with tmpfile("html") as fn:
        with prof:
            get(dsk, "e")
        # Run just to see that it doesn't error
        prof.visualize(show=False, file_path=fn)

        assert os.path.exists(fn)
        with open(fn) as f:
            assert "html" in f.read().lower()
Esempio n. 11
0
def test_with_alias(capsys):
    dsk = {'a': 1,
           'b': 2,
           'c': (add, 'a', 'b'),
           'd': (add, 1, 2),
           'e': 'd',
           'f': (mul, 'e', 'c')}
    with ProgressBar():
        get(dsk, 'f')
    check_bar_completed(capsys)
Esempio n. 12
0
def test_profiler_works_under_error():
    div = lambda x, y: x / y
    dsk = {'x': (div, 1, 1), 'y': (div, 'x', 2), 'z': (div, 'y', 0)}

    with ignoring(ZeroDivisionError):
        with prof:
            get(dsk, 'z')

    assert all(len(v) == 5 for v in prof.results)
    assert len(prof.results) == 2
Esempio n. 13
0
def test_saves_file():
    with tmpfile('html') as fn:
        with prof:
            get(dsk, 'e')
        # Run just to see that it doesn't error
        prof.visualize(show=False, file_path=fn)

        assert os.path.exists(fn)
        with open(fn) as f:
            assert 'HTML' in f.read()
Esempio n. 14
0
def test_dont_spawn_too_many_threads():
    before = threading.active_count()

    dsk = {('x', i): (lambda: i,) for i in range(10)}
    dsk['x'] = (sum, list(dsk))
    for i in range(20):
        get(dsk, 'x', num_workers=4)

    after = threading.active_count()

    assert after <= before + 8
Esempio n. 15
0
def test_register(profiler):
    prof = profiler()
    try:
        prof.register()
        get(dsk2, 'c')
        n = len(prof.results)
        assert n > 0
        get(dsk2, 'c')
        assert len(prof.results) > n
    finally:
        prof.unregister()
Esempio n. 16
0
def test_resource_profiler_plot():
    with ResourceProfiler(dt=0.01) as rprof:
        get(dsk2, "c")
    p = rprof.visualize(plot_width=500, plot_height=300, tools="hover", title="Not the default", show=False, save=False)
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert p.title == "Not the default"
    # Test empty, checking for errors
    rprof.clear()
    rprof.visualize(show=False, save=False)
Esempio n. 17
0
def test_profiler_plot():
    with prof:
        get(dsk, 'e')
    p = prof.visualize(plot_width=500,
                       plot_height=300,
                       tools="hover",
                       title="Not the default",
                       show=False, save=False)
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert p.title == "Not the default"
Esempio n. 18
0
def test_cache_profiler_plot():
    with CacheProfiler(metric_name="non-standard") as cprof:
        get(dsk, "e")
    p = cprof.visualize(plot_width=500, plot_height=300, tools="hover", title="Not the default", show=False, save=False)
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert p.title == "Not the default"
    assert p.axis[1].axis_label == "Cache Size (non-standard)"
    # Test empty, checking for errors
    cprof.clear()
    cprof.visualize(show=False, save=False)
Esempio n. 19
0
def test_progressbar(capsys):
    with ProgressBar():
        out = get(dsk, 'e')
    assert out == 6
    out, err = capsys.readouterr()
    bar, percent, time = [i.strip() for i in out.split('\r')[-1].split('|')]
    assert bar == "[########################################]"
    assert percent == "100% Completed"
    with ProgressBar(width=20):
        out = get(dsk, 'e')
    out, err = capsys.readouterr()
    bar, percent, time = [i.strip() for i in out.split('\r')[-1].split('|')]
    assert bar == "[####################]"
    assert percent == "100% Completed"
Esempio n. 20
0
def test_plot_both():
    from dask.diagnostics.profile_visualize import visualize
    from bokeh.plotting import GridPlot
    with ResourceProfiler(dt=0.01) as rprof:
        with prof:
            get(dsk2, 'c')
    p = visualize([prof, rprof], label_size=50,
                  title="Not the default", show=False, save=False)
    assert isinstance(p, GridPlot)
    assert len(p.children) == 2
    assert p.children[0][0].title == "Not the default"
    assert p.children[0][0].xaxis[0].axis_label is None
    assert p.children[1][0].title is None
    assert p.children[1][0].xaxis[0].axis_label == 'Time (s)'
Esempio n. 21
0
def test_broken_callback():
    from dask.callbacks import Callback

    def _f_ok(*args, **kwargs):
        pass

    def _f_broken(*args, **kwargs):
        raise ValueError('my_exception')

    dsk = {'x': 1}

    with Callback(start=_f_broken, finish=_f_ok):
        with Callback(start=_f_ok, finish=_f_ok):
            with pytest.raises(ValueError, match='my_exception'):
                get(dsk, 'x')
Esempio n. 22
0
def test_resource_profiler():
    with ResourceProfiler(dt=0.01) as rprof:
        out = get(dsk2, 'c')
    results = rprof.results
    assert all(isinstance(i, tuple) and len(i) == 3 for i in results)

    rprof.clear()
    assert rprof.results == []

    rprof.close()
    assert not rprof._tracker.is_alive()

    with pytest.raises(AssertionError):
        with rprof:
            get(dsk, 'e')
Esempio n. 23
0
def test_register(capsys):
    try:
        p = ProgressBar()
        p.register()

        assert _globals['callbacks']

        get(dsk, 'e')
        check_bar_completed(capsys)

        p.unregister()

        assert not _globals['callbacks']
    finally:
        _globals['callbacks'].clear()
Esempio n. 24
0
def test_resource_profiler():
    with ResourceProfiler(dt=0.01) as rprof:
        get(dsk2, 'c')
    results = rprof.results
    assert len(results) > 0
    assert all(isinstance(i, tuple) and len(i) == 3 for i in results)

    rprof.clear()
    assert rprof.results == []

    rprof.close()
    assert not rprof._tracker.is_alive()

    with pytest.raises(AssertionError):
        with rprof:
            get(dsk, 'e')
Esempio n. 25
0
def test_with_cache(capsys):
    cachey = pytest.importorskip('cachey')
    from dask.cache import Cache
    c = cachey.Cache(10000)
    cc = Cache(c)

    with cc:
        with ProgressBar():
            assert get({'x': (mul, 1, 2)}, 'x') == 2
    check_bar_completed(capsys)
    assert c.data['x'] == 2

    with cc:
        with ProgressBar():
            assert get({'x': (mul, 1, 2), 'y': (mul, 'x', 3)}, 'y') == 6
    check_bar_completed(capsys)
def test_profiler_plot():
    with prof:
        get(dsk, 'e')
    p = prof.visualize(plot_width=500,
                       plot_height=300,
                       tools="hover",
                       title="Not the default",
                       show=False, save=False)
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert check_title(p, "Not the default")
    # Test empty, checking for errors
    prof.clear()
    prof.visualize(show=False, save=False)
Esempio n. 27
0
def test_profiler_plot():
    with prof:
        get(dsk, 'e')
    p = prof.visualize(plot_width=500,
                       plot_height=300,
                       tools="hover",
                       title="Not the default",
                       show=False, save=False)
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert check_title(p, "Not the default")
    # Test empty, checking for errors
    prof.clear()
    prof.visualize(show=False, save=False)
Esempio n. 28
0
def test_resource_profiler_plot():
    with ResourceProfiler(dt=0.01) as rprof:
        get(dsk2, 'c')
    p = rprof.visualize(plot_width=500,
                        plot_height=300,
                        tools="hover",
                        title="Not the default",
                        show=False, save=False)
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert p.title == "Not the default"
    # Test empty, checking for errors
    rprof.clear()
    rprof.visualize(show=False, save=False)
Esempio n. 29
0
def test_with_cache(capsys):
    cachey = pytest.importorskip('cachey')
    from dask.cache import Cache
    c = cachey.Cache(10000)
    cc = Cache(c)

    with cc:
        with ProgressBar():
            assert get({'x': (mul, 1, 2)}, 'x') == 2
    check_bar_completed(capsys)
    assert c.data['x'] == 2

    with cc:
        with ProgressBar():
            assert get({'x': (mul, 1, 2), 'y': (mul, 'x', 3)}, 'y') == 6
    check_bar_completed(capsys)
def test_cache_profiler_plot():
    with CacheProfiler(metric_name='non-standard') as cprof:
        get(dsk, 'e')
    p = cprof.visualize(plot_width=500,
                        plot_height=300,
                        tools="hover",
                        title="Not the default",
                        show=False, save=False)
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert check_title(p, "Not the default")
    assert p.axis[1].axis_label == 'Cache Size (non-standard)'
    # Test empty, checking for errors
    cprof.clear()
    cprof.visualize(show=False, save=False)
Esempio n. 31
0
def test_two_gets():
    with prof:
        get(dsk, "e")
    n = len(prof.results)

    dsk2 = {"x": (add, 1, 2), "y": (add, "x", "x")}

    with prof:
        get(dsk2, "y")
    m = len(prof.results)

    with prof:
        get(dsk, "e")
        get(dsk2, "y")
        get(dsk, "e")

    assert len(prof.results) == n + m + n
Esempio n. 32
0
def test_two_gets():
    with prof:
        get(dsk, 'e')
    n = len(prof.results)

    dsk2 = {'x': (add, 1, 2), 'y': (add, 'x', 'x')}

    with prof:
        get(dsk2, 'y')
    m = len(prof.results)

    with prof:
        get(dsk, 'e')
        get(dsk2, 'y')
        get(dsk, 'e')

    assert len(prof.results) == n + m + n
Esempio n. 33
0
def test_callback():
    f = lambda x: x + 1
    dsk = {'a': (f, 1)}
    from dask.threaded import get

    def start_callback(key, d, state):
        assert key == 'a' or key is None
        assert d == dsk
        assert isinstance(state, dict)

    def end_callback(key, value, d, state, worker_id):
        assert key == 'a' or key is None
        assert value == 2 or value is None
        assert d == dsk
        assert isinstance(state, dict)

    get(dsk, 'a', start_callback=start_callback, end_callback=end_callback)
Esempio n. 34
0
def test_callback():
    f = lambda x: x + 1
    dsk = {"a": (f, 1)}
    from dask.threaded import get

    def start_callback(key, d, state):
        assert key == "a" or key is None
        assert d == dsk
        assert isinstance(state, dict)

    def end_callback(key, value, d, state, worker_id):
        assert key == "a" or key is None
        assert value == 2 or value is None
        assert d == dsk
        assert isinstance(state, dict)

    get(dsk, "a", start_callback=start_callback, end_callback=end_callback)
Esempio n. 35
0
def test_two_gets():
    with prof:
        get(dsk, "e")
    n = len(prof.results)

    dsk2 = {"x": (add, 1, 2), "y": (add, "x", "x")}

    with prof:
        get(dsk2, "y")
    m = len(prof.results)

    with prof:
        get(dsk, "e")
        get(dsk2, "y")
        get(dsk, "e")

    assert len(prof.results) == n + m + n
Esempio n. 36
0
def test_two_gets():
    with prof:
        get(dsk, 'e')
    n = len(prof.results)

    dsk2 = {'x': (add, 1, 2), 'y': (add, 'x', 'x')}

    with prof:
        get(dsk2, 'y')
    m = len(prof.results)

    with prof:
        get(dsk, 'e')
        get(dsk2, 'y')
        get(dsk, 'e')

    assert len(prof.results) == n + m + n
Esempio n. 37
0
def test_with_cache(capsys):
    cachey = pytest.importorskip("cachey")
    from dask.cache import Cache

    c = cachey.Cache(10000)
    cc = Cache(c)

    with cc:
        with ProgressBar():
            assert get({"x": (mul, 1, 2)}, "x") == 2
    check_bar_completed(capsys)
    assert c.data["x"] == 2

    with cc:
        with ProgressBar():
            assert get({"x": (mul, 1, 2), "y": (mul, "x", 3)}, "y") == 6
    check_bar_completed(capsys)
Esempio n. 38
0
def test_plot_multiple():
    from dask.diagnostics.profile_visualize import visualize
    from bokeh.plotting import GridPlot
    with ResourceProfiler(dt=0.01) as rprof:
        with prof:
            get(dsk2, 'c')
    p = visualize([prof, rprof], label_size=50,
                  title="Not the default", show=False, save=False)
    assert isinstance(p, GridPlot)
    assert len(p.children) == 2
    assert p.children[0][0].title == "Not the default"
    assert p.children[0][0].xaxis[0].axis_label is None
    assert p.children[1][0].title is None
    assert p.children[1][0].xaxis[0].axis_label == 'Time (s)'
    # Test empty, checking for errors
    prof.clear()
    rprof.clear()
    visualize([prof, rprof], show=False, save=False)
Esempio n. 39
0
def test_plot_multiple():
    from dask.diagnostics.profile_visualize import visualize
    from bokeh.models import GridPlot
    with ResourceProfiler(dt=0.01) as rprof:
        with prof:
            get(dsk2, 'c')
    p = visualize([prof, rprof], label_size=50,
                  title="Not the default", show=False, save=False)
    assert isinstance(p, GridPlot)
    assert len(p.children) == 2
    assert p.children[0][0].title == "Not the default"
    assert p.children[0][0].xaxis[0].axis_label is None
    assert p.children[1][0].title is None
    assert p.children[1][0].xaxis[0].axis_label == 'Time (s)'
    # Test empty, checking for errors
    prof.clear()
    rprof.clear()
    visualize([prof, rprof], show=False, save=False)
Esempio n. 40
0
def main():

    d = {
        'task_one': (task_one, ),
        'task_two': (task_two, ),
        'task_three': (task_three, 'task_one', 'task_two')
    }

    print(get(d, 'task_three'))  # executes in parallel
Esempio n. 41
0
def test_pool_kwarg():
    def f():
        sleep(0.01)
        return threading.get_ident()

    dsk = {('x', i): (f,) for i in range(30)}
    dsk['x'] = (len, (set, [('x', i) for i in range(len(dsk))]))

    with ThreadPool(3) as pool:
        assert get(dsk, 'x', pool=pool) == 3
Esempio n. 42
0
def test_plot_multiple():
    from dask.diagnostics.profile_visualize import visualize

    with ResourceProfiler(dt=0.01) as rprof:
        with prof:
            get(dsk2, "c")
    p = visualize(
        [prof, rprof], label_size=50, title="Not the default", show=False, save=False
    )
    figures = [r[0] for r in p.children[1].children]
    assert len(figures) == 2
    assert figures[0].title.text == "Not the default"
    assert figures[0].xaxis[0].axis_label is None
    assert figures[1].title is None
    assert figures[1].xaxis[0].axis_label == "Time (s)"
    # Test empty, checking for errors
    prof.clear()
    rprof.clear()
    visualize([prof, rprof], show=False, save=False)
Esempio n. 43
0
def test_profiler_works_under_error():
    div = lambda x, y: x / y
    dsk = {'x': (div, 1, 1), 'y': (div, 'x', 2), 'z': (div, 'y', 0)}

    with ignoring(ZeroDivisionError):
        with prof:
            out = get(dsk, 'z')

    assert all(len(v) == 5 for v in prof.results)
    assert len(prof.results) == 2
Esempio n. 44
0
def test_pool_kwarg(pool_typ):
    def f():
        sleep(0.01)
        return threading.get_ident()

    dsk = {("x", i): (f, ) for i in range(30)}
    dsk["x"] = (len, (set, [("x", i) for i in range(len(dsk))]))

    with pool_typ(3) as pool:
        assert get(dsk, "x", pool=pool) == 3
Esempio n. 45
0
def test_profiler():
    with prof:
        out = get(dsk, "e")
    assert out == 6
    prof_data = sorted(prof.results, key=lambda d: d.key)
    keys = [i.key for i in prof_data]
    assert keys == ["c", "d", "e"]
    tasks = [i.task for i in prof_data]
    assert tasks == [(add, "a", "b"), (mul, "a", "b"), (mul, "c", "d")]
    prof.clear()
    assert prof.results == []
def test_plot_multiple():
    from dask.diagnostics.profile_visualize import visualize
    with ResourceProfiler(dt=0.01) as rprof:
        with prof:
            get(dsk2, 'c')
    p = visualize([prof, rprof], label_size=50,
                  title="Not the default", show=False, save=False)
    if LooseVersion(bokeh.__version__) >= '0.12.0':
        figures = [r.children[0] for r in p.children[1].children]
    else:
        figures = [r[0] for r in p.children]
    assert len(figures) == 2
    assert check_title(figures[0], "Not the default")
    assert figures[0].xaxis[0].axis_label is None
    assert figures[1].title is None
    assert figures[1].xaxis[0].axis_label == 'Time (s)'
    # Test empty, checking for errors
    prof.clear()
    rprof.clear()
    visualize([prof, rprof], show=False, save=False)
Esempio n. 47
0
def test_profiler():
    with prof:
        out = get(dsk, 'e')
    assert out == 6
    prof_data = sorted(prof.results, key=lambda d: d.key)
    keys = [i.key for i in prof_data]
    assert keys == ['c', 'd', 'e']
    tasks = [i.task for i in prof_data]
    assert tasks == [(add, 'a', 'b'), (mul, 'a', 'b'), (mul, 'c', 'd')]
    prof.clear()
    assert prof.results == []
Esempio n. 48
0
def test_resource_profiler():
    with ResourceProfiler(dt=0.01) as rprof:
        get(dsk2, "c")
    results = rprof.results
    assert len(results) > 0
    assert all(isinstance(i, tuple) and len(i) == 3 for i in results)

    # Tracker stopped on exit
    assert not rprof._is_running()

    rprof.clear()
    assert rprof.results == []

    # Close is idempotent
    rprof.close()
    assert not rprof._is_running()

    # Restarts tracker if already closed
    with rprof:
        get(dsk2, "c")
    assert len(rprof.results) > 0
def testing_city_finder():
    path = os.path.join(os.path.dirname(__file__), 'sample_transcripts',
                        'out-example-2021-02-01-hansard-plenary.txt')
    city_finder = processor()

    workflow = city_finder.build_workflow(path)
    report = get(workflow, 'output')

    with open('./tests/test_report.json', 'r') as json_file:
        expected_report = json.load(json_file)

    compiled_report = report.compile()
Esempio n. 50
0
def test_cache_profiler():
    with CacheProfiler() as cprof:
        out = get(dsk2, 'c')
    results = cprof.results
    assert all(isinstance(i, tuple) and len(i) == 5 for i in results)

    cprof.clear()
    assert cprof.results == []

    tics = [0]
    def nbytes(res):
        tics[0] += 1
        return tics[0]

    with CacheProfiler(nbytes) as cprof:
        out = get(dsk2, 'c')
    results = cprof.results
    assert tics[-1] == len(results)
    assert tics[-1] == results[-1].metric
    assert cprof._metric_name == 'nbytes'
    assert CacheProfiler(metric=nbytes, metric_name='foo')._metric_name == 'foo'
Esempio n. 51
0
def getOptimalNumThreads(dsk, finalTask):
    threads = np.arange(9) + 1  #physical limit?
    from dask.callbacks import Callback
    #from multiprocessing.pool import ThreadPool
    from dask.threaded import get
    #dask.set_options(pool=ThreadPool(numConcurrentTasks))
    equivTime = 0

    class PrintKeys(Callback):
        def __init__(self, numWorkers):
            self.equivTime = None
            self.numWorkers = numWorkers

        def _start(self, dsk):
            self.startTime = clock()

        def _finish(self, dsk, state, errored):
            self.endTime = clock()
            dt = (self.endTime - self.startTime) * 100.
            self.equivTime = dt * self.numWorkers
            equivTime = self.equivTime

    eqT = []
    eff = []
    optnum = 1
    for i in threads:
        with PrintKeys(i) as pk:
            get(dsk, finalTask, num_workers=i)
            eqT.append(pk.equivTime)
            print("Efficiency [{}]: {}".format(i,
                                               1 - pk.equivTime / eqT[0] / i))
            ef = 1 - pk.equivTime / eqT[0] / i
            eff.append(ef)
            if len(eff) > 1:
                if ef / eff[-2] < 1.1:
                    optnum = i - 1
                    break
    print(
        "Optimal number of concurrent tasks (if possible): {}".format(optnum))
    return optnum
Esempio n. 52
0
def testing_register_pass():
    path = os.path.join(os.path.dirname(__file__), 'data', 'registers-sample.csv')
    registers = processor()
    workflow = registers.get_workflow(path)
    report = get(workflow, 'output')
    results = report.compile()

    print(json.dumps(report.compile(), indent=True))
    assert len(results['tables'][0]['warnings']) == 5

    report = [issue for issue in results['tables'][0]['warnings'] if issue['message'].endswith('\'FY\'')].pop()
    assert report['code'] == 'country-mismatch'
    assert report['error-data']['mismatch'] == 'FY'
    assert len(report['context']) == 1
    assert report['context'][0]['properties'] == {
        'name': 'Venus MacNee',
        'nationality': 'Namibian',
        'sample_date': '2017-03-01',
        'state': 'FY',
        'organizations': 12,
        'country': 'Finland'
    }
    assert len(report['error-data']) == 1

    report = [issue for issue in results['tables'][0]['warnings'] if issue['message'].endswith('\'British\'?')].pop()
    assert report['code'] == 'country-mismatch'
    assert report['error-data'] == {
        'mismatch': 'Britidh',
        'guess': ('British', 86)
    }
    assert len(report['context']) == 1
    assert report['context'][0]['properties'] == {
        'country': 'Trinidad',
        'nationality': 'Britidh',
        'sample_date': '2016-03-23',
        'name': 'Cara Matisz',
        'state': 'TT',
        'organizations': 9
    }
    assert report['item']['entity'] == {
        'type': 'Cell',
        'definition': None,
        'location': {
            'row': 3,
            'column': 5
        }
    }

    assert len(results['tables'][0]['informations']) == 3
    report = results['tables'][0]['informations'][0]
    assert report['code'] == 'country-checked'
    assert report['item']['entity']['type'] == 'Column'
Esempio n. 53
0
def test_cache():
    c = cachey.Cache(10000)
    cc = Cache(c)

    with cc:
        assert get({'x': (inc, 1)}, 'x') == 2

    assert flag == [1]
    assert c.data['x'] == 2

    assert not cc.starttimes
    assert not cc.durations

    while flag:
        flag.pop()
    dsk = {'x': (inc, 1), 'y': (inc, 2), 'z': (add, 'x', 'y')}
    with cc:
        assert get(dsk, 'z') == 5

    assert flag == [2]  # no x present

    assert not _globals['callbacks']
Esempio n. 54
0
def test_cache():
    c = cachey.Cache(10000)
    cc = Cache(c)

    with cc:
        assert get({"x": (inc, 1)}, "x") == 2

    assert flag == [1]
    assert c.data["x"] == 2

    assert not cc.starttimes
    assert not cc.durations

    while flag:
        flag.pop()
    dsk = {"x": (inc, 1), "y": (inc, 2), "z": (add, "x", "y")}
    with cc:
        assert get(dsk, "z") == 5

    assert flag == [2]  # no x present

    assert not Callback.active
Esempio n. 55
0
def test_profiler_plot():
    with prof:
        get(dsk, "e")
    p = prof.visualize(
        plot_width=500,
        plot_height=300,
        tools="hover",
        title="Not the default",
        show=False,
        save=False,
    )
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert p.title.text == "Not the default"
    # Test empty, checking for errors
    prof.clear()
    with pytest.warns(None) as record:
        prof.visualize(show=False, save=False)

    assert len(record) == 0
Esempio n. 56
0
def test_good_on_bad():
    path = os.path.join(os.path.dirname(__file__), 'data', 'awful.csv')
    good = processor()
    workflow = good.get_workflow(path)
    results = get(workflow, 'output').compile()

    errors = results['tables'][0]['errors']
    print(errors)
    assert len(errors) == 7

    report = errors[0]
    print(report)
    assert report['code'] == 'duplicate-header'
Esempio n. 57
0
def test_exceptions_propagate():
    class MyException(Exception):
        def __init__(self, a, b):
            self.a = a
            self.b = b

        def __str__(self):
            return "My Exception!"

    def f():
        raise MyException(1, 2)

    from dask.threaded import get

    try:
        get({"x": (f, )}, "x")
        assert False
    except MyException as e:
        assert "My Exception!" in str(e)
        assert "a" in dir(e)
        assert e.a == 1
        assert e.b == 2
Esempio n. 58
0
def test_cache_profiler_plot():
    with CacheProfiler(metric_name="non-standard") as cprof:
        get(dsk, "e")
    p = cprof.visualize(
        plot_width=500,
        plot_height=300,
        tools="hover",
        title="Not the default",
        show=False,
        save=False,
    )
    assert p.plot_width == 500
    assert p.plot_height == 300
    assert len(p.tools) == 1
    assert isinstance(p.tools[0], bokeh.models.HoverTool)
    assert p.title.text == "Not the default"
    assert p.axis[1].axis_label == "Cache Size (non-standard)"
    # Test empty, checking for errors
    cprof.clear()
    with pytest.warns(None) as record:
        cprof.visualize(show=False, save=False)

    assert len(record) == 0
Esempio n. 59
0
def getOrderOfExecution(dsk, finalTask, numConcurrentTasks=1):
    '''Get the optimal order to minimizing backtracking and memory footprint.
    numConcurrentTasks can be more than 1 if you think you can do two things at once.
    
    Put the final task you want to achieve (one of the keys in depDict)'''
    from dask.callbacks import Callback
    #from multiprocessing.pool import ThreadPool
    from dask.threaded import get

    #dask.set_options(pool=ThreadPool(numConcurrentTasks))
    class PrintKeys(Callback):
        def __init__(self, numWorkers):
            self.equivTime = None
            self.numWorkers = numWorkers

        def _start(self, dsk):
            print("Working with {} concurrent tasks".format(self.numWorkers))
            self.startTime = clock()

        def _pretask(self, key, dask, state):
            """Print the key of every task as it's started"""
            pass

        def _posttask(self, key, result, dsk, state, id):
            print("Do {} <- {}, approx. {} time units".format(
                repr(key), dsk[key][1:], repr(result)))

        def _finish(self, dsk, state, errored):
            self.endTime = clock()
            dt = (self.endTime - self.startTime) * 100.
            print("Approximate time to complete: {} time units".format(dt))
            print("Equivalent single thread time: {} time units".format(
                dt * self.numWorkers))
            self.equivTime = dt * self.numWorkers

    with PrintKeys(numConcurrentTasks):
        get(dsk, finalTask, num_workers=numConcurrentTasks)
Esempio n. 60
0
def test_nonstandard_exceptions_propagate():
    class MyException(Exception):
        def __init__(self, a, b):
            self.a = a
            self.b = b

        def __str__(self):
            return "My Exception!"

    def f():
        raise MyException(1, 2)

    from dask.threaded import get

    try:
        get({'x': (f, )}, 'x')
        assert False
    except MyException as e:
        assert "My Exception!" in str(e)
        assert "Traceback" in str(e)
        assert 'a' in dir(e)
        assert 'traceback' in dir(e)
        assert e.exception.a == 1 and e.exception.b == 2
        assert e.a == 1 and e.b == 2