async def test_close_async(c, s, a, b):
    sem = await Semaphore(name="test")

    assert await sem.acquire()
    with pytest.warns(
            RuntimeWarning,
            match="Closing semaphore .* but there remain unreleased leases .*",
    ):
        await sem.close()

    with pytest.raises(RuntimeError,
                       match="Semaphore `test` not known or already closed."):
        await sem.acquire()

    sem2 = await Semaphore(name="t2", max_leases=1)
    assert await sem2.acquire()

    def f(sem_):
        return sem_.acquire()

    semaphore_object = s.extensions["semaphores"]
    fire_and_forget(c.submit(f, sem_=sem2))
    while not semaphore_object.metrics["pending"][
            "t2"]:  # Wait for the pending lease
        await asyncio.sleep(0.01)
    with pytest.warns(
            RuntimeWarning,
            match="Closing semaphore .* but there remain pending leases"):
        await sem2.close()

    assert not semaphore_object.max_leases
    assert not semaphore_object.leases
    assert not semaphore_object.events
    for metric_dict in semaphore_object.metrics.values():
        assert not metric_dict
Beispiel #2
0
    def submit(self, fn: Callable, *args: Any, **kwargs: Any) -> Future:
        """
        Submit a function to the executor for execution. Returns a Future object.

        Args:
            - fn (Callable): function that is being submitted for execution
            - *args (Any): arguments to be passed to `fn`
            - **kwargs (Any): keyword arguments to be passed to `fn`

        Returns:
            - Future: a Future-like object that represents the computation of `fn(*args, **kwargs)`
        """
        task = kwargs["task"]
        workflow = self.client.new_workflow_stub(get_workflow(task.worker_id))
        dask_kwargs = self._prep_dask_kwargs()
        kwargs.update(dask_kwargs)

        if self.is_started and hasattr(self, "client"):
            if hasattr(task, "hydra_task"):
                self.hydra_client.submit(fn, *args, **kwargs)
            else:
                future = self.client.submit(fn, *args, **kwargs)
        elif self.is_started:
            with worker_client(separate_thread=True) as client:
                future = client.submit(fn, *args, **kwargs)
        else:
            raise ValueError("This executor has not been started.")

        fire_and_forget(future)
        return future
Beispiel #3
0
    def map(self, fn: Callable, *args: Any, **kwargs: Any) -> List[Future]:
        """
        Submit a function to be mapped over its iterable arguments.

        Args:
            - fn (Callable): function that is being submitted for execution
            - *args (Any): arguments that the function will be mapped over
            - **kwargs (Any): additional keyword arguments that will be passed to the Dask Client

        Returns:
            - List[Future]: a list of Future-like objects that represent each computation of
                fn(*a), where a = zip(*args)[i]

        """
        if not args:
            return []

        dask_kwargs = self._prep_dask_kwargs()
        kwargs.update(dask_kwargs)

        if self.is_started and hasattr(self, "client"):
            futures = self.client.map(fn, *args, **kwargs)
        elif self.is_started:
            with worker_client(separate_thread=True) as client:
                futures = client.map(fn, *args, **kwargs)
                return client.gather(futures)
        else:
            raise ValueError("This executor has not been started.")

        fire_and_forget(futures)
        return futures
Beispiel #4
0
def dask_endpoint(owner, app_name, action):
    """
    Route dask simulation to appropriate dask scheduluer.
    """
    print(f"dask endpoint: {owner}/{app_name}/{action}")
    data = request.get_data()
    inputs = json.loads(data)
    print("inputs", inputs)
    addr = dask_scheduler_address(owner, app_name)
    job_id = str(uuid.uuid4())

    # Worker needs the job_id to push the results back to the
    # webapp.
    # The url and api token are passed as args insted of env
    # variables so that the wrapper has access to them
    # but the model does not.
    inputs.update({
        "job_id": job_id,
        "comp_url": os.environ.get("COMP_URL"),
        "comp_api_token": os.environ.get("COMP_API_TOKEN"),
        "timeout": get_time_out(owner, app_name),
    })

    with Client(addr) as c:
        fut = c.submit(dask_sim, **inputs)
        fire_and_forget(fut)
        return {"job_id": job_id, "qlength": 1}
Beispiel #5
0
    def submit(self, fn: Callable, *args: Any, **kwargs: Any) -> "Future":
        """
        Submit a function to the executor for execution. Returns a Future object.

        Args:
            - fn (Callable): function that is being submitted for execution
            - *args (Any): arguments to be passed to `fn`
            - **kwargs (Any): keyword arguments to be passed to `fn`

        Returns:
            - Future: a Future-like object that represents the computation of `fn(*args, **kwargs)`
        """
        # import dask functions here to decrease our import times
        from distributed import fire_and_forget, worker_client

        dask_kwargs = self._prep_dask_kwargs()
        kwargs.update(dask_kwargs)

        if self.is_started and hasattr(self, "client"):
            future = self.client.submit(fn, *args, **kwargs)
        elif self.is_started:
            with worker_client(separate_thread=True) as client:
                future = client.submit(fn, *args, **kwargs)
        else:
            raise ValueError("This executor has not been started.")

        fire_and_forget(future)
        return future
Beispiel #6
0
    def run_job(self,
                job_type: str,
                spec: dict,
                scheduler: str,
                mode: str = "async") -> Union[ExecutedJob, InvalidJob]:

        from distributed import fire_and_forget
        import dask

        from mason_dask.jobs.executed import ExecutedJob as ExecutedDaskJob
        from mason_dask.jobs.executed import InvalidJob as InvalidDaskJob

        from mason_dask.utils.cluster_spec import ClusterSpec

        if job_type == "format":
            from mason_dask.jobs.format import FormatJob as DaskFormatJob
            job = DaskFormatJob(spec)
        elif job_type == "query":
            from mason_dask.jobs.query import QueryJob as DaskQueryJob
            job = DaskQueryJob(spec)
        else:
            raise NotImplementedError(f"Job not implemented: {job_type}")

        dask_job = job.validate()

        def to_mason_job(job: Result[ExecutedDaskJob, InvalidDaskJob]):
            j = compute(job)
            if isinstance(j, ExecutedDaskJob):
                return ExecutedJob("format-job", j.message)
            else:
                value = job._inner_value
                assert (isinstance(value, InvalidDaskJob))
                return InvalidJob(value.message)

        with self.client() as client:
            cluster_spec = ClusterSpec(client, scheduler=self.scheduler)

            final: Union[ExecutedJob, InvalidJob]
            if isinstance(dask_job, InvalidDaskJob):
                final = InvalidJob(f"Invalid Dask Job: {dask_job.message}")
            else:
                result: Result[ExecutedDaskJob, InvalidDaskJob]
                if scheduler.startswith("local"):
                    result = dask_job.run(cluster_spec)
                    final = to_mason_job(result)
                else:
                    dask.config.set(
                        {'distributed.scheduler.allowed-failures': 50})
                    future = client.submit(dask_job.run, cluster_spec)
                    if mode == "sync":
                        result = client.gather(future)
                        final = to_mason_job(result)
                    else:
                        fire_and_forget(future)
                        final = ExecutedJob(
                            f"Queued job {dask_job} to run against dask scheduler: {scheduler}"
                        )

        return final
 def run_flow(self, environment, config, context, **kwargs):
     future = self.dask_client.submit(
         run_flow_in_worker,
         environment=environment,
         config=config,
         context=context,
         pure=False,
     )
     distributed.fire_and_forget(future)
Beispiel #8
0
def test_cancel_fire_and_forget(c, s, a, b):
    x = delayed(slowinc)(1, delay=0.05)
    y = delayed(slowinc)(x, delay=0.05)
    z = delayed(slowinc)(y, delay=0.05)
    w = delayed(slowinc)(z, delay=0.05)
    future = c.compute(w)
    fire_and_forget(future)

    yield gen.sleep(0.05)
    yield future.cancel(force=True)
    assert future.status == 'cancelled'
    assert not s.task_state
Beispiel #9
0
def test_cancel_fire_and_forget(c, s, a, b):
    x = delayed(slowinc)(1, delay=0.05)
    y = delayed(slowinc)(x, delay=0.05)
    z = delayed(slowinc)(y, delay=0.05)
    w = delayed(slowinc)(z, delay=0.05)
    future = c.compute(w)
    fire_and_forget(future)

    yield gen.sleep(0.05)
    yield future.cancel(force=True)
    assert future.status == 'cancelled'
    assert not s.tasks
Beispiel #10
0
def run(spec: dict, scheduler: str):
    class CompleteDaskJob:
        def __init__(self, message: str = ""):
            self.message = message

    class InvalidDaskJob():
        def __init__(self, message: str = ""):
            self.message = message

    class DaskQueryJob():
        def __init__(self, job_spec: dict):
            self.query_string = job_spec.get("query_string")
            self.database = job_spec.get("database")
            self.output_path = job_spec.get("output_path")

        def run_job(self) -> Union[CompleteDaskJob, InvalidDaskJob]:
            # df: DataFrame = dd.read_sql_table(self.query_string)
            if self.output_path:
                # df.to_parquet(self.output_path)
                return CompleteDaskJob(
                    f"Job to query via Dask succesfully queued to scheduler")
            else:
                return InvalidDaskJob(
                    "Output path required for Dask implementation of table query"
                )

    dask_job = DaskQueryJob(spec)
    mode = "async"

    if scheduler == "local":
        client = Client()
        dask_job.run_job()
    else:
        dask.config.set({'distributed.scheduler.allowed-failures': 50})
        client = Client(scheduler)
        future = client.submit(dask_job.run_job)
        if mode == "sync":
            client.gather(future)
        else:
            fire_and_forget(future)
Beispiel #11
0
    def submit(self, fn: Callable, *args: Any, **kwargs: Any) -> Future:
        """
        Submit a function to the executor for execution. Returns a Future object.

        Args:
            - fn (Callable): function that is being submitted for execution
            - *args (Any): arguments to be passed to `fn`
            - **kwargs (Any): keyword arguments to be passed to `fn`

        Returns:
            - Future: a Future-like object which represents the computation of `fn(*args, **kwargs)`
        """
        if self.is_started and hasattr(self, "client"):

            future = self.client.submit(fn, *args, pure=False, **kwargs)
        elif self.is_started:
            with worker_client(separate_thread=True) as client:
                future = client.submit(fn, *args, pure=False, **kwargs)
        else:
            raise ValueError("This executor has not been started.")

        fire_and_forget(future)
        return future
Beispiel #12
0
    def submit(self,
               fn: Callable,
               *args: Any,
               extra_context: dict = None,
               **kwargs: Any) -> "Future":
        """
        Submit a function to the executor for execution. Returns a Future object.

        Args:
            - fn (Callable): function that is being submitted for execution
            - *args (Any): arguments to be passed to `fn`
            - extra_context (dict, optional): an optional dictionary with extra information about the submitted task
            - **kwargs (Any): keyword arguments to be passed to `fn`

        Returns:
            - Future: a Future-like object that represents the computation of `fn(*args, **kwargs)`
        """
        # import dask functions here to decrease our import times
        from distributed import fire_and_forget, worker_client

        extra_context = extra_context or {}
        task_name = extra_context.get("task_full_name", "")
        task_tags = extra_context.get("task_tags", [])
        dask_kwargs = self._prep_dask_kwargs(task_name=task_name,
                                             task_tags=task_tags)
        kwargs.update(dask_kwargs)

        if self.is_started and hasattr(self, "client"):
            future = self.client.submit(fn, *args, **kwargs)
        elif self.is_started:
            with worker_client(separate_thread=True) as client:
                future = client.submit(fn, *args, **kwargs)
        else:
            raise ValueError("This executor has not been started.")

        fire_and_forget(future)
        return future
Beispiel #13
0
    def cleanup(self, badstatuslist=['cancelled', 'error', 'lost'], keep=None):
        """ Clean up job list.
        Scans futures, removes finished jobs, and pushes results to relevant indices.
        badstatuslist can include 'cancelled', 'error', 'lost'.
        keep defines a scanId (string) key that should not be removed from dicts.
        """

        removed = 0
        cindexed = 0
        sdms = 0

        scanIds = [scanId for scanId in self.futures]
        if len(scanIds):
            logger.info("Checking on scanIds: {0}".format(','.join(scanIds)))

        # clean futures and get finished jobs
        removed = self.removefutures(badstatuslist)
        for scanId in self.futures:

            # check on finished
            finishedlist = [
                (seg, data, cc, acc)
                for (scanId0, futurelist) in iteritems(self.futures)
                for seg, data, cc, acc in futurelist
                if (acc.status == 'finished') and (scanId0 == scanId)
            ]
            self.finished[scanId] += len(finishedlist)
            if self.indexresults:
                elastic.indexscanstatus(scanId,
                                        pending=self.pending[scanId],
                                        finished=self.finished[scanId],
                                        errors=self.errors[scanId],
                                        indexprefix=self.indexprefix)

            # TODO: check on error handling for fire_and_forget
            for futures in finishedlist:
                seg, data, cc, acc = futures
                ncands, mocks = acc.result()

                # index mocks
                if self.indexresults and mocks:
                    distributed.fire_and_forget(
                        self.client.submit(elastic.indexmock,
                                           scanId,
                                           mocks,
                                           indexprefix=self.indexprefix))
                else:
                    logger.debug(
                        "No mocks indexed from scanId {0}".format(scanId))

                # index noises
                noisefile = self.states[scanId].noisefile
                if self.indexresults and os.path.exists(noisefile):
                    distributed.fire_and_forget(
                        self.client.submit(elastic.indexnoises,
                                           noisefile,
                                           scanId,
                                           indexprefix=self.indexprefix))
                else:
                    logger.debug(
                        "No noises indexed from scanId {0}.".format(scanId))

                # index cands
                if self.indexresults and ncands:
                    workdir = self.states[scanId].prefs.workdir
                    distributed.fire_and_forget(
                        self.client.submit(util.indexcands_and_plots,
                                           cc,
                                           scanId,
                                           self.tags,
                                           self.indexprefix,
                                           workdir,
                                           priority=5))
                else:
                    logger.debug(
                        "No cands indexed from scanId {0}".format(scanId))

                # optionally save and archive sdm/bdfs for segment
                if self.saveproducts and ncands:
                    distributed.fire_and_forget(
                        self.client.submit(createproducts,
                                           cc,
                                           data,
                                           self.archiveproducts,
                                           indexprefix=self.indexprefix,
                                           priority=5))
                    logger.info(
                        "Creating an SDM for {0}, segment {1}, with {2} candidates"
                        .format(scanId, seg, ncands))
                    sdms += 1
                else:
                    logger.debug(
                        "No SDMs plots moved for scanId {0}.".format(scanId))

                # remove job from list
                self.futures[scanId].remove(futures)
                removed += 1

        # clean up self.futures
        removeids = [
            scanId for scanId in self.futures
            if (len(self.futures[scanId]) == 0) and (scanId != keep)
        ]
        if removeids:
            logstr = ("No jobs left for scanIds: {0}.".format(
                ', '.join(removeids)))
            if keep is not None:
                logstr += (". Cleaning state and futures dicts (keeping {0})".
                           format(keep))
            else:
                logstr += ". Cleaning state and futures dicts."
            logger.info(logstr)

            for scanId in removeids:
                _ = self.futures.pop(scanId)
                _ = self.states.pop(scanId)
                _ = self.finished.pop(scanId)
                _ = self.errors.pop(scanId)
                try:
                    _ = self.known_segments.pop(scanId)
                except KeyError:
                    pass


#        _ = self.client.run(gc.collect)
        if removed or cindexed or sdms:
            logger.info(
                'Removed {0} jobs, indexed {1} cands, made {2} SDMs.'.format(
                    removed, cindexed, sdms))
Beispiel #14
0
    def start_pipeline(self, scanId, cfile=None, segments=None):
        """ Start pipeline conditional on cluster state.
        Sets futures and state after submission keyed by scanId.
        segments arg can be used to select or slow segment submission.
        """

        st = self.states[scanId]
        w_memlim = self.read_overhead * st.vismem * 1e9
        if segments is None:
            segments = list(range(st.nsegment))

        vys_timeout = self.vys_timeout
        if st.metadata.datasource in ['vys', 'vyssim']:
            if self.vys_timeout is not None:
                logger.debug(
                    "vys_timeout factor set to fixed value of {0:.1f}x".format(
                        vys_timeout))
            else:
                assert self.vys_sec_per_spec is not None, "Must define vys_sec_per_spec to estimate vys_timeout"
                nspec = st.readints * st.nbl * st.nspw * st.npol
                vys_timeout = (st.t_segment +
                               self.vys_sec_per_spec * nspec) / st.t_segment
                logger.debug(
                    "vys_timeout factor scaled by nspec to {0:.1f}x".format(
                        vys_timeout))

        mockseg = random.choice(segments) if random.uniform(
            0, 1) < self.mockprob else None
        if mockseg is not None:
            logger.info("Mock set for scanId {0} in segment {1}".format(
                scanId, mockseg))

        # vys data means realtime operations must timeout within a scan time
        if st.metadata.datasource == 'vys':
            timeout = 0.9 * st.metadata.inttime * st.metadata.nints  # bit shorter than scan
        else:
            timeout = 0
        throttletime = self.throttle * st.metadata.inttime * st.metadata.nints / st.nsegment
        logger.info(
            'Submitting {0} segments for scanId {1} with {2:.1f}s per segment'.
            format(len(segments), scanId, throttletime))
        logger.debug('Read_overhead {0}, read_totfrac {1}, and '
                     'spill_limit {2} with timeout {3}s'.format(
                         self.read_overhead, self.read_totfrac,
                         self.spill_limit, timeout))

        tot_memlim = self.read_totfrac * sum([
            v['resources']['MEMORY']
            for v in itervalues(self.client.scheduler_info()['workers'])
            if 'READER' in v['resources']
        ])

        # submit segments
        t0 = time.Time.now().unix
        elapsedtime = 0
        nsubmitted = 0  # count number submitted from list segments
        segments = iter(segments)
        segment = next(segments)
        telcalset = self.set_telcalfile(scanId)
        while True:
            segsubtime = time.Time.now().unix
            if st.metadata.datasource == 'vys':
                endtime = time.Time(st.segmenttimes[segment][1],
                                    format='mjd').unix
                if endtime < segsubtime - 2:  # TODO: define buffer delay better
                    logger.warning(
                        "Segment {0} time window has passed ({1} < {2}). Skipping."
                        .format(segment, endtime, segsubtime - 1))
                    try:
                        segment = next(segments)
                        continue
                    except StopIteration:
                        logger.debug(
                            "No more segments for scanId {0}".format(scanId))
                        break

            # try setting telcal
            if not telcalset:
                telcalset = self.set_telcalfile(scanId)

            # submit if cluster ready and telcal available
            if (heuristics.reader_memory_ok(self.client, w_memlim)
                    and heuristics.readertotal_memory_ok(
                        self.client, tot_memlim)
                    and heuristics.spilled_memory_ok(limit=self.spill_limit,
                                                     daskdir=self.daskdir)
                    and (telcalset if self.requirecalibration else True)):

                # first time initialize scan
                if scanId not in self.futures:
                    self.futures[scanId] = []
                    self.errors[scanId] = 0
                    self.finished[scanId] = 0

                    if self.indexresults:
                        elastic.indexscan(
                            inmeta=self.states[scanId].metadata,
                            preferences=self.states[scanId].prefs,
                            indexprefix=self.indexprefix)
                    else:
                        logger.info("Not indexing scan or prefs.")

                futures = pipeline.pipeline_seg(st,
                                                segment,
                                                cl=self.client,
                                                cfile=cfile,
                                                vys_timeout=vys_timeout,
                                                mem_read=w_memlim,
                                                mem_search=2 * st.vismem * 1e9,
                                                mockseg=mockseg)
                self.futures[scanId].append(futures)
                nsubmitted += 1

                if self.data_logging:
                    segment, data, cc, acc = futures
                    distributed.fire_and_forget(
                        self.client.submit(util.data_logger,
                                           st,
                                           segment,
                                           data,
                                           fifo_timeout='0s',
                                           priority=-1))
                if self.indexresults:
                    elastic.indexscanstatus(scanId,
                                            pending=self.pending[scanId],
                                            finished=self.finished[scanId],
                                            errors=self.errors[scanId],
                                            indexprefix=self.indexprefix,
                                            nsegment=st.nsegment)

                try:
                    segment = next(segments)
                except StopIteration:
                    logger.info(
                        "No more segments for scanId {0}".format(scanId))
                    break

            else:
                if not heuristics.reader_memory_ok(self.client, w_memlim):
                    logger.info(
                        "System not ready. No reader available with required memory {0}"
                        .format(w_memlim))
                elif not heuristics.readertotal_memory_ok(
                        self.client, tot_memlim):
                    logger.info(
                        "System not ready. Total reader memory exceeds limit of {0}"
                        .format(tot_memlim))
                elif not heuristics.spilled_memory_ok(limit=self.spill_limit,
                                                      daskdir=self.daskdir):
                    logger.info(
                        "System not ready. Spilled memory exceeds limit of {0}"
                        .format(self.spill_limit))
                elif not (self.set_telcalfile(scanId)
                          if self.requirecalibration else True):
                    logger.info(
                        "System not ready. No telcalfile available for {0}".
                        format(scanId))

            # periodically check on submissions. always, if memory limited.
            if not (segment % 2) or not (
                    heuristics.reader_memory_ok(self.client, w_memlim) and
                    heuristics.readertotal_memory_ok(self.client, tot_memlim)
                    and heuristics.spilled_memory_ok(limit=self.spill_limit,
                                                     daskdir=self.daskdir)):
                self.cleanup(
                    keep=scanId)  # do not remove keys of ongoing submission

            # check timeout and wait time for next segment
            elapsedtime = time.Time.now().unix - t0
            if elapsedtime > timeout and timeout:
                logger.info("Submission timed out. Submitted {0}/{1} segments "
                            "in ScanId {2}".format(nsubmitted, st.nsegment,
                                                   scanId))
                break
            else:
                dt = time.Time.now().unix - segsubtime
                if dt < throttletime:
                    logger.debug("Waiting {0:.1f}s to submit segment.".format(
                        throttletime - dt))
                    sleep(throttletime - dt)
    with joblib.parallel_backend('dask'):
        grid_search.fit(X, y)

    fs = gcsfs.GCSFileSystem()
    path = "gcs://pangeo-scratch/tomaugspurger/model-1.pkl"
    with fs.open(path, "wb") as f:
        joblib.dump(grid_search, f)
    return path


if __name__ == "__main__":
    auth = JupyterHubAuth(os.environ["PANGEO_TOKEN"])
    # Proxy address will be made easier to find.
    print("Connecting to Gateway")
    gateway = Gateway(
        address=
        "https://staging.us-central1-b.gcp.pangeo.io/services/dask-gateway/",
        proxy_address="tls://104.197.142.28:8786",
        auth=auth)
    cluster = gateway.new_cluster(shutdown_on_close=False)
    client = cluster.get_client()
    print("Dashboard:", client.dashboard_link)

    cluster.scale(4)
    client.wait_for_workers(4)
    print("Cluster ready")

    fut = client.submit(main)
    fire_and_forget(fut)