コード例 #1
0
    def func():
        with worker_client() as ee:
            x = ee.submit(inc, 1, workers=a_address)
            y = ee.submit(inc, 2, workers=b_address)

            xx, yy = ee.gather([x, y])
        return xx, yy
コード例 #2
0
 def f(i):
     with worker_client(separate_thread=False) as client:
         get_worker().count += 1
         assert get_worker().count <= 3
         sleep(random.random() / 40)
         assert get_worker().count <= 3
         get_worker().count -= 1
     return i
コード例 #3
0
    def mysum():
        result = 0
        sub_tasks = [delayed(double)(i) for i in range(100)]

        with worker_client() as lc:
            futures = lc.compute(sub_tasks)
            for f in as_completed(futures):
                result += f.result()
        return result
コード例 #4
0
ファイル: test_queues.py プロジェクト: tomMoral/distributed
 def f(i):
     with worker_client() as c:
         q = Queue('x', client=c)
         for _ in range(100):
             future = q.get()
             x = future.result()
             y = c.submit(inc, x)
             q.put(y)
             sleep(0.01)
         result = q.get().result()
         return result
コード例 #5
0
    def func():
        with worker_client() as c:
            correct = True
            for data in [[1, 2], (1, 2), {1, 2}]:
                futures = c.scatter(data)
                correct &= type(futures) == type(data)

            o = object()
            futures = c.scatter({'x': o})
            correct &= get_worker().data['x'] is o
            return correct
コード例 #6
0
ファイル: test_variable.py プロジェクト: tomMoral/distributed
 def f(i):
     with worker_client() as c:
         v = Variable('x', client=c)
         for _ in range(NITERS):
             future = v.get()
             x = future.result()
             y = c.submit(inc, x)
             v.set(y)
             sleep(0.01 * random.random())
         result = v.get().result()
         sleep(0.1)  # allow fire-and-forget messages to clear
         return result
コード例 #7
0
    def func():
        with worker_client() as c:
            futures = c.scatter([1, 2, 3, 4, 5])
            assert isinstance(futures, (list, tuple))
            assert len(futures) == 5

            x = dict(get_worker().data)
            y = {f.key: i for f, i in zip(futures, [1, 2, 3, 4, 5])}
            assert x == y

            total = c.submit(sum, futures)
            return total.result()
コード例 #8
0
 def func():
     with worker_client() as c:
         x = np.ones(5)
         future = c.scatter(x)
         assert future.type == np.ndarray
コード例 #9
0
 def f():
     with worker_client() as lc:
         return lc.loop is get_worker().loop
コード例 #10
0
    def full_func(*args, **kwargs):
        # global funcs_to_debug

        # funcs_to_debug = []
        print('DECORATOR distributed...', args)

        if not is_io_path(args[0]):
            return func(*args, **kwargs)

        # fileworker_address = '10.11.8.149:8795'
        # fileworker_address = '10.11.8.149'

        # mtimes = []
        # this should be processed on a fileworker
        # recursive_func_application_with_linear_output(list(args),get_mtime_from_path,mtimes)
        # try:

        from distributed import worker_client
        with worker_client(timeout=1000) as e:

            mtimes = e.submit(recursive_func_application_with_list_output,
                              *(list(args), get_mtime_from_path),
                              resources={
                                  'files': 1
                              }).result()
            # except:
            #     mtimes = recursive_func_application_with_list_output(list(args),get_mtime_from_path)

            # recursive_func_application_with_linear_output(list(args),get_mtime_from_path,mtimes)
            # print(mtimes)
            highest_mtime = np.array(mtimes[1:]).max()

            # print funcs_to_debug[0].orig_name
            # print func

            if not mtimes[0] == -1:
                # if os.path.exists(args[0]):
                #     if func.func_name not in [i.orig_name for i in funcs_to_debug]:
                #     if os.path.getmtime(args[0]) >= highest_mtime:

                if mtimes[0] >= highest_mtime:
                    return args[0]

            # print('here its calculating!')

            nargs = []
            for iarg, arg in enumerate(args):
                if not iarg: continue
                # try:
                # with worker_client() as e:
                res = e.submit(recursive_func_application,
                               *(arg, process_input_element),
                               resources={
                                   'files': 1
                               }).result()
                # except:
                #     res = recursive_func_application(arg,process_input_element)
                # print(arg,res)
                nargs.append(res)
                # nargs.append(recursive_func_application(arg,process_input_element))

            # print('la'+str(args))
            result = func(*nargs, **kwargs)
            # this should be processed on a fileworker
            # try:
            nresult = e.submit(process_output_element,
                               *(result, args[0]),
                               resources={
                                   'files': 1
                               }).result()
            # except:
            #     nresult = process_output_element(result,args[0])
        return nresult
コード例 #11
0
    def f():
        with worker_client():
            pass

        return threading.current_thread() in get_worker().executor._threads
コード例 #12
0
 def func(x):
     with worker_client() as wc:
         y = wc.submit(lambda: 1 + x)
         return wc.gather(y)
コード例 #13
0
 def func():
     with worker_client(timeout=0) as wc:
         print("hello")
コード例 #14
0
 def mysum():
     with worker_client() as c:
         with c.get_executor() as e:
             return sum(e.map(double, range(30)))
コード例 #15
0
 def f():
     with worker_client():
         return dask.delayed(lambda x: x)(1).compute()
コード例 #16
0
 def func(x):
     with worker_client() as wc:
         y = wc.submit(lambda: 1 + x)
         return wc.gather(y)
コード例 #17
0
 def func():
     with worker_client(timeout=0) as wc:
         print('hello')
コード例 #18
0
 def f(x):
     with worker_client() as c:
         return True
コード例 #19
0
def add(x, y):
    with distributed.worker_client():
        time.sleep(30 * 60)
    return x + y
コード例 #20
0
ファイル: test_steal.py プロジェクト: tomMoral/distributed
 def long(delay):
     with worker_client() as c:
         sleep(delay)
コード例 #21
0
    def run(self,
            matrices: Optional[Union[Union[str, Path], List[Path]]] = None,
            filepath_column: str = "filepath",
            **kwargs) -> List[Path]:
        """
        Invert the list of matrices provided.

        If running in the command line, this will lookup the prior step's produced
        manifest for matrice retrieval. If running in the workflow, uses the direct
        output of the prior step.

        Parameters
        ----------
        matrices: Optional[Union[Union[str, Path], List[Path]]]
            A path to a csv manifest to use or directly a list of paths of serialized
            arrays to invert.
            Default: self.step_local_staging_dir.parent / "mappedraw" / manifest.csv
        filepath_column: str
            If providing a path to a csv manifest, the column to use for matrices.
            Default: "filepath"

        Returns
        -------
        inverted: List[Path]
            The list of paths to the inverted matrices.
        """
        # Default matrices value
        if matrices is None:
            matrices = self.step_local_staging_dir.parent / "mappedraw" / "manifest.csv"

        # Get the matrices from the csv if provided a path
        if isinstance(matrices, (str, Path)):
            # Resolve the filepath and check for existance
            matrices = Path(matrices).resolve(strict=True)

            # Read csv
            raw_data = pd.read_csv(matrices)

            # Convert the specified column into a list of paths
            matrices = [Path(f) for f in raw_data[filepath_column]]

        # Storage dir
        inverted_dir = self.step_local_staging_dir / "inverted"

        # Connect to an executor
        with worker_client() as client:
            # Create random arrays
            futures = client.map(
                self._invert_array,
                matrices,
                [inverted_dir for i in range(len(matrices))],
            )

            # Blocking until all are done
            inversion_infos = client.gather(futures)

        # Configure manifest dataframe for storage tracking
        self.manifest = pd.DataFrame(index=range(len(matrices)),
                                     columns=["filepath"])
        for i, path in inversion_infos:
            self.manifest.at[i, "filepath"] = path

        # Save the manifest
        self.manifest.to_csv(self.step_local_staging_dir / "manifest.csv",
                             index=False)

        # Return list of paths
        return list(self.manifest["filepath"])
コード例 #22
0
 def f():
     with worker_client() as lc:
         return lc.loop is get_worker().loop
コード例 #23
0
    def f():
        with worker_client():
            pass

        return threading.current_thread() in get_worker().executor._threads
コード例 #24
0
 def f(x):
     with worker_client() as c:
         return True
コード例 #25
0
ファイル: parser.py プロジェクト: panoramichq/panoramic-cli
def _process_table_identifiers(
        pdf: DataFrame,
        dimension_combinations: Optional[List[List[str]]] = None,
        max_combination_length: int = 5) -> List[List[str]]:
    """
    Dask wrapper around extracting identifiers from a single sampled table (pdf).

    This method submits multiple sub-tasks to identify possible identifier combinations, waits for them to complete
    and returns one or more dimension combinations.

    Note that the `worker_client` call forces the task to secede from the Worker's thread-pool, therefore it does not
    block any other computations and cannot cause a deadlock while waiting for sub-tasks to finish.
    """
    with timed_block('[idparser] Computing number of rows took {:.3f} seconds',
                     logger, logging.DEBUG):
        num_rows = len(pdf)

    with timed_block('[idparser] Pruning columns took {:.3f} seconds', logger,
                     logging.DEBUG):
        # filter out columns that contain at least X% null values - null values can't be parts of the primary key
        columns = [
            col for col, count in pdf.count().compute().items()
            if count / num_rows >= NON_NULL_VALUES_RATIO
        ]

    with worker_client(separate_thread=True) as client:  # type: Client
        with timed_block(
                '[idparser] Generating combinations took {:.3f} seconds',
                logger, logging.DEBUG):
            # explore all possible dimension combinations if none are provided
            if dimension_combinations is None:
                all_possible_combinations = itertools.chain.from_iterable(
                    itertools.combinations(columns, i) for i in range(
                        1,
                        min(max_combination_length, len(columns)) + 1))
                generated_combinations: List[List[str]] = [
                    sorted(combination)
                    for combination in all_possible_combinations
                ]
            else:
                generated_combinations = dimension_combinations

        with timed_block(
                '[idparser] Waiting for all combination tasks took {:.3f} seconds',
                logger, logging.DEBUG):
            with timed_block(
                    '[idparser] Submitting all combination tasks took {:.3f} seconds',
                    logger, logging.DEBUG):
                # submit "per dimension combination" tasks
                futures = client.map(
                    lambda combination:
                    _process_possible_identifier_combination(pdf, combination),
                    generated_combinations,
                    key=[
                        f'comb_{combination}_{str(uuid4())}'
                        for combination in generated_combinations
                    ],
                    # priority=100,
                    # batch_size=32,
                    retries=2,
                )
            results = client.gather(futures)

    return [
        dimensions for dimensions, num_duplicates in results
        if num_duplicates == 0
    ]
コード例 #26
0
 def func(x):
     with worker_client() as c:
         x = c.submit(inc, x)
         y = c.submit(double, x)
         result = x.result() + y.result()
         return result
コード例 #27
0
 def func():
     with worker_client() as c:
         x = np.ones(5)
         future = c.scatter(x)
         assert future.type == np.ndarray
コード例 #28
0
 def f():
     with worker_client():
         return dask.delayed(lambda x: x)(1).compute()
コード例 #29
0
 def f():
     with worker_client() as lc:
         return lc.loop is lc.worker.loop
コード例 #30
0
 def func(x):
     with worker_client() as c:
         x = c.submit(inc, x)
         y = c.submit(double, x)
         result = x.result() + y.result()
         return result
コード例 #31
0
ファイル: test_steal.py プロジェクト: fgebhart/distributed
 def long(delay):
     with worker_client() as c:
         sleep(delay)
コード例 #32
0
 def mysum():
     with worker_client() as c:
         with c.get_executor() as e:
             return sum(e.map(double, range(30)))
コード例 #33
0
 def go(self):
     with worker_client() as wc:
         futures = [wc.submit(self.go_, pset_i=i, **pset) \
                    for i, pset in self.iterpsets()]
         futures = wc.gather(futures)
     return futures