def test_has_keyword(): def foo(a, b, c=None): pass assert has_keyword(foo, 'a') assert has_keyword(foo, 'b') assert has_keyword(foo, 'c') bar = functools.partial(foo, a=1) assert has_keyword(bar, 'b') assert has_keyword(bar, 'c')
def test_has_keyword(): def foo(a, b, c=None): pass assert has_keyword(foo, "a") assert has_keyword(foo, "b") assert has_keyword(foo, "c") bar = functools.partial(foo, a=1) assert has_keyword(bar, "b") assert has_keyword(bar, "c")
def apply( self, func, raw=None, engine="cython", engine_kwargs=None, args=None, kwargs=None, ): compat_kwargs = {} kwargs = kwargs or {} args = args or () meta = self.obj._meta.rolling(0) if has_keyword(meta.apply, "engine"): # PANDAS_GT_100 compat_kwargs = dict(engine=engine, engine_kwargs=engine_kwargs) if raw is None: # PANDAS_GT_100: The default changed from None to False raw = inspect.signature(meta.apply).parameters["raw"] return self._call_method("apply", func, raw=raw, args=args, kwargs=kwargs, **compat_kwargs)
def compute_meta(func, _dtype, *args, **kwargs): with np.errstate(all="ignore"), warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) args_meta = [ meta_from_array(x) if is_arraylike(x) else x for x in args ] kwargs_meta = { k: meta_from_array(v) if is_arraylike(v) else v for k, v in kwargs.items() } # todo: look for alternative to this, causes issues when using map_blocks() # with np.vectorize, such as dask.array.routines._isnonzero_vec(). if isinstance(func, np.vectorize): meta = func(*args_meta) else: try: # some reduction functions need to know they are computing meta if has_keyword(func, "computing_meta"): kwargs_meta["computing_meta"] = True meta = func(*args_meta, **kwargs_meta) except TypeError as e: if any(s in str(e) for s in [ "unexpected keyword argument", "is an invalid keyword for", "Did not understand the following kwargs", ]): raise else: return None except ValueError as e: # min/max functions have no identity, just use the same input type when there's only one if len( args_meta ) == 1 and "zero-size array to reduction operation" in str(e): meta = args_meta[0] else: return None except Exception: return None if _dtype and getattr(meta, "dtype", None) != _dtype: with contextlib.suppress(AttributeError): meta = meta.astype(_dtype) if np.isscalar(meta): meta = np.array(meta) return meta
def _create_task(func, smaller_src_arrays, src_block_info, dst_arrays, dst_block_info, position, fill_value, kwargs): """Create a task for resample_blocks.""" from dask.utils import has_keyword dependencies = [] args = [] for smaller_data in smaller_src_arrays: args.append((smaller_data.name, *([0] * smaller_data.ndim))) dependencies.append(smaller_data) for dst_array in dst_arrays: dst_position = [0] * (dst_array.ndim - 2) + list(position[-2:]) args.append((dst_array.name, *dst_position)) func_kwargs = kwargs.copy() func_kwargs['fill_value'] = fill_value if has_keyword(func, "block_info"): func_kwargs["block_info"] = {0: src_block_info, None: dst_block_info} pfunc = partial(func, **func_kwargs) task = (pfunc, *args) return task, dependencies
def map_overlap( func, df, before, after, *args, meta=no_default, enforce_metadata=True, transform_divisions=True, align_dataframes=True, **kwargs, ): """Apply a function to each partition, sharing rows with adjacent partitions. Parameters ---------- func : function The function applied to each partition. If this function accepts the special ``partition_info`` keyword argument, it will recieve information on the partition's relative location within the dataframe. df: dd.DataFrame, dd.Series args, kwargs : Positional and keyword arguments to pass to the function. Positional arguments are computed on a per-partition basis, while keyword arguments are shared across all partitions. The partition itself will be the first positional argument, with all other arguments passed *after*. Arguments can be ``Scalar``, ``Delayed``, or regular Python objects. DataFrame-like args (both dask and pandas) will be repartitioned to align (if necessary) before applying the function; see ``align_dataframes`` to control this behavior. enforce_metadata : bool, default True Whether to enforce at runtime that the structure of the DataFrame produced by ``func`` actually matches the structure of ``meta``. This will rename and reorder columns for each partition, and will raise an error if this doesn't work or types don't match. before : int or timedelta The rows to prepend to partition ``i`` from the end of partition ``i - 1``. after : int or timedelta The rows to append to partition ``i`` from the beginning of partition ``i + 1``. transform_divisions : bool, default True Whether to apply the function onto the divisions and apply those transformed divisions to the output. align_dataframes : bool, default True Whether to repartition DataFrame- or Series-like args (both dask and pandas) so their divisions align before applying the function. This requires all inputs to have known divisions. Single-partition inputs will be split into multiple partitions. If False, all inputs must have either the same number of partitions or a single partition. Single-partition inputs will be broadcast to every partition of multi-partition inputs. $META See Also -------- dd.DataFrame.map_overlap """ args = (df, ) + args dfs = [df for df in args if isinstance(df, _Frame)] if isinstance(before, datetime.timedelta) or isinstance( after, datetime.timedelta): if not is_datetime64_any_dtype( dfs[0].index._meta_nonempty.inferred_type): raise TypeError( "Must have a `DatetimeIndex` when using string offset " "for `before` and `after`") else: if not (isinstance(before, Integral) and before >= 0 and isinstance(after, Integral) and after >= 0): raise ValueError("before and after must be positive integers") name = kwargs.pop("token", None) parent_meta = kwargs.pop("parent_meta", None) assert callable(func) if name is not None: token = tokenize(meta, before, after, *args, **kwargs) else: name = "overlap-" + funcname(func) token = tokenize(func, meta, before, after, *args, **kwargs) name = f"{name}-{token}" if align_dataframes: args = _maybe_from_pandas(args) try: args = _maybe_align_partitions(args) except ValueError as e: raise ValueError( f"{e}. If you don't want the partitions to be aligned, and are " "calling `map_overlap` directly, pass `align_dataframes=False`." ) from e meta = _get_meta_map_partitions(args, dfs, func, kwargs, meta, parent_meta) if all(isinstance(arg, Scalar) for arg in args): layer = { (name, 0): ( apply, func, (tuple, [(arg._name, 0) for arg in args]), kwargs, ) } graph = HighLevelGraph.from_collections(name, layer, dependencies=args) return Scalar(graph, name, meta) args2 = [] dependencies = [] divisions = _get_divisions_map_partitions(align_dataframes, transform_divisions, dfs, func, args, kwargs) def _handle_frame_argument(arg): dsk = {} prevs_parts_dsk, prevs = _get_previous_partitions(arg, before) dsk.update(prevs_parts_dsk) nexts_parts_dsk, nexts = _get_nexts_partitions(arg, after) dsk.update(nexts_parts_dsk) name_a = "overlap-concat-" + tokenize(arg) for i, (prev, current, next) in enumerate(zip(prevs, arg.__dask_keys__(), nexts)): key = (name_a, i) dsk[key] = (_combined_parts, prev, current, next, before, after) graph = HighLevelGraph.from_collections(name_a, dsk, dependencies=[arg]) return new_dd_object(graph, name_a, meta, divisions) for arg in args: if isinstance(arg, _Frame): arg = _handle_frame_argument(arg) args2.append(arg) dependencies.append(arg) continue arg = normalize_arg(arg) arg2, collections = unpack_collections(arg) if collections: args2.append(arg2) dependencies.extend(collections) else: args2.append(arg) kwargs3 = {} simple = True for k, v in kwargs.items(): v = normalize_arg(v) v, collections = unpack_collections(v) dependencies.extend(collections) kwargs3[k] = v if collections: simple = False if has_keyword(func, "partition_info"): partition_info = {(i, ): { "number": i, "division": division } for i, division in enumerate(divisions[:-1])} args2.insert(0, BlockwiseDepDict(partition_info)) orig_func = func def func(partition_info, *args, **kwargs): return orig_func(*args, **kwargs, partition_info=partition_info) if enforce_metadata: dsk = partitionwise_graph( apply_and_enforce, name, func, before, after, *args2, dependencies=dependencies, _func=overlap_chunk, _meta=meta, **kwargs3, ) else: kwargs4 = kwargs if simple else kwargs3 dsk = partitionwise_graph( overlap_chunk, name, func, before, after, *args2, **kwargs4, dependencies=dependencies, ) graph = HighLevelGraph.from_collections(name, dsk, dependencies=dependencies) return new_dd_object(graph, name, meta, divisions)