Beispiel #1
0
 def __init__(self, conf: Any = None):
     p = ParamDict(FUGUE_DASK_DEFAULT_CONF)
     p.update(ParamDict(conf))
     super().__init__(p)
     self._fs = FileSystem()
     self._log = logging.getLogger()
     self._native = NativeExecutionEngine(conf=conf)
def test_configvar():
    t = MockTaskForVar()
    s = ConfigSpec("a", dict, True, True, None)
    c = _ConfigVar(t, s)
    raises(AssertionError, lambda: c.get())  # required not set

    p = ParamDict()
    s = ConfigSpec("a", dict, True, False, p)
    c = _ConfigVar(t, s)
    assert p is c.get()
    c.set(None)
    assert c.get() is None

    p = ParamDict()
    s = ConfigSpec("a", ParamDict, False, False, p)
    c = _ConfigVar(t, s)
    assert p is c.get()
    raises(AssertionError, lambda: c.set(None))
    assert p is c.get()

    p2 = ParamDict()
    s2 = ConfigSpec("x", dict, False, False, p2)
    c2 = _ConfigVar(t, s2)
    assert p2 is c2.get()  # not set, use the defaut
    c2.set_dependency(c)  # set parent
    assert p is c2.get()  # get parent value
    p3 = ParamDict()
    c.set(p3)  # set on parent will change child get
    assert p3 is c.get()
    assert p3 is c2.get()
Beispiel #3
0
def test_run_processor():
    df = ArrayDataFrame([[0]], "a:int")
    dfs = DataFrames(df1=df, df2=df)
    dfs2 = DataFrames(df, df)
    assert not dfs2.has_key

    o1 = _to_processor(t3)
    assert 4 == o1(df, df, 2).as_array()[0][0]

    o1._params = ParamDict([("a", 2)], deep=False)
    o1._execution_engine = None
    assert 4 == o1.process(dfs).as_array()[0][0]
    o1._params = ParamDict([("a", 2)], deep=False)
    o1._execution_engine = None
    assert 4 == o1.process(dfs2).as_array()[0][0]

    o1 = _to_processor(t5)
    assert 4 == o1("dummy", dfs, 2)[0][0]
    assert 4 == o1("dummy", dfs2, 2)[0][0]
    o1._params = ParamDict([("a", 2)], deep=False)
    o1._execution_engine = "dummy"
    assert 4 == o1.process(dfs).as_array()[0][0]
    o1._params = ParamDict([("a", 2)], deep=False)
    o1._execution_engine = "dummy"
    assert 4 == o1.process(dfs2).as_array()[0][0]
Beispiel #4
0
 def __init__(self, conf: Any = None):
     p = ParamDict(FUGUE_DASK_DEFAULT_CONF)
     p.update(ParamDict(conf))
     super().__init__(p)
     self._fs = FileSystem()
     self._log = logging.getLogger()
     self._default_sql_engine = QPDDaskEngine(self)
Beispiel #5
0
def test_run_creator():
    o1 = _to_creator(t3)
    assert 4 == o1(4).as_array()[0][0]

    o1._params = ParamDict([("a", 2)], deep=False)
    o1._execution_engine = None
    assert 2 == o1.create().as_array()[0][0]

    o1 = _to_creator(t5)
    assert 4 == o1("dummy", 4)[0][0]
    o1._params = ParamDict([("a", 2)], deep=False)
    o1._execution_engine = "dummy"
    assert 2 == o1.create().as_array()[0][0]
Beispiel #6
0
 def __init__(self, *args: Any, **kwargs: Any):
     p = ParamDict()
     for a in args:
         if a is None:
             continue
         elif isinstance(a, PartitionSpec):
             self._update_dict(p, a.jsondict)
         elif isinstance(a, Dict):
             self._update_dict(p, a)
         elif isinstance(a, str):
             self._update_dict(p, json.loads(a))
         else:
             raise TypeError(f"{a} is not supported")
     self._update_dict(p, kwargs)
     self._num_partitions = p.get("num_partitions", "0")
     self._algo = p.get("algo", "").lower()
     self._partition_by = p.get("partition_by", [])
     aot(
         len(self._partition_by) == len(set(self._partition_by)),
         SyntaxError(f"{self._partition_by} has duplicated keys"),
     )
     self._presort = self._parse_presort_exp(p.get_or_none("presort", object))
     if any(x in self._presort for x in self._partition_by):
         raise SyntaxError(
             "partition by overlap with presort: "
             + f"{self._partition_by}, {self._presort}"
         )
     # TODO: currently, size limit not in use
     self._size_limit = to_size(p.get("size_limit", "0"))
     self._row_limit = p.get("row_limit", 0)
Beispiel #7
0
def _load_avro(p: FileParser,
               columns: Any = None,
               **kwargs: Any) -> Tuple[pd.DataFrame, Any]:

    kw = ParamDict(kwargs)
    preprocess_record = None
    if "process_record" in kw:
        process_record = kw["process_record"]
        del kw["process_record"]

    with open(p.uri, "rb") as fp:  # QN is p.uri the path?
        # Configure Avro reader
        avro_reader = reader(fp)
        # Load records in memory
        if preprocess_record:
            records = [process_record(r) for r in avro_reader]
        else:
            records = list(avro_reader)

        # Populate pandas.DataFrame with records
        pdf = pd.DataFrame.from_records(records)

    if columns is None:
        return pdf, None
    if isinstance(columns, list):  # column names
        return pdf[columns], None
    schema = Schema(columns)

    # Return created DataFrame
    return pdf[schema.names], schema
Beispiel #8
0
 def jsondict(self) -> ParamDict:
     res = ParamDict()
     for k, v in self.paramdict.items():
         if isinstance(v, type):
             v = get_full_type_path(v)
         res[k] = v
     return res
Beispiel #9
0
 def _load_csv(self, p: List[str], columns: Any = None, **kwargs: Any) -> DataFrame:
     kw = ParamDict(kwargs)
     infer_schema = kw.get("infer_schema", False)
     if infer_schema:
         kw["inferSchema"] = True
     if "infer_schema" in kw:
         del kw["infer_schema"]
     header = str(kw.get_or_none("header", object)).lower()
     if "header" in kw:
         del kw["header"]
     reader = self._session.read.format("csv")
     reader.options(**kw)
     if header == "true":
         reader.option("header", "true")
         if columns is None:
             return SparkDataFrame(reader.load(p))
         if isinstance(columns, list):  # column names
             return SparkDataFrame(reader.load(p)[columns])
         schema = Schema(columns)
         return SparkDataFrame(reader.load(p)[schema.names], schema)
     if header in ["false", "none"]:
         reader.option("header", "false")
         if columns is None:
             raise InvalidOperationError("columns must be set if without header")
         if isinstance(columns, list):  # column names
             sdf = reader.load(p)
             inferred = to_schema(sdf)
             renames = [f"{k} AS {v}" for k, v in zip(inferred.names, columns)]
             return SparkDataFrame(sdf.selectExpr(*renames))
         schema = Schema(columns)
         sdf = reader.schema(to_spark_schema(schema)).load(p)
         return SparkDataFrame(sdf, schema)
     else:
         raise NotImplementedError(f"{header} is not supported")
Beispiel #10
0
def _save_avro(df: LocalDataFrame, p: FileParser, **kwargs: Any):
    """Save pandas dataframe as avro.
    If providing your own schema, the usage of schema argument is preferred

    :param schema: Avro Schema determines dtypes saved
    """
    import pandavro as pdx

    kw = ParamDict(kwargs)

    # pandavro defaults
    schema = None
    append = False
    times_as_micros = True

    if "schema" in kw:
        schema = kw["schema"]
        del kw["schema"]

    if "append" in kw:
        append = kw[
            "append"]  # default is overwrite (False) instead of append (True)
        del kw["append"]

    if "times_as_micros" in kw:
        times_as_micros = kw["times_as_micros"]
        del kw["times_as_micros"]

    pdf = df.as_pandas()
    pdx.to_avro(p.uri,
                pdf,
                schema=schema,
                append=append,
                times_as_micros=times_as_micros,
                **kw)
Beispiel #11
0
def _save_avro(df: LocalDataFrame,
               p: FileParser,
               columns: Any = None,
               **kwargs: Any):
    """Save pandas dataframe as avro.
    If providing your own schema, the usage of schema argument is preferred

    """

    kw = ParamDict(kwargs)
    # pandavro defaults
    schema = None
    append = False
    times_as_micros = True

    # pandavro defaults
    schema = None
    append = False
    times_as_micros = True

    if "schema" in kw:
        schema = kw["schema"]
        if schema is None:
            if columns is not None:
                schema = _convert_pyarrow_to_avro_schema(df, columns)
        else:
            if columns:
                # both schema and columns provided
                raise Exception("set columns to None when schema is provided")

        del kw["infer_schema"]

    if "infer_schema" in kw:
        infer_schema = kw["infer_schema"]
        if infer_schema and (schema is not None):
            # infer_schema set to True but schema was provided
            raise Exception(
                "set infer_schema to False when schema is provided")
        del kw["infer_schema"]

    if "append" in kw:
        append = kw[
            "append"]  # default is overwrite (False) instead of append (True)
        del kw["append"]

    if "times_as_micros" in kw:
        times_as_micros = kw["times_as_micros"]
        del kw["times_as_micros"]

    pdf = df.as_pandas()
    pdx.to_avro(p.uri,
                pdf,
                schema=schema,
                append=append,
                times_as_micros=times_as_micros,
                **kw)
Beispiel #12
0
 def __init__(self,
              name: str,
              data_type: Any,
              nullable: bool,
              metadata: Any = None):
     self.name = assert_triad_var_name(name)
     self.data_type = to_type(data_type)
     self.nullable = nullable
     self.metadata = ParamDict(metadata, deep=True)
     self.metadata.set_readonly()
Beispiel #13
0
 def jsondict(self) -> ParamDict:
     """Get json serializeable dict of the spec"""
     return ParamDict(
         dict(
             num_partitions=self._num_partitions,
             algo=self._algo,
             partition_by=self._partition_by,
             presort=self.presort_expr,
             size_limit=self._size_limit,
             row_limit=self._row_limit,
         ))
Beispiel #14
0
def test_run_outputter():
    df = ArrayDataFrame([[0]], "a:int")
    dfs = DataFrames(df1=df, df2=df)
    dfs2 = DataFrames(df, df)
    assert not dfs2.has_key

    class Ct(object):
        pass

    c = Ct()
    o1 = _to_outputter(t3)
    o1(df, df, 2, c)
    assert 4 == c.value
    c.value = 0
    o1._params = ParamDict([("a", 2), ("b", c)], deep=False)
    o1._execution_engine = None
    o1.process(dfs)
    assert 4 == c.value
    c.value = 0
    o1._params = ParamDict([("a", 2), ("b", c)], deep=False)
    o1._execution_engine = None
    o1.process(dfs2)
    assert 4 == c.value

    c = Ct()
    o1 = _to_outputter(t5)
    o1("dummy", dfs, 2, c)
    assert 4 == c.value
    c.value = 0
    o1("dummy", dfs2, 2, c)
    assert 4 == c.value
    c.value = 0
    o1._params = ParamDict([("a", 2), ("b", c)], deep=False)
    o1._execution_engine = NativeExecutionEngine()
    o1.process(dfs)
    assert 4 == c.value
    c.value = 0
    o1._params = ParamDict([("a", 2), ("b", c)], deep=False)
    o1._execution_engine = NativeExecutionEngine()
    o1.process(dfs2)
    assert 4 == c.value
Beispiel #15
0
    def map(
        self,
        df: DataFrame,
        map_func: Callable[[PartitionCursor, LocalDataFrame], LocalDataFrame],
        output_schema: Any,
        partition_spec: PartitionSpec,
        metadata: Any = None,
        on_init: Optional[Callable[[int, DataFrame], Any]] = None,
    ) -> DataFrame:
        if partition_spec.num_partitions != "0":
            self.log.warning(
                "%s doesn't respect num_partitions %s",
                self,
                partition_spec.num_partitions,
            )
        cursor = partition_spec.get_cursor(df.schema, 0)
        if on_init is not None:
            on_init(0, df)
        if len(partition_spec.partition_by) == 0:  # no partition
            df = to_local_df(df)
            cursor.set(df.peek_array(), 0, 0)
            output_df = map_func(cursor, df)
            if (isinstance(output_df, PandasDataFrame)
                    and output_df.schema != output_schema):
                output_df = PandasDataFrame(output_df.native, output_schema)
            assert_or_throw(
                output_df.schema == output_schema,
                lambda: f"map output {output_df.schema} "
                f"mismatches given {output_schema}",
            )
            output_df._metadata = ParamDict(metadata, deep=True)
            output_df._metadata.set_readonly()
            return self.to_df(output_df)
        presort = partition_spec.presort
        presort_keys = list(presort.keys())
        presort_asc = list(presort.values())
        output_schema = Schema(output_schema)

        def _map(pdf: pd.DataFrame) -> pd.DataFrame:
            if len(presort_keys) > 0:
                pdf = pdf.sort_values(presort_keys, ascending=presort_asc)
            input_df = PandasDataFrame(pdf.reset_index(drop=True),
                                       df.schema,
                                       pandas_df_wrapper=True)
            cursor.set(input_df.peek_array(), cursor.partition_no + 1, 0)
            output_df = map_func(cursor, input_df)
            return output_df.as_pandas()

        result = self.pl_utils.safe_groupby_apply(df.as_pandas(),
                                                  partition_spec.partition_by,
                                                  _map)
        return PandasDataFrame(result, output_schema, metadata)
Beispiel #16
0
 def jsondict(self) -> ParamDict:
     res = ParamDict(
         dict(
             configs=[c.jsondict for c in self.configs.values()],
             inputs=[c.jsondict for c in self.inputs.values()],
             outputs=[c.jsondict for c in self.outputs.values()],
             func=get_full_type_path(self.func),
             metadata=self.metadata,
             deterministic=self.deterministic,
             lazy=self.lazy,
         ))
     if self._node_spec is not None:
         res["node_spec"] = self.node_spec.jsondict
     return res
Beispiel #17
0
 def __init__(self, schema: Any = None, metadata: Any = None):
     if not callable(schema):
         schema = _input_schema(schema).assert_not_empty()
         schema.set_readonly()
         self._schema: Union[Schema, Callable[[], Schema]] = schema
         self._schema_discovered = True
     else:
         self._schema: Union[Schema,
                             Callable[[], Schema]] = schema  # type: ignore
         self._schema_discovered = False
     self._metadata = (metadata if isinstance(metadata, ParamDict) else
                       ParamDict(metadata, deep=True))
     self._metadata.set_readonly()
     self._lazy_schema_lock = RLock()
Beispiel #18
0
 def __init__(
     self,
     creator: Any,
     schema: Any = None,
     params: Any = None,
     deterministic: bool = True,
     lazy: bool = True,
 ):
     self._creator = _to_creator(creator, schema)
     self._creator._params = ParamDict(params)
     super().__init__(params=params,
                      input_n=0,
                      output_n=1,
                      deterministic=deterministic,
                      lazy=lazy)
Beispiel #19
0
 def __init__(self, *args: Any, **kwargs: Any):  # noqa: C901
     p = ParamDict()
     if (
         len(args) == 1
         and len(kwargs) == 0
         and isinstance(args[0], str)
         and args[0].lower() == "per_row"
     ):
         p["algo"] = "even"
         p["num_partitions"] = "ROWCOUNT"
     else:
         for a in args:
             if a is None:
                 continue
             elif isinstance(a, PartitionSpec):
                 self._update_dict(p, a.jsondict)
             elif isinstance(a, Dict):
                 self._update_dict(p, a)
             elif isinstance(a, str):
                 self._update_dict(p, json.loads(a))
             else:
                 raise TypeError(f"{a} is not supported")
         self._update_dict(p, kwargs)
     self._num_partitions = p.get("num_partitions", "0")
     self._algo = p.get("algo", "").lower()
     if "partition_by" not in p:
         self._partition_by: List[str] = []
     elif isinstance(p["partition_by"], str):
         self._partition_by = [p["partition_by"]]
     elif isinstance(p["partition_by"], (list, tuple)):
         self._partition_by = list(p["partition_by"])
     else:
         raise SyntaxError(p["partition_by"])
     aot(
         len(self._partition_by) == len(set(self._partition_by)),
         SyntaxError(f"{self._partition_by} has duplicated keys"),
     )
     self._presort = parse_presort_exp(p.get_or_none("presort", object))
     if any(x in self._presort for x in self._partition_by):
         raise SyntaxError(
             "partition by overlap with presort: "
             + f"{self._partition_by}, {self._presort}"
         )
     # TODO: currently, size limit not in use
     self._size_limit = to_size(p.get("size_limit", "0"))
     self._row_limit = p.get("row_limit", 0)
Beispiel #20
0
def _load_csv(p: FileParser,
              columns: Any = None,
              **kwargs: Any) -> Tuple[pd.DataFrame, Any]:
    kw = ParamDict(kwargs)
    infer_schema = kw.get("infer_schema", False)
    if not infer_schema:
        kw["dtype"] = object
    if "infer_schema" in kw:
        del kw["infer_schema"]
    header: Any = False
    if "header" in kw:
        header = kw["header"]
        del kw["header"]
    if str(header) in ["True", "0"]:
        pdf = _safe_load_csv(p.uri, **{"index_col": False, "header": 0, **kw})
        if columns is None:
            return pdf, None
        if isinstance(columns, list):  # column names
            return pdf[columns], None
        schema = Schema(columns)
        return pdf[schema.names], schema
    if header is None or str(header) == "False":
        if columns is None:
            raise InvalidOperationError(
                "columns must be set if without header")
        if isinstance(columns, list):  # column names
            pdf = _safe_load_csv(
                p.uri, **{
                    "index_col": False,
                    "header": None,
                    "names": columns,
                    **kw
                })
            return pdf, None
        schema = Schema(columns)
        pdf = _safe_load_csv(
            p.uri, **{
                "index_col": False,
                "header": None,
                "names": schema.names,
                **kw
            })
        return pdf, schema
    else:
        raise NotImplementedError(f"{header} is not supported")
Beispiel #21
0
 def __init__(
     self,
     configs: Any,
     inputs: Any,
     outputs: Any,
     func: Any,
     metadata: Any = None,
     deterministic: bool = True,
     lazy: bool = False,
 ):
     self.configs = self._parse_spec_collection(configs, ConfigSpec)
     self.inputs = self._parse_spec_collection(inputs, InputSpec)
     self.outputs = self._parse_spec_collection(outputs, OutputSpec)
     self.metadata = ParamDict(metadata, deep=True)
     self.func = to_function(func)
     self.deterministic = deterministic
     self.lazy = lazy
     self._node_spec: Optional["_NodeSpec"] = None
Beispiel #22
0
 def process(self, dfs: DataFrames) -> None:
     df = dfs[0]
     tf = _to_output_transformer(
         self.params.get_or_none("transformer", object), )
     tf._workflow_conf = self.execution_engine.conf
     tf._params = self.params.get("params", ParamDict())  # type: ignore
     tf._partition_spec = self.partition_spec  # type: ignore
     rpc_handler = to_rpc_handler(
         self.params.get_or_throw("rpc_handler", object))
     if not isinstance(rpc_handler, EmptyRPCHandler):
         tf._rpc_client = self.execution_engine.rpc_server.make_client(
             rpc_handler)
     ie = self.params.get("ignore_errors", [])
     self._ignore_errors = [to_type(x, Exception) for x in ie]
     tf.validate_on_runtime(df)
     if isinstance(tf, Transformer):
         self.transform(df, tf)
     else:
         self.cotransform(df, tf)
Beispiel #23
0
    def __init__(
        self,
        cache: Any = NoOpCache,
        engine: Any = SequentialExecutionEngine,
        hooks: Any = WorkflowHooks,
        logger: Any = None,
        config: Any = None,
    ):
        self._conf: ParamDict = ParamDict(config)
        self._abort_requested: Event = Event()

        self._cache: WorkflowResultCache = self._parse_config(
            cache, WorkflowResultCache, [self])
        self._engine: WorkflowExecutionEngine = self._parse_config(
            engine, WorkflowExecutionEngine, [self])
        self._hooks: WorkflowHooks = self._parse_config(
            hooks, WorkflowHooks, [self])
        if logger is None:
            logger = logging.getLogger()
        self._logger: logging.Logger = self._parse_config(
            logger, logging.Logger, [])
Beispiel #24
0
def _load_single_avro(path: str, **kwargs: Any) -> pd.DataFrame:
    from fastavro import reader

    kw = ParamDict(kwargs)
    process_record = None
    if "process_record" in kw:
        process_record = kw["process_record"]
        del kw["process_record"]

    with FileSystem().openbin(path) as fp:
        # Configure Avro reader
        avro_reader = reader(fp)
        # Load records in memory
        if process_record:
            records = [process_record(r) for r in avro_reader]

        else:
            records = list(avro_reader)

        # Populate pandas.DataFrame with records
        return pd.DataFrame.from_records(records)
Beispiel #25
0
 def __init__(
     self,
     input_n: int,
     processor: Any,
     schema: Any,
     params: Any,
     pre_partition: Any = None,
     deterministic: bool = True,
     lazy: bool = False,
     input_names: Optional[List[str]] = None,
 ):
     self._processor = _to_processor(processor, schema)
     self._processor._params = ParamDict(params)
     self._processor._partition_spec = PartitionSpec(pre_partition)
     super().__init__(
         params=params,
         input_n=input_n,
         output_n=1,
         deterministic=deterministic,
         lazy=lazy,
         input_names=input_names,
     )
Beispiel #26
0
 def __init__(
     self,
     input_n: int,
     outputter: Any,
     params: Any,
     pre_partition: Any = None,
     deterministic: bool = True,
     lazy: bool = False,
     input_names: Optional[List[str]] = None,
 ):
     assert_or_throw(input_n > 0,
                     FugueWorkflowError("must have at least one input"))
     self._outputter = _to_outputter(outputter)
     self._outputter._params = ParamDict(params)
     self._outputter._partition_spec = PartitionSpec(pre_partition)
     super().__init__(
         params=params,
         input_n=input_n,
         output_n=1,
         deterministic=deterministic,
         lazy=lazy,
         input_names=input_names,
     )
Beispiel #27
0
 def __init__(self, *args: Any, **kwargs: Any):
     super().__init__(*args, **kwargs)
     self._sql_vars: Dict[str, WorkflowDataFrame] = {}
     self._sql_conf = ParamDict({**FUGUE_SQL_DEFAULT_CONF, **super().conf})
def test_input():
    t = MockTaskForVar()
    s = OutputSpec("o", dict, False)
    o = _Output(t, s)
    p = ParamDict()
    ii = InputSpec("x", dict, False, False, default_value=p, default_on_timeout=True)
    i = _Input(t, ii)
    i.set_dependency(o)
    raises(ValueError, lambda: o.set(None))
    raises(ValueError, lambda: i.get())

    t = MockTaskForVar()
    s = OutputSpec("o", ParamDict, False)
    o = _Output(t, s)
    raises(AssertionError, lambda: InputSpec("x", dict, False, False,
                                             timeout="0.1s",
                                             default_value=None,
                                             default_on_timeout=True))

    # Input linked with Output
    t = MockTaskForVar()
    s = OutputSpec("o", ParamDict, False)
    o = _Output(t, s)
    p = ParamDict()
    p2 = ParamDict()
    ii = InputSpec("x", dict, False, False, timeout="0.1s",
                   default_value=p, default_on_timeout=True)
    i = _Input(t, ii).set_dependency(o)
    assert p is i.get()
    o.set(p2)
    assert p is not i.get()
    assert p2 is i.get()
    # Input linked with Input
    i2 = _Input(t, ii).set_dependency(i)
    assert p is not i2.get()
    assert p2 is i2.get()

    t = MockTaskForVar()
    s = OutputSpec("o", ParamDict, False)
    o = _Output(t, s)
    p = ParamDict()
    p2 = ParamDict()
    ii = InputSpec("x", dict, False, False, timeout="0.1s",
                   default_value=p, default_on_timeout=False)
    i = _Input(t, ii).set_dependency(o)
    raises(TimeoutError, lambda: i.get())

    # Output skipped, input without default will raise error
    t = MockTaskForVar()
    s = OutputSpec("o", ParamDict, False)
    o = _Output(t, s)
    p = ParamDict()
    ii = InputSpec("x", dict, False)
    i = _Input(t, ii).set_dependency(o)
    o.skip()
    raises(SkippedError, lambda: i.get())

    # Output skipped, input with default will return default
    t = MockTaskForVar()
    s = OutputSpec("o", ParamDict, False)
    o = _Output(t, s)
    p = ParamDict()
    ii = InputSpec("x", dict, False, False, p)
    i = _Input(t, ii).set_dependency(o)
    o.skip()
    assert p is i.get()

    # Output -> workflow output -> Input
    t = MockTaskForVar()
    s = OutputSpec("o", ParamDict, False)
    oo = _Output(t, s)  # task output
    o = _Output(t, s)  # workflow output
    o.set_dependency(oo)
    p = ParamDict()
    ii = InputSpec("x", dict, False)
    i = _Input(t, ii).set_dependency(o)
    oo.set(p)
    assert p is i.get()
Beispiel #29
0
 def paramdict(self) -> ParamDict:
     return ParamDict((x, self.__dict__[x]) for x in self.attributes)
Beispiel #30
0
from triad.collections.dict import ParamDict

DEFAULT_CONFIG = ParamDict({"fugue.dask.dataframe.default.partitions": 16})