def jsondict(self) -> ParamDict: res = ParamDict() for k, v in self.paramdict.items(): if isinstance(v, type): v = get_full_type_path(v) res[k] = v return res
def __init__(self, func: Callable): super().__init__() assert_or_throw(callable(func), lambda: ValueError(func)) self._func = func if isinstance(func, LambdaType): self._uuid = to_uuid("lambda") else: self._uuid = to_uuid(get_full_type_path(func))
def function_to_taskspec( func: Callable, is_config: Callable[[List[Dict[str, Any]]], List[bool]], deterministic: bool = True, lazy: bool = False, ) -> TaskSpec: specs = inspect.getfullargspec(func) sig = inspect.signature(func) annotations = get_type_hints(func) assert_or_throw( specs.varargs is None and specs.varkw is None and len(specs.kwonlyargs) == 0, "Function can't have varargs or kwargs", ) inputs: List[InputSpec] = [] configs: List[ConfigSpec] = [] outputs: List[OutputSpec] = [] arr: List[Dict[str, Any]] = [] for k, w in sig.parameters.items(): anno = annotations.get(k, w.annotation) a = _parse_annotation(anno) a["name"] = k if w.default == inspect.Parameter.empty: a["required"] = True else: a["required"] = False a["default_value"] = w.default arr.append(a) cfg = is_config(arr) for i in range(len(cfg)): if cfg[i]: configs.append(ConfigSpec(**arr[i])) else: assert_or_throw( arr[i]["required"], f"{arr[i]}: dependency must not have default value" ) inputs.append(InputSpec(**arr[i])) n = 0 anno = annotations.get("return", sig.return_annotation) is_multiple = _is_tuple(anno) for x in list(anno.__args__) if is_multiple else [anno]: if x == inspect.Parameter.empty or x is type(None): # noqa: E721 continue a = _parse_annotation(x) a["name"] = f"_{n}" outputs.append(OutputSpec(**a)) n += 1 metadata = dict(__interfaceless_func=get_full_type_path(func)) return TaskSpec( configs, inputs, outputs, _interfaceless_wrapper, metadata, deterministic=deterministic, lazy=lazy, )
def __uuid__(self) -> str: return to_uuid( self.configs, self.inputs, self.outputs, get_full_type_path(self.func), self.metadata, self.deterministic, self.lazy, self._node_spec, )
def test_start_stop(): conf = {"fugue.rpc.server": get_full_type_path(_MockRPC)} engine = _MockExecutionEngine(conf=conf) engine.start() engine.start() engine.stop() engine.stop() # second round engine.start() engine.stop() assert 2 == engine._start assert 2 == engine._stop assert 2 == _MockRPC._start assert 2 == _MockRPC._stop
def jsondict(self) -> ParamDict: res = ParamDict( dict( configs=[c.jsondict for c in self.configs.values()], inputs=[c.jsondict for c in self.inputs.values()], outputs=[c.jsondict for c in self.outputs.values()], func=get_full_type_path(self.func), metadata=self.metadata, deterministic=self.deterministic, lazy=self.lazy, )) if self._node_spec is not None: res["node_spec"] = self.node_spec.jsondict return res
def test_select(self): with self.dag() as dag: a = dag.df([[1, 10], [2, 20], [3, 30]], "x:long,y:long") b = dag.df([[2, 20, 40], [3, 30, 90]], "x:long,y:long,z:long") dag.select("* FROM", a).assert_eq(a) dag.select("SELECT *,x*y AS z FROM", a, "WHERE x>=2").assert_eq(b) c = dag.df([[2, 20, 40], [3, 30, 90]], "x:long,y:long,zb:long") dag.select( " SELECT t1.*,z AS zb FROM ", a, "AS t1 INNER JOIN", b, "AS t2 ON t1.x=t2.x ", ).assert_eq(c) # no select dag.select("t1.*,z AS zb FROM ", a, "AS t1 INNER JOIN", b, "AS t2 ON t1.x=t2.x").assert_eq(c) # specify sql engine dag.select( "SELECT t1.*,z AS zb FROM ", a, "AS t1 INNER JOIN", b, "AS t2 ON t1.x=t2.x", sql_engine=SqliteEngine, ).assert_eq(c) # specify sql engine dag.select( "SELECT t1.*,z AS zb FROM ", a, "AS t1 INNER JOIN", b, "AS t2 ON t1.x=t2.x", sql_engine=get_full_type_path(SqliteEngine), ).assert_eq(c) # no input dag.select("9223372036854775807 AS a").assert_eq( dag.df([[9223372036854775807]], "a:long")) # make sure transform -> select works b = a.transform(mock_tf1) a = a.transform(mock_tf1) aa = dag.select("* FROM", a) dag.select("* FROM", b).assert_eq(aa)
def __uuid__(self) -> str: return to_uuid(get_full_type_path(self._func), self._params, self._rt)
def test_get_full_type_path(): assert "tests.utils.test_convert.dummy_for_test" == get_full_type_path( dummy_for_test) raises(TypeError, lambda: get_full_type_path(lambda x: x + 1)) assert "tests.utils.test_convert.__Dummy__" == get_full_type_path( __Dummy__) assert "tests.utils.convert_examples.SubClass" == get_full_type_path( SubClassSame) raises(TypeError, lambda: get_full_type_path(None)) assert "builtins.int" == get_full_type_path(int) assert "builtins.dict" == get_full_type_path(dict) assert "builtins.Exception" == get_full_type_path(Exception) assert "builtins.int" == get_full_type_path(123) assert "builtins.str" == get_full_type_path("ad") assert "tests.utils.test_convert.__Dummy__" == get_full_type_path( __Dummy__())
def _to_model_str(model: Any) -> Any: if isinstance(model, str): model = _to_model(model) return get_full_type_path(model)
def _sk_stack_cv( _sk__model: str, _sk__estimators: str, _sk__train_df: pd.DataFrame, _sk__scoring: Any, _sk__stack_cv: int = 2, _sk__method: str = "auto", _sk__passthrough: bool = False, _sk__cv: int = 5, _sk__feature_prefix: str = "", _sk__label_col: str = "label", _sk__save_path: str = "", **kwargs: Any, ) -> Dict[str, Any]: final_estimator = _to_model(_sk__model)(**kwargs) estimators: List[Tuple[str, Any]] = [] for i, d in enumerate(json.loads(_sk__estimators)): key = f"_{i}" m = _to_model(d.pop("_sk__model")) estimators.append((key, m(**d))) if is_classifier(final_estimator): model = StackingClassifier( estimators, final_estimator, cv=_sk__stack_cv, stack_method=_sk__method, passthrough=_sk__passthrough, n_jobs=kwargs.get("n_jobs", 1), ) else: model = StackingRegressor( estimators, final_estimator, cv=_sk__stack_cv, passthrough=_sk__passthrough, n_jobs=kwargs.get("n_jobs", 1), ) train_df = _sk__train_df.sample(frac=1, random_state=0).reset_index(drop=True) train_x = train_df.drop([_sk__label_col], axis=1) cols = [x for x in train_x.columns if x.startswith(_sk__feature_prefix)] train_x = train_x[cols] train_y = train_df[_sk__label_col] s = cross_val_score(model, train_x, train_y, cv=_sk__cv, scoring=_sk__scoring) metadata = dict(sk_model=get_full_type_path(model), cv_scores=[float(x) for x in s]) if _sk__save_path != "": model.fit(train_x, train_y) fp = os.path.join(_sk__save_path, str(uuid4()) + ".pkl") with FileSystem().openbin(fp, mode="wb") as f: pickle.dump(model, f) metadata["model_path"] = fp return dict( error=-np.mean(s), hp=dict( _sk__model=get_full_type_path(model), _sk__estimators=dict( **{ f"_{i}": d for i, d in enumerate(json.loads(_sk__estimators)) }, stacking=dict(_sk__model=_sk__model, **kwargs), ), _sk__stack_cv=_sk__stack_cv, _sk__method=_sk__method, _sk__passthrough=_sk__passthrough, ), metadata=metadata, )
def __uuid__(self) -> str: return to_uuid(get_full_type_path(self))
def __uuid__(self) -> str: """Unique id for this expression""" return to_uuid(get_full_type_path(self._func), self._args, self._kwargs)
def to_keras_spec_expr(spec: Any) -> str: if isinstance(spec, str): spec = to_keras_spec(spec) return get_full_type_path(spec)
def to_sk_model_expr(model: Any) -> Any: if isinstance(model, str): model = to_sk_model(model) return get_full_type_path(model)