def process(self, dfs: DataFrames) -> DataFrame: df = dfs[0] tf = _to_transformer( self.params.get_or_none("transformer", object), self.params.get_or_none("schema", object), ) tf._workflow_conf = self.execution_engine.conf tf._params = self.params.get("params", ParamDict()) # type: ignore tf._partition_spec = self.partition_spec # type: ignore ie = self.params.get("ignore_errors", []) self._ignore_errors = [to_type(x, Exception) for x in ie] if isinstance(tf, Transformer): return self.transform(df, tf) else: return self.cotransform(df, tf)
def test__to_transformer_determinism(): a = _to_transformer(t1, None) b = _to_transformer(t1, None) c = _to_transformer("t1", None) assert a is not b assert to_uuid(a) == to_uuid(b) assert a is not c assert to_uuid(a) == to_uuid(c) a = _to_transformer(t4, "a:int,b:int") b = _to_transformer("t4", Schema("a:int,b:int")) assert a is not b assert to_uuid(a) == to_uuid(b) a = _to_transformer(MockTransformer) b = _to_transformer("MockTransformer") assert a is not b assert to_uuid(a) == to_uuid(b) a = _to_transformer(t7, "a:int,b:int") b = _to_transformer("t7", "a:int,b:int") assert a is not b assert to_uuid(a) == to_uuid(b)
def process(self, dfs: DataFrames) -> DataFrame: df = dfs[0] tf = _to_transformer( self.params.get_or_none("transformer", object), self.params.get_or_none("schema", object), ) tf._workflow_conf = self.execution_engine.conf tf._params = self.params.get("params", ParamDict()) # type: ignore tf._partition_spec = self.partition_spec rpc_handler = to_rpc_handler(self.params.get_or_throw("rpc_handler", object)) if not isinstance(rpc_handler, EmptyRPCHandler): tf._rpc_client = self.execution_engine.rpc_server.make_client(rpc_handler) ie = self.params.get("ignore_errors", []) self._ignore_errors = [to_type(x, Exception) for x in ie] tf.validate_on_runtime(df) if isinstance(tf, Transformer): return self.transform(df, tf) else: return self.cotransform(df, tf)
def test__to_transformer(): a = _to_transformer(t1, None) assert isinstance(a, Transformer) a._x = 1 # every parse should produce a different transformer even the input is # a transformer instance b = _to_transformer(t1, None) assert isinstance(b, Transformer) assert "_x" not in b.__dict__ c = _to_transformer("t1", None) assert isinstance(c, Transformer) assert "_x" not in c.__dict__ c._x = 1 d = _to_transformer("t1", None) assert isinstance(d, Transformer) assert "_x" not in d.__dict__ raises(FugueInterfacelessError, lambda: _to_transformer(t4, None)) raises(FugueInterfacelessError, lambda: _to_transformer("t4", None)) e = _to_transformer("t4", "*,b:int") assert isinstance(e, Transformer)
def test__to_transformer(): a = _to_transformer(MockTransformer) assert isinstance(a, MockTransformer) b = _to_transformer("MockTransformer") assert isinstance(b, MockTransformer) a = _to_transformer(t1, None) assert isinstance(a, CoTransformer) a._x = 1 # every parse should produce a different transformer even the input is # a transformer instance b = _to_transformer(t1, None) assert isinstance(b, CoTransformer) assert "_x" not in b.__dict__ c = _to_transformer("t1", None) assert isinstance(c, CoTransformer) assert "_x" not in c.__dict__ c._x = 1 d = _to_transformer("t1", None) assert isinstance(d, CoTransformer) assert "_x" not in d.__dict__ raises(FugueInterfacelessError, lambda: _to_transformer(t4, None)) raises(FugueInterfacelessError, lambda: _to_transformer("t4", None)) e = _to_transformer("t4", "a:int,b:int") assert isinstance(e, CoTransformer) f = _to_transformer("t5", "a:int,b:int") assert isinstance(f, CoTransformer) g = _to_transformer("t6", "a:int,b:int") assert isinstance(g, CoTransformer) i = _to_transformer("t7", "a:int,b:int") assert isinstance(i, CoTransformer)
def test__register(): register_transformer("ct_x", MockTransformer) b = _to_transformer("ct_x") assert isinstance(b, MockTransformer)