def test_fsql(): # schema: *,x:long def t(df: pd.DataFrame) -> pd.DataFrame: df["x"] = 1 return df df = pd.DataFrame([[0], [1]], columns=["a"]) result = fsql( """ SELECT * FROM df WHERE a>{{p}} UNION ALL SELECT * FROM df2 WHERE a>{{p}} TRANSFORM USING t YIELD DATAFRAME AS result """, df2=pd.DataFrame([[0], [1]], columns=["a"]), p=0, ).run() assert [[1, 1], [1, 1]] == result["result"].as_array() result = fsql( """ select * from df where a>{{p}} union all select * from df2 where a>{{p}} transform using t yield dataframe as result """, df2=pd.DataFrame([[0], [1]], columns=["a"]), p=0, fsql_ignore_case=True, ).run() assert [[1, 1], [1, 1]] == result["result"].as_array()
def test_use_soecial_df(tmpdir): # external non-workflowdataframe arr = ArrayDataFrame([[0], [1]], "a:int") fsql( """ b=CREATE[[0], [1]] SCHEMA a: int a = SELECT * FROM a.x OUTPUT a, b USING assert_eq a = SELECT x.* FROM a.x AS x OUTPUT a, b USING assert_eq c=CREATE [[0,0],[1,1]] SCHEMA a:int,b:int d = SELECT x.*,y.a AS b FROM a.x x INNER JOIN a.x y ON x.a=y.a OUTPUT c, d USING assert_eq """, { "a.x": arr }, ).run() # from yield file engine = NativeExecutionEngine( conf={"fugue.workflow.checkpoint.path": os.path.join(tmpdir, "ck")}) with FugueSQLWorkflow(engine) as dag: dag("CREATE[[0], [1]] SCHEMA a: int YIELD FILE AS b") res = dag.yields["b"] with FugueSQLWorkflow(engine) as dag: dag( """ b=CREATE[[0], [1]] SCHEMA a: int a = SELECT * FROM a.x OUTPUT a, b USING assert_eq """, {"a.x": res}, )
def fsql(self, line: str, cell: str, local_ns: Any = None) -> None: dag = fugue_sql.fsql(cell, local_ns) dag.run(self.get_engine(line, {} if local_ns is None else local_ns)) for k, v in dag.yields.items(): if isinstance(v, YieldedDataFrame): local_ns[k] = v.result # type: ignore else: local_ns[k] = v # type: ignore
def fsql(self, line: str, cell: str, local_ns: Any = None) -> None: try: dag = fugue_sql.fsql( "\n" + cell, local_ns, fsql_ignore_case=self._fsql_ignore_case ) except FugueSQLSyntaxError as ex: raise FugueSQLSyntaxError(str(ex)).with_traceback(None) from None dag.run(self.get_engine(line, {} if local_ns is None else local_ns)) for k, v in dag.yields.items(): if isinstance(v, YieldedDataFrame): local_ns[k] = v.result # type: ignore else: local_ns[k] = v # type: ignore
def test_fsql(): # schema: *,x:long def t(df: pd.DataFrame) -> pd.DataFrame: df["x"] = 1 return df df = pd.DataFrame([[0], [1]], columns=["a"]) result = fsql( """ SELECT * FROM df WHERE a>{{p}} UNION ALL SELECT * FROM df2 WHERE a>{{p}} TRANSFORM USING t YIELD DATAFRAME AS result """, df2=pd.DataFrame([[0], [1]], columns=["a"]), p=0, ).run() assert [[1, 1], [1, 1]] == result["result"].as_array()
def test_fsql_syntax_error(): with raises(FugueSQLSyntaxError): fsql("""CREATEE [[0]] SCHEMA a:int""")