def test_sql_snap_interface_complex_jinja(): sql = """ select:T {% if inputs.input.realized_schema.unique_on %} distinct on ( {% for col in inputs.input.realized_schema.unique_on %} "{{ col }}" {%- if not loop.last %},{% endif %} {% endfor %} ) {% endif %} {% for col in inputs.input.realized_schema.fields %} "{{ col.name }}" {%- if not loop.last %},{% endif %} {% endfor %} from input:T {% if inputs.input.resolved_schema.updated_at_field %} order by {% for col in inputs.input.realized_schema.unique_on %} "{{ col }}", {% endfor %} "{{ inputs.input.resolved_schema.updated_at_field.name }}" desc {% endif %}""" df = sql_snap("s1", sql) pi = df.get_interface() assert pi is not None assert len(pi.inputs) == 1 assert pi.inputs[0].is_generic assert pi.inputs[0].schema_like == "T" assert pi.output.is_generic assert pi.output is not None
def process_snap(self, snap_like: Union[SnapLike, str, ModuleType]) -> _Snap: from snapflow.core.snap import _Snap, make_snap, PythonCodeSnapWrapper from snapflow.core.sql.sql_snap import sql_snap if isinstance(snap_like, _Snap): snap = snap_like else: if callable(snap_like): snap = make_snap(snap_like, module=self.name) elif isinstance(snap_like, str) and snap_like.endswith(".sql"): if not self.py_module_path: raise Exception( f"Module path not set, cannot read sql definition {snap_like}" ) sql_file_path = os.path.join(self.py_module_path, snap_like) with open(sql_file_path) as f: sql = f.read() file_name = os.path.basename(snap_like)[:-4] snap = sql_snap( name=file_name, module=self.name, sql=sql ) # TODO: versions, runtimes, etc for sql (someway to specify in a .sql file) elif isinstance(snap_like, str): # Just a string, not a sql file, assume it is python? TODO snap = make_snap(PythonCodeSnapWrapper(snap_like), module=self.name) elif isinstance(snap_like, ModuleType): # Module snap (the new default) code = inspect.getsource(snap_like) snap = make_snap(PythonCodeSnapWrapper(code), module=self.name) else: raise TypeError(snap_like) return snap
def test_sql_snap_interface_jinja_block(): sql = """select 1, 'not a commment -- nope' from {% jinja block %} t1, t2 on t1.a = t2.b""" df = sql_snap("s1", sql) pi = df.get_interface() assert pi is not None assert len(pi.inputs) == 2
def test_sql_snap_interface_comment_like_string(): sql = """select 1, 'not a commment -- nope' from -- comment inbetween t1, t2 on t1.a = t2.b""" df = sql_snap("s1", sql) pi = df.get_interface() assert pi is not None assert len(pi.inputs) == 2
def test_sql_snap_interface_self_ref(): sql = """select 1, 'not a commment -- nope' from {% jinja block %} this""" df = Input(name="this", schema="T", from_self=True)(sql_snap("s1", sql)) pi = df.get_interface() assert pi is not None assert len(pi.inputs) == 1 assert pi.inputs[0].data_format == "DataBlock" assert pi.inputs[0].name == "this" assert pi.inputs[0].from_self assert not pi.inputs[0].required assert pi.inputs[0].is_generic
def test_sql_snap_interface_output(): sql = """select:DataBlock[T] 1 from -- comment inbetween input join t2 on t1.a = t2.b""" df = sql_snap("s1", sql) pi = df.get_interface() assert pi is not None assert len(pi.inputs) == 2 assert pi.output is not None assert pi.output.schema_like == "T" assert pi.output.data_format == "DataBlock"
def test_sql_snap_interface(): sql = """select 1 from from t1:T1 join t2:Any on t1.a = t2.b left join t3:T2 on""" df = sql_snap("s1", sql) pi = df.get_interface() assert pi is not None assert len(pi.inputs) == 3 t1 = pi.get_input("t1") t2 = pi.get_input("t2") t3 = pi.get_input("t3") assert t1.schema_like == "T1" assert t1.name == "t1" assert t1.data_format == "DataBlock" assert t2.schema_like == "Any" assert t2.name == "t2" assert t3.schema_like == "T2" assert t3.name == "t3" assert t3.data_format == "DataBlock" assert pi.output is not None
sql = super().get_compiled_sql(ctx, storage, inputs=inputs) sql = column_map( f"({sql}) as __conformed", [c for c in schema.field_names() if c in fields or c in mapping], mapping, ) return sql def get_interface(self) -> DeclaredSnapInterface: return dataframe_conform_to_schema.get_interface() sql_conform_to_schema = Param("schema", "str")(sql_snap( name="sql_conform_to_schema", sql="select * from input", module="core", wrapper_cls=SqlConformToSchema, display_name="Conform Table to Schema", )) def test_conform(): from snapflow.modules import core TestSchemaA = create_quick_schema("TestSchemaA", [("a", "Integer"), ("b", "Integer")], namespace="core") TestSchemaB = create_quick_schema( "TestSchemaB", [("a", "Integer"), ("c", "Integer"), ("d", "Text")], implementations=[Implementation("TestSchemaA", {"b": "c"})],