コード例 #1
0
def test_sql_snap_interface_complex_jinja():
    sql = """
        select:T
        {% if inputs.input.realized_schema.unique_on %}
            distinct on (
                {% for col in inputs.input.realized_schema.unique_on %}
                    "{{ col }}"
                    {%- if not loop.last %},{% endif %}
                {% endfor %}
                )
        {% endif %}
            {% for col in inputs.input.realized_schema.fields %}
                "{{ col.name }}"
                {%- if not loop.last %},{% endif %}
            {% endfor %}

        from input:T
        {% if inputs.input.resolved_schema.updated_at_field %}
        order by
            {% for col in inputs.input.realized_schema.unique_on %}
                "{{ col }}",
            {% endfor %}
            "{{ inputs.input.resolved_schema.updated_at_field.name }}" desc
        {% endif %}"""
    df = sql_snap("s1", sql)
    pi = df.get_interface()
    assert pi is not None
    assert len(pi.inputs) == 1
    assert pi.inputs[0].is_generic
    assert pi.inputs[0].schema_like == "T"
    assert pi.output.is_generic
    assert pi.output is not None
コード例 #2
0
    def process_snap(self, snap_like: Union[SnapLike, str, ModuleType]) -> _Snap:
        from snapflow.core.snap import _Snap, make_snap, PythonCodeSnapWrapper
        from snapflow.core.sql.sql_snap import sql_snap

        if isinstance(snap_like, _Snap):
            snap = snap_like
        else:
            if callable(snap_like):
                snap = make_snap(snap_like, module=self.name)
            elif isinstance(snap_like, str) and snap_like.endswith(".sql"):
                if not self.py_module_path:
                    raise Exception(
                        f"Module path not set, cannot read sql definition {snap_like}"
                    )
                sql_file_path = os.path.join(self.py_module_path, snap_like)
                with open(sql_file_path) as f:
                    sql = f.read()
                file_name = os.path.basename(snap_like)[:-4]
                snap = sql_snap(
                    name=file_name, module=self.name, sql=sql
                )  # TODO: versions, runtimes, etc for sql (someway to specify in a .sql file)
            elif isinstance(snap_like, str):
                # Just a string, not a sql file, assume it is python? TODO
                snap = make_snap(PythonCodeSnapWrapper(snap_like), module=self.name)
            elif isinstance(snap_like, ModuleType):
                # Module snap (the new default)
                code = inspect.getsource(snap_like)
                snap = make_snap(PythonCodeSnapWrapper(code), module=self.name)
            else:
                raise TypeError(snap_like)
        return snap
コード例 #3
0
def test_sql_snap_interface_jinja_block():
    sql = """select 1, 'not a commment -- nope'
        from {% jinja block %}
        t1, t2 on t1.a = t2.b"""
    df = sql_snap("s1", sql)
    pi = df.get_interface()
    assert pi is not None
    assert len(pi.inputs) == 2
コード例 #4
0
def test_sql_snap_interface_comment_like_string():
    sql = """select 1, 'not a commment -- nope'
        from -- comment inbetween
        t1, t2 on t1.a = t2.b"""
    df = sql_snap("s1", sql)
    pi = df.get_interface()
    assert pi is not None
    assert len(pi.inputs) == 2
コード例 #5
0
def test_sql_snap_interface_self_ref():
    sql = """select 1, 'not a commment -- nope'
        from {% jinja block %}
        this"""
    df = Input(name="this", schema="T", from_self=True)(sql_snap("s1", sql))
    pi = df.get_interface()
    assert pi is not None
    assert len(pi.inputs) == 1
    assert pi.inputs[0].data_format == "DataBlock"
    assert pi.inputs[0].name == "this"
    assert pi.inputs[0].from_self
    assert not pi.inputs[0].required
    assert pi.inputs[0].is_generic
コード例 #6
0
def test_sql_snap_interface_output():
    sql = """select:DataBlock[T]
        1
        from -- comment inbetween
        input
        join t2 on t1.a = t2.b"""
    df = sql_snap("s1", sql)
    pi = df.get_interface()
    assert pi is not None
    assert len(pi.inputs) == 2
    assert pi.output is not None
    assert pi.output.schema_like == "T"
    assert pi.output.data_format == "DataBlock"
コード例 #7
0
def test_sql_snap_interface():
    sql = """select 1 from from t1:T1
        join t2:Any on t1.a = t2.b left join t3:T2
        on"""
    df = sql_snap("s1", sql)
    pi = df.get_interface()
    assert pi is not None

    assert len(pi.inputs) == 3
    t1 = pi.get_input("t1")
    t2 = pi.get_input("t2")
    t3 = pi.get_input("t3")
    assert t1.schema_like == "T1"
    assert t1.name == "t1"
    assert t1.data_format == "DataBlock"
    assert t2.schema_like == "Any"
    assert t2.name == "t2"
    assert t3.schema_like == "T2"
    assert t3.name == "t3"
    assert t3.data_format == "DataBlock"
    assert pi.output is not None
コード例 #8
0
        sql = super().get_compiled_sql(ctx, storage, inputs=inputs)
        sql = column_map(
            f"({sql}) as __conformed",
            [c for c in schema.field_names() if c in fields or c in mapping],
            mapping,
        )
        return sql

    def get_interface(self) -> DeclaredSnapInterface:
        return dataframe_conform_to_schema.get_interface()


sql_conform_to_schema = Param("schema", "str")(sql_snap(
    name="sql_conform_to_schema",
    sql="select * from input",
    module="core",
    wrapper_cls=SqlConformToSchema,
    display_name="Conform Table to Schema",
))


def test_conform():
    from snapflow.modules import core

    TestSchemaA = create_quick_schema("TestSchemaA", [("a", "Integer"),
                                                      ("b", "Integer")],
                                      namespace="core")
    TestSchemaB = create_quick_schema(
        "TestSchemaB",
        [("a", "Integer"), ("c", "Integer"), ("d", "Text")],
        implementations=[Implementation("TestSchemaA", {"b": "c"})],