def insert_missing_rows(source: ETLObjectBase,
                        destination: ETLWritableObjectBase) -> int:
    """ Insert rows from source that do not exist in destination and return the number of rows inserted. """

    # Destination columns that are in source or are overridden.
    insertable_columns = _get_shared_columns(
        source.columns + list(destination.insert_overrides),
        destination.columns)

    sql = """
        insert into {destination_object_representation} ({insert_columns})
        select      {select_columns}
        from        {source_object_representation} as s
                    left outer join {destination_object_representation} as d on {join}
        where       {excluder}
    """

    sql = SQL(sql).format(
        destination_object_representation=destination.object_representation,
        insert_columns=primatives.make_column_list(insertable_columns),
        select_columns=primatives.make_column_list(
            insertable_columns, "s", destination.insert_overrides),
        source_object_representation=source.object_representation,
        join=primatives.make_join_conditional(destination.key_columns, "d",
                                              "s"),
        excluder=primatives.make_join_excluder_conditional(
            destination.key_columns, "d"),
    )

    return sql_helpers.execute_dml_sql(sql)
def identify_new_or_updated(source: ETLObjectBase,
                            destination: ETLWritableObjectBase,
                            staging: ETLTemporaryTable) -> int:
    """
    Create a temporary staging table containing keys of rows in source that are new or
    updated from destination and return the number of rows affected.
    """

    # Destination columns that are in source that are not overridden and are not keys.
    changeable_columns = _get_changeable_columns(source, destination)

    sql = """
        create temporary table {staging_object_representation} as
        select {select_columns}
        from   {source_object_representation} as s
               left outer join {destination_object_representation} as d on {join}
        where  ({excluder}) or ({detect_changes})
    """

    sql = SQL(sql).format(
        staging_object_representation=staging.object_representation,
        select_columns=primatives.make_column_list(
            [c.name for c in destination.key_columns], "s"),
        source_object_representation=source.object_representation,
        destination_object_representation=destination.object_representation,
        join=primatives.make_join_conditional(destination.key_columns, "d",
                                              "s"),
        excluder=primatives.make_join_excluder_conditional(
            destination.key_columns, "d"),
        detect_changes=primatives.make_change_detector_conditional(
            changeable_columns, "s", "d"),
    )

    return sql_helpers.execute_dml_sql(sql)
Esempio n. 3
0
 def _get_object_representation(self) -> Composed:
     """ To help us treat a dblink table like any other object, let's wrap it in a subquery. """
     remote_table = SQL("{}.{}").format(Identifier(self.schema_name),
                                        Identifier(self.table_name))
     remote_sql = SQL("select {columns} from {remote_table}").format(
         columns=primatives.make_column_list(self.columns),
         remote_table=remote_table)
     return SQL("({})").format(
         primatives.wrap_dblink_query(self.dblink_name, remote_sql, "r",
                                      self.columns, self.data_types))
Esempio n. 4
0
def upsert_records_with_predicate(source: ETLObjectBase,
                                  destination: ETLWritableObjectBase,
                                  predicate: list, primary_key: str) -> int:
    # Destination columns that are in source or are overridden.
    insertable_columns = _get_shared_columns(
        source.columns + list(destination.insert_overrides),
        destination.columns)

    excluded = SQL(", ").join([
        SQL("{dest} = {source}").format(dest=Identifier(field),
                                        source=SQL("EXCLUDED.") +
                                        Identifier(field))
        for field in insertable_columns
    ])

    upsert_sql_template = """
        INSERT INTO {destination_object_representation} ({insert_columns})
        SELECT      {select_columns}
        FROM        {source_object} AS {alias}
        ON CONFLICT ({primary_key}) DO UPDATE SET
        {excluded}
        RETURNING {primary_key}
    """
    alias = "s"

    sql = SQL(upsert_sql_template).format(
        primary_key=Identifier(primary_key),
        alias=Identifier(alias),
        destination_object_representation=destination.object_representation,
        insert_columns=primatives.make_column_list(insertable_columns),
        select_columns=primatives.make_column_list(
            insertable_columns, alias, destination.insert_overrides),
        source_object=source.object_representation_custom_predicate(predicate),
        excluded=excluded,
    )

    return sql_helpers.execute_dml_sql(sql)
def stage_table(source: ETLObjectBase, destination: ETLWritableObjectBase,
                staging: ETLTemporaryTable) -> int:
    """ Copy source table contents to staging table and return the number of rows copied. """

    shared_columns = _get_shared_columns(source.columns, destination.columns)

    sql = """
        create temporary table {staging_object_representation} as
        select {select_columns}
        from   {source_object_representation} as t
    """

    sql = SQL(sql).format(
        staging_object_representation=staging.object_representation,
        select_columns=primatives.make_column_list(shared_columns),
        source_object_representation=source.object_representation,
    )

    return sql_helpers.execute_dml_sql(sql)
Esempio n. 6
0
def test_primitives():

    data_types = OrderedDict([
        ("test",
         primatives.ColumnDefinition(name="test",
                                     data_type="int",
                                     not_nullable=True)),
        ("tube",
         primatives.ColumnDefinition(name="tube",
                                     data_type="text",
                                     not_nullable=False)),
    ])
    single_key_column = [data_types["test"]]
    double_key_column = [data_types["test"], data_types["tube"]]

    assert cc(primatives.make_cast_column_list([], {})) == ""
    assert cc(primatives.make_cast_column_list(
        ["test"], data_types)) == 'cast("test" as int) as "test"'
    assert cc(primatives.make_cast_column_list(
        ["test"], data_types, "t")) == 'cast("t"."test" as int) as "test"'
    assert (
        cc(primatives.make_cast_column_list(["test", "tube"], data_types,
                                            "t")) ==
        'cast("t"."test" as int) as "test", cast("t"."tube" as text) as "tube"'
    )

    assert cc(primatives.make_change_detector_conditional([], "a", "b")) == ""
    assert (cc(primatives.make_change_detector_conditional(
        ["test"], "a", "b")) == '"a"."test" is distinct from "b"."test"')
    assert (
        cc(
            primatives.make_change_detector_conditional(["test", "tube"], "a",
                                                        "b")) ==
        '"a"."test" is distinct from "b"."test" or "a"."tube" is distinct from "b"."tube"'
    )

    assert cc(primatives.make_column_list([])) == ""
    assert cc(primatives.make_column_list(["test"])) == '"test"'
    assert cc(primatives.make_column_list(["test"], "t")) == '"t"."test"'
    assert cc(primatives.make_column_list(["test", "tube"],
                                          "t")) == '"t"."test", "t"."tube"'
    assert cc(
        primatives.make_column_list(
            ["test", "tube"], "t",
            {"tube": SQL("now()")})) == '"t"."test", now()'

    assert cc(primatives.make_column_setter_list([], "t")) == ""
    assert cc(primatives.make_column_setter_list(["test"],
                                                 "t")) == '"test" = "t"."test"'
    assert cc(primatives.make_column_setter_list(
        ["test", "tube"], "t")) == '"test" = "t"."test", "tube" = "t"."tube"'
    assert (cc(
        primatives.make_column_setter_list(
            ["test", "tube"], "t",
            {"tube": SQL("now()")})) == '"test" = "t"."test", "tube" = now()')

    assert cc(
        primatives.make_composed_qualified_table_name("test")) == '"test"'
    assert cc(primatives.make_composed_qualified_table_name(
        "test", "tube")) == '"tube"."test"'
    assert cc(
        primatives.make_composed_qualified_table_name(
            "test", "tube", "t")) == '"tube"."test" as "t"'
    assert cc(primatives.make_composed_qualified_table_name(
        "test", alias="t")) == '"test" as "t"'

    assert cc(primatives.make_join_conditional([], "a", "b")) == ""
    assert cc(primatives.make_join_conditional(
        single_key_column, "a", "b")) == '"a"."test" = "b"."test"'
    assert (
        cc(primatives.make_join_conditional(double_key_column, "a", "b")) ==
        '"a"."test" = "b"."test" and "a"."tube" is not distinct from "b"."tube"'
    )

    assert cc(primatives.make_join_excluder_conditional([], "t")) == ""
    assert cc(primatives.make_join_excluder_conditional(
        single_key_column, "t")) == '"t"."test" is null'
    assert (cc(
        primatives.make_join_excluder_conditional(
            double_key_column,
            "t")) == '"t"."test" is null and "t"."tube" is null')

    table = SQL("{}").format(Identifier("my_table"))
    assert cc(primatives.make_join_to_table_conditional([], "t", table)) == ""
    assert (cc(
        primatives.make_join_to_table_conditional(
            single_key_column, "t",
            table)) == '"t"."test" = "my_table"."test"')
    assert (
        cc(
            primatives.make_join_to_table_conditional(double_key_column, "t",
                                                      table)) ==
        '"t"."test" = "my_table"."test" and "t"."tube" is not distinct from "my_table"."tube"'
    )

    assert cc(primatives.make_typed_column_list([], {})) == ""
    assert cc(primatives.make_typed_column_list(["test"],
                                                data_types)) == '"test" int'
    assert cc(primatives.make_typed_column_list(
        ["test", "tube"], data_types)) == '"test" int, "tube" text'

    assert (
        standardize_whitespace(
            cc(
                primatives.wrap_dblink_query("testdblink", "select now()", "r",
                                             ["test"], data_types))) ==
        'select "r"."test" from dblink(\'testdblink\', \'select now()\') as "r" ("test" int)'
    )