Ejemplo n.º 1
0
def test_example_F(test_cases_for_sql_data_connector_sqlite_execution_engine):
    random.seed(0)
    db = test_cases_for_sql_data_connector_sqlite_execution_engine

    config = yaml.load(
        """
    name: my_sql_data_connector
    datasource_name: FAKE_Datasource_NAME

    data_assets:
        table_partitioned_by_foreign_key__F:
            splitter_method: _split_on_column_value
            splitter_kwargs:
                column_name: session_id
    """, )
    config["execution_engine"] = db

    my_data_connector = ConfiguredAssetSqlDataConnector(**config)

    report = my_data_connector.self_check()
    print(json.dumps(report, indent=2))

    assert report == {
        "class_name": "ConfiguredAssetSqlDataConnector",
        "data_asset_count": 1,
        "example_data_asset_names": ["table_partitioned_by_foreign_key__F"],
        "data_assets": {
            "table_partitioned_by_foreign_key__F": {
                "batch_definition_count":
                49,
                # TODO Abe 20201029 : These values should be sorted
                "example_data_references": [
                    {
                        "session_id": 3
                    },
                    {
                        "session_id": 2
                    },
                    {
                        "session_id": 4
                    },
                ],
            }
        },
        "unmatched_data_reference_count": 0,
        "example_unmatched_data_references": [],
        "example_data_reference": {
            "n_rows": 2,
            "batch_spec": {
                "table_name": "table_partitioned_by_foreign_key__F",
                "partition_definition": {
                    "session_id": 2
                },
                "splitter_method": "_split_on_column_value",
                "splitter_kwargs": {
                    "column_name": "session_id"
                },
            },
        },
    }
Ejemplo n.º 2
0
def test_example_A(test_cases_for_sql_data_connector_sqlite_execution_engine):
    random.seed(0)
    db = test_cases_for_sql_data_connector_sqlite_execution_engine

    config = yaml.load(
        """
    name: my_sql_data_connector
    datasource_name: FAKE_Datasource_NAME

    data_assets:
        table_partitioned_by_date_column__A:
            splitter_method: _split_on_column_value
            splitter_kwargs:
                column_name: date

    """, )
    config["execution_engine"] = db

    my_data_connector = ConfiguredAssetSqlDataConnector(**config)

    report = my_data_connector.self_check()
    print(json.dumps(report, indent=2))

    assert report == {
        "class_name": "ConfiguredAssetSqlDataConnector",
        "data_asset_count": 1,
        "example_data_asset_names": ["table_partitioned_by_date_column__A"],
        "data_assets": {
            "table_partitioned_by_date_column__A": {
                "batch_definition_count":
                30,
                "example_data_references": [
                    {
                        "date": "2020-01-01"
                    },
                    {
                        "date": "2020-01-02"
                    },
                    {
                        "date": "2020-01-03"
                    },
                ],
            }
        },
        "unmatched_data_reference_count": 0,
        "example_unmatched_data_references": [],
        "example_data_reference": {
            "n_rows": 8,
            "batch_spec": {
                "table_name": "table_partitioned_by_date_column__A",
                "partition_definition": {
                    "date": "2020-01-02"
                },
                "splitter_method": "_split_on_column_value",
                "splitter_kwargs": {
                    "column_name": "date"
                },
            },
        },
    }
def test_behavior_with_whole_table_splitter(
    test_cases_for_sql_data_connector_sqlite_execution_engine,
):
    db = test_cases_for_sql_data_connector_sqlite_execution_engine

    config = yaml.load(
        """
    name: my_sql_data_connector
    datasource_name: FAKE_Datasource_NAME

    assets:
        table_partitioned_by_date_column__A:
            splitter_method : "_split_on_whole_table"
            splitter_kwargs : {}
    """,
    )
    config["execution_engine"] = db

    my_data_connector = ConfiguredAssetSqlDataConnector(**config)
    report_object = my_data_connector.self_check()
    print(json.dumps(report_object, indent=2))

    batch_definition_list = (
        my_data_connector.get_batch_definition_list_from_batch_request(
            batch_request=BatchRequest(
                datasource_name="FAKE_Datasource_NAME",
                data_connector_name="my_sql_data_connector",
                data_asset_name="table_partitioned_by_date_column__A",
            )
        )
    )
    assert len(batch_definition_list) == 1
    assert batch_definition_list[0]["batch_identifiers"] == {}

    batch_definition_list = (
        my_data_connector.get_batch_definition_list_from_batch_request(
            batch_request=BatchRequest(
                datasource_name="FAKE_Datasource_NAME",
                data_connector_name="my_sql_data_connector",
                data_asset_name="table_partitioned_by_date_column__A",
                data_connector_query={},
            )
        )
    )
    assert len(batch_definition_list) == 1
    assert batch_definition_list[0]["batch_identifiers"] == {}

    batch_definition_list = (
        my_data_connector.get_batch_definition_list_from_batch_request(
            batch_request=BatchRequest(
                datasource_name="FAKE_Datasource_NAME",
                data_connector_name="my_sql_data_connector",
                data_asset_name="table_partitioned_by_date_column__A",
                data_connector_query={"batch_filter_parameters": {}},
            )
        )
    )
    assert len(batch_definition_list) == 1
    assert batch_definition_list[0]["batch_identifiers"] == {}
def test_example_G(test_cases_for_sql_data_connector_sqlite_execution_engine):
    random.seed(0)
    db = test_cases_for_sql_data_connector_sqlite_execution_engine

    config = yaml.load(
        """
    name: my_sql_data_connector
    datasource_name: FAKE_Datasource_NAME

    assets:
        table_partitioned_by_multiple_columns__G:
            splitter_method: _split_on_multi_column_values
            splitter_kwargs:
                column_names:
                    - y
                    - m
                    - d
    """,
    )
    config["execution_engine"] = db

    my_data_connector = ConfiguredAssetSqlDataConnector(**config)

    report = my_data_connector.self_check()
    print(json.dumps(report, indent=2))

    assert report == {
        "class_name": "ConfiguredAssetSqlDataConnector",
        "data_asset_count": 1,
        "example_data_asset_names": ["table_partitioned_by_multiple_columns__G"],
        "data_assets": {
            "table_partitioned_by_multiple_columns__G": {
                "batch_definition_count": 30,
                # TODO Abe 20201029 : These values should be sorted
                "example_data_references": [
                    {"y": 2020, "m": 1, "d": 1},
                    {"y": 2020, "m": 1, "d": 2},
                    {"y": 2020, "m": 1, "d": 3},
                ],
            }
        },
        "unmatched_data_reference_count": 0,
        "example_unmatched_data_references": [],
        # FIXME: (Sam) example_data_reference removed temporarily in PR #2590:
        # "example_data_reference": {
        #     "n_rows": 8,
        #     "batch_spec": {
        #         "table_name": "table_partitioned_by_multiple_columns__G",
        #         "data_asset_name": "table_partitioned_by_multiple_columns__G",
        #         "batch_identifiers": {
        #             "y": 2020,
        #             "m": 1,
        #             "d": 2,
        #         },
        #         "splitter_method": "_split_on_multi_column_values",
        #         "splitter_kwargs": {"column_names": ["y", "m", "d"]},
        #     },
        # },
    }
def test_basic_self_check(
        test_cases_for_sql_data_connector_sqlite_execution_engine):
    random.seed(0)
    execution_engine = test_cases_for_sql_data_connector_sqlite_execution_engine

    config = yaml.load(
        """
    name: my_sql_data_connector
    datasource_name: FAKE_Datasource_NAME

    assets:
        table_partitioned_by_date_column__A:
            #table_name: events # If table_name is omitted, then the table_name defaults to the asset name
            splitter_method: _split_on_column_value
            splitter_kwargs:
                column_name: date
    """, )
    config["execution_engine"] = execution_engine

    my_data_connector = ConfiguredAssetSqlDataConnector(**config)

    report = my_data_connector.self_check()
    print(json.dumps(report, indent=2))

    assert report == {
        "class_name": "ConfiguredAssetSqlDataConnector",
        "data_asset_count": 1,
        "example_data_asset_names": ["table_partitioned_by_date_column__A"],
        "data_assets": {
            "table_partitioned_by_date_column__A": {
                "batch_definition_count":
                30,
                "example_data_references": [
                    {
                        "date": "2020-01-01"
                    },
                    {
                        "date": "2020-01-02"
                    },
                    {
                        "date": "2020-01-03"
                    },
                ],
            }
        },
        "unmatched_data_reference_count": 0,
        "example_unmatched_data_references": [],
        # FIXME: (Sam) example_data_reference removed temporarily in PR #2590:
        # "example_data_reference": {
        #     "n_rows": 8,
        #     "batch_spec": {
        #         "table_name": "table_partitioned_by_date_column__A",
        #         "data_asset_name": "table_partitioned_by_date_column__A",
        #         "batch_identifiers": {"date": "2020-01-02"},
        #         "splitter_method": "_split_on_column_value",
        #         "splitter_kwargs": {"column_name": "date"},
        #     },
        # },
    }
def test_example_B(test_cases_for_sql_data_connector_sqlite_execution_engine):
    random.seed(0)
    db = test_cases_for_sql_data_connector_sqlite_execution_engine

    config = yaml.load("""
    name: my_sql_data_connector
    datasource_name: FAKE_Datasource_NAME

    assets:
        table_partitioned_by_timestamp_column__B:
            splitter_method: _split_on_converted_datetime
            splitter_kwargs:
                column_name: timestamp
    """)
    config["execution_engine"] = db

    my_data_connector = ConfiguredAssetSqlDataConnector(**config)

    report = my_data_connector.self_check()
    print(json.dumps(report, indent=2))

    assert report == {
        "class_name": "ConfiguredAssetSqlDataConnector",
        "data_asset_count": 1,
        "example_data_asset_names":
        ["table_partitioned_by_timestamp_column__B"],
        "data_assets": {
            "table_partitioned_by_timestamp_column__B": {
                "batch_definition_count":
                30,
                "example_data_references": [
                    {
                        "timestamp": "2020-01-01"
                    },
                    {
                        "timestamp": "2020-01-02"
                    },
                    {
                        "timestamp": "2020-01-03"
                    },
                ],
            }
        },
        "unmatched_data_reference_count": 0,
        "example_unmatched_data_references": [],
        # FIXME: (Sam) example_data_reference removed temporarily in PR #2590:
        # "example_data_reference": {
        #     "n_rows": 8,
        #     "batch_spec": {
        #         "table_name": "table_partitioned_by_timestamp_column__B",
        #         "data_asset_name": "table_partitioned_by_timestamp_column__B",
        #         "batch_identifiers": {"timestamp": "2020-01-02"},
        #         "splitter_method": "_split_on_converted_datetime",
        #         "splitter_kwargs": {"column_name": "timestamp"},
        #     },
        # },
    }
def test_example_C(
    splitter_method_name_prefix,
    test_cases_for_sql_data_connector_sqlite_execution_engine,
):
    random.seed(0)
    db = test_cases_for_sql_data_connector_sqlite_execution_engine

    config = yaml.load(
        f"""
    name: my_sql_data_connector
    datasource_name: FAKE_Datasource_NAME

    assets:
        table_partitioned_by_regularly_spaced_incrementing_id_column__C:
            splitter_method: {splitter_method_name_prefix}split_on_divided_integer
            splitter_kwargs:
                column_name: id
                divisor: 10
    """,
    )
    config["execution_engine"] = db

    my_data_connector = ConfiguredAssetSqlDataConnector(**config)

    report = my_data_connector.self_check()
    print(json.dumps(report, indent=2))

    assert report == {
        "class_name": "ConfiguredAssetSqlDataConnector",
        "data_asset_count": 1,
        "example_data_asset_names": [
            "table_partitioned_by_regularly_spaced_incrementing_id_column__C"
        ],
        "data_assets": {
            "table_partitioned_by_regularly_spaced_incrementing_id_column__C": {
                "batch_definition_count": 12,
                "example_data_references": [
                    {"id": 0},
                    {"id": 1},
                    {"id": 2},
                ],
            }
        },
        "unmatched_data_reference_count": 0,
        "example_unmatched_data_references": [],
        # FIXME: (Sam) example_data_reference removed temporarily in PR #2590:
        # "example_data_reference": {
        #     "n_rows": 10,
        #     "batch_spec": {
        #         "table_name": "table_partitioned_by_regularly_spaced_incrementing_id_column__C",
        #         "data_asset_name": "table_partitioned_by_regularly_spaced_incrementing_id_column__C",
        #         "batch_identifiers": {"id": 1},
        #         "splitter_method": "_split_on_divided_integer",
        #         "splitter_kwargs": {"column_name": "id", "divisor": 10},
        #     },
        # },
    }
Ejemplo n.º 8
0
def test_example_E(test_cases_for_sql_data_connector_sqlite_execution_engine):
    random.seed(0)
    db = test_cases_for_sql_data_connector_sqlite_execution_engine

    config = yaml.load(
        """
    name: my_sql_data_connector
    datasource_name: FAKE_Datasource_NAME

    data_assets:
        table_partitioned_by_incrementing_batch_id__E:
            splitter_method: _split_on_column_value
            splitter_kwargs:
                column_name: batch_id
    """, )
    config["execution_engine"] = db

    my_data_connector = ConfiguredAssetSqlDataConnector(**config)

    report = my_data_connector.self_check()
    print(json.dumps(report, indent=2))

    assert report == {
        "class_name":
        "ConfiguredAssetSqlDataConnector",
        "data_asset_count":
        1,
        "example_data_asset_names":
        ["table_partitioned_by_incrementing_batch_id__E"],
        "data_assets": {
            "table_partitioned_by_incrementing_batch_id__E": {
                "batch_definition_count":
                11,
                "example_data_references": [
                    {
                        "batch_id": 0
                    },
                    {
                        "batch_id": 1
                    },
                    {
                        "batch_id": 2
                    },
                ],
            }
        },
        "unmatched_data_reference_count":
        0,
        "example_unmatched_data_references": [],
        # FIXME: (Sam) example_data_reference removed temporarily in PR #2590:
        # "example_data_reference": {
        #     "n_rows": 9,
        #     "batch_spec": {
        #         "table_name": "table_partitioned_by_incrementing_batch_id__E",
        #         "data_asset_name": "table_partitioned_by_incrementing_batch_id__E",
        #         "partition_definition": {"batch_id": 1},
        #         "splitter_method": "_split_on_column_value",
        #         "splitter_kwargs": {"column_name": "batch_id"},
        #     },
        # },
    }