Esempio n. 1
0
def test_serde_to_json(pytestconfig: PytestConfig, tmp_path: pathlib.Path,
                       json_filename: str) -> None:
    golden_file = pytestconfig.rootpath / json_filename

    output_filename = "output.json"
    output_file = tmp_path / output_filename

    pipeline = Pipeline.create({
        "source": {
            "type": "file",
            "config": {
                "filename": str(golden_file)
            }
        },
        "sink": {
            "type": "file",
            "config": {
                "filename": str(output_file)
            }
        },
    })
    pipeline.run()
    pipeline.raise_from_status()

    output = mce_helpers.load_json_file(tmp_path / output_filename)
    golden = mce_helpers.load_json_file(golden_file)
    assert golden == output
Esempio n. 2
0
def test_dbt_ingest(pytestconfig, tmp_path, mock_time):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/dbt"

    pipeline = Pipeline.create({
        "run_id": "dbt-test",
        "source": {
            "type": "dbt",
            "config": {
                "manifest_path": f"{test_resources_dir}/dbt_manifest.json",
                "catalog_path": f"{test_resources_dir}/dbt_catalog.json",
            },
        },
        "sink": {
            "type": "file",
            "config": {
                "filename": f"{tmp_path}/dbt_mces.json",
            },
        },
    })
    pipeline.run()
    pipeline.raise_from_status()

    output = mce_helpers.load_json_file(str(tmp_path / "dbt_mces.json"))
    golden = mce_helpers.load_json_file(
        str(test_resources_dir / "dbt_mces_golden.json"))
    mce_helpers.assert_mces_equal(output, golden)
Esempio n. 3
0
def test_mssql_ingest(docker_compose_runner, pytestconfig, tmp_path,
                      mock_time):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/sql_server"

    with docker_compose_runner(test_resources_dir / "docker-compose.yml",
                               "sql-server") as docker_services:
        # Wait for SQL Server to be ready. We wait an extra couple seconds, as the port being available
        # does not mean the server is accepting connections.
        # TODO: find a better way to check for liveness.
        wait_for_port(docker_services, "testsqlserver", 1433)
        time.sleep(5)

        # Run the setup.sql file to populate the database.
        docker = "docker"
        command = f"{docker} exec testsqlserver /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P 'test!Password' -d master -i /setup/setup.sql"
        ret = subprocess.run(command,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        assert ret.returncode == 0

        # Run the metadata ingestion pipeline.
        config_file = (test_resources_dir / "mssql_to_file.yml").resolve()
        runner = CliRunner()
        with fs_helpers.isolated_filesystem(tmp_path):
            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
            assert result.exit_code == 0

            output = mce_helpers.load_json_file("mssql_mces.json")

        # Verify the output.
        golden = mce_helpers.load_json_file(
            str(test_resources_dir / "mssql_mce_golden.json"))
        mce_helpers.assert_mces_equal(output, golden)
def test_feast_ingest(docker_compose_runner, pytestconfig, tmp_path):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/feast"

    with docker_compose_runner(test_resources_dir / "docker-compose.yml",
                               "feast") as docker_services:
        wait_for_port(docker_services, "testfeast", 6565)

        # container listens to this port once test cases have been setup
        wait_for_port(docker_services, "testfeast_setup", 6789)

        # Run the metadata ingestion pipeline.
        pipeline = Pipeline.create({
            "run_id": "feast-test",
            "source": {
                "type": "feast",
                "config": {
                    "core_url": "localhost:6565",
                    "use_local_build": True,
                },
            },
            "sink": {
                "type": "file",
                "config": {
                    "filename": f"{tmp_path}/feast_mces.json",
                },
            },
        })
        pipeline.run()
        pipeline.raise_from_status()

        # Verify the output.
        output = mce_helpers.load_json_file(str(tmp_path / "feast_mces.json"))
        golden = mce_helpers.load_json_file(
            str(test_resources_dir / "feast_mce_golden.json"))
        mce_helpers.assert_mces_equal(output, golden)
Esempio n. 5
0
def test_lookml_ingest(pytestconfig, tmp_path, mock_time):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"

    pipeline = Pipeline.create({
        "run_id": "lookml-test",
        "source": {
            "type": "lookml",
            "config": {
                "base_folder": str(test_resources_dir),
                "connection_to_platform_map": {
                    "my_connection": "conn"
                },
                "parse_table_names_from_sql": True,
            },
        },
        "sink": {
            "type": "file",
            "config": {
                "filename": f"{tmp_path}/lookml_mces.json",
            },
        },
    })
    pipeline.run()
    pipeline.raise_from_status()

    output = mce_helpers.load_json_file(str(tmp_path / "lookml_mces.json"))
    expected = mce_helpers.load_json_file(
        str(test_resources_dir / "expected_output.json"))
    mce_helpers.assert_mces_equal(output, expected)
Esempio n. 6
0
def test_dbt_ingest(pytestconfig, tmp_path, mock_time):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/dbt"

    # test manifest, catalog, sources are generated from https://github.com/kevinhu/sample-dbt
    pipeline = Pipeline.create({
        "run_id": "dbt-test",
        "source": {
            "type": "dbt",
            "config": {
                "manifest_path": f"{test_resources_dir}/dbt_manifest.json",
                "catalog_path": f"{test_resources_dir}/dbt_catalog.json",
                "sources_path": f"{test_resources_dir}/dbt_sources.json",
                "target_platform": "dbt",
                "load_schemas": True,
            },
        },
        "sink": {
            "type": "file",
            "config": {
                "filename": f"{tmp_path}/dbt_mces.json",
            },
        },
    })
    pipeline.run()
    pipeline.raise_from_status()

    output = mce_helpers.load_json_file(str(tmp_path / "dbt_mces.json"))
    golden = mce_helpers.load_json_file(
        str(test_resources_dir / "dbt_mces_golden.json"))
    mce_helpers.assert_mces_equal(output, golden)
Esempio n. 7
0
def test_ldap_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/ldap"

    with docker_compose_runner(test_resources_dir / "docker-compose.yml",
                               "ldap") as docker_services:
        # The openldap container loads the sample data after exposing the port publicly. As such,
        # we must wait a little bit extra to ensure that the sample data is loaded.
        wait_for_port(docker_services, "openldap", 389)
        time.sleep(5)

        pipeline = Pipeline.create({
            "run_id": "ldap-test",
            "source": {
                "type": "ldap",
                "config": {
                    "ldap_server": "ldap://localhost",
                    "ldap_user": "******",
                    "ldap_password": "******",
                    "base_dn": "dc=example,dc=org",
                },
            },
            "sink": {
                "type": "file",
                "config": {
                    "filename": f"{tmp_path}/ldap_mces.json",
                },
            },
        })
        pipeline.run()
        pipeline.raise_from_status()

        output = mce_helpers.load_json_file(str(tmp_path / "ldap_mces.json"))
        golden = mce_helpers.load_json_file(
            str(test_resources_dir / "ldap_mces_golden.json"))
        mce_helpers.assert_mces_equal(output, golden)
Esempio n. 8
0
def test_mongodb_ingest(docker_compose_runner, pytestconfig, tmp_path,
                        mock_time):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/mongodb"

    with docker_compose_runner(test_resources_dir / "docker-compose.yml",
                               "mongo") as docker_services:
        wait_for_port(docker_services, "testmongodb", 27017)

        # Run the metadata ingestion pipeline.
        pipeline = Pipeline.create({
            "run_id": "mongodb-test",
            "source": {
                "type": "mongodb",
                "config": {
                    "connect_uri": "mongodb://localhost:57017",
                    "username": "******",
                    "password": "******",
                },
            },
            "sink": {
                "type": "file",
                "config": {
                    "filename": f"{tmp_path}/mongodb_mces.json",
                },
            },
        })
        pipeline.run()
        pipeline.raise_from_status()

        # Verify the output.
        output = mce_helpers.load_json_file(str(tmp_path /
                                                "mongodb_mces.json"))
        golden = mce_helpers.load_json_file(
            str(test_resources_dir / "mongodb_mce_golden.json"))
        mce_helpers.assert_mces_equal(output, golden)
Esempio n. 9
0
def test_looker_ingest(pytestconfig, tmp_path, mock_time):
    mocked_client = mock.MagicMock()
    with mock.patch(
        "datahub.ingestion.source.looker.LookerDashboardSource._get_looker_client",
        mocked_client,
    ):
        mocked_client.return_value.all_dashboards.return_value = [Dashboard(id="1")]
        mocked_client.return_value.dashboard.return_value = Dashboard(
            id="1",
            title="foo",
            created_at=datetime.utcfromtimestamp(time.time()),
            description="lorem ipsum",
            dashboard_elements=[
                DashboardElement(
                    id="2",
                    type="",
                    subtitle_text="Some text",
                    query=Query(
                        model="data",
                        view="my_view",
                        dynamic_fields='[{"table_calculation":"calc","label":"foobar","expression":"offset(${my_table.value},1)","value_format":null,"value_format_name":"eur","_kind_hint":"measure","_type_hint":"number"}',
                    ),
                )
            ],
        )

        test_resources_dir = pytestconfig.rootpath / "tests/integration/looker"

        pipeline = Pipeline.create(
            {
                "run_id": "looker-test",
                "source": {
                    "type": "looker",
                    "config": {
                        "base_url": "https://looker.company.com",
                        "client_id": "foo",
                        "client_secret": "bar",
                    },
                },
                "sink": {
                    "type": "file",
                    "config": {
                        "filename": f"{tmp_path}/looker_mces.json",
                    },
                },
            }
        )
        pipeline.run()
        pipeline.raise_from_status()

        output = mce_helpers.load_json_file(str(tmp_path / "looker_mces.json"))
        expected = mce_helpers.load_json_file(
            str(test_resources_dir / "expected_output.json")
        )
        mce_helpers.assert_mces_equal(output, expected)
Esempio n. 10
0
def test_bq_usage_source(pytestconfig, tmp_path):
    # from google.cloud.logging_v2 import ProtobufEntry

    test_resources_dir: pathlib.Path = (
        pytestconfig.rootpath / "tests/integration/bigquery-usage"
    )
    bigquery_reference_logs_path = test_resources_dir / "bigquery_logs.json"

    if WRITE_REFERENCE_FILE:
        source = BigQueryUsageSource.create(
            dict(
                project_id="harshal-playground-306419",
                start_time=datetime.now(tz=timezone.utc) - timedelta(days=25),
            ),
            PipelineContext(run_id="bq-usage-test"),
        )
        entries = list(source._get_bigquery_log_entries())

        entries = [entry._replace(logger=None) for entry in entries]
        log_entries = jsonpickle.encode(entries, indent=4)
        with bigquery_reference_logs_path.open("w") as logs:
            logs.write(log_entries)

    with unittest.mock.patch(
        "datahub.ingestion.source.bigquery_usage.GCPLoggingClient", autospec=True
    ) as MockClient:
        # Add mock BigQuery API responses.
        with bigquery_reference_logs_path.open() as logs:
            reference_logs = jsonpickle.decode(logs.read())
        MockClient().list_entries.return_value = reference_logs

        # Run a BigQuery usage ingestion run.
        pipeline = Pipeline.create(
            {
                "run_id": "test-bigquery-usage",
                "source": {
                    "type": "bigquery-usage",
                    "config": {"project_id": "sample-bigquery-project-1234"},
                },
                "sink": {
                    "type": "file",
                    "config": {
                        "filename": f"{tmp_path}/bigquery_usages.json",
                    },
                },
            }
        )
        pipeline.run()
        pipeline.raise_from_status()

    output = mce_helpers.load_json_file(str(tmp_path / "bigquery_usages.json"))
    golden = mce_helpers.load_json_file(
        str(test_resources_dir / "bigquery_usages_golden.json")
    )
    mce_helpers.assert_mces_equal(output, golden)
Esempio n. 11
0
def test_mysql_ingest(docker_compose_runner, pytestconfig, tmp_path,
                      mock_time):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/mysql"

    with docker_compose_runner(test_resources_dir / "docker-compose.yml",
                               "mysql") as docker_services:
        wait_for_port(docker_services, "testmysql", 3306)

        # Run the metadata ingestion pipeline.
        runner = CliRunner()
        with fs_helpers.isolated_filesystem(tmp_path):
            config_file = (test_resources_dir / "mysql_to_file.yml").resolve()
            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
            assert result.exit_code == 0

            output = mce_helpers.load_json_file("mysql_mces.json")

        # Verify the output.
        golden = mce_helpers.load_json_file(
            str(test_resources_dir / "mysql_mces_golden.json"))
        mce_helpers.assert_mces_equal(output, golden)
Esempio n. 12
0
def test_glue_ingest(tmp_path, pytestconfig):

    glue_source_instance = glue_source()

    with Stubber(glue_source_instance.glue_client) as glue_stubber:

        glue_stubber.add_response("get_databases", get_databases_response, {})
        glue_stubber.add_response(
            "get_tables",
            get_tables_response_1,
            {"DatabaseName": "flights-database"},
        )
        glue_stubber.add_response(
            "get_tables",
            get_tables_response_2,
            {"DatabaseName": "test-database"},
        )
        glue_stubber.add_response("get_jobs", get_jobs_response, {})
        glue_stubber.add_response(
            "get_dataflow_graph",
            get_dataflow_graph_response_1,
            {"PythonScript": get_object_body_1},
        )
        glue_stubber.add_response(
            "get_dataflow_graph",
            get_dataflow_graph_response_2,
            {"PythonScript": get_object_body_2},
        )

        with Stubber(glue_source_instance.s3_client) as s3_stubber:

            s3_stubber.add_response(
                "get_object",
                get_object_response_1,
                {
                    "Bucket": "aws-glue-assets-123412341234-us-west-2",
                    "Key": "scripts/job-1.py",
                },
            )
            s3_stubber.add_response(
                "get_object",
                get_object_response_2,
                {
                    "Bucket": "aws-glue-assets-123412341234-us-west-2",
                    "Key": "scripts/job-2.py",
                },
            )

            mce_objects = [
                wu.mce.to_obj() for wu in glue_source_instance.get_workunits()
            ]

            with open(str(tmp_path / "glue_mces.json"), "w") as f:
                json.dump(mce_objects, f, indent=2)

    output = mce_helpers.load_json_file(str(tmp_path / "glue_mces.json"))

    test_resources_dir = pytestconfig.rootpath / "tests/unit/glue"
    golden = mce_helpers.load_json_file(
        str(test_resources_dir / "glue_mces_golden.json"))
    mce_helpers.assert_mces_equal(output, golden)