def test_dbt_ingest(pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/dbt" pipeline = Pipeline.create({ "run_id": "dbt-test", "source": { "type": "dbt", "config": { "manifest_path": f"{test_resources_dir}/dbt_manifest.json", "catalog_path": f"{test_resources_dir}/dbt_catalog.json", }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/dbt_mces.json", }, }, }) pipeline.run() pipeline.raise_from_status() output = mce_helpers.load_json_file(str(tmp_path / "dbt_mces.json")) golden = mce_helpers.load_json_file( str(test_resources_dir / "dbt_mces_golden.json")) mce_helpers.assert_mces_equal(output, golden)
def test_mssql_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/sql_server" with docker_compose_runner(test_resources_dir / "docker-compose.yml", "sql-server") as docker_services: # Wait for SQL Server to be ready. We wait an extra couple seconds, as the port being available # does not mean the server is accepting connections. # TODO: find a better way to check for liveness. wait_for_port(docker_services, "testsqlserver", 1433) time.sleep(5) # Run the setup.sql file to populate the database. docker = "docker" command = f"{docker} exec testsqlserver /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P 'test!Password' -d master -i /setup/setup.sql" ret = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) assert ret.returncode == 0 # Run the metadata ingestion pipeline. config_file = (test_resources_dir / "mssql_to_file.yml").resolve() runner = CliRunner() with fs_helpers.isolated_filesystem(tmp_path): result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"]) assert result.exit_code == 0 output = mce_helpers.load_json_file("mssql_mces.json") # Verify the output. golden = mce_helpers.load_json_file( str(test_resources_dir / "mssql_mce_golden.json")) mce_helpers.assert_mces_equal(output, golden)
def test_feast_ingest(docker_compose_runner, pytestconfig, tmp_path): test_resources_dir = pytestconfig.rootpath / "tests/integration/feast" with docker_compose_runner(test_resources_dir / "docker-compose.yml", "feast") as docker_services: wait_for_port(docker_services, "testfeast", 6565) # container listens to this port once test cases have been setup wait_for_port(docker_services, "testfeast_setup", 6789) # Run the metadata ingestion pipeline. pipeline = Pipeline.create({ "run_id": "feast-test", "source": { "type": "feast", "config": { "core_url": "localhost:6565", "use_local_build": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/feast_mces.json", }, }, }) pipeline.run() pipeline.raise_from_status() # Verify the output. output = mce_helpers.load_json_file(str(tmp_path / "feast_mces.json")) golden = mce_helpers.load_json_file( str(test_resources_dir / "feast_mce_golden.json")) mce_helpers.assert_mces_equal(output, golden)
def test_lookml_ingest(pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" pipeline = Pipeline.create({ "run_id": "lookml-test", "source": { "type": "lookml", "config": { "base_folder": str(test_resources_dir), "connection_to_platform_map": { "my_connection": "conn" }, "parse_table_names_from_sql": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/lookml_mces.json", }, }, }) pipeline.run() pipeline.raise_from_status() output = mce_helpers.load_json_file(str(tmp_path / "lookml_mces.json")) expected = mce_helpers.load_json_file( str(test_resources_dir / "expected_output.json")) mce_helpers.assert_mces_equal(output, expected)
def test_dbt_ingest(pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/dbt" # test manifest, catalog, sources are generated from https://github.com/kevinhu/sample-dbt pipeline = Pipeline.create({ "run_id": "dbt-test", "source": { "type": "dbt", "config": { "manifest_path": f"{test_resources_dir}/dbt_manifest.json", "catalog_path": f"{test_resources_dir}/dbt_catalog.json", "sources_path": f"{test_resources_dir}/dbt_sources.json", "target_platform": "dbt", "load_schemas": True, }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/dbt_mces.json", }, }, }) pipeline.run() pipeline.raise_from_status() output = mce_helpers.load_json_file(str(tmp_path / "dbt_mces.json")) golden = mce_helpers.load_json_file( str(test_resources_dir / "dbt_mces_golden.json")) mce_helpers.assert_mces_equal(output, golden)
def test_ldap_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/ldap" with docker_compose_runner(test_resources_dir / "docker-compose.yml", "ldap") as docker_services: # The openldap container loads the sample data after exposing the port publicly. As such, # we must wait a little bit extra to ensure that the sample data is loaded. wait_for_port(docker_services, "openldap", 389) time.sleep(5) pipeline = Pipeline.create({ "run_id": "ldap-test", "source": { "type": "ldap", "config": { "ldap_server": "ldap://localhost", "ldap_user": "******", "ldap_password": "******", "base_dn": "dc=example,dc=org", }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/ldap_mces.json", }, }, }) pipeline.run() pipeline.raise_from_status() output = mce_helpers.load_json_file(str(tmp_path / "ldap_mces.json")) golden = mce_helpers.load_json_file( str(test_resources_dir / "ldap_mces_golden.json")) mce_helpers.assert_mces_equal(output, golden)
def test_mongodb_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/mongodb" with docker_compose_runner(test_resources_dir / "docker-compose.yml", "mongo") as docker_services: wait_for_port(docker_services, "testmongodb", 27017) # Run the metadata ingestion pipeline. pipeline = Pipeline.create({ "run_id": "mongodb-test", "source": { "type": "mongodb", "config": { "connect_uri": "mongodb://localhost:57017", "username": "******", "password": "******", }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/mongodb_mces.json", }, }, }) pipeline.run() pipeline.raise_from_status() # Verify the output. output = mce_helpers.load_json_file(str(tmp_path / "mongodb_mces.json")) golden = mce_helpers.load_json_file( str(test_resources_dir / "mongodb_mce_golden.json")) mce_helpers.assert_mces_equal(output, golden)
def test_looker_ingest(pytestconfig, tmp_path, mock_time): mocked_client = mock.MagicMock() with mock.patch( "datahub.ingestion.source.looker.LookerDashboardSource._get_looker_client", mocked_client, ): mocked_client.return_value.all_dashboards.return_value = [Dashboard(id="1")] mocked_client.return_value.dashboard.return_value = Dashboard( id="1", title="foo", created_at=datetime.utcfromtimestamp(time.time()), description="lorem ipsum", dashboard_elements=[ DashboardElement( id="2", type="", subtitle_text="Some text", query=Query( model="data", view="my_view", dynamic_fields='[{"table_calculation":"calc","label":"foobar","expression":"offset(${my_table.value},1)","value_format":null,"value_format_name":"eur","_kind_hint":"measure","_type_hint":"number"}', ), ) ], ) test_resources_dir = pytestconfig.rootpath / "tests/integration/looker" pipeline = Pipeline.create( { "run_id": "looker-test", "source": { "type": "looker", "config": { "base_url": "https://looker.company.com", "client_id": "foo", "client_secret": "bar", }, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/looker_mces.json", }, }, } ) pipeline.run() pipeline.raise_from_status() output = mce_helpers.load_json_file(str(tmp_path / "looker_mces.json")) expected = mce_helpers.load_json_file( str(test_resources_dir / "expected_output.json") ) mce_helpers.assert_mces_equal(output, expected)
def test_bq_usage_source(pytestconfig, tmp_path): # from google.cloud.logging_v2 import ProtobufEntry test_resources_dir: pathlib.Path = ( pytestconfig.rootpath / "tests/integration/bigquery-usage" ) bigquery_reference_logs_path = test_resources_dir / "bigquery_logs.json" if WRITE_REFERENCE_FILE: source = BigQueryUsageSource.create( dict( project_id="harshal-playground-306419", start_time=datetime.now(tz=timezone.utc) - timedelta(days=25), ), PipelineContext(run_id="bq-usage-test"), ) entries = list(source._get_bigquery_log_entries()) entries = [entry._replace(logger=None) for entry in entries] log_entries = jsonpickle.encode(entries, indent=4) with bigquery_reference_logs_path.open("w") as logs: logs.write(log_entries) with unittest.mock.patch( "datahub.ingestion.source.bigquery_usage.GCPLoggingClient", autospec=True ) as MockClient: # Add mock BigQuery API responses. with bigquery_reference_logs_path.open() as logs: reference_logs = jsonpickle.decode(logs.read()) MockClient().list_entries.return_value = reference_logs # Run a BigQuery usage ingestion run. pipeline = Pipeline.create( { "run_id": "test-bigquery-usage", "source": { "type": "bigquery-usage", "config": {"project_id": "sample-bigquery-project-1234"}, }, "sink": { "type": "file", "config": { "filename": f"{tmp_path}/bigquery_usages.json", }, }, } ) pipeline.run() pipeline.raise_from_status() output = mce_helpers.load_json_file(str(tmp_path / "bigquery_usages.json")) golden = mce_helpers.load_json_file( str(test_resources_dir / "bigquery_usages_golden.json") ) mce_helpers.assert_mces_equal(output, golden)
def test_mysql_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/mysql" with docker_compose_runner(test_resources_dir / "docker-compose.yml", "mysql") as docker_services: wait_for_port(docker_services, "testmysql", 3306) # Run the metadata ingestion pipeline. runner = CliRunner() with fs_helpers.isolated_filesystem(tmp_path): config_file = (test_resources_dir / "mysql_to_file.yml").resolve() result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"]) assert result.exit_code == 0 output = mce_helpers.load_json_file("mysql_mces.json") # Verify the output. golden = mce_helpers.load_json_file( str(test_resources_dir / "mysql_mces_golden.json")) mce_helpers.assert_mces_equal(output, golden)
def test_glue_ingest(tmp_path, pytestconfig): glue_source_instance = glue_source() with Stubber(glue_source_instance.glue_client) as glue_stubber: glue_stubber.add_response("get_databases", get_databases_response, {}) glue_stubber.add_response( "get_tables", get_tables_response_1, {"DatabaseName": "flights-database"}, ) glue_stubber.add_response( "get_tables", get_tables_response_2, {"DatabaseName": "test-database"}, ) glue_stubber.add_response("get_jobs", get_jobs_response, {}) glue_stubber.add_response( "get_dataflow_graph", get_dataflow_graph_response_1, {"PythonScript": get_object_body_1}, ) glue_stubber.add_response( "get_dataflow_graph", get_dataflow_graph_response_2, {"PythonScript": get_object_body_2}, ) with Stubber(glue_source_instance.s3_client) as s3_stubber: s3_stubber.add_response( "get_object", get_object_response_1, { "Bucket": "aws-glue-assets-123412341234-us-west-2", "Key": "scripts/job-1.py", }, ) s3_stubber.add_response( "get_object", get_object_response_2, { "Bucket": "aws-glue-assets-123412341234-us-west-2", "Key": "scripts/job-2.py", }, ) mce_objects = [ wu.mce.to_obj() for wu in glue_source_instance.get_workunits() ] with open(str(tmp_path / "glue_mces.json"), "w") as f: json.dump(mce_objects, f, indent=2) output = mce_helpers.load_json_file(str(tmp_path / "glue_mces.json")) test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" golden = mce_helpers.load_json_file( str(test_resources_dir / "glue_mces_golden.json")) mce_helpers.assert_mces_equal(output, golden)
def test_basic_diff_owner_change(): with pytest.raises(AssertionError): mce_helpers.assert_mces_equal(basic_1, basic_3)
def test_basic_diff_same(): mce_helpers.assert_mces_equal(basic_1, basic_2)
def test_basic_diff_owner_change() -> None: with pytest.raises(AssertionError): mce_helpers.assert_mces_equal(basic_1, basic_3, mce_helpers.IGNORE_PATH_TIMESTAMPS)
def test_basic_diff_same() -> None: mce_helpers.assert_mces_equal(basic_1, basic_2, mce_helpers.IGNORE_PATH_TIMESTAMPS)