def ingest_cleanup_data(): print("ingesting test data") ingest_file_via_rest("tests/cypress/data.json") ingest_file_via_rest("tests/cypress/schema-blame-data.json") yield print("removing test data") delete_urns_from_file("tests/cypress/data.json") delete_urns_from_file("tests/cypress/schema-blame-data.json")
def test_ingestion_via_rest_rapid(frontend_session, wait_for_healthchecks): ingest_file_via_rest(bootstrap_small) ingest_file_via_rest(bootstrap_small_2) urn = f"urn:li:dataset:(urn:li:dataPlatform:testPlatform,testDataset,PROD)" json = { "query": """query getDataset($urn: String!) {\n dataset(urn: $urn) {\n urn\n name\n description\n platform {\n urn\n }\n schemaMetadata {\n name\n version\n createdAt\n }\n outgoing: relationships(\n input: { types: ["DownstreamOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 10000 }\n ) {\n start\n count\n total\n relationships {\n type\n direction\n entity {\n urn\n type\n }\n }\n }\n }\n }""", "variables": { "urn": urn } } # time.sleep(2) response = frontend_session.post(f"{FRONTEND_ENDPOINT}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() assert res_data assert res_data["data"] assert res_data["data"]["dataset"] assert res_data["data"]["dataset"]["urn"] == urn
def test_all(): platform = "urn:li:dataPlatform:kafka" dataset_name = ("test-timeline-sample-kafka") env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" ingest_file_via_rest("tests/timeline/timeline_test_data.json") ingest_file_via_rest("tests/timeline/timeline_test_datav2.json") ingest_file_via_rest("tests/timeline/timeline_test_datav3.json") res_data = timeline_cli.get_timeline(dataset_urn, [ "TAG", "DOCUMENTATION", "TECHNICAL_SCHEMA", "GLOSSARY_TERM", "OWNERSHIP" ], None, None, False) delete_cli.delete_one_urn_cmd(dataset_urn, False, False, "dataset", None, None) assert res_data assert len(res_data) == 3 assert res_data[0]["semVerChange"] == "MINOR" assert len(res_data[0]["changeEvents"]) == 10 assert res_data[1]["semVerChange"] == "MAJOR" assert len(res_data[1]["changeEvents"]) == 9 assert res_data[2]["semVerChange"] == "MAJOR" assert len(res_data[2]["changeEvents"]) == 7 assert res_data[2]["semVer"] == "2.0.0-computed"
def ingest_cleanup_data(request): print("ingesting containers test data") ingest_file_via_rest("tests/containers/data.json") yield print("removing containers test data") delete_urns_from_file("tests/containers/data.json")
def ingest_cleanup_data(request): print("ingesting test data") ingest_file_via_rest("tests/tags-and-terms/data.json") yield print("removing test data") delete_urns_from_file("tests/tags-and-terms/data.json")
def ingest_cleanup_data(request): print("ingesting deprecation test data") ingest_file_via_rest("tests/deprecation/data.json") yield print("removing deprecation test data") delete_urns_from_file("tests/deprecation/data.json")
def test_ingestion_usage_via_rest(wait_for_healthchecks): ingest_file_via_rest(usage_sample_data)
def test_ingestion_via_rest(wait_for_healthchecks): ingest_file_via_rest(bootstrap_sample_data)
def test_run_ingestion(generate_test_data): ingest_file_via_rest(generate_test_data)
def test_rollback_editable(): platform = "urn:li:dataPlatform:kafka" dataset_name = ("test-rollback") env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" session, gms_host = get_session_and_host() # Clean slate. delete_by_urn(dataset_urn, session, gms_host) assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False) assert "editableDatasetProperties" not in get_aspects_for_entity( entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False) # Ingest dataset ingested_dataset_run_id = ingest_file_via_rest( "tests/cli/cli_test_data.json").config.run_id print("Ingested dataset id:", ingested_dataset_run_id) # Assert that second data ingestion worked assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False) # Sleep forces ingestion of files to have distinct run-ids. sleep(1) # Make editable change ingested_editable_run_id = ingest_file_via_rest( "tests/cli/cli_editable_test_data.json").config.run_id print("ingested editable id:", ingested_editable_run_id) # Assert that second data ingestion worked assert "editableDatasetProperties" in get_aspects_for_entity( entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False) # rollback ingestion 1 rollback_url = f"{gms_host}/runs?action=rollback" session.post(rollback_url, data=json.dumps({ "runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False })) # Allow async MCP processor to handle ingestions & rollbacks sleep(10) # EditableDatasetProperties should still be part of the entity that was soft deleted. assert "editableDatasetProperties" in get_aspects_for_entity( entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False) # But first ingestion aspects should not be present assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False) pass