Ejemplo n.º 1
0
def ingest_cleanup_data():
    print("ingesting test data")
    ingest_file_via_rest("tests/cypress/data.json")
    ingest_file_via_rest("tests/cypress/schema-blame-data.json")
    yield
    print("removing test data")
    delete_urns_from_file("tests/cypress/data.json")
    delete_urns_from_file("tests/cypress/schema-blame-data.json")
Ejemplo n.º 2
0
def test_ingestion_via_rest_rapid(frontend_session, wait_for_healthchecks):
    ingest_file_via_rest(bootstrap_small)
    ingest_file_via_rest(bootstrap_small_2)
    urn = f"urn:li:dataset:(urn:li:dataPlatform:testPlatform,testDataset,PROD)"
    json = {
        "query": """query getDataset($urn: String!) {\n
                dataset(urn: $urn) {\n
                    urn\n
                    name\n
                    description\n
                    platform {\n
                        urn\n
                    }\n
                    schemaMetadata {\n
                        name\n
                        version\n
                        createdAt\n
                    }\n
                    outgoing: relationships(\n
                                input: { types: ["DownstreamOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 10000 }\n
                            ) {\n
                            start\n
                            count\n
                            total\n
                            relationships {\n
                                type\n
                                direction\n
                                entity {\n
                                    urn\n
                                    type\n
                                }\n
                            }\n
                    }\n
                }\n
            }""",
        "variables": {
            "urn": urn
        }
    }
    #
    time.sleep(2)
    response = frontend_session.post(f"{FRONTEND_ENDPOINT}/api/v2/graphql",
                                     json=json)
    response.raise_for_status()
    res_data = response.json()

    assert res_data
    assert res_data["data"]
    assert res_data["data"]["dataset"]
    assert res_data["data"]["dataset"]["urn"] == urn
Ejemplo n.º 3
0
def test_all():
    platform = "urn:li:dataPlatform:kafka"
    dataset_name = ("test-timeline-sample-kafka")
    env = "PROD"
    dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"

    ingest_file_via_rest("tests/timeline/timeline_test_data.json")
    ingest_file_via_rest("tests/timeline/timeline_test_datav2.json")
    ingest_file_via_rest("tests/timeline/timeline_test_datav3.json")

    res_data = timeline_cli.get_timeline(dataset_urn, [
        "TAG", "DOCUMENTATION", "TECHNICAL_SCHEMA", "GLOSSARY_TERM",
        "OWNERSHIP"
    ], None, None, False)

    delete_cli.delete_one_urn_cmd(dataset_urn, False, False, "dataset", None,
                                  None)
    assert res_data
    assert len(res_data) == 3
    assert res_data[0]["semVerChange"] == "MINOR"
    assert len(res_data[0]["changeEvents"]) == 10
    assert res_data[1]["semVerChange"] == "MAJOR"
    assert len(res_data[1]["changeEvents"]) == 9
    assert res_data[2]["semVerChange"] == "MAJOR"
    assert len(res_data[2]["changeEvents"]) == 7
    assert res_data[2]["semVer"] == "2.0.0-computed"
Ejemplo n.º 4
0
def ingest_cleanup_data(request):
    print("ingesting containers test data")
    ingest_file_via_rest("tests/containers/data.json")
    yield
    print("removing containers test data")
    delete_urns_from_file("tests/containers/data.json")
Ejemplo n.º 5
0
def ingest_cleanup_data(request):
    print("ingesting test data")
    ingest_file_via_rest("tests/tags-and-terms/data.json")
    yield
    print("removing test data")
    delete_urns_from_file("tests/tags-and-terms/data.json")
Ejemplo n.º 6
0
def ingest_cleanup_data(request):
    print("ingesting deprecation test data")
    ingest_file_via_rest("tests/deprecation/data.json")
    yield
    print("removing deprecation test data")
    delete_urns_from_file("tests/deprecation/data.json")
Ejemplo n.º 7
0
def test_ingestion_usage_via_rest(wait_for_healthchecks):
    ingest_file_via_rest(usage_sample_data)
Ejemplo n.º 8
0
def test_ingestion_via_rest(wait_for_healthchecks):
    ingest_file_via_rest(bootstrap_sample_data)
Ejemplo n.º 9
0
def test_run_ingestion(generate_test_data):
    ingest_file_via_rest(generate_test_data)
Ejemplo n.º 10
0
def test_rollback_editable():
    platform = "urn:li:dataPlatform:kafka"
    dataset_name = ("test-rollback")
    env = "PROD"
    dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"

    session, gms_host = get_session_and_host()

    # Clean slate.
    delete_by_urn(dataset_urn, session, gms_host)

    assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn,
                                                       aspects=["browsePaths"],
                                                       typed=False)
    assert "editableDatasetProperties" not in get_aspects_for_entity(
        entity_urn=dataset_urn,
        aspects=["editableDatasetProperties"],
        typed=False)

    # Ingest dataset
    ingested_dataset_run_id = ingest_file_via_rest(
        "tests/cli/cli_test_data.json").config.run_id
    print("Ingested dataset id:", ingested_dataset_run_id)
    # Assert that second data ingestion worked
    assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn,
                                                   aspects=["browsePaths"],
                                                   typed=False)

    # Sleep forces ingestion of files to have distinct run-ids.
    sleep(1)

    # Make editable change
    ingested_editable_run_id = ingest_file_via_rest(
        "tests/cli/cli_editable_test_data.json").config.run_id
    print("ingested editable id:", ingested_editable_run_id)
    # Assert that second data ingestion worked
    assert "editableDatasetProperties" in get_aspects_for_entity(
        entity_urn=dataset_urn,
        aspects=["editableDatasetProperties"],
        typed=False)

    # rollback ingestion 1
    rollback_url = f"{gms_host}/runs?action=rollback"

    session.post(rollback_url,
                 data=json.dumps({
                     "runId": ingested_dataset_run_id,
                     "dryRun": False,
                     "hardDelete": False
                 }))

    # Allow async MCP processor to handle ingestions & rollbacks
    sleep(10)

    # EditableDatasetProperties should still be part of the entity that was soft deleted.
    assert "editableDatasetProperties" in get_aspects_for_entity(
        entity_urn=dataset_urn,
        aspects=["editableDatasetProperties"],
        typed=False)
    # But first ingestion aspects should not be present
    assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn,
                                                       aspects=["browsePaths"],
                                                       typed=False)
    pass