Exemple #1
0
def test_batches_entities_with_real_guid():
    gt = GuidTracker()
    a = AtlasEntity("A", "DataSet", "A", guid=gt.get_guid())
    b = AtlasEntity("B", "DataSet", "B", guid=gt.get_guid())
    b.addRelationship(table=a)

    c = AtlasEntity("C", "DataSet", "C", guid=gt.get_guid())
    d = AtlasEntity("D", "DataSet", "D", guid=gt.get_guid())
    c.addRelationship(tester={"guid": "abc-123"})

    entities = [x.to_json() for x in [a, b, c, d]]
    results = batch_dependent_entities(entities, batch_size=2)

    assert (len(results) == 2)
Exemple #2
0
def test_set_relationship_different_ways():

    ae = AtlasEntity("rel01","hive_table", "tests://rel01", guid=-1)
    c1 = AtlasEntity("rel01#01", "hive_column", "tests://rel01#c", guid=-2, attributes={"type":"str"})
    c2 = AtlasEntity("rel01#02", "hive_column", "tests://rel02#c", guid=-3, attributes={"type":"str"})
    c3 = AtlasEntity("rel01#03", "hive_column", "tests://rel03#c", guid=-4, attributes={"type":"str"})
    c4 = AtlasEntity("rel01#04", "hive_column", "tests://rel04#c", guid=-5, attributes={"type":"str"})

    # Add c1 as the only relationship
    ae.addRelationship(columns=[c1.to_json(minimum=True)])

    c2.relationshipAttributes.update({"table": ae.to_json(minimum=True) })
    c3.addRelationship(table = ae)

    assignments = client.upload_entities([ae, c1, c2, c3, c4])["guidAssignments"]
    try:
        live_table = client.get_entity(guid=assignments["-1"])["entities"][0]
        
        # Should have two attributes because one is from the table having the
        # relationship defined as an array of columns and the second two from
        # the column's having the table relationshipAttribute defined on them.
        assert(len(live_table["relationshipAttributes"]["columns"]) == 3)

        relationship = {
                    "typeName": "hive_table_columns",
                    "attributes": {},
                    "guid": -100,
                    # Ends are either guid or guid + typeName 
                    # (in case there are ambiguities?)
                    "end1": {
                        "guid": assignments["-1"]
                    },
                    "end2": {
                        "guid": assignments["-5"]
                    }
                }

        relation_upload = client.upload_relationship(relationship)
        # Check that we have one more relationship
        # There are caching issues here :-(
        live_table_post_relationship = client.get_entity(guid=assignments["-1"])["entities"][0]
        assert(len(live_table["relationshipAttributes"]["columns"]) == 4)

    finally:
        # Need to delete all columns BEFORE you delete the table
        for local_id in [str(s) for s in range(-5,0)]:
            guid = assignments[local_id]
            _ = client.delete_entity(guid)
Exemple #3
0
def test_batches_entities_dependent():
    gt = GuidTracker()
    a = AtlasEntity("A", "DataSet", "A", guid=gt.get_guid())
    b = AtlasEntity("B", "DataSet", "B", guid=gt.get_guid())
    b.addRelationship(table=a)
    c = AtlasEntity("C", "DataSet", "C", guid=gt.get_guid())
    d = AtlasEntity("D", "DataSet", "D", guid=gt.get_guid())
    c.addRelationship(parent=b)
    d.addRelationship(parent=b)
    e = AtlasEntity("E", "DataSet", "E", guid=gt.get_guid())
    e.addRelationship(table=a)
    f = AtlasEntity("F", "DataSet", "F", guid=gt.get_guid())
    g = AtlasEntity("G", "DataSet", "G", guid=gt.get_guid())
    g.addRelationship(table=f)
    h = AtlasEntity("H", "DataSet", "H", guid=gt.get_guid())
    h.addRelationship(parent=g)
    # Intentionally out of order
    j = AtlasEntity("J", "DataSet", "J", guid=gt.get_guid())
    k = AtlasEntity("K", "DataSet", "K", guid=gt.get_guid())
    i = AtlasEntity("I", "DataSet", "I", guid=gt.get_guid())

    i.addRelationship(colA=j)
    i.addRelationship(colB=k)

    l = AtlasEntity("L", "DataSet", "L", guid=gt.get_guid())
    m = AtlasEntity("M", "DataSet", "M", guid=gt.get_guid())
    n = AtlasEntity("N", "DataSet", "N", guid=gt.get_guid())
    o = AtlasEntity("O", "DataSet", "O", guid=gt.get_guid())
    p = AtlasEntity("P", "DataSet", "P", guid=gt.get_guid())

    entities = [
        x.to_json() for x in [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p]
    ]
    results = batch_dependent_entities(entities, batch_size=7)
    # There are sixteen results, batch size of 7 means at least three groups
    # One group has seven connected
    # One group should have only three
    # All others are independent
    assert (len(results) == 3)
            "description": "This is the first column."
        },
        guid=gt.get_guid())
    column02 = AtlasEntity(
        name="column02",
        typeName="pyapacheatlas_demo_column",
        qualified_name=
        "pyapacheatlas://sample_tablepyapacheatlas_custom_type@column02",
        attributes={
            "data_type": "int",
            "description": "This is the second column."
        },
        guid=gt.get_guid())

    # Add the "table" relationship attribute to your column entities
    column01.addRelationship(table=table_entity)
    column02.addRelationship(table=table_entity)

    # Do the upload and view the entities in the UI
    upload_results = client.upload_entities(
        batch=[table_entity, column01, column02])

    print(json.dumps(upload_results, indent=2))

    # To remove, delete the entity created and then the entity type.
    # client.delete_entity(guid=["..."])
    # delete_relationship = client.delete_type("pyapacheatlas_table_column_relationship")
    # delete_results = client.delete_type("pyapacheatlas_demo_table")
    # delete_results = client.delete_type("pyapacheatlas_demo_column")
    # print(json.dumps(delete_results, indent=2))
Exemple #5
0
                     "tests://rel02#c",
                     guid=-3,
                     attributes={"type": "str"})
    c3 = AtlasEntity("rel01#03",
                     "hive_column",
                     "tests://rel03#c",
                     guid=-4,
                     attributes={"type": "str"})
    c4 = AtlasEntity("rel01#04",
                     "hive_column",
                     "tests://rel04#c",
                     guid=-5,
                     attributes={"type": "str"})

    # Add c1 as the only relationship to the table
    table.addRelationship(columns=[c1.to_json(minimum=True)])

    c2.relationshipAttributes.update({"table": table.to_json(minimum=True)})
    c3.addRelationship(table=table)

    assignments = client.upload_entities([table, c1, c2, c3,
                                          c4])["guidAssignments"]

    try:
        live_table = client.get_entity(guid=assignments["-1"])["entities"][0]

        # Should have two attributes because one is from the table having the
        # relationship defined as an array of columns and the second two from
        # the column's having the table relationshipAttribute defined on them.
        print("Here's what the upload looks like!")
        print(json.dumps(live_table["relationshipAttributes"], indent=2))
                     "hive_column",
                     "tests://rel03#c",
                     guid=-4,
                     attributes={"type": "str"})
    c4 = AtlasEntity("rel10#04",
                     "hive_column",
                     "tests://rel04#c",
                     guid=-5,
                     attributes={"type": "str"})

    # Add relationships to the columns from the table overwriting existing columns
    # Good if you want to overwrite existing schema or creating a brand new table
    # and Schema.
    columns_to_add = [c1, c2, c3]
    # Use a list comprehension to convert them into dictionaries when adding a list
    table.addRelationship(
        columns=[c.to_json(minimum=True) for c in columns_to_add])

    # OR Add a table relationship to a column. This lets you essentially APPEND
    # a column to a table's schema.
    c4.addRelationship(table=table)

    # Upload all of the tables and columns that are referenced.
    assignments = client.upload_entities([table, c1, c2, c3,
                                          c4])["guidAssignments"]

    # Check that we have one more relationship
    print(
        "Now we can see that there should be one more relationship attribute.")
    live_table_post_relationship = client.get_entity(
        guid=assignments["-1"])["entities"][0]
    print(