コード例 #1
0
ファイル: test_whatif.py プロジェクト: wjohnson/pyapacheatlas
def test_would_it_overwrite():
    entities = [
        AtlasEntity("dummy1",
                    "demo_table",
                    "dummy1",
                    -99,
                    attributes={
                        "req_attrib": "1"
                    }).to_json(),
        AtlasEntity("dummy2",
                    "demo_table",
                    "dummy1",
                    -100,
                    attributes={
                        "foo": "bar"
                    }).to_json()
    ]

    new_entity = AtlasEntity("dummy1",
                             "demo_table",
                             "dummy1",
                             -99,
                             attributes={
                                 "req_attrib": "1"
                             }).to_json()

    demo_table_type = {"entityDefs": []}

    local_what_if = WhatIfValidator(existing_entities=entities)

    results = local_what_if.entity_would_overwrite(new_entity)

    assert (results)
コード例 #2
0
ファイル: test_search.py プロジェクト: wjohnson/pyapacheatlas
def test_purview_search_iterates_safely_over_multiple():
    ae = AtlasEntity(
        name="there_can_be_only_two",
        qualified_name="pyapacheatlas://there_can_be_only_two_00",
        guid=-100,
        typeName="hive_table"
    )

    ae2 = AtlasEntity(
        name="there_can_be_only_two",
        qualified_name="pyapacheatlas://there_can_be_only_two_01",
        guid=-101,
        typeName="hive_table"
    )

    upload_success = client.upload_entities([ae, ae2])    

    search_results = client.search_entities(r"there_can_be_only_two")

    counter = 0
    for entity in search_results:
        len(entity["id"])
        counter = counter + 1
    
    assert(counter == 2)
コード例 #3
0
def test_prepare_bulk_entity_from_mixed_atlas_entity_dict():

    class_entity = AtlasEntity(
        name=sample_entity["attributes"]["name"],
        typeName=sample_entity["typeName"],
        qualified_name=sample_entity["attributes"]["qualifiedName"],
        attributes=sample_entity["attributes"],
        guid=sample_entity["guid"],
        relationshipAttributes=sample_entity["relationshipAttributes"])
    class_entity2 = AtlasEntity(
        name=sample_entity["attributes"]["name"] + "abc",
        typeName=sample_entity["typeName"],
        qualified_name=sample_entity["attributes"]["qualifiedName"] + "abc",
        attributes=sample_entity["attributes"],
        guid=sample_entity["guid"],
        relationshipAttributes=sample_entity["relationshipAttributes"])

    results = AtlasClient._prepare_entity_upload(
        [class_entity, class_entity2.to_json()])

    sample2 = sample_entity.copy()
    sample2["attributes"]["name"] = sample2["attributes"]["name"] + "abc"
    sample2["attributes"][
        "qualifiedName"] = sample2["attributes"]["qualifiedName"] + "abc"

    expected = {"entities": [sample_entity, sample2]}

    assert (results == expected)
コード例 #4
0
ファイル: test_whatif.py プロジェクト: wjohnson/pyapacheatlas
def test_missing_req_attributes():
    entities = [
        AtlasEntity("dummy1",
                    "demo_table",
                    "dummy1",
                    -99,
                    attributes={
                        "req_attrib": "1"
                    }).to_json(),
        AtlasEntity("dummy2", "demo_table", "dummy1", -100).to_json()
    ]

    demo_table_type = {
        "entityDefs": [{
            'category':
            'ENTITY',
            'name':
            'demo_table',
            'attributeDefs': [{
                "name": "req_attrib",
                "isOptional": False
            }],
            'relationshipAttributeDefs': [],
            'superTypes': ['DataSet']
        }]
    }

    local_what_if = WhatIfValidator(demo_table_type)

    results = [local_what_if.entity_missing_attributes(e) for e in entities]

    assert (results[0] == False)
    assert (results[1] == {'req_attrib'})
コード例 #5
0
ファイル: test_whatif.py プロジェクト: wjohnson/pyapacheatlas
def test_type_doesnt_exist():
    entities = [
        AtlasEntity("dummy1", "demo_table", "dummy1", -99).to_json(),
        AtlasEntity("dummy2", "foobar", "dummy1", -100).to_json()
    ]

    results = [whatif.entity_type_exists(e) for e in entities]

    assert (results[0] == True)
    assert (results[1] == False)
コード例 #6
0
def test_batches_entities_with_real_guid():
    gt = GuidTracker()
    a = AtlasEntity("A", "DataSet", "A", guid=gt.get_guid())
    b = AtlasEntity("B", "DataSet", "B", guid=gt.get_guid())
    b.addRelationship(table=a)

    c = AtlasEntity("C", "DataSet", "C", guid=gt.get_guid())
    d = AtlasEntity("D", "DataSet", "D", guid=gt.get_guid())
    c.addRelationship(tester={"guid": "abc-123"})

    entities = [x.to_json() for x in [a, b, c, d]]
    results = batch_dependent_entities(entities, batch_size=2)

    assert (len(results) == 2)
コード例 #7
0
def convert_Spline_to_Purview(splineJson):
    splineJson = json.loads(splineJson)

    # Get notebook info
    notebookInfo = splineJson["extraInfo"]["notebookInfo"]["obj"]
    notebookURL = notebookInfo["notebookURL"].replace("\\", "")

    guid = GuidTracker()

    # Get inputs
    inputs = []
    for read in splineJson["operations"]["reads"]:
        input_path = read["inputSources"][0].replace(
            notebookInfo["mounts"][0],
            "https://adldata.dfs.core.windows.net/data/")
        input = AtlasEntity(name=input_path.split("/")[-1],
                            typeName="azure_datalake_gen2_path",
                            qualified_name=input_path,
                            guid=guid.get_guid())
        inputs.append(input)

    # Get outputs
    write = splineJson["operations"]["write"]
    output_path = write["outputSource"].replace(
        notebookInfo["mounts"][0],
        "https://adldata.dfs.core.windows.net/data/")
    output = AtlasEntity(name=output_path.split("/")[-1],
                         typeName="azure_datalake_gen2_path",
                         qualified_name=output_path,
                         guid=guid.get_guid())

    # Get Process
    process_attributes = {
        "name": notebookInfo["name"],
        "owner": notebookInfo["user"],
        "description": f"Link to spark job notebook: http://{notebookURL}",
        "startTime": notebookInfo["timestamp"],
        "endTime": notebookInfo["timestamp"]
    }
    process = AtlasProcess(name=notebookInfo["name"],
                           typeName="Process",
                           qualified_name=f"adb-{notebookURL[4:20]}",
                           inputs=inputs,
                           outputs=[output],
                           guid=guid.get_guid(),
                           attributes=process_attributes)

    purview_lineage = inputs + [output] + [process]
    return purview_lineage
コード例 #8
0
def test_batches_entities_simple():
    entities = [
        AtlasEntity(str(i), "DataSet", str(i), guid=i).to_json()
        for i in range(0, 10)
    ]
    results = batch_dependent_entities(entities, batch_size=2)

    assert (len(results) == 5)
コード例 #9
0
 def add_atlas_db(self, database, database_fqn):
     self.guid_count -= 1
     db = AtlasEntity(name=database[0],
                      typeName='pyapacheatlas_mysql_db',
                      qualified_name=database_fqn,
                      guid=self.guid_count)
     self.dbs.append(db)
     return db
コード例 #10
0
 def add_atlas_table(self, table, table_fqn):
     self.guid_count -= 1
     db_table = AtlasEntity(name=table[0],
                            typeName='pyapacheatlas_mysql_table',
                            qualified_name=table_fqn,
                            guid=self.guid_count)
     self.db_tables.append(db_table)
     return db_table
コード例 #11
0
def test_first_entity_matching_attribute():
    atlas_entities = [
        AtlasEntity(
            name="demoentity",
            typeName="demo_table",
            qualified_name="demoentity",
            guid = -1000
        ),
        AtlasEntity(
            name="demoentity2",
            typeName="demo2_table",
            qualified_name="demoentity2",
            guid = -1001
        )
    ]
    results = first_entity_matching_attribute("name","demoentity", atlas_entities)

    assert (results.typeName == "demo_table")
コード例 #12
0
def setup_batch_entities():
    atlas_entities = [
          AtlasEntity(
              name="demoentity",
              typeName="demo_table",
              qualified_name="demoentity",
              guid = -1000
          ),
          AtlasEntity(
              name="demoentity2",
              typeName="demo2_table",
              qualified_name="demoentity2",
              guid = -1001
          )
      ]
    atlas_proc = AtlasProcess(
              name="demo_process_name",
              typeName="demo_process",
              qualified_name="demo_process_qualifier",
              inputs=[atlas_entities[0].to_json(minimum=True)],
              outputs=[atlas_entities[1].to_json(minimum=True)],
              guid = -1002
      )
    atlas_proc_no_in = AtlasProcess(
              name="demo_process_qualifier_no_in",
              typeName="demo_process1",
              qualified_name="demo_process_qualifier_no_in",
              inputs=[],
              outputs=[atlas_entities[1].to_json(minimum=True)],
              guid = -1003
      )
    atlas_proc_no_out = AtlasProcess(
              name="demo_process_qualifier_no_out",
              typeName="demo_process2",
              qualified_name="demo_process_qualifier_no_out",
              inputs=[atlas_entities[0].to_json(minimum=True)],
              outputs=[],
              guid = -1004
      )
    atlas_entities.extend([atlas_proc, atlas_proc_no_in, atlas_proc_no_out])
    return atlas_entities
コード例 #13
0
def test_min_entity_json_no_guid_usage():

    ae = AtlasEntity("BeforeModi",
                     "DataSet",
                     "tests://EntityRESTBeforeModification",
                     guid=-1)

    assignments = client.upload_entities([ae])["guidAssignments"]
    assign_with_no_guid = {}
    try:
        # live_table = client.get_entity(guid=assignments["-1"])["entities"][0]
        ae_no_guid = AtlasEntity("BeforeModi",
                                 "DataSet",
                                 "tests://EntityRESTBeforeModification",
                                 guid=None)

        proc1 = AtlasProcess("WillBeUpdatedWithNoGuidEntity",
                             "Process",
                             "tests://EntityRESTBeforeModificationProc",
                             inputs=[ae_no_guid],
                             outputs=[],
                             guid=-2)
        assign_with_no_guid = client.upload_entities([proc1
                                                      ])["guidAssignments"]

        live_proc = client.get_entity(
            guid=assign_with_no_guid["-2"])["entities"][0]

        # Should have one input that matches the guid assignment
        assert (len(live_proc["attributes"]["inputs"]) == 1)
        assert (
            live_proc["attributes"]["inputs"][0]["guid"] == assignments["-1"])

    finally:
        # Delete the entities now that the test is complete
        _ = client.delete_entity(assignments["-1"])
        if "-2" in assign_with_no_guid:
            _ = client.delete_entity(assign_with_no_guid.get("-2"))
        pass
コード例 #14
0
def test_classify_entities():
    # Create an entity
    # You must provide a name, typeName, qualified_name, and guid
    # the guid must be a negative number and unique in your batch
    # being uploaded.
    input01 = AtlasEntity(name="input01",
                          typeName="DataSet",
                          qualified_name="tests://classify_01",
                          guid=-100)
    input02 = AtlasEntity(name="input02",
                          typeName="DataSet",
                          qualified_name="tests://classify_02",
                          guid=-101)

    results = client.upload_entities(batch=[input01, input02])

    # Get the Guids for us to work with
    guids = [v for v in results["guidAssignments"].values()]

    try:
        one_entity_multi_class = client.classify_entity(
            guid=guids[0],
            classifications=[
                AtlasClassification("MICROSOFT.PERSONAL.DATE_OF_BIRTH"),
                AtlasClassification("MICROSOFT.PERSONAL.NAME")
            ],
            force_update=True)

        assert (one_entity_multi_class)

        multi_entity_single_class = client.classify_bulk_entities(
            entityGuids=guids,
            classification=AtlasClassification("MICROSOFT.PERSONAL.IPADDRESS"))

        assert (multi_entity_single_class)
    finally:
        for guid in guids:
            _ = client.delete_entity(guid)
コード例 #15
0
def test_prepare_bulk_entity_from_atlas_entity():

    class_entity = AtlasEntity(
        name=sample_entity["attributes"]["name"],
        typeName=sample_entity["typeName"],
        qualified_name=sample_entity["attributes"]["qualifiedName"],
        attributes=sample_entity["attributes"],
        guid=sample_entity["guid"],
        relationshipAttributes=sample_entity["relationshipAttributes"])

    results = AtlasClient._prepare_entity_upload(class_entity)

    expected = {"entities": [sample_entity]}

    assert (results == expected)
コード例 #16
0
 def add_atlas_instance(self, version, instance_fqn):
     self.guid_count -= 1
     self.instance = AtlasEntity(name=f'MySQL v.{version}',
                                 typeName='pyapacheatlas_mysql_instance',
                                 qualified_name=instance_fqn,
                                 guid=self.guid_count,
                                 attributes={
                                     'hostname': MYSQL_SERVER_HOSTNAME,
                                     'port': MYSQL_INSTANCE_PORT,
                                     'cloudOrOnPrem':
                                     MYSQL_SERVERINSTANCE_CLOUDORONPREM,
                                     'contact_info':
                                     MYSQL_SERVERINSTANCE_CONTACTINFO,
                                     'comment': MYSQL_SERVERINSTANCE_COMMENT
                                 })
コード例 #17
0
def test_purview_search_iterates_safely():
    ae = AtlasEntity(name="there_can_be_only_one",
                     qualified_name="pyapacheatlas://there_can_be_only_one",
                     guid=-100,
                     typeName="hive_table")

    upload_success = client.upload_entities(ae)

    search_results = client.search_entities(r"custom_type_entity")

    counter = 0
    for page in search_results:
        for entity in page:
            len(entity["id"])
            counter = counter + 1

    assert (counter == 1)
コード例 #18
0
 def add_atlas_column(self, column, table_fqn):
     self.guid_count -= 1
     column_fqn = f'{table_fqn}#{column.Field}'
     table_column = AtlasEntity(
         name=column.Field,
         typeName='pyapacheatlas_mysql_column',
         qualified_name=column_fqn,
         guid=self.guid_count,
         attributes={
             'comment': self._decode_field_if_bytes(column.Comment),
             'data_type': self._decode_field_if_bytes(column.Type),
             'default_value': self._decode_field_if_bytes(column.Default),
             'isNullable': self._decode_field_if_bytes(column.Null),
             'isPrimaryKey':
             self._decode_field_if_bytes(column.Key) != None,
             'collation': self._decode_field_if_bytes(column.Collation)
         })
     self.table_columns.append(table_column)
     return table_column
コード例 #19
0
def test_set_relationship_different_ways():

    ae = AtlasEntity("rel01","hive_table", "tests://rel01", guid=-1)
    c1 = AtlasEntity("rel01#01", "hive_column", "tests://rel01#c", guid=-2, attributes={"type":"str"})
    c2 = AtlasEntity("rel01#02", "hive_column", "tests://rel02#c", guid=-3, attributes={"type":"str"})
    c3 = AtlasEntity("rel01#03", "hive_column", "tests://rel03#c", guid=-4, attributes={"type":"str"})
    c4 = AtlasEntity("rel01#04", "hive_column", "tests://rel04#c", guid=-5, attributes={"type":"str"})

    # Add c1 as the only relationship
    ae.addRelationship(columns=[c1.to_json(minimum=True)])

    c2.relationshipAttributes.update({"table": ae.to_json(minimum=True) })
    c3.addRelationship(table = ae)

    assignments = client.upload_entities([ae, c1, c2, c3, c4])["guidAssignments"]
    try:
        live_table = client.get_entity(guid=assignments["-1"])["entities"][0]
        
        # Should have two attributes because one is from the table having the
        # relationship defined as an array of columns and the second two from
        # the column's having the table relationshipAttribute defined on them.
        assert(len(live_table["relationshipAttributes"]["columns"]) == 3)

        relationship = {
                    "typeName": "hive_table_columns",
                    "attributes": {},
                    "guid": -100,
                    # Ends are either guid or guid + typeName 
                    # (in case there are ambiguities?)
                    "end1": {
                        "guid": assignments["-1"]
                    },
                    "end2": {
                        "guid": assignments["-5"]
                    }
                }

        relation_upload = client.upload_relationship(relationship)
        # Check that we have one more relationship
        # There are caching issues here :-(
        live_table_post_relationship = client.get_entity(guid=assignments["-1"])["entities"][0]
        assert(len(live_table["relationshipAttributes"]["columns"]) == 4)

    finally:
        # Need to delete all columns BEFORE you delete the table
        for local_id in [str(s) for s in range(-5,0)]:
            guid = assignments[local_id]
            _ = client.delete_entity(guid)
コード例 #20
0
    # Create a relationship between the process and steps
    relationship = RelationshipTypeDef(
        name="process_with_steps_steps",
        relationshipCategory="COMPOSITION",
        # Use the Parent/Child standard end definitions
        # "steps" will be an attribute on the process_with_steps entities
        # it will contain a list of step_in_process and display on the schema.
        # "parent_process" will be an attribute on the step_in_process entity.
        endDef1=ParentEndDef(name="steps", typeName="process_with_steps"),
        endDef2=ChildEndDef(name="parent_process", typeName="step_in_process"))

    # Create the process, steps in the process, and dummy inputs and outputs
    # for the lineage visualization
    step01 = AtlasEntity(
        name="Step01: Do something",
        qualified_name="process_xyz#step01",
        typeName="step_in_process",
        guid=-1000,
        description="This is the first step in which we do something.")
    step02 = AtlasEntity(
        name="Step02: Modify the data",
        qualified_name="process_xyz#step02",
        typeName="step_in_process",
        guid=-1001,
        description="This is the second step in which modify things.")
    step03 = AtlasEntity(
        name="Step03: Finalize the data",
        qualified_name="process_xyz#step03",
        typeName="step_in_process",
        guid=-1002,
        description="This is the third step in which we finalize things.")
コード例 #21
0
    and hive_columns defined on each end. However, this is the slowest path
    as it can only take one upload at a time whereas entity uploads can be
    many entities at a time.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Creating the entities that will be used in uploads.
    # One table will be added
    table = AtlasEntity("rel10", "hive_table", "tests://rel10", guid=-1)
    # Four columns will be added
    c1 = AtlasEntity("rel10#01",
                     "hive_column",
                     "tests://rel10#c",
                     guid=-2,
                     attributes={"type": "str"})
    c2 = AtlasEntity("rel10#02",
                     "hive_column",
                     "tests://rel02#c",
                     guid=-3,
                     attributes={"type": "str"})
    c3 = AtlasEntity("rel10#03",
                     "hive_column",
                     "tests://rel03#c",
                     guid=-4,
コード例 #22
0
    Lastly, you can always upload an individual relationship with hive_table
    and hive_columns defined on each end. However, this is the slowest path
    as it can only take one upload at a time whereas entity uploads can be
    many entities at a time.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Creating the entities that will be used in uploads.
    table = AtlasEntity("rel01", "hive_table", "tests://rel01", guid=-1)
    c1 = AtlasEntity("rel01#01",
                     "hive_column",
                     "tests://rel01#c",
                     guid=-2,
                     attributes={"type": "str"})
    c2 = AtlasEntity("rel01#02",
                     "hive_column",
                     "tests://rel02#c",
                     guid=-3,
                     attributes={"type": "str"})
    c3 = AtlasEntity("rel01#03",
                     "hive_column",
                     "tests://rel03#c",
                     guid=-4,
                     attributes={"type": "str"})
コード例 #23
0
display(df)

# COMMAND ----------

# Now we begin to do some Atlas uploads using the types created above.
# Get the notebook path as it will be part of our process' name.
notebook_path = dbutils.notebook.entry_point.getDbutils().notebook(
).getContext().notebookPath().get()

# COMMAND ----------

# Create an asset for the input data frame.
atlas_input_df = AtlasEntity(
    name="demo_dbfs_delays_data",
    qualified_name="pyapacheatlas://demo_dbfs_delays_data",
    typeName="custom_spark_dataframe",
    guid=guid.get_guid(),
)

# Create a process that represents our notebook and has our input
# dataframe as one of the inputs.
process = AtlasProcess(
    name="demo_cluster" + notebook_path,
    qualified_name="pyapacheatlas://demo_cluster" + notebook_path,
    typeName="custom_spark_job_process",
    guid=guid.get_guid(),
    attributes={"job_type": "notebook"},
    inputs=[atlas_input_df],
    outputs=[
    ]  # No outputs for this demo, but otherwise, repeat what you did you the input dataframe.
)
コード例 #24
0
import json
import os

from pyapacheatlas.auth import BasicAuthentication, ServicePrincipalAuthentication
from pyapacheatlas.core import AtlasClient, AtlasEntity, AtlasProcess, PurviewClient
from pyapacheatlas.readers import ExcelConfiguration, ExcelReader

ae_in = AtlasEntity("test_in", "hive_table", "test://lineage_hive_in", -101)
ae_out = AtlasEntity("test_out", "hive_table", "test://lineage_hive_out", -102)
proc = AtlasProcess("test_proc", "Process", "test://lineage_hive_out", guid=-103,
                    inputs=[ae_in], outputs=[ae_out]
                    )
LINEAGE_BATCH = [ae_in, ae_out, proc]

auth = BasicAuthentication(username="******", password="******")
client = AtlasClient(endpoint_url="http://localhost:21000/api/atlas/v2",
                     authentication=auth)

oauth = ServicePrincipalAuthentication(
    tenant_id=os.environ.get("TENANT_ID", ""),
    client_id=os.environ.get("CLIENT_ID", ""),
    client_secret=os.environ.get("CLIENT_SECRET", "")
)
purview_client = PurviewClient(
    account_name=os.environ.get("PURVIEW_NAME", ""),
    authentication=oauth
)


def test_lineage_atlas():
コード例 #25
0
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", "")
    )
    client = PurviewClient(
        account_name = os.environ.get("PURVIEW_NAME", ""),
        authentication=oauth
    )

    # Create an entity
    # You must provide a name, typeName, qualified_name, and guid
    # the guid must be a negative number and unique in your batch
    # being uploaded.
    input01 = AtlasEntity(
        name="input01",
        typeName="DataSet",
        qualified_name="pyapacheatlas://demoinput01",
        guid=-100
    )
    input02 = AtlasEntity(
        name="input02",
        typeName="DataSet",
        qualified_name="pyapacheatlas://demoinput02",
        guid=-101
    )

    results = client.upload_entities(
        batch=[input01.to_json(), input02.to_json()]
    )

    # Get the Guids for us to work with
    guids = [v for v in results["guidAssignments"].values()]
コード例 #26
0
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Create two entities with AtlasEntity
    # You must provide a name, typeName, qualified_name, and guid
    # the guid must be a negative number and unique in your batch
    # being uploaded.
    input01 = AtlasEntity(name="input01",
                          typeName="DataSet",
                          qualified_name="pyapacheatlas://demoinput01",
                          guid=-100)
    output01 = AtlasEntity(name="output01",
                           typeName="DataSet",
                           qualified_name="pyapacheatlas://demooutput01",
                           guid=-101)

    # The Atlas Process is the lineage component that links the two
    # entities together. The inputs and outputs need to be the "header"
    # version of the atlas entities, so specify minimum = True to
    # return just guid, qualifiedName, and typeName.
    process = AtlasProcess(name="sample process",
                           typeName="Process",
                           qualified_name="pyapacheatlas://democustomprocess",
                           inputs=[input01],
                           outputs=[output01],
コード例 #27
0
            "isLegacyAttribute": False
        })

    # Upload the results
    upload_results = client.upload_typedefs(
        entityDefs=[column_entity_def, table_entity_def],
        relationshipDefs=[table_column_relationship],
        force_update=True)

    # With all the types and relationships defined, we can create entities.
    # We can use a GuidTracker to always get a unique negative number
    gt = GuidTracker()

    table_entity = AtlasEntity(
        name="sample_table",
        qualified_name="pyapacheatlas://sample_tablepyapacheatlas_custom_type",
        typeName="pyapacheatlas_demo_table",
        guid=gt.get_guid())

    # Add two columns. They must include the "relationshipAttribute" attribute.
    column01 = AtlasEntity(
        name="column01",
        typeName="pyapacheatlas_demo_column",
        qualified_name=
        "pyapacheatlas://sample_tablepyapacheatlas_custom_type@column01",
        attributes={
            "data_type": "string",
            "description": "This is the first column."
        },
        guid=gt.get_guid())
    column02 = AtlasEntity(
コード例 #28
0
def test_batches_entities_dependent():
    gt = GuidTracker()
    a = AtlasEntity("A", "DataSet", "A", guid=gt.get_guid())
    b = AtlasEntity("B", "DataSet", "B", guid=gt.get_guid())
    b.addRelationship(table=a)
    c = AtlasEntity("C", "DataSet", "C", guid=gt.get_guid())
    d = AtlasEntity("D", "DataSet", "D", guid=gt.get_guid())
    c.addRelationship(parent=b)
    d.addRelationship(parent=b)
    e = AtlasEntity("E", "DataSet", "E", guid=gt.get_guid())
    e.addRelationship(table=a)
    f = AtlasEntity("F", "DataSet", "F", guid=gt.get_guid())
    g = AtlasEntity("G", "DataSet", "G", guid=gt.get_guid())
    g.addRelationship(table=f)
    h = AtlasEntity("H", "DataSet", "H", guid=gt.get_guid())
    h.addRelationship(parent=g)
    # Intentionally out of order
    j = AtlasEntity("J", "DataSet", "J", guid=gt.get_guid())
    k = AtlasEntity("K", "DataSet", "K", guid=gt.get_guid())
    i = AtlasEntity("I", "DataSet", "I", guid=gt.get_guid())

    i.addRelationship(colA=j)
    i.addRelationship(colB=k)

    l = AtlasEntity("L", "DataSet", "L", guid=gt.get_guid())
    m = AtlasEntity("M", "DataSet", "M", guid=gt.get_guid())
    n = AtlasEntity("N", "DataSet", "N", guid=gt.get_guid())
    o = AtlasEntity("O", "DataSet", "O", guid=gt.get_guid())
    p = AtlasEntity("P", "DataSet", "P", guid=gt.get_guid())

    entities = [
        x.to_json() for x in [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p]
    ]
    results = batch_dependent_entities(entities, batch_size=7)
    # There are sixteen results, batch size of 7 means at least three groups
    # One group has seven connected
    # One group should have only three
    # All others are independent
    assert (len(results) == 3)
コード例 #29
0
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", "")
    )
    client = PurviewClient(
        account_name = os.environ.get("PURVIEW_NAME", ""),
        authentication=oauth
    )

    # Create two entities with AtlasEntity
    # You must provide a name, typeName, qualified_name, and guid
    # the guid must be a negative number and unique in your batch
    # being uploaded.
    input01 = AtlasEntity(
        name="input01",
        typeName="DataSet",
        qualified_name="pyapacheatlas://demoinput01",
        guid=-100
    )
    output01 = AtlasEntity(
        name="output01",
        typeName="DataSet",
        qualified_name="pyapacheatlas://demooutput01",
        guid=-101
    )

    # The Atlas Process is the lineage component that links the two
    # entities together. The inputs and outputs need to be the "header"
    # version of the atlas entities, so specify minimum = True to
    # return just guid, qualifiedName, and typeName.
    process = AtlasProcess(
        name="sample process",
コード例 #30
0
ファイル: test_whatif.py プロジェクト: wjohnson/pyapacheatlas
def test_whatif_validation():

    expected = {
        "counts": {
            "TypeDoesNotExist": 1,
            "UsingInvalidAttributes": 1,
            "MissingRequiredAttributes": 1
        },
        "total": 3,
        "values": {
            "TypeDoesNotExist": [-101],
            "UsingInvalidAttributes": {
                -100: {"foo"}
            },
            "MissingRequiredAttributes": {
                -98: {"req_attrib"}
            }
        }
    }

    entities = [
        # Valid attribute
        AtlasEntity("dummy1",
                    "demo_table",
                    "dummy1",
                    -99,
                    attributes={
                        "req_attrib": "1"
                    }).to_json(),
        # Missing attribute
        AtlasEntity("dummy10", "demo_table", "dummy10", -98,
                    attributes={}).to_json(),
        # Non-Required attribute
        AtlasEntity("dummy20",
                    "demo_table",
                    "dummy20",
                    -100,
                    attributes={
                        "foo": "bar",
                        "req_attrib": "abc"
                    }).to_json(),
        # Bad Type
        AtlasEntity("dummy30",
                    "bad_table",
                    "dummy30",
                    -101,
                    attributes={
                        "foo": "bar"
                    }).to_json()
    ]

    demo_table_type = {
        "entityDefs": [{
            'category':
            'ENTITY',
            'name':
            'demo_table',
            'attributeDefs': [
                {
                    "name": "req_attrib",
                    "isOptional": False
                },
                {
                    "name": "name",
                    "isOptional": False
                },
                {
                    "name": "qualifiedName",
                    "isOptional": False
                },
            ],
            'relationshipAttributeDefs': [],
            'superTypes': ['DataSet']
        }]
    }

    local_what_if = WhatIfValidator(demo_table_type)

    results = local_what_if.validate_entities(entities)

    assert (set(local_what_if.entity_required_fields["demo_table"]) == set(
        ["req_attrib", "name", "qualifiedName"]))
    assert (results == expected)