qualified_name="process_xyz",
                         typeName="process_with_steps",
                         guid=-1003,
                         relationshipAttributes={
                             "steps": [
                                 step01.to_json(minimum=True),
                                 step02.to_json(minimum=True),
                                 step03.to_json(minimum=True),
                             ]
                         },
                         attributes={
                             "inputs": [input01.to_json(minimum=True)],
                             "outputs": [output01.to_json(minimum=True)]
                         })

    # Create a batch of entities to be uploaded to Purview
    batch = [step01, step02, step03, parent, input01, output01]

    # Upload the types
    typeResults = client.upload_typedefs(
        entityDefs=[processWithSteps, processSteps],
        relationshipDefs=[relationship],
        force_update=True)

    # Upload the entities
    results = client.upload_entities(batch)

    # Print the results of the entities upload
    print(json.dumps(results, indent=2))
    print("Successfully created types and entities!")
Esempio n. 2
0
            "type": column_lineage_process_entity.name,
            "name": "query",
            "isContainer": False,
            "cardinality": "SINGLE",
            "isLegacyAttribute": True
        },
        endDef2={
            "type": table_process_entity.name,
            "name": "columnLineages",
            "isContainer": True,
            "cardinality": "SET",
            "isLegacyAttribute": False
        })

    # Output composite entity
    output = {
        "entityDefs": [
            column_lineage_process_entity.to_json(),
            table_process_entity.to_json()
        ],
        "relationshipDefs":
        [table_process_column_lineage_relationship.to_json()]
    }
    print(json.dumps(output, indent=2))

    input(">>>>Ready to upload?")

    upload_results = client.upload_typedefs(output)

    print(json.dumps(upload_results, indent=2))
Esempio n. 3
0
        "name": "columns",
        "isContainer": True,
        "cardinality": "SET",
        "isLegacyAttribute": False
    },
    endDef2={
        "type": "custom_spark_dataframe_column",
        "name": "dataframe",
        "isContainer": False,
        "cardinality": "SINGLE",
        "isLegacyAttribute": False
    })

typedef_results = client.upload_typedefs(
    {
        "entityDefs": [type_spark_df, type_spark_columns, type_spark_job],
        "relationshipDefs": [spark_column_to_df_relationship]
    },
    force_update=True)
print(typedef_results)

# COMMAND ----------

# No we actually do some databricks work
df = spark.read.csv("/databricks-datasets/flights/departuredelays.csv",
                    header=True,
                    inferSchema=True)

# COMMAND ----------

# Do some transformations
        "cardinality": "SET",
        "isLegacyAttribute": False
    },
    endDef2={
        "type": "custom_spark_dataframe_column",
        "name": "dataframe",
        "isContainer": False,
        "cardinality": "SINGLE",
        "isLegacyAttribute": False
    })

typedef_results = client.upload_typedefs(
    {
        "entityDefs": [
            type_spark_df.to_json(),
            type_spark_columns.to_json(),
            type_spark_job.to_json()
        ],
        "relationshipDefs": [spark_column_to_df_relationship.to_json()]
    },
    force_update=True)
print(typedef_results)

# COMMAND ----------

# No we actually do some databricks work
df = spark.read.csv("/databricks-datasets/flights/departuredelays.csv",
                    header=True,
                    inferSchema=True)

# COMMAND ----------
            "name": "columns",
            "isContainer": True,
            "cardinality": "SET",
            "isLegacyAttribute": False,
        },
        endDef2={
            "type": column_entity_def.name,
            "name": "table",
            "isContainer": False,
            "cardinality": "SINGLE",
            "isLegacyAttribute": False
        })

    # Upload the results
    upload_results = client.upload_typedefs(
        entityDefs=[column_entity_def, table_entity_def],
        relationshipDefs=[table_column_relationship],
        force_update=True)

    # With all the types and relationships defined, we can create entities.
    # We can use a GuidTracker to always get a unique negative number
    gt = GuidTracker()

    table_entity = AtlasEntity(
        name="sample_table",
        qualified_name="pyapacheatlas://sample_tablepyapacheatlas_custom_type",
        typeName="pyapacheatlas_demo_table",
        guid=gt.get_guid())

    # Add two columns. They must include the "relationshipAttribute" attribute.
    column01 = AtlasEntity(
        name="column01",
Esempio n. 6
0
    # SETUP: This is just setting up the excel file for you
    file_path = "./demo_custom_type_and_entity_upload.xlsx"
    excel_config = ExcelConfiguration()
    excel_reader = ExcelReader(excel_config)

    # Create an empty excel template to be populated
    excel_reader.make_template(file_path)
    # This is just a helper to fill in some demo data
    fill_in_type_workbook(file_path, excel_config)
    fill_in_entity_workbook(file_path, excel_config)

    # ACTUAL WORK: This parses our excel file and creates a batch to upload
    typedefs = excel_reader.parse_entity_defs(file_path)
    entities = excel_reader.parse_bulk_entities(file_path)

    # This is what is getting sent to your Atlas server
    # print(json.dumps(typedefs,indent=2))
    # print(json.dumps(entities,indent=2))

    type_results = client.upload_typedefs(typedefs, force_update=True)
    entity_results = client.upload_entities(entities)

    print(json.dumps(type_results, indent=2))
    print("\n")
    print(json.dumps(entity_results, indent=2))

    print(
        "Completed type and bulk upload successfully!\nSearch for exampledataset to see your results."
    )
Esempio n. 7
0
                                                  1,
                                                  "valuesMaxCount":
                                                  1,
                                                  "isUnique":
                                                  False,
                                                  "isIndexable":
                                                  False,
                                                  "includeInNotification":
                                                  False
                                              }])
    # Alternatively, you can get all atlas types via...
    # atlas_type_defs = client.get_all_typedefs()

    input(">>>>Ready to upload type definitions?")
    # Upload scaffolded type defs and view the results of upload
    _upload_typedef = client.upload_typedefs(atlas_type_defs,
                                             force_update=True)
    print(json.dumps(_upload_typedef, indent=2))

    input(">>>>Review the above results to see what was uploaded.")

    # Generate the atlas entities!

    excel_results = excel_reader.parse_lineages(file_path,
                                                atlas_type_defs,
                                                use_column_mapping=True)

    print("Results from excel transformation")
    print(json.dumps(excel_results, indent=2))

    input(">>>>Review the above results to see what your excel file contained")
                         attributes={
                             "inputs": [input01.to_json(minimum=True)],
                             "outputs": [output01.to_json(minimum=True)]
                         })

    # Create a batch of entities to be uploaded as json/dicts
    batch = [
        step01.to_json(),
        step02.to_json(),
        step03.to_json(),
        parent.to_json(),
        input01.to_json(),
        output01.to_json()
    ]

    # Upload the types
    typeResults = client.upload_typedefs(
        {
            "entityDefs": [processWithSteps.to_json(),
                           processSteps.to_json()],
            "relationshipDefs": [relationship.to_json()]
        },
        force_update=True)

    # Upload the entities
    results = client.upload_entities({"entities": batch})

    # Print the results of the entities upload
    print(json.dumps(results, indent=2))
    print("Successfully created types and entities!")
Esempio n. 9
0
        account_name=os.environ.get("PURVIEW_NAME", ""),
        authentication=oauth
    )

    # We need a custom process entity type that contains the definition for
    # a columnMapping attribute.
    procType = EntityTypeDef(
        "ProcessWithColumnMapping",
        superTypes=["Process"],
        attributeDefs = [
            AtlasAttributeDef("columnMapping")
        ]
    )

    # Upload the type definition
    type_results = client.upload_typedefs(entityDefs=[procType], force_update=True)
    print(json.dumps(type_results,indent=2))

    # Set up a guid tracker to make it easier to generate negative guids
    gt = GuidTracker()

    # Now we can create the entities, we will have two inputs and one output
    colMapInput01 = AtlasEntity(
        "Input for Column Mapping",
        "hive_table",
        "pyapacheatlas://colMapInput01",
        guid=gt.get_guid()
    )
    colMapInput02 = AtlasEntity(
        "Second Input for Column Mapping",
        "hive_table",
Esempio n. 10
0
# Set up the new entity types to capture delta lake tables and databricks jobs

# Databricks Table
databricks_table_type = EntityTypeDef(
    name="databricks_table",
    attributeDefs=[
        AtlasAttributeDef(name="format",
                          defaultValue="parquet",
                          isOptional=True).to_json(),
        AtlasAttributeDef(name="location", isOptional=True).to_json(),
        AtlasAttributeDef(name="num_files", isOptional=True).to_json(),
        AtlasAttributeDef(name="size", isOptional=True).to_json()
    ],
    superTypes=["DataSet"],
    options={"schemaElementAttribute": "columns"})
typedef_results = client.upload_typedefs(
    {"entityDefs": [databricks_table_type.to_json()]}, force_update=True)
print(typedef_results)

# COMMAND ----------

# DBTITLE 1,databricks-column entity type
# Databricks Column
databricks_column_type = EntityTypeDef(
    name="databricks_column",
    attributeDefs=[AtlasAttributeDef(name="data_type")],
    superTypes=["DataSet"],
)

typedef_results = client.upload_typedefs(
    {"entityDefs": [databricks_column_type.to_json()]}, force_update=True)
print(typedef_results)
    # Create an entity type definition with three columns (column1, 2, 3)
    # with column1 required.
    edef = EntityTypeDef(name="pyapacheatlas_create_type_def_sample",
                         attributeDefs=[
                             AtlasAttributeDef("column1",
                                               typeName="string",
                                               isOptional=False),
                             AtlasAttributeDef("column2", typeName="int"),
                             AtlasAttributeDef("column3",
                                               typeName="array<string>",
                                               cardinality="SET"),
                         ],
                         superTypes=["DataSet"])

    # Do the upload
    results = client.upload_typedefs(entityDefs=[edef], force_update=True)

    # Just for demonstration purposes, get the entity type def.
    get_results = client.get_typedef(
        TypeCategory.ENTITY, name="pyapacheatlas_create_type_def_sample")
    print("# Results from getting created type def:")
    print(json.dumps(get_results, indent=2))

    # Creating an instance of this custom type
    actual_entity = AtlasEntity(
        name="instance_of_pyapacheatlas_create_type_def_sample",
        qualified_name=
        "pyapacheatlas://instance_of_pyapacheatlas_create_type_def_sample",
        typeName="pyapacheatlas_create_type_def_sample",
        attributes={
            "column1": "abc",
Esempio n. 12
0
import os
import json

from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess

print(os.environ.get('AZURE_TENANT_ID', ''))

oauth = ServicePrincipalAuthentication(
    tenant_id=os.environ.get('AZURE_TENANT_ID', ''),
    client_id=os.environ.get('AZURE_CLIENT_ID', ''),
    client_secret=os.environ.get('AZURE_CLIENT_SECRET', ''))
client = PurviewClient(account_name=os.environ.get('PURVIEW_CATALOG_NAME', ''),
                       authentication=oauth)
client.upload_typedefs(json.load(
    open('./pyapacheatlas_mysql_typedefs_v2.json', 'r')),
                       force_update=True)