def test_add_attributes_later(): s = AtlasStructDef(name="blah", category=TypeCategory.ENTITY) a1 = AtlasAttributeDef(name="test") a2 = AtlasAttributeDef(name="test2").to_json() s.addAttributeDef(a1, a2) assert (len(s.attributeDefs) == 2) assert (all([isinstance(e, dict) for e in s.attributeDefs]))
def test_add_attributes_at_start(): s = AtlasStructDef(name="blah", category=TypeCategory.ENTITY, attributeDefs=[ AtlasAttributeDef(name="test"), AtlasAttributeDef(name="test2").to_json(), AtlasAttributeDef(name="test3") ]) c = ClassificationTypeDef(name="blah", attributeDefs=[ AtlasAttributeDef(name="test"), AtlasAttributeDef(name="test2").to_json(), AtlasAttributeDef(name="test3") ]) ent = EntityTypeDef(name="blah", attributeDefs=[ AtlasAttributeDef(name="test"), AtlasAttributeDef(name="test2").to_json(), AtlasAttributeDef(name="test3") ]) # Base Struct handles this assert (len(s.attributeDefs) == 3) assert (all([isinstance(e, dict) for e in s.attributeDefs])) # ClassificationDefs should also handle this behavior assert (len(c.attributeDefs) == 3) assert (all([isinstance(e, dict) for e in c.attributeDefs])) # EntityDefs should also handle this behavior assert (len(ent.attributeDefs) == 3) assert (all([isinstance(e, dict) for e in ent.attributeDefs]))
def test_parse_entity_defs_extended(): rc = ReaderConfiguration() reader = Reader(rc) json_rows = [{ "Entity TypeName": "generic", "name": "attrib1", "description": "desc1", "isOptional": "True", "isUnique": "False", "defaultValue": None }, { "Entity TypeName": "generic", "name": "attrib2", "description": "desc2", "isOptional": "True", "isUnique": "False", "defaultValue": None, "cardinality": "SINGLE" }, { "Entity TypeName": "demo", "name": "attrib3", "description": "desc3", "isOptional": "False", "isUnique": "False", "cardinality": "SET" }] output = reader.parse_entity_defs(json_rows) # It is an AtlasTypesDef composite wrapper assert ("entityDefs" in output.keys()) # There are two entity typenames specified so there should be only two entityDefs assert (len(output["entityDefs"]) == 2) genericEntityDef = None demoEntityDef = None for entityDef in output["entityDefs"]: if entityDef["name"] == "generic": genericEntityDef = entityDef elif entityDef["name"] == "demo": demoEntityDef = entityDef # Generic has two attributes assert (len(genericEntityDef["attributeDefs"]) == 2) # Demo has one attribute assert (len(demoEntityDef["attributeDefs"]) == 1) assert (demoEntityDef["attributeDefs"][0] == AtlasAttributeDef( name="attrib3", **{ "description": "desc3", "isOptional": "False", "isUnique": "False", "cardinality": "SET" }).to_json())
tenant_id=os.environ.get("TENANT_ID", tenant_id), client_id=os.environ.get("CLIENT_ID", client_id), client_secret=os.environ.get("CLIENT_SECRET", client_secret)) client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", "purview_account_name"), authentication=oauth) guid = GuidTracker() # COMMAND ---------- # Set up a few types and relationships # This is a one time thing but necessary to make the demo work # It also demonstrates how you can capture different attributes # for your dataframes, dataframe columns, and jobs. type_spark_df = EntityTypeDef(name="custom_spark_dataframe", attributeDefs=[AtlasAttributeDef(name="format")], superTypes=["DataSet"], options={"schemaElementAttribute": "columns"}) type_spark_columns = EntityTypeDef( name="custom_spark_dataframe_column", attributeDefs=[AtlasAttributeDef(name="data_type")], superTypes=["DataSet"], ) type_spark_job = EntityTypeDef(name="custom_spark_job_process", attributeDefs=[ AtlasAttributeDef(name="job_type", isOptional=False), AtlasAttributeDef(name="schedule", defaultValue="adHoc") ], superTypes=["Process"])
tenant_id=os.environ.get("TENANT_ID", ""), client_id=os.environ.get("CLIENT_ID", ""), client_secret=os.environ.get("CLIENT_SECRET", "") ) client = PurviewClient( account_name=os.environ.get("PURVIEW_NAME", ""), authentication=oauth ) # We need a custom process entity type that contains the definition for # a columnMapping attribute. procType = EntityTypeDef( "ProcessWithColumnMapping", superTypes=["Process"], attributeDefs = [ AtlasAttributeDef("columnMapping") ] ) # Upload the type definition type_results = client.upload_typedefs(entityDefs=[procType], force_update=True) print(json.dumps(type_results,indent=2)) # Set up a guid tracker to make it easier to generate negative guids gt = GuidTracker() # Now we can create the entities, we will have two inputs and one output colMapInput01 = AtlasEntity( "Input for Column Mapping", "hive_table", "pyapacheatlas://colMapInput01",
# MAGIC %md # MAGIC ##### 2. Setup Custom Entity Types # MAGIC Setup custom entities to capture Databricks Tables, Columns and Jobs # COMMAND ---------- # DBTITLE 0,databricks-table entity type # Set up the new entity types to capture delta lake tables and databricks jobs # Databricks Table databricks_table_type = EntityTypeDef( name="databricks_table", attributeDefs=[ AtlasAttributeDef(name="format", defaultValue="parquet", isOptional=True).to_json(), AtlasAttributeDef(name="location", isOptional=True).to_json(), AtlasAttributeDef(name="num_files", isOptional=True).to_json(), AtlasAttributeDef(name="size", isOptional=True).to_json() ], superTypes=["DataSet"], options={"schemaElementAttribute": "columns"}) typedef_results = client.upload_typedefs( {"entityDefs": [databricks_table_type.to_json()]}, force_update=True) print(typedef_results) # COMMAND ---------- # DBTITLE 1,databricks-column entity type # Databricks Column