def download_gen1_terms(config): catalog_name = config["ADCGen1Client"]["CATALOG_NAME"] glossary_name = config["ADCGen1Client"]["GLOSSARY_NAME"] api_version = "2016-03-30" auth = ServicePrincipalAuthentication( tenant_id=config["ADCGen1Client"]["TENANT_ID"], client_id=config["ADCGen1Client"]["CLIENT_ID"], client_secret=config["ADCGen1Client"]["CLIENT_SECRET"] ) # Need to update the resource we're authenticating against auth.data.update({"resource": "https://api.azuredatacatalog.com"}) # This endpoint provides you with all glossary terms in ADC Gen1 enumerate_uri = f"https://api.azuredatacatalog.com/catalogs/{catalog_name}/glossaries/{glossary_name}/terms?api-version={api_version}" output = [] while(True): results = requests.get( enumerate_uri, headers=auth.get_authentication_headers() ) content = results.json() output.extend(content["value"]) if "nextLink" not in content: break else: enumerate_uri = content["nextLink"] with open(config["Default"]["ADCTermsPath"], 'w') as fp: json.dump(output, fp, indent=1) return output
def uploadPurview(purview_lineage): oauth = ServicePrincipalAuthentication(tenant_id=Azure.tenant_id, client_id=Azure.client_id, client_secret=Azure.client_secret) client = AtlasClient(endpoint_url=Azure.endpoint_url, authentication=oauth) results = client.upload_entities(batch=purview_lineage)
def test_purview_client_integration(): # Authenticate against your Purview service oauth = ServicePrincipalAuthentication( tenant_id=os.environ.get("TENANT_ID", ""), client_id=os.environ.get("CLIENT_ID", ""), client_secret=os.environ.get("CLIENT_SECRET", "") ) client = PurviewClient( account_name = os.environ.get("PURVIEW_NAME", ""), authentication=oauth ) results = client.get_glossary() assert(results is not None)
def save_entities(atlas_mysql): oauth = ServicePrincipalAuthentication( tenant_id=os.environ.get('AZURE_TENANT_ID', ''), client_id=os.environ.get('AZURE_CLIENT_ID', ''), client_secret=os.environ.get('AZURE_CLIENT_SECRET', '')) client = PurviewClient(account_name=os.environ.get('PURVIEW_CATALOG_NAME', ''), authentication=oauth) entities = [] entities.append(atlas_mysql.instance) for db in atlas_mysql.dbs: entities.append(db) for table in atlas_mysql.db_tables: entities.append(table) for column in atlas_mysql.table_columns: entities.append(column) assignments = client.upload_entities(entities)['guidAssignments'] f = open(f"entities.{time.time()}.txt", "a") for guid in assignments: f.write(assignments[guid] + "\n") f.close()
import json import os import time from pyapacheatlas.auth import ServicePrincipalAuthentication from pyapacheatlas.core.client import PurviewClient from pyapacheatlas.core import AtlasEntity from pyapacheatlas.core.typedef import EntityTypeDef oauth = ServicePrincipalAuthentication( tenant_id=os.environ.get("TENANT_ID", ""), client_id=os.environ.get("CLIENT_ID", ""), client_secret=os.environ.get("CLIENT_SECRET", "") ) client = PurviewClient( account_name = os.environ.get("PURVIEW_NAME", ""), authentication=oauth ) def test_set_relationship_different_ways(): ae = AtlasEntity("rel01","hive_table", "tests://rel01", guid=-1) c1 = AtlasEntity("rel01#01", "hive_column", "tests://rel01#c", guid=-2, attributes={"type":"str"}) c2 = AtlasEntity("rel01#02", "hive_column", "tests://rel02#c", guid=-3, attributes={"type":"str"}) c3 = AtlasEntity("rel01#03", "hive_column", "tests://rel03#c", guid=-4, attributes={"type":"str"}) c4 = AtlasEntity("rel01#04", "hive_column", "tests://rel04#c", guid=-5, attributes={"type":"str"}) # Add c1 as the only relationship ae.addRelationship(columns=[c1.to_json(minimum=True)]) c2.relationshipAttributes.update({"table": ae.to_json(minimum=True) })
unchanged_path = os.path.join(folder_path, "glossary.json") glossary_prep_path = os.path.join(output_path, "glossary_prepared.json") relationships_guid_path = config["Default"]["GlossaryRelationships"] old_to_new_glossary_guid_path = config["Default"]["GlossaryOldToNew"] if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--skip-download", action="store_true", help="Use if you've already written the glossary to disk.") args = parser.parse_args() oauth_old = ServicePrincipalAuthentication( tenant_id=config["OldClient"]["TENANT_ID"], client_id=config["OldClient"]["CLIENT_ID"], client_secret=config["OldClient"]["CLIENT_SECRET"]) old_client = AtlasClient(endpoint_url=config["OldClient"]["ENDPOINT_URL"], authentication=oauth_old) oauth_new = ServicePrincipalAuthentication( tenant_id=config["NewClient"]["TENANT_ID"], client_id=config["NewClient"]["CLIENT_ID"], client_secret=config["NewClient"]["CLIENT_SECRET"]) new_client = AtlasClient(endpoint_url=config["NewClient"]["ENDPOINT_URL"], authentication=oauth_new) gt = GuidTracker() # Export the glossary terms if not args.skip_download: print("Exporting the old glossary terms")
for row in output: # Update the related term if it exists if row["related_term"] != "": row["related_term"] = term_id_to_name[row["related_term"]] importwriter.writerow(list(row.values())) if __name__ == "__main__": config = configparser.ConfigParser() config.read("./samples/migrateADCGen1/config.ini") # Configure your Purview Authentication oauth = ServicePrincipalAuthentication( tenant_id=config["PurviewClient"]["TENANT_ID"], client_id=config["PurviewClient"]["CLIENT_ID"], client_secret=config["PurviewClient"]["CLIENT_SECRET"] ) client = PurviewClient( account_name=config["PurviewClient"]["PURVIEW_ACCOUNT_NAME"], authentication=oauth ) # Download the Gen 1 Terms to a json document print("Downloading ADC Gen 1 Terms...") download_gen1_terms(config) print("Successfully downloaded ADC Gen 1 Terms.") # Convert the json to a csv for import print("Converting ADC Gen 1 Terms to be CSV for Purview Upload...") convert_gen1_to_purview_terms(config)
# Databricks notebook source import argparse import json import os from pyapacheatlas.auth import ServicePrincipalAuthentication from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess, TypeCategory from pyapacheatlas.core.util import GuidTracker from pyapacheatlas.core.typedef import AtlasAttributeDef, EntityTypeDef, RelationshipTypeDef from pyapacheatlas.readers import ExcelConfiguration, ExcelReader # The above cell gets the v_tenant_id,v_client_id etc. auth = ServicePrincipalAuthentication(tenant_id=v_tenant_id, client_id=v_client_id, client_secret=v_client_secret) # Create a client to connect to your service. client = PurviewClient(account_name=v_data_catalog_name, authentication=auth) guid = GuidTracker() # COMMAND ---------- # Search for the entity you want to delete import json import os search = client.search_entities("loan_risk_data.csv") for page in search: print(json.dumps(page, indent=2))
import time import os import sys import array from pyapacheatlas.auth import ServicePrincipalAuthentication from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess filename = sys.argv[1] oauth = ServicePrincipalAuthentication( tenant_id=os.environ.get('AZURE_TENANT_ID', ''), client_id=os.environ.get('AZURE_CLIENT_ID', ''), client_secret=os.environ.get('AZURE_CLIENT_SECRET', '')) client = PurviewClient(account_name=os.environ.get('PURVIEW_CATALOG_NAME', ''), authentication=oauth) infile = open(filename) guids = [] for line in infile: guids.append(line.strip()) client.delete_entity(guids) infile.close() os.remove(filename)