def create_adls_client( storename: str, dl_service_auth_str: str = None, interactive: bool = False ) -> core.AzureDLFileSystem: """ Creates an ADLS file system client. Parameters ---------- storename: str Name of datalake store. dl_service_auth_str: str Authentication string to use interactive: bool If true then use interactive authentication Returns ------- core.AzureDLFileSystem Instance of AzureDLFileSystem, ready to use """ token = get_datalake_token( interactive=interactive, dl_service_auth_str=dl_service_auth_str ) adls_file_system_client = core.AzureDLFileSystem(token, store_name=storename) return adls_file_system_client
def __init__(self, **kwargs): adlCreds = lib.auth(tenant_id=kwargs['TENANT_ID'], client_secret=kwargs['AZURE_SECRET_KEY'], client_id=kwargs['AZURE_CLIENT_ID'], resource=kwargs['AZURE_RESOURCE']) self.adl_conn_obj = core.AzureDLFileSystem( adlCreds, store_name=kwargs['STORE_NAME'])
def add_usql_job(scripts_folder, directory_name, usql_file, adl_token, adl_name, simulation_datetime, au_per_usql_job): #may need to recreate adl_token every time in case it expires #adl_token = lib.auth(tenant_id=adl_tenant_id, client_id=adl_client_id, client_secret=adl_client_secret) adla_job_client = DataLakeAnalyticsJobManagementClient( adl_token, 'azuredatalakeanalytics.net') # download USQL file from ADLS usql_file_full_path = scripts_folder + '/' + directory_name + '/' + usql_file + '.usql' adls_file_system_client = core.AzureDLFileSystem(adl_token, store_name=adl_name) multithread.ADLDownloader(adls_file_system_client, lpath='.', rpath=usql_file_full_path, overwrite=True) usql_script = ''.join(open(usql_file + '.usql', 'r').readlines()) if simulation_datetime: datetime_replace = "Convert.ToDateTime(\"" + simulation_datetime + "\")" usql_script = usql_script.replace('DateTime.Now', datetime_replace) jobId = str(uuid.uuid4()) jobInfo = JobInformation(name=directory_name + '/' + usql_file, type='USql', degree_of_parallelism=au_per_usql_job, properties=USqlJobProperties(script=usql_script)) jobResult = adla_job_client.job.create(adl_name, jobId, jobInfo) return (jobId)
def client(args): """Create a filesystem client object Parameters: args (class): Arguments. """ adls_client = core.AzureDLFileSystem(store_name=args.account_name) return adls_client
def cf_datalake_store_filesystem(account_name): profile = Profile() subscription_id = None cred, subscription_id, _ = profile.get_login_credentials(subscription_id=subscription_id) return core.AzureDLFileSystem( token=cred, store_name=account_name, url_suffix=CLOUD.suffixes.azure_datalake_store_file_system_endpoint)
def get_adls_file_list(beginning_path): """returns a data frame with detailed system information using a adls file system client""" adlCreds = lib.auth(tenant_id=TENANT_ID, client_secret=CLIENT_SECRET, client_id=CLIENT_ID, resource='https://datalake.azure.net/') adl = core.AzureDLFileSystem(adlCreds, store_name=ADLS_ACCOUNT) return pd.DataFrame(adl.ls(beginning_path))
def initializeAdls(): 'Initializes Azure DataLakeStore' log.debug("%sclientId: %s", LEV2, clientId) log.debug("%sclientSecret: %s", LEV2, clientSecret) log.debug("%stenantId: %s", LEV2, tenantId) log.debug("%sadlsAccountName: %s", LEV2, adlsAccountName) token = lib.auth(tenant_id = tenantId, client_secret = clientSecret, client_id = clientId) adl = core.AzureDLFileSystem(token, store_name=adlsAccountName) return ( adl )
def cf_dls_filesystem(account_name): from azure.datalake.store import core profile = Profile() subscription_id = None cred, subscription_id, _ = profile.get_login_credentials( subscription_id=subscription_id, resource=CLOUD.endpoints.active_directory_data_lake_resource_id) return core.AzureDLFileSystem( token=cred, store_name=account_name, url_suffix=CLOUD.suffixes.azure_datalake_store_file_system_endpoint)
def uploadfiletoadls(store_name, adls_dir, l_path, r_path): adlsFileSystemClient = core.AzureDLFileSystem(credentials, store_name=store_name) adlsFileSystemClient.mkdir(adls_dir) multithread.ADLUploader(adlsFileSystemClient, lpath=l_path, rpath=r_path, nthreads=64, overwrite=True, buffersize=4194304, blocksize=4194304) return 0
def dataLake(self): adlCreds = lib.auth(tenant_id = TENANT_ID, client_secret = KEY, client_id = CLIENT, resource = RESOURCE) ## Declare variables subscriptionId = subscription_id adlsAccountName = 'datalakearm' ## Create a filesystem client object adlsFileSystemClient = core.AzureDLFileSystem(adlCreds, store_name=adlsAccountName)
def verify_datalake_directory(self, datalake_name, dir_name): try: datalake_client = core.AzureDLFileSystem(self.dl_filesystem_creds, store_name=datalake_name) result = datalake_client.exists(dir_name) return result except Exception as err: logging.info( "Unable to verify Data Lake directory: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)) append_result(str({"error": "Unable to verify Data Lake directory", "error_message": str(err) + "\n Traceback: " + traceback.print_exc( file=sys.stdout)})) traceback.print_exc(file=sys.stdout)
def renew_adl_token(): print("--- Creating a thread to renew ADL token periodically ---") global adl interval = 1800 while True: time.sleep(interval) try: token = lib.auth(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret) adl = core.AzureDLFileSystem(token=token, store_name=adl_name) print("--- ADL token has been renewed ---") except Exception as e: raise Exception('Error while attempting to connect to Azure Data Lake Store:\n{}'.format(e)) print("--- Exiting the loop of renewing ADL token ---.")
def get_adl_client(adls_account_name, tenant_id): """ :param adls_account_name: Data Lake account :param tenant_id: Azure AD Tenant Id :return: client object """ adls_credentials = lib.auth(tenant_id=tenant_id, resource='https://datalake.azure.net/') adls_fs_client = core.AzureDLFileSystem(adls_credentials, store_name=adls_account_name) return adls_fs_client
def get_conn(self): """Return a AzureDLFileSystem object.""" conn = self.get_connection(self.conn_id) service_options = conn.extra_dejson self.account_name = service_options.get('account_name') adlCreds = lib.auth(tenant_id=service_options.get('tenant'), client_secret=conn.password, client_id=conn.login) adlsFileSystemClient = core.AzureDLFileSystem( adlCreds, store_name=self.account_name) adlsFileSystemClient.connect() return adlsFileSystemClient
def download(download_dir, data_dir): token = lib.auth() adl = core.AzureDLFileSystem(token, store_name='bigdatadevdatalake') download_dir = "december_2018" for f in adl.ls(data_dir): print(f[-38:]) outfile = os.path.join(download_dir, f[-38:]) downloader = multithread.ADLDownloader(adl, f, outfile) if downloader.successful(): print("Finished Downloading!") else: print("error in downloading!")
def __create_fs_client(self, account): token = self.__token_service.token # Copied from azure.datalake.store.lib, since AzureDLFileSystem requires a different form of token than that provided by our token service. token.update({ 'access': token['accessToken'], 'resource': lib.DEFAULT_RESOURCE_ENDPOINT, 'refresh': token.get('refreshToken', False), 'time': time(), 'tenant': self.__token_service.tenant_id, 'client': lib.default_client }) return core.AzureDLFileSystem(token=lib.DataLakeCredential(token), store_name=account)
def adls_client(key_vault_url: str, store_name: str) -> core.AzureDLFileSystem: sc = secret_client(key_vault_url) adlCreds = lib.auth(tenant_id=sc.get_secret("tenantid").value, client_id=sc.get_secret("spclientid").value, client_secret=sc.get_secret("spclientsecret").value, resource="https://datalake.azure.net/") # Create a filesystem client object adlsFileSystemClient = core.AzureDLFileSystem(adlCreds, store_name=store_name) return adlsFileSystemClient
def get_adl_client(store_name, client_id=None, client_secret=None, tenant_id=None): try: from quorum.config.config import AZURE_DATA_LAKE token = lib.auth(client_id=AZURE_DATA_LAKE["ADL_CLIENT_ID"], client_secret=AZURE_DATA_LAKE["ADL_CLIENT_SECRET"], tenant_id=AZURE_DATA_LAKE["TENANT_ID"]) except: raise Exception( 'Pass client_id, client_secret, and tenant_id or define in config.py' ) return core.AzureDLFileSystem(token, store_name=store_name)
def connect_to_data_lake_store(config): from azure.datalake.store import core, lib # Imported there in order to not need to retrieve this large library if you don't use this function """ Connection to Data Lake Store This connector is based on conf file and provide a AzureDLFileSystem object in order to read and write on the Data Lake Store. :return: environment file system :rtype: AzureDLFileSystem object """ token = lib.auth(tenant_id=config.get('tenantId'), username=config.get('username'), password=config.get('password')) adls_account_name = config.get('accountName') adl = core.AzureDLFileSystem(token, store_name=adls_account_name) return adl
def upload_azure_datalake(): try: from azure.datalake.store import core, lib, multithread sp_creds = json.loads(open(os.environ['AZURE_AUTH_LOCATION']).read()) dl_filesystem_creds = lib.auth(tenant_id=json.dumps(sp_creds['tenantId']).replace('"', ''), client_secret=json.dumps(sp_creds['clientSecret']).replace('"', ''), client_id=json.dumps(sp_creds['clientId']).replace('"', ''), resource='https://datalake.azure.net/') datalake_client = core.AzureDLFileSystem(dl_filesystem_creds, store_name=args.azure_datalake_account) for f in dataset_file: multithread.ADLUploader(datalake_client, lpath='/tmp/{0}'.format(f), rpath='{0}/{1}_dataset/{2}'.format(args.storage, args.notebook, f)) except Exception as err: print('Failed to upload test dataset to datalake store', str(err)) sys.exit(1)
def authenticate(self): token = auth.get_token(DATALAKE) # Needed for ADLS datalake operations token.update({ 'access': token['accessToken'], 'resource': DATALAKE, 'refresh': token.get('refreshToken', False), 'time': time.time(), 'tenant': TENANT_ID, 'client': CLIENT_ID }) adlCreds = lib.DataLakeCredential(token) # Create a filesystem client object self.adls = core.AzureDLFileSystem(adlCreds, store_name=self.store_name)
def __init__(self, root, config, plugin_config): self.root = root self.root_lnt = self.get_lnt_path(root) self.client_id = config["client-id"] self.client_secret = config["client-secret"] self.tenant_id = config["tenant-id"] self.resource = "https://datalake.azure.net/" self.adls_account = config["adls-account"] self.adls_creds = lib.auth(resource=self.resource, tenant_id=self.tenant_id, client_id=self.client_id, client_secret=self.client_secret, api_version=None) self.adls_client = core.AzureDLFileSystem(self.adls_creds, store_name=self.adls_account)
def get_credentials(secret): subscriptionId = 'c71f08af-8fcd-4f65-b991-143888d0cbd8' adlsAccountName = 'iris-acm-prod-c15' tenant = '72f988bf-86f1-41af-91ab-2d7cd011db47' RESOURCE = 'https://datalake.azure.net/' client_id = 'a6a835cf-c106-4ad8-a77e-0285a6e3e447' client_secret = secret # get the adl credentials adlCreds = lib.auth(tenant_id = tenant, client_secret = client_secret, client_id = client_id, resource = RESOURCE) adl = core.AzureDLFileSystem(adlCreds, store_name=adlsAccountName) return adl
def connect_adls(self): """ Creates a connection to Azure Data Lake Store """ adls = None try: token = lib.auth(tenant_id=self.azure_tenant_id, client_id=self.adls_client_id, client_secret=self.adls_client_secret, resource='https://datalake.azure.net/') adls = core.AzureDLFileSystem(token, store_name=self.adls_name) except Exception as ex: print("Unable to connect to Azure Data Lake! Error: %s" % (str(ex))) return adls
def get_adlcreds(self): try: adl_creds = lib.auth(tenant_id=self.credentials['tenant'], client_secret=self.credentials['secret'], client_id=self.credentials['client_id'], resource=self.resource) adls_accountname = self.store_name adls_filesystemclient = core.AzureDLFileSystem( adl_creds, store_name=adls_accountname) except CloudError as exc: self.log('Error attempting to access to the Data lake instance.') self.fail("Error login to the Data Lake instance: {0}".format( str(exc))) return adls_filesystemclient
def get_conn(self) -> core.AzureDLFileSystem: """Return a AzureDLFileSystem object.""" if not self._conn: conn = self.get_connection(self.conn_id) service_options = conn.extra_dejson self.account_name = service_options.get( 'account_name') or service_options.get( 'extra__azure_data_lake__account_name') tenant = service_options.get('tenant') or service_options.get( 'extra__azure_data_lake__tenant') adl_creds = lib.auth(tenant_id=tenant, client_secret=conn.password, client_id=conn.login) self._conn = core.AzureDLFileSystem(adl_creds, store_name=self.account_name) self._conn.connect() return self._conn
def get_ground_truth_from_adls(adls_account_name, tenant_id, ground_truth_adls_path): """ :param adls_account_name: The data lake store :param tenant_id: Azure AD tentant :param ground_truth_adls_path: The data lake path to the Ground Truth :return: Data frame with the Ground Truth """ df = pd.DataFrame() adls_credentials = lib.auth(tenant_id=tenant_id, resource='https://datalake.azure.net/') adlsFileSystemClient = core.AzureDLFileSystem(adls_credentials, store_name=adls_account_name) with adlsFileSystemClient.open(ground_truth_adls_path, 'rb') as f: df = pd.read_pickle(f, compression=None) return df
def get_adl_client(store_name, client_id=None, client_secret=None, tenant_id=None): if not client_id or not client_secret or not tenant_id: try: from azure_utils.config import ADL_CLIENT_ID, ADL_CLIENT_SECRET, TENANT_ID tenant_id = tenant_id or TENANT_ID client_id = client_id or ADL_CLIENT_ID client_secret = client_secret or ADL_CLIENT_SECRET except: raise Exception( 'Pass client_id, client_secret, and tenant_id or define in config.py' ) token = lib.auth(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret) return core.AzureDLFileSystem(token, store_name=store_name)
def create_from_env( cls, store_name: str, interactive: bool = False, adl_secret: Optional[ADLSecret] = None, ) -> "ADLGen1FileSystem": """ Creates ADL Gen1 file system client. Parameters ---------- store_name: str Name of datalake store. interactive: bool If true then use interactive authentication adl_secret: ADLSecret Azure authentication information Returns ------- ADLGen1FileSystem """ if interactive: logger.info("Attempting to use interactive azure authentication") token = lib.auth() else: if type(adl_secret) is not ADLSecret: raise ConfigException("Unsupported type for adl_secret '%s'" % type(adl_secret)) adl_secret = cast(ADLSecret, adl_secret) logger.info("Attempting to use datalake service authentication") token = lib.auth( tenant_id=adl_secret.tenant_id, client_id=adl_secret.client_id, client_secret=adl_secret.client_secret, resource="https://datalake.azure.net/", ) adl_client = core.AzureDLFileSystem(token, store_name=store_name) return cls(adl_client, store_name)
def __init__(self, adlscred): """ Initialization argument adlscred only accept object of type ADLSCredential """ if not isinstance(adlscred, ADLSCredential): raise Exception("adlscred is not of type ADLSCredential") if not hasattr(adlscred, "is_valid"): raise Exception("adlscred has no function named is_valid") if not adlscred.is_valid(): raise Exception( "adlscred is not valid, one more fields is missing") auth = lib.auth(tenant_id=adlscred.tenant_id, username=adlscred.username, password=adlscred.password, resource='https://datalake.azure.net/') self.client = core.AzureDLFileSystem( auth, store_name=adlscred.adls_account_name)