コード例 #1
0
def create_adls_client(
    storename: str, dl_service_auth_str: str = None, interactive: bool = False
) -> core.AzureDLFileSystem:
    """
    Creates an ADLS file system client.

    Parameters
    ----------
    storename: str
        Name of datalake store.
    dl_service_auth_str: str
        Authentication string to use
    interactive: bool
        If true then use interactive authentication


    Returns
    -------
    core.AzureDLFileSystem
        Instance of AzureDLFileSystem, ready to use
    """
    token = get_datalake_token(
        interactive=interactive, dl_service_auth_str=dl_service_auth_str
    )

    adls_file_system_client = core.AzureDLFileSystem(token, store_name=storename)
    return adls_file_system_client
コード例 #2
0
 def __init__(self, **kwargs):
     adlCreds = lib.auth(tenant_id=kwargs['TENANT_ID'],
                         client_secret=kwargs['AZURE_SECRET_KEY'],
                         client_id=kwargs['AZURE_CLIENT_ID'],
                         resource=kwargs['AZURE_RESOURCE'])
     self.adl_conn_obj = core.AzureDLFileSystem(
         adlCreds, store_name=kwargs['STORE_NAME'])
コード例 #3
0
def add_usql_job(scripts_folder, directory_name, usql_file, adl_token,
                 adl_name, simulation_datetime, au_per_usql_job):

    #may need to recreate adl_token every time in case it expires
    #adl_token = lib.auth(tenant_id=adl_tenant_id, client_id=adl_client_id, client_secret=adl_client_secret)

    adla_job_client = DataLakeAnalyticsJobManagementClient(
        adl_token, 'azuredatalakeanalytics.net')

    # download USQL file from ADLS
    usql_file_full_path = scripts_folder + '/' + directory_name + '/' + usql_file + '.usql'
    adls_file_system_client = core.AzureDLFileSystem(adl_token,
                                                     store_name=adl_name)
    multithread.ADLDownloader(adls_file_system_client,
                              lpath='.',
                              rpath=usql_file_full_path,
                              overwrite=True)

    usql_script = ''.join(open(usql_file + '.usql', 'r').readlines())

    if simulation_datetime:
        datetime_replace = "Convert.ToDateTime(\"" + simulation_datetime + "\")"
        usql_script = usql_script.replace('DateTime.Now', datetime_replace)

    jobId = str(uuid.uuid4())
    jobInfo = JobInformation(name=directory_name + '/' + usql_file,
                             type='USql',
                             degree_of_parallelism=au_per_usql_job,
                             properties=USqlJobProperties(script=usql_script))
    jobResult = adla_job_client.job.create(adl_name, jobId, jobInfo)

    return (jobId)
コード例 #4
0
def client(args):
    """Create a filesystem client object
    Parameters:
        args (class): Arguments.
    """
    adls_client = core.AzureDLFileSystem(store_name=args.account_name)
    return adls_client
コード例 #5
0
ファイル: _client_factory.py プロジェクト: thegalah/azure-cli
def cf_datalake_store_filesystem(account_name):
    profile = Profile()
    subscription_id = None
    cred, subscription_id, _ = profile.get_login_credentials(subscription_id=subscription_id)
    return core.AzureDLFileSystem(
        token=cred,
        store_name=account_name,
        url_suffix=CLOUD.suffixes.azure_datalake_store_file_system_endpoint)
コード例 #6
0
def get_adls_file_list(beginning_path):
    """returns a data frame with detailed system information using a adls file system client"""
    adlCreds = lib.auth(tenant_id=TENANT_ID,
                        client_secret=CLIENT_SECRET,
                        client_id=CLIENT_ID,
                        resource='https://datalake.azure.net/')
    adl = core.AzureDLFileSystem(adlCreds, store_name=ADLS_ACCOUNT)
    return pd.DataFrame(adl.ls(beginning_path))
コード例 #7
0
def initializeAdls():
  'Initializes Azure DataLakeStore'
  log.debug("%sclientId: %s", LEV2, clientId)
  log.debug("%sclientSecret: %s", LEV2, clientSecret)
  log.debug("%stenantId: %s", LEV2, tenantId)
  log.debug("%sadlsAccountName: %s", LEV2, adlsAccountName)
  token = lib.auth(tenant_id = tenantId, client_secret = clientSecret, client_id  = clientId)
  adl = core.AzureDLFileSystem(token, store_name=adlsAccountName)
  return ( adl )
コード例 #8
0
def cf_dls_filesystem(account_name):
    from azure.datalake.store import core
    profile = Profile()
    subscription_id = None
    cred, subscription_id, _ = profile.get_login_credentials(
        subscription_id=subscription_id,
        resource=CLOUD.endpoints.active_directory_data_lake_resource_id)
    return core.AzureDLFileSystem(
        token=cred,
        store_name=account_name,
        url_suffix=CLOUD.suffixes.azure_datalake_store_file_system_endpoint)
def uploadfiletoadls(store_name, adls_dir, l_path, r_path):
    adlsFileSystemClient = core.AzureDLFileSystem(credentials,
                                                  store_name=store_name)
    adlsFileSystemClient.mkdir(adls_dir)
    multithread.ADLUploader(adlsFileSystemClient,
                            lpath=l_path,
                            rpath=r_path,
                            nthreads=64,
                            overwrite=True,
                            buffersize=4194304,
                            blocksize=4194304)
    return 0
コード例 #10
0
    def dataLake(self):
        adlCreds = lib.auth(tenant_id = TENANT_ID,
                client_secret = KEY,
                client_id = CLIENT,
                resource = RESOURCE)
        ## Declare variables
        subscriptionId = subscription_id
        adlsAccountName = 'datalakearm'

        ## Create a filesystem client object
        adlsFileSystemClient = core.AzureDLFileSystem(adlCreds, 
                                                      store_name=adlsAccountName)
コード例 #11
0
 def verify_datalake_directory(self, datalake_name, dir_name):
     try:
         datalake_client = core.AzureDLFileSystem(self.dl_filesystem_creds, store_name=datalake_name)
         result = datalake_client.exists(dir_name)
         return result
     except Exception as err:
         logging.info(
             "Unable to verify Data Lake directory: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
         append_result(str({"error": "Unable to verify Data Lake directory",
                            "error_message": str(err) + "\n Traceback: " + traceback.print_exc(
                                file=sys.stdout)}))
         traceback.print_exc(file=sys.stdout)
コード例 #12
0
def renew_adl_token():
    print("--- Creating a thread to renew ADL token periodically ---")
    global adl
    interval = 1800
    while True:
        time.sleep(interval)
        try:
            token = lib.auth(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret)
            adl = core.AzureDLFileSystem(token=token, store_name=adl_name)
            print("--- ADL token has been renewed ---")
        except Exception as e:
            raise Exception('Error while attempting to connect to Azure Data Lake Store:\n{}'.format(e))             
    print("--- Exiting the loop of renewing ADL token ---.")
コード例 #13
0
def get_adl_client(adls_account_name, tenant_id):
    """

    :param adls_account_name: Data Lake account
    :param tenant_id: Azure AD Tenant Id
    :return: client object
    """
    adls_credentials = lib.auth(tenant_id=tenant_id,
                                resource='https://datalake.azure.net/')
    adls_fs_client = core.AzureDLFileSystem(adls_credentials,
                                            store_name=adls_account_name)

    return adls_fs_client
コード例 #14
0
    def get_conn(self):
        """Return a AzureDLFileSystem object."""
        conn = self.get_connection(self.conn_id)
        service_options = conn.extra_dejson
        self.account_name = service_options.get('account_name')

        adlCreds = lib.auth(tenant_id=service_options.get('tenant'),
                            client_secret=conn.password,
                            client_id=conn.login)
        adlsFileSystemClient = core.AzureDLFileSystem(
            adlCreds, store_name=self.account_name)
        adlsFileSystemClient.connect()
        return adlsFileSystemClient
コード例 #15
0
def download(download_dir, data_dir):
    token = lib.auth()
    adl = core.AzureDLFileSystem(token, store_name='bigdatadevdatalake')
    download_dir = "december_2018"

    for f in adl.ls(data_dir):
        print(f[-38:])
        outfile = os.path.join(download_dir, f[-38:])
        downloader = multithread.ADLDownloader(adl, f, outfile)
        if downloader.successful():
            print("Finished Downloading!")
        else:
            print("error in downloading!")
コード例 #16
0
    def __create_fs_client(self, account):
        token = self.__token_service.token
        # Copied from azure.datalake.store.lib, since AzureDLFileSystem requires a different form of token than that provided by our token service.
        token.update({
            'access': token['accessToken'],
            'resource': lib.DEFAULT_RESOURCE_ENDPOINT,
            'refresh': token.get('refreshToken', False),
            'time': time(),
            'tenant': self.__token_service.tenant_id,
            'client': lib.default_client
        })

        return core.AzureDLFileSystem(token=lib.DataLakeCredential(token),
                                      store_name=account)
コード例 #17
0
def adls_client(key_vault_url: str, store_name: str) -> core.AzureDLFileSystem:

    sc = secret_client(key_vault_url)

    adlCreds = lib.auth(tenant_id=sc.get_secret("tenantid").value,
                        client_id=sc.get_secret("spclientid").value,
                        client_secret=sc.get_secret("spclientsecret").value,
                        resource="https://datalake.azure.net/")

    # Create a filesystem client object
    adlsFileSystemClient = core.AzureDLFileSystem(adlCreds,
                                                  store_name=store_name)

    return adlsFileSystemClient
コード例 #18
0
ファイル: azure_utils.py プロジェクト: tdraebing/quorum
def get_adl_client(store_name,
                   client_id=None,
                   client_secret=None,
                   tenant_id=None):
    try:
        from quorum.config.config import AZURE_DATA_LAKE
        token = lib.auth(client_id=AZURE_DATA_LAKE["ADL_CLIENT_ID"],
                         client_secret=AZURE_DATA_LAKE["ADL_CLIENT_SECRET"],
                         tenant_id=AZURE_DATA_LAKE["TENANT_ID"])
    except:
        raise Exception(
            'Pass client_id, client_secret, and tenant_id or define in config.py'
        )

    return core.AzureDLFileSystem(token, store_name=store_name)
コード例 #19
0
ファイル: utils.py プロジェクト: yannickRadji/drugs
def connect_to_data_lake_store(config):
    from azure.datalake.store import core, lib
    # Imported there in order to not need to retrieve this large library if you don't use this function
    """
        Connection to Data Lake Store
        This connector is based on conf file and provide a AzureDLFileSystem object in order to read
        and write on the Data Lake Store.
        :return: environment file system
        :rtype: AzureDLFileSystem object
    """
    token = lib.auth(tenant_id=config.get('tenantId'),
                     username=config.get('username'),
                     password=config.get('password'))
    adls_account_name = config.get('accountName')
    adl = core.AzureDLFileSystem(token, store_name=adls_account_name)
    return adl
コード例 #20
0
def upload_azure_datalake():
    try:
        from azure.datalake.store import core, lib, multithread
        sp_creds = json.loads(open(os.environ['AZURE_AUTH_LOCATION']).read())
        dl_filesystem_creds = lib.auth(tenant_id=json.dumps(sp_creds['tenantId']).replace('"', ''),
                                       client_secret=json.dumps(sp_creds['clientSecret']).replace('"', ''),
                                       client_id=json.dumps(sp_creds['clientId']).replace('"', ''),
                                       resource='https://datalake.azure.net/')
        datalake_client = core.AzureDLFileSystem(dl_filesystem_creds, store_name=args.azure_datalake_account)
        for f in dataset_file:
            multithread.ADLUploader(datalake_client,
                                    lpath='/tmp/{0}'.format(f),
                                    rpath='{0}/{1}_dataset/{2}'.format(args.storage, args.notebook, f))
    except Exception as err:
        print('Failed to upload test dataset to datalake store', str(err))
        sys.exit(1)
コード例 #21
0
ファイル: adls.py プロジェクト: dsame/cli-demo
    def authenticate(self):
        token = auth.get_token(DATALAKE)
        # Needed for ADLS datalake operations
        token.update({
            'access': token['accessToken'],
            'resource': DATALAKE,
            'refresh': token.get('refreshToken', False),
            'time': time.time(),
            'tenant': TENANT_ID,
            'client': CLIENT_ID
        })
        adlCreds = lib.DataLakeCredential(token)

        # Create a filesystem client object
        self.adls = core.AzureDLFileSystem(adlCreds,
                                           store_name=self.store_name)
コード例 #22
0
    def __init__(self, root, config, plugin_config):
        self.root = root
        self.root_lnt = self.get_lnt_path(root)
        self.client_id = config["client-id"]
        self.client_secret = config["client-secret"]
        self.tenant_id = config["tenant-id"]
        self.resource = "https://datalake.azure.net/"
        self.adls_account = config["adls-account"]

        self.adls_creds = lib.auth(resource=self.resource,
                                   tenant_id=self.tenant_id,
                                   client_id=self.client_id,
                                   client_secret=self.client_secret,
                                   api_version=None)
        self.adls_client = core.AzureDLFileSystem(self.adls_creds,
                                                  store_name=self.adls_account)
コード例 #23
0
def get_credentials(secret):
    subscriptionId = 'c71f08af-8fcd-4f65-b991-143888d0cbd8'
    adlsAccountName = 'iris-acm-prod-c15'
    tenant = '72f988bf-86f1-41af-91ab-2d7cd011db47'
    RESOURCE = 'https://datalake.azure.net/'
    client_id = 'a6a835cf-c106-4ad8-a77e-0285a6e3e447'
    client_secret = secret

    # get the adl credentials
    adlCreds = lib.auth(tenant_id = tenant,
                    client_secret = client_secret,
                    client_id = client_id,
                    resource = RESOURCE)

    adl = core.AzureDLFileSystem(adlCreds, store_name=adlsAccountName)

    return adl
コード例 #24
0
    def connect_adls(self):
        """
        Creates a connection to Azure Data Lake Store
        """
        adls = None
        try:
            token = lib.auth(tenant_id=self.azure_tenant_id, 
                client_id=self.adls_client_id, 
                client_secret=self.adls_client_secret, 
                resource='https://datalake.azure.net/')

            adls = core.AzureDLFileSystem(token, store_name=self.adls_name)

        except Exception as ex:
            print("Unable to connect to Azure Data Lake! Error: %s" % (str(ex)))

        return adls
コード例 #25
0
    def get_adlcreds(self):

        try:
            adl_creds = lib.auth(tenant_id=self.credentials['tenant'],
                                 client_secret=self.credentials['secret'],
                                 client_id=self.credentials['client_id'],
                                 resource=self.resource)

            adls_accountname = self.store_name
            adls_filesystemclient = core.AzureDLFileSystem(
                adl_creds, store_name=adls_accountname)

        except CloudError as exc:
            self.log('Error attempting to access to the Data lake instance.')
            self.fail("Error login to the Data Lake instance: {0}".format(
                str(exc)))

        return adls_filesystemclient
コード例 #26
0
    def get_conn(self) -> core.AzureDLFileSystem:
        """Return a AzureDLFileSystem object."""
        if not self._conn:
            conn = self.get_connection(self.conn_id)
            service_options = conn.extra_dejson
            self.account_name = service_options.get(
                'account_name') or service_options.get(
                    'extra__azure_data_lake__account_name')
            tenant = service_options.get('tenant') or service_options.get(
                'extra__azure_data_lake__tenant')

            adl_creds = lib.auth(tenant_id=tenant,
                                 client_secret=conn.password,
                                 client_id=conn.login)
            self._conn = core.AzureDLFileSystem(adl_creds,
                                                store_name=self.account_name)
            self._conn.connect()
        return self._conn
コード例 #27
0
def get_ground_truth_from_adls(adls_account_name, tenant_id,
                               ground_truth_adls_path):
    """

    :param adls_account_name: The data lake store
    :param tenant_id: Azure AD tentant
    :param ground_truth_adls_path: The data lake path to the Ground Truth
    :return: Data frame with the Ground Truth
    """
    df = pd.DataFrame()
    adls_credentials = lib.auth(tenant_id=tenant_id,
                                resource='https://datalake.azure.net/')
    adlsFileSystemClient = core.AzureDLFileSystem(adls_credentials,
                                                  store_name=adls_account_name)

    with adlsFileSystemClient.open(ground_truth_adls_path, 'rb') as f:
        df = pd.read_pickle(f, compression=None)

    return df
コード例 #28
0
def get_adl_client(store_name,
                   client_id=None,
                   client_secret=None,
                   tenant_id=None):
    if not client_id or not client_secret or not tenant_id:
        try:
            from azure_utils.config import ADL_CLIENT_ID, ADL_CLIENT_SECRET, TENANT_ID
            tenant_id = tenant_id or TENANT_ID
            client_id = client_id or ADL_CLIENT_ID
            client_secret = client_secret or ADL_CLIENT_SECRET
        except:
            raise Exception(
                'Pass client_id, client_secret, and tenant_id or define in config.py'
            )

    token = lib.auth(tenant_id=tenant_id,
                     client_id=client_id,
                     client_secret=client_secret)

    return core.AzureDLFileSystem(token, store_name=store_name)
コード例 #29
0
ファイル: adl1.py プロジェクト: Fe-59/gordo-dataset
    def create_from_env(
        cls,
        store_name: str,
        interactive: bool = False,
        adl_secret: Optional[ADLSecret] = None,
    ) -> "ADLGen1FileSystem":
        """
        Creates ADL Gen1 file system client.

        Parameters
        ----------
        store_name: str
            Name of datalake store.
        interactive: bool
            If true then use interactive authentication
        adl_secret: ADLSecret
            Azure authentication information

        Returns
        -------
        ADLGen1FileSystem
        """

        if interactive:
            logger.info("Attempting to use interactive azure authentication")
            token = lib.auth()
        else:
            if type(adl_secret) is not ADLSecret:
                raise ConfigException("Unsupported type for adl_secret '%s'" %
                                      type(adl_secret))
            adl_secret = cast(ADLSecret, adl_secret)
            logger.info("Attempting to use datalake service authentication")
            token = lib.auth(
                tenant_id=adl_secret.tenant_id,
                client_id=adl_secret.client_id,
                client_secret=adl_secret.client_secret,
                resource="https://datalake.azure.net/",
            )

        adl_client = core.AzureDLFileSystem(token, store_name=store_name)
        return cls(adl_client, store_name)
コード例 #30
0
ファイル: adls.py プロジェクト: tituscheng/adls-backup
    def __init__(self, adlscred):
        """
        Initialization argument adlscred only accept object of type ADLSCredential
        """
        if not isinstance(adlscred, ADLSCredential):
            raise Exception("adlscred is not of type ADLSCredential")

        if not hasattr(adlscred, "is_valid"):
            raise Exception("adlscred has no function named is_valid")

        if not adlscred.is_valid():
            raise Exception(
                "adlscred is not valid, one more fields is missing")

        auth = lib.auth(tenant_id=adlscred.tenant_id,
                        username=adlscred.username,
                        password=adlscred.password,
                        resource='https://datalake.azure.net/')

        self.client = core.AzureDLFileSystem(
            auth, store_name=adlscred.adls_account_name)