Beispiel #1
0
def azurelistfile():
    token = auth(tenant_id=tenant,
                 client_id=client,
                 client_secret=clientsecret)
    azureDLS = AzureDLFileSystem(store_name=adls, token=token)
    childItems = azureDLS.ls("/")
    print '\n'.join([str(item) for item in childItems])
Beispiel #2
0
def azurewritefile():
    token = auth(tenant_id=tenant,
                 client_id=client,
                 client_secret=clientsecret)
    azureDLS = AzureDLFileSystem(store_name=adls, token=token)
    with azureDLS.open('vishfile', 'wb') as f:
        f.write(b'example')
Beispiel #3
0
def azurereadfile(filename):
    token = auth(tenant_id=tenant,
                 client_id=client,
                 client_secret=clientsecret)
    azureDLS = AzureDLFileSystem(store_name=adls, token=token)
    with azureDLS.open('vishfile', blocksize=2**20) as f:
        print(f.readline())
Beispiel #4
0
 def __init__(self, **kwargs):
     adlCreds = lib.auth(tenant_id=kwargs['TENANT_ID'],
                         client_secret=kwargs['AZURE_SECRET_KEY'],
                         client_id=kwargs['AZURE_CLIENT_ID'],
                         resource=kwargs['AZURE_RESOURCE'])
     self.adl_conn_obj = core.AzureDLFileSystem(
         adlCreds, store_name=kwargs['STORE_NAME'])
Beispiel #5
0
def get_datalake_token(
    interactive: bool = False, dl_service_auth_str: str = None
) -> lib.DataLakeCredential:
    """
    Provides a token for azure datalake, either by parsing a datalake service
    authentication string or using interactive authentication

    Parameters
    ----------
    interactive: bool
        If true then use interactive authentication
    dl_service_auth_str: str
        String on the format tenant:client_id:client_secret

    Returns
    -------
    lib.DataLakeCredential
        A lib.DataLakeCredential which can be used to authenticate towards the datalake
    """
    dl_service_auth_str = (
        os.environ.get("DL_SERVICE_AUTH_STR")
        if str(dl_service_auth_str) == "None"
        else dl_service_auth_str
    )
    if interactive:
        logger.info("Attempting to use interactive azure authentication")
        return lib.auth()
    elif dl_service_auth_str:
        logger.info(f"Attempting to use datalake service authentication")
        dl_service_auth_elems = dl_service_auth_str.split(":")
        tenant = dl_service_auth_elems[0]
        client_id = dl_service_auth_elems[1]
        client_secret = dl_service_auth_elems[2]
        token = lib.auth(
            tenant_id=tenant,
            client_secret=client_secret,
            client_id=client_id,
            resource="https://datalake.azure.net/",
        )
        return token
    else:
        raise ValueError(
            f"Either interactive (value: {interactive}) must be True, "
            f"or dl_service_auth_str (value: {dl_service_auth_str}) "
            "must be set. "
        )
def get_adls_file_list(beginning_path):
    """returns a data frame with detailed system information using a adls file system client"""
    adlCreds = lib.auth(tenant_id=TENANT_ID,
                        client_secret=CLIENT_SECRET,
                        client_id=CLIENT_ID,
                        resource='https://datalake.azure.net/')
    adl = core.AzureDLFileSystem(adlCreds, store_name=ADLS_ACCOUNT)
    return pd.DataFrame(adl.ls(beginning_path))
Beispiel #7
0
 def do_connect(self):
     """Establish connection object."""
     token = lib.auth(
         tenant_id=self.tenant_id,
         client_id=self.client_id,
         client_secret=self.client_secret,
     )
     self.azure_fs = AzureDLFileSystem(token=token, store_name=self.store_name)
Beispiel #8
0
def initializeAdls():
  'Initializes Azure DataLakeStore'
  log.debug("%sclientId: %s", LEV2, clientId)
  log.debug("%sclientSecret: %s", LEV2, clientSecret)
  log.debug("%stenantId: %s", LEV2, tenantId)
  log.debug("%sadlsAccountName: %s", LEV2, adlsAccountName)
  token = lib.auth(tenant_id = tenantId, client_secret = clientSecret, client_id  = clientId)
  adl = core.AzureDLFileSystem(token, store_name=adlsAccountName)
  return ( adl )
Beispiel #9
0
def get_datalake_token(
    interactive=True, dl_service_auth_str=None
) -> lib.DataLakeCredential:
    """
    Provides a token for azure datalake, either by parsing a datalake service
    authentication string or using interactive authentication

    Parameters
    ----------
    interactive
        If true then fall back to interactive authentication in case
        `dl_service_auth_str` is empty
    dl_service_auth_str
        String on the format tenant:client_id:client_secret

    Returns
    -------
    lib.DataLakeCredential
        A lib.DataLakeCredential which can be used to authenticate towards the datalake
    """
    logger.info("Looking for ways to authenticate with data lake")
    if dl_service_auth_str:
        logger.info("Attempting to use datalake service authentication")
        dl_service_auth_elems = dl_service_auth_str.split(":")
        tenant = dl_service_auth_elems[0]
        client_id = dl_service_auth_elems[1]
        client_secret = dl_service_auth_elems[2]
        token = lib.auth(
            tenant_id=tenant,
            client_secret=client_secret,
            client_id=client_id,
            resource="https://datalake.azure.net/",
        )
        return token
    elif interactive:
        logger.info("Attempting to use interactive azure authentication")
        return lib.auth()
    else:
        raise ValueError(
            f"Either interactive (value: {interactive}) must be True, "
            f"or dl_service_auth_str (value: {dl_service_auth_str}) "
            "must be set. "
        )
 def __init__(self,
              tenant_id,
              client_id,
              client_secret,
              store_name,
              metadata=None):
     token = lib.auth(tenant_id=tenant_id,
                      client_id=client_id,
                      client_secret=client_secret)
     self.adl = AzureDLFileSystem(store_name=store_name, token=token)
     super(WritableTextFilesADLSource, self).__init__(metadata=metadata)
Beispiel #11
0
    def create_from_env(
        cls,
        store_name: str,
        interactive: bool = False,
        adl_secret: Optional[ADLSecret] = None,
    ) -> "ADLGen1FileSystem":
        """
        Creates ADL Gen1 file system client.

        Parameters
        ----------
        store_name: str
            Name of datalake store.
        interactive: bool
            If true then use interactive authentication
        adl_secret: ADLSecret
            Azure authentication information

        Returns
        -------
        ADLGen1FileSystem
        """

        if interactive:
            logger.info("Attempting to use interactive azure authentication")
            token = lib.auth()
        else:
            if type(adl_secret) is not ADLSecret:
                raise ConfigException("Unsupported type for adl_secret '%s'" %
                                      type(adl_secret))
            adl_secret = cast(ADLSecret, adl_secret)
            logger.info("Attempting to use datalake service authentication")
            token = lib.auth(
                tenant_id=adl_secret.tenant_id,
                client_id=adl_secret.client_id,
                client_secret=adl_secret.client_secret,
                resource="https://datalake.azure.net/",
            )

        adl_client = core.AzureDLFileSystem(token, store_name=store_name)
        return cls(adl_client, store_name)
Beispiel #12
0
    def dataLake(self):
        adlCreds = lib.auth(tenant_id = TENANT_ID,
                client_secret = KEY,
                client_id = CLIENT,
                resource = RESOURCE)
        ## Declare variables
        subscriptionId = subscription_id
        adlsAccountName = 'datalakearm'

        ## Create a filesystem client object
        adlsFileSystemClient = core.AzureDLFileSystem(adlCreds, 
                                                      store_name=adlsAccountName)
    def get_conn(self):
        """Return a AzureDLFileSystem object."""
        conn = self.get_connection(self.conn_id)
        service_options = conn.extra_dejson
        self.account_name = service_options.get('account_name')

        adlCreds = lib.auth(tenant_id=service_options.get('tenant'),
                            client_secret=conn.password,
                            client_id=conn.login)
        adlsFileSystemClient = core.AzureDLFileSystem(adlCreds,
                                                      store_name=self.account_name)
        adlsFileSystemClient.connect()
        return adlsFileSystemClient
Beispiel #14
0
def download(download_dir, data_dir):
    token = lib.auth()
    adl = core.AzureDLFileSystem(token, store_name='bigdatadevdatalake')
    download_dir = "december_2018"

    for f in adl.ls(data_dir):
        print(f[-38:])
        outfile = os.path.join(download_dir, f[-38:])
        downloader = multithread.ADLDownloader(adl, f, outfile)
        if downloader.successful():
            print("Finished Downloading!")
        else:
            print("error in downloading!")
Beispiel #15
0
def get_adl_client(adls_account_name, tenant_id):
    """

    :param adls_account_name: Data Lake account
    :param tenant_id: Azure AD Tenant Id
    :return: client object
    """
    adls_credentials = lib.auth(tenant_id=tenant_id,
                                resource='https://datalake.azure.net/')
    adls_fs_client = core.AzureDLFileSystem(adls_credentials,
                                            store_name=adls_account_name)

    return adls_fs_client
Beispiel #16
0
 def __init__(self):
     os.environ['AZURE_AUTH_LOCATION'] = '/root/azure_auth.json'
     self.compute_client = get_client_from_auth_file(ComputeManagementClient)
     self.resource_client = get_client_from_auth_file(ResourceManagementClient)
     self.network_client = get_client_from_auth_file(NetworkManagementClient)
     self.storage_client = get_client_from_auth_file(StorageManagementClient)
     self.datalake_client = get_client_from_auth_file(DataLakeStoreAccountManagementClient)
     self.authorization_client = get_client_from_auth_file(AuthorizationManagementClient)
     self.sp_creds = json.loads(open(os.environ['AZURE_AUTH_LOCATION']).read())
     self.dl_filesystem_creds = lib.auth(tenant_id=json.dumps(self.sp_creds['tenantId']).replace('"', ''),
                                         client_secret=json.dumps(self.sp_creds['clientSecret']).replace('"', ''),
                                         client_id=json.dumps(self.sp_creds['clientId']).replace('"', ''),
                                         resource='https://datalake.azure.net/')
Beispiel #17
0
def renew_adl_token():
    print("--- Creating a thread to renew ADL token periodically ---")
    global adl
    interval = 1800
    while True:
        time.sleep(interval)
        try:
            token = lib.auth(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret)
            adl = core.AzureDLFileSystem(token=token, store_name=adl_name)
            print("--- ADL token has been renewed ---")
        except Exception as e:
            raise Exception('Error while attempting to connect to Azure Data Lake Store:\n{}'.format(e))             
    print("--- Exiting the loop of renewing ADL token ---.")
Beispiel #18
0
    def get_conn(self):
        """Return a AzureDLFileSystem object."""
        conn = self.get_connection(self.conn_id)
        service_options = conn.extra_dejson
        self.account_name = service_options.get('account_name')

        adlCreds = lib.auth(tenant_id=service_options.get('tenant'),
                            client_secret=conn.password,
                            client_id=conn.login)
        adlsFileSystemClient = core.AzureDLFileSystem(
            adlCreds, store_name=self.account_name)
        adlsFileSystemClient.connect()
        return adlsFileSystemClient
Beispiel #19
0
def adls_client(key_vault_url: str, store_name: str) -> core.AzureDLFileSystem:

    sc = secret_client(key_vault_url)

    adlCreds = lib.auth(tenant_id=sc.get_secret("tenantid").value,
                        client_id=sc.get_secret("spclientid").value,
                        client_secret=sc.get_secret("spclientsecret").value,
                        resource="https://datalake.azure.net/")

    # Create a filesystem client object
    adlsFileSystemClient = core.AzureDLFileSystem(adlCreds,
                                                  store_name=store_name)

    return adlsFileSystemClient
Beispiel #20
0
def get_adl_client(store_name,
                   client_id=None,
                   client_secret=None,
                   tenant_id=None):
    try:
        from quorum.config.config import AZURE_DATA_LAKE
        token = lib.auth(client_id=AZURE_DATA_LAKE["ADL_CLIENT_ID"],
                         client_secret=AZURE_DATA_LAKE["ADL_CLIENT_SECRET"],
                         tenant_id=AZURE_DATA_LAKE["TENANT_ID"])
    except:
        raise Exception(
            'Pass client_id, client_secret, and tenant_id or define in config.py'
        )

    return core.AzureDLFileSystem(token, store_name=store_name)
def send_events_to_adls(p_message, p_filename):
    debug("sendevents (+): ")
    debug("p_filename (+): " + str(p_filename))
    debug(p_message)
    # for file system operations
    global adl_creds
    if adl_creds is None:
        debug("Calling lib auth for adl creds")
        adl_creds = lib.auth(tenant_id=azure_ad_properties_directory_id,
                             client_secret=client_secret,
                             client_id=application_id,
                             resource=RESOURCE_FILE_OPERATIONS)

    debug("adl_creds = " + str(adl_creds))

    # Create a filesystem client object
    adls_file_system_client = core.AzureDLFileSystem(adl_creds, store_name=adls_account_name)
    debug("adls_file_system_client = " + str(adls_file_system_client))

    # List contents
    # avoiding this as this is an unnecessary call
    # debug(adls_file_system_client.ls('/'))

    # Make a directory
    # learned that the ADLUploader API will create the directory on the fly if it doesnt exist
    # if not adls_file_system_client.exists("/" + adls_directory):
    #  debug("Directory doesnt exist - creating one")
    #  debug("Directory create!")
    #  adls_file_system_client.mkdir("/" + adls_directory)
    #  debug("Directory created!")

    # debug("creating file off of the message")
    # datafile = create_file(p_message)
    # debug("filename = " + str(datafile) + " to be created under /" + adls_directory)
    # Upload a file
    # multithread.ADLUploader(adls_file_system_client,
    #                         lpath=datafile,
    #                         rpath="/" + adls_directory + "/" + datafile,
    #                         nthreads=128,
    #                         overwrite=True,
    #                         buffersize=4194304,
    #                         blocksize=4194304)

    debug("uploading the content to ADL = file: " + str(p_filename))
    with adls_file_system_client.open(p_filename, 'wb') as f:
        f.write(p_message)

    debug("file uploaded!")
Beispiel #22
0
def upload_azure_datalake():
    try:
        from azure.datalake.store import core, lib, multithread
        sp_creds = json.loads(open(os.environ['AZURE_AUTH_LOCATION']).read())
        dl_filesystem_creds = lib.auth(tenant_id=json.dumps(sp_creds['tenantId']).replace('"', ''),
                                       client_secret=json.dumps(sp_creds['clientSecret']).replace('"', ''),
                                       client_id=json.dumps(sp_creds['clientId']).replace('"', ''),
                                       resource='https://datalake.azure.net/')
        datalake_client = core.AzureDLFileSystem(dl_filesystem_creds, store_name=args.azure_datalake_account)
        for f in dataset_file:
            multithread.ADLUploader(datalake_client,
                                    lpath='/tmp/{0}'.format(f),
                                    rpath='{0}/{1}_dataset/{2}'.format(args.storage, args.notebook, f))
    except Exception as err:
        print('Failed to upload test dataset to datalake store', str(err))
        sys.exit(1)
Beispiel #23
0
    def __init__(self, root, config, plugin_config):
        self.root = root
        self.root_lnt = self.get_lnt_path(root)
        self.client_id = config["client-id"]
        self.client_secret = config["client-secret"]
        self.tenant_id = config["tenant-id"]
        self.resource = "https://datalake.azure.net/"
        self.adls_account = config["adls-account"]

        self.adls_creds = lib.auth(resource=self.resource,
                                   tenant_id=self.tenant_id,
                                   client_id=self.client_id,
                                   client_secret=self.client_secret,
                                   api_version=None)
        self.adls_client = core.AzureDLFileSystem(self.adls_creds,
                                                  store_name=self.adls_account)
Beispiel #24
0
def connect_to_data_lake_store(config):
    from azure.datalake.store import core, lib
    # Imported there in order to not need to retrieve this large library if you don't use this function
    """
        Connection to Data Lake Store
        This connector is based on conf file and provide a AzureDLFileSystem object in order to read
        and write on the Data Lake Store.
        :return: environment file system
        :rtype: AzureDLFileSystem object
    """
    token = lib.auth(tenant_id=config.get('tenantId'),
                     username=config.get('username'),
                     password=config.get('password'))
    adls_account_name = config.get('accountName')
    adl = core.AzureDLFileSystem(token, store_name=adls_account_name)
    return adl
def __test_retry_auth(error_code,
                      error_string,
                      is_exception_expected,
                      total_tries=4,
                      last_try_status=200,
                      last_try_body=None):
    import re, adal
    end_point_discovery = re.compile(
        "https:\/\/login\.microsoftonline\.com\/common\/discovery\/"
        "instance\?authorization_endpoint=.+")
    mock_url_auth = "https://login.microsoftonline.com/" + settings.TENANT_ID + "/oauth2/token"

    body_discovery = r'{"tenant_discovery_endpoint":"https://login.microsoftonline.com/' + TENANT_ID + \
                     '/.well-known/openid-configuration"}'
    body_error = r'{"error":"' + error_string + r'","error_description":"0","error_codes":[0],"timestamp":"0",' \
                                                r'"trace_id":"0","correlation_id":"0"}'
    if last_try_body is None:
        last_try_body = r'{"token_type":"Bearer","expires_in":"1","ext_expires_in":"1","expires_on":"1",' \
                        r'"not_before":"1","resource":"https://datalake.azure.net/","access_token":"a"}'

    while total_tries > 0:
        responses.add(responses.GET,
                      end_point_discovery,
                      body=body_discovery,
                      status=200)
        responses.add(responses.POST,
                      mock_url_auth,
                      body=body_error,
                      status=error_code)
        total_tries -= 1

    responses.add(responses.GET,
                  end_point_discovery,
                  body=body_discovery,
                  status=200)
    responses.add(responses.POST,
                  mock_url_auth,
                  body=last_try_body,
                  status=last_try_status)
    try:
        token = auth(tenant_id=TENANT_ID,
                     client_secret='GARBAGE',
                     client_id=CLIENT_ID)
        assert isinstance(token, DataLakeCredential)
        assert not is_exception_expected
    except (HTTPError, adal.adal_error.AdalError):
        assert is_exception_expected
Beispiel #26
0
    def connect_adls(self):
        """
        Creates a connection to Azure Data Lake Store
        """
        adls = None
        try:
            token = lib.auth(tenant_id=self.azure_tenant_id, 
                client_id=self.adls_client_id, 
                client_secret=self.adls_client_secret, 
                resource='https://datalake.azure.net/')

            adls = core.AzureDLFileSystem(token, store_name=self.adls_name)

        except Exception as ex:
            print("Unable to connect to Azure Data Lake! Error: %s" % (str(ex)))

        return adls
 def __init__(self,
              tenant_id=None,
              client_id=None,
              client_secret=None,
              **kwargs):
     self.tenant_id = tenant_id
     self.client_id = client_id
     self.client_secret = client_secret
     self.kwargs = kwargs
     # self.kwargs['store_name'] = kwargs['host']
     token = lib.auth(
         tenant_id=self.tenant_id,
         client_id=self.client_id,
         client_secret=self.client_secret,
     )
     self.kwargs["token"] = token
     self.fs = AzureDLFileSystem(**self.kwargs)
Beispiel #28
0
def get_credentials(secret):
    subscriptionId = 'c71f08af-8fcd-4f65-b991-143888d0cbd8'
    adlsAccountName = 'iris-acm-prod-c15'
    tenant = '72f988bf-86f1-41af-91ab-2d7cd011db47'
    RESOURCE = 'https://datalake.azure.net/'
    client_id = 'a6a835cf-c106-4ad8-a77e-0285a6e3e447'
    client_secret = secret

    # get the adl credentials
    adlCreds = lib.auth(tenant_id = tenant,
                    client_secret = client_secret,
                    client_id = client_id,
                    resource = RESOURCE)

    adl = core.AzureDLFileSystem(adlCreds, store_name=adlsAccountName)

    return adl
    def get_adlcreds(self):

        try:
            adl_creds = lib.auth(tenant_id=self.credentials['tenant'],
                                 client_secret=self.credentials['secret'],
                                 client_id=self.credentials['client_id'],
                                 resource=self.resource)

            adls_accountname = self.store_name
            adls_filesystemclient = core.AzureDLFileSystem(
                adl_creds, store_name=adls_accountname)

        except CloudError as exc:
            self.log('Error attempting to access to the Data lake instance.')
            self.fail("Error login to the Data Lake instance: {0}".format(
                str(exc)))

        return adls_filesystemclient
Beispiel #30
0
    def get_conn(self) -> core.AzureDLFileSystem:
        """Return a AzureDLFileSystem object."""
        if not self._conn:
            conn = self.get_connection(self.conn_id)
            service_options = conn.extra_dejson
            self.account_name = service_options.get(
                'account_name') or service_options.get(
                    'extra__azure_data_lake__account_name')
            tenant = service_options.get('tenant') or service_options.get(
                'extra__azure_data_lake__tenant')

            adl_creds = lib.auth(tenant_id=tenant,
                                 client_secret=conn.password,
                                 client_id=conn.login)
            self._conn = core.AzureDLFileSystem(adl_creds,
                                                store_name=self.account_name)
            self._conn.connect()
        return self._conn
Beispiel #31
0
def get_ground_truth_from_adls(adls_account_name, tenant_id,
                               ground_truth_adls_path):
    """

    :param adls_account_name: The data lake store
    :param tenant_id: Azure AD tentant
    :param ground_truth_adls_path: The data lake path to the Ground Truth
    :return: Data frame with the Ground Truth
    """
    df = pd.DataFrame()
    adls_credentials = lib.auth(tenant_id=tenant_id,
                                resource='https://datalake.azure.net/')
    adlsFileSystemClient = core.AzureDLFileSystem(adls_credentials,
                                                  store_name=adls_account_name)

    with adlsFileSystemClient.open(ground_truth_adls_path, 'rb') as f:
        df = pd.read_pickle(f, compression=None)

    return df
Beispiel #32
0
def get_adl_client(store_name,
                   client_id=None,
                   client_secret=None,
                   tenant_id=None):
    if not client_id or not client_secret or not tenant_id:
        try:
            from azure_utils.config import ADL_CLIENT_ID, ADL_CLIENT_SECRET, TENANT_ID
            tenant_id = tenant_id or TENANT_ID
            client_id = client_id or ADL_CLIENT_ID
            client_secret = client_secret or ADL_CLIENT_SECRET
        except:
            raise Exception(
                'Pass client_id, client_secret, and tenant_id or define in config.py'
            )

    token = lib.auth(tenant_id=tenant_id,
                     client_id=client_id,
                     client_secret=client_secret)

    return core.AzureDLFileSystem(token, store_name=store_name)
Beispiel #33
0
 def __init__(self, store):
   self.token = lib.auth(tenant_id = ADLS_TENANT_ID,
                         client_secret = ADLS_CLIENT_SECRET,
                         client_id = ADLS_CLIENT_ID)
   self.adlsclient = core.AzureDLFileSystem(self.token, store_name=store)