def file_system_sample(self):

        # [START create_file_system_client_from_service]
        # Instantiate a DataLakeServiceClient using a connection string
        from azure.storage.filedatalake import DataLakeServiceClient
        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)

        # Instantiate a FileSystemClient
        file_system_client = datalake_service_client.get_file_system_client(
            "mynewfilesystem")
        # [END create_file_system_client_from_service]

        try:
            # [START create_file_system]
            file_system_client.create_file_system()
            # [END create_file_system]

            # [START get_file_system_properties]
            properties = file_system_client.get_file_system_properties()
            # [END get_file_system_properties]

        finally:
            # [START delete_file_system]
            file_system_client.delete_file_system()
    def acquire_lease_on_file_system(self):

        # Instantiate a DataLakeServiceClient using a connection string
        # [START create_data_lake_service_client_from_conn_str]
        from azure.storage.filedatalake import DataLakeServiceClient
        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)
        # [END create_data_lake_service_client_from_conn_str]

        # Instantiate a FileSystemClient
        file_system_client = datalake_service_client.get_file_system_client(
            "myleasefilesystem")

        # Create new File System
        try:
            file_system_client.create_file_system()
        except ResourceExistsError:
            pass

        # [START acquire_lease_on_file_system]
        # Acquire a lease on the file system
        lease = file_system_client.acquire_lease()

        # Delete file system by passing in the lease
        file_system_client.delete_file_system(lease=lease)
    def list_paths_in_file_system(self):

        # Instantiate a DataLakeServiceClient using a connection string
        from azure.storage.filedatalake import DataLakeServiceClient
        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)

        # Instantiate a FileSystemClient
        file_system_client = datalake_service_client.get_file_system_client(
            "myfilesystemforlistpaths")

        # Create new File System
        file_system_client.create_file_system()

        # [START upload_file_to_file_system]
        with open(SOURCE_FILE, "rb") as data:
            file_client = file_system_client.get_file_client("myfile")
            file_client.create_file()
            file_client.append_data(data, 0)
            file_client.flush_data(data.tell())
        # [END upload_file_to_file_system]

        # [START get_paths_in_file_system]
        path_list = file_system_client.get_paths()
        for path in path_list:
            print(path.name + '\n')
        # [END get_paths_in_file_system]

        # Delete file system
        file_system_client.delete_file_system()
 def get_file_system_client(self):
     connect_str = os.environ["ADLS_CONNECTION_STRING"]
     service_client = DataLakeServiceClient.from_connection_string(
         connect_str)
     file_system_client = service_client.get_file_system_client(
         file_system=self.file_system_name)
     return file_system_client
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)
        self.file_system_client = datalake_service_client.get_file_system_client(
            self.container_name)
    def set_metadata_on_file_system(self):

        # Instantiate a DataLakeServiceClient using a connection string
        from azure.storage.filedatalake import DataLakeServiceClient
        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)

        # Instantiate a FileSystemClient
        file_system_client = datalake_service_client.get_file_system_client(
            "mymetadatafilesystemsync")

        try:
            # Create new File System
            file_system_client.create_file_system()

            # [START set_file_system_metadata]
            # Create key, value pairs for metadata
            metadata = {'type': 'test'}

            # Set metadata on the file system
            file_system_client.set_file_system_metadata(metadata=metadata)
            # [END set_file_system_metadata]

            # Get file system properties
            properties = file_system_client.get_file_system_properties()

        finally:
            # Delete file system
            file_system_client.delete_file_system()
    def get_directory_client_from_file_system(self):

        # Instantiate a DataLakeServiceClient using a connection string
        from azure.storage.filedatalake import DataLakeServiceClient
        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)

        # Instantiate a FileSystemClient
        file_system_client = datalake_service_client.get_file_system_client(
            "myfilesystem")

        # Create new File System
        try:
            file_system_client.create_file_system()
        except ResourceExistsError:
            pass

        # [START get_directory_client_from_file_system]
        # Get the DataLakeDirectoryClient from the FileSystemClient to interact with a specific file
        directory_client = file_system_client.get_directory_client(
            "mynewdirectory")
        # [END get_directory_client_from_file_system]

        # Delete file system
        file_system_client.delete_file_system()
Esempio n. 8
0
def upload_dir_datalake(path: str, file_system_name: str = 'p4-data'):
    try:
        ser_cli = DataLakeServiceClient.from_connection_string(
            config.AZURE_STORAGE_CONNECTION_STRING)
        filesys_cli = ser_cli.get_file_system_client(
            file_system=file_system_name)
        dir_cli = filesys_cli.get_directory_client(path)

        csv_files = glob.glob(f'{path}/**/*.csv', recursive=True)
        for csv_f in csv_files:
            # afile = 'results/pi/common apis_1595417687.0704062.csv'
            file_cli = dir_cli.get_file_client(csv_f)
            with open(csv_f, 'r') as f:
                file_cli.upload_data(f.read(), overwrite=True)

    except Exception as e:
        print(e)
def main():
    try:
        CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING']

    except KeyError:
        print("AZURE_STORAGE_CONNECTION_STRING must be set.")
        sys.exit(1)

    datalake_service_client = DataLakeServiceClient.from_connection_string(
        CONNECTION_STRING)
    filesystem_name = "quickqueryfilesystem"
    filesystem_client = datalake_service_client.get_file_system_client(
        filesystem_name)
    try:
        filesystem_client.create_file_system()
    except:
        pass
    # [START query]
    errors = []

    def on_error(error):
        errors.append(error)

    # upload the csv file
    file_client = datalake_service_client.get_file_client(
        filesystem_name, "csvfile")
    file_client.upload_data(CSV_DATA, overwrite=True)

    # select the second column of the csv file
    query_expression = "SELECT _2 from DataLakeStorage"
    input_format = DelimitedTextDialect(delimiter=',',
                                        quotechar='"',
                                        lineterminator='\n',
                                        escapechar="",
                                        has_header=False)
    output_format = DelimitedJsonDialect(delimiter='\n')
    reader = file_client.query_file(query_expression,
                                    on_error=on_error,
                                    file_format=input_format,
                                    output_format=output_format)
    content = reader.readall()
    # [END query]
    print(content)

    filesystem_client.delete_file_system()
Esempio n. 10
0
def upload_to_adls(directory, filename, file_chunk_size=1048576):
    service_client = DataLakeServiceClient.from_connection_string(os.environ['ADLS_CONNECTION_STRING'])
    file_system_client = service_client.get_file_system_client(file_system=os.environ['ADLS_FILE_SYSTEM_NAME'])
    directory_client = file_system_client.get_directory_client(directory)
    file_client = directory_client.create_file(filename)

    with open(filename, 'rb') as local_file:
        offset = 0

        for file_chunk in iter(lambda: local_file.read(file_chunk_size), b""):
            chunk_size = len(file_chunk)
            file_client.append_data(
                file_chunk,
                offset=offset,
                length=chunk_size,
                validate_content=True)
            offset += chunk_size
            file_client.flush_data(offset)
Esempio n. 11
0
def upload_dir_datalake_newfile(from_path: str,
                                to_path: str,
                                file_system_name: str = 'p4-data'):
    try:
        ser_cli = DataLakeServiceClient.from_connection_string(
            config.AZURE_STORAGE_CONNECTION_STRING)
        filesys_cli = ser_cli.get_file_system_client(
            file_system=file_system_name)
        dir_cli = filesys_cli.get_directory_client(to_path)

        # csv_files = glob.glob(f'{from_path}/*.txt', recursive=True)
        all_files = os.listdir(from_path)

        csv_files = [name for name in all_files if name.endswith('.csv')]
        for csv_f in csv_files:
            # print(csv_f)
            file_cli = dir_cli.get_file_client(csv_f)
            with open(os.path.join(from_path, csv_f), 'r') as f:
                file_cli.upload_data(f.read(), overwrite=True)

    except Exception as e:
        print(e)
Esempio n. 12
0
def upload_file_datalake(filename: str,
                         from_path: str,
                         to_path: str,
                         file_system_name: str = 'p4-data'):
    try:
        ser_cli = DataLakeServiceClient.from_connection_string(
            config.AZURE_STORAGE_CONNECTION_STRING)
        filesys_cli = ser_cli.get_file_system_client(
            file_system=file_system_name)
        dir_cli = filesys_cli.get_directory_client(to_path)

        # print(csv_f)
        file_cli = dir_cli.get_file_client(filename)

        filepath = os.path.join(from_path, filename)
        if not os.path.exists(filepath):
            time.sleep(3)

        with open(filepath, 'r') as f:
            file_cli.upload_data(f.read(), overwrite=True)

    except Exception as e:
        print(e)
Esempio n. 13
0
def run_amiss():
    
    req_body = request.json

    ## Get Data Lake Connection Ready
    dl_account = req_body['account_url']
    dl_key = req_body['account_credential']
    dl_container = req_body['container']
    dl_suffix = "core.windows.net"
    dl_cnxn = "DefaultEndpointsProtocol=https;AccountName=" + dl_account + ";AccountKey=" + dl_key + ";EndpointSuffix=" + dl_suffix

    serv = DataLakeServiceClient.from_connection_string(conn_str = dl_cnxn)
    fs_client = serv.get_file_system_client(dl_container)

    ## Get task info
    task = req_body['task']

    vcf_path = task['vcf_path']
    cadd_snv_path = task['cadd_snv_path']
    cadd_indel_path = task['cadd_indel_path']

    ## Make Unique Session ID
    sessionid = datetime.now().strftime('%Y%m%d%H%M%S_') + str(uuid4())

    ## Download Files
    dest_dir = '/app/amiss/output/' + sessionid + '/' #f'/app/amiss/output/{sessionid}/'

    for task_file in [vcf_path, cadd_snv_path, cadd_indel_path]:
        file_client = fs_client.get_file_client(task_file)

        task_file_path = os.path.basename(task_file)

        dest_path = os.path.dirname(os.path.join(dest_dir, task_file_path))
        dest_path_file =  os.path.join(dest_dir, task_file_path)
        os.makedirs(dest_path, exist_ok = True)

        with open(dest_path_file, 'wb') as local_file:
            file_client.download_file().readinto(local_file)

    ## Define environment variables
    rel_dir = 'output/' + sessionid + '/'

    os.environ['AMISS_SESSION_ID'] = sessionid
    os.environ['AMISS_SESSION_DIR'] = rel_dir
    os.environ['AMISS_VCF_FILENAME'] = rel_dir + os.path.basename(vcf_path)
    os.environ['AMISS_CADD_SNV_FILENAME'] = rel_dir + os.path.basename(cadd_snv_path)
    os.environ['AMISS_CADD_INDEL_FILENAME'] = rel_dir + os.path.basename(cadd_indel_path)


    amiss_cmd = ["/bin/sh", "run.sh"]#,
                # sessionid,
                # dest_dir,
                # dest_dir + os.path.basename(vcf_path),
                # dest_dir + os.path.basename(cadd_snv_path),
                # dest_dir + os.path.basename(cadd_indel_path)]

    amiss_pipe = subprocess.Popen(amiss_cmd, \
                                  stdout=subprocess.PIPE, \
                                  stderr=subprocess.PIPE)

    update_status(sessionid = sessionid, pid = amiss_pipe.pid, status = 'Submitted', \
                  pipe = amiss_pipe, message = '')

    ## TEMPORARY: Show that files have been downloaded
    # output_files = 0
    # for base, dirs, files in os.walk(dest_dir):
    #     for Files in files:
    #         output_files += 1

    # output = {'files downloaded': output_files, "sessionid": sessionid}
    
    # return jsonify({"response": output})

    output = {'task': 'amiss',
              'sessionid': sessionid,
              'pid': amiss_pipe.pid,
              'message': 'Task submitted successfully.'}

    return Response(json.dumps(output), 200, mimetype='application/json')
Esempio n. 14
0
 def _get_service_client_from_connection_string(self,
                                                connection_string: str):
     return DataLakeServiceClient.from_connection_string(
         conn_str=connection_string)
Esempio n. 15
0
    def data_lake_service_sample(self):

        # Instantiate a DataLakeServiceClient using a connection string
        # [START create_datalake_service_client]
        from azure.storage.filedatalake import DataLakeServiceClient
        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)
        # [END create_datalake_service_client]

        # Instantiate a DataLakeServiceClient Azure Identity credentials.
        # [START create_datalake_service_client_oauth]
        from azure.identity import ClientSecretCredential
        token_credential = ClientSecretCredential(
            self.active_directory_tenant_id,
            self.active_directory_application_id,
            self.active_directory_application_secret,
        )
        datalake_service_client = DataLakeServiceClient(
            "https://{}.dfs.core.windows.net".format(self.account_name),
            credential=token_credential)
        # [END create_datalake_service_client_oauth]

        # get user delegation key
        # [START get_user_delegation_key]
        from datetime import datetime, timedelta
        user_delegation_key = datalake_service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))
        # [END get_user_delegation_key]

        # Create file systems
        # [START create_file_system_from_service_client]
        datalake_service_client.create_file_system("filesystem")
        # [END create_file_system_from_service_client]
        file_system_client = datalake_service_client.create_file_system(
            "anotherfilesystem")

        # List file systems
        # [START list_file_systems]
        file_systems = datalake_service_client.list_file_systems()
        for file_system in file_systems:
            print(file_system.name)
        # [END list_file_systems]

        # Get Clients from DataLakeServiceClient
        file_system_client = datalake_service_client.get_file_system_client(
            file_system_client.file_system_name)
        # [START get_directory_client_from_service_client]
        directory_client = datalake_service_client.get_directory_client(
            file_system_client.file_system_name, "mydirectory")
        # [END get_directory_client_from_service_client]
        # [START get_file_client_from_service_client]
        file_client = datalake_service_client.get_file_client(
            file_system_client.file_system_name, "myfile")
        # [END get_file_client_from_service_client]

        # Create file and set properties
        metadata = {'hello': 'world', 'number': '42'}
        from azure.storage.filedatalake import ContentSettings
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        file_client.create_file(content_settings=content_settings)
        file_client.set_metadata(metadata=metadata)
        file_props = file_client.get_file_properties()
        print(file_props.metadata)

        # Create file/directory and set properties
        directory_client.create_directory(content_settings=content_settings,
                                          metadata=metadata)
        dir_props = directory_client.get_directory_properties()
        print(dir_props.metadata)

        # Delete File Systems
        # [START delete_file_system_from_service_client]
        datalake_service_client.delete_file_system("filesystem")
        # [END delete_file_system_from_service_client]
        file_system_client.delete_file_system()
Esempio n. 16
0
# The sample scripts are provided AS IS without warranty of any kind. Microsoft further disclaims all implied warranties including, without limitation, any implied warranties of merchantability or of fitness for a particular purpose.
# The entire risk arising out of the use or performance of the sample scripts and documentation remains with you.
# In no event shall Microsoft, its authors, owners of this repository or anyone else involved in the creation, production, or delivery of the scripts be liable for any damages whatsoever (including,
# without limitation, damages for loss of business profits, business interruption, loss of business information, or other pecuniary loss) arising out of the use of or inability to use the sample scripts or documentation, even if Microsoft has been advised of the possibility of such damages

#-------------------------------------------------------------------------

#IMPORT THE LIBRARIES INTO YOUR FILE
from azure.storage.filedatalake import DataLakeServiceClient
from azure.storage.filedatalake._shared.base_client import create_configuration

#OPTION 1  - MAKING USE OF CONNECTION STRING AND CREATING THE DATALAKE CLIENT
connection_string = "PUT CONNECTION STRING HERE"

#CREATE THE DATALAKE SERVICE CLIENT
service_client = DataLakeServiceClient.from_connection_string(
    connection_string)

##OPTION 2  - MAKING USE OF ACCESS KEY AND CREATING THE DATALAKE CLIENT
storage_account_key = "ACCESS KEY"
storage_account_name = "ACCOUNT NAME"

#CREATE THE DATALAKE SERVICE CLIENT
service_client = DataLakeServiceClient(
    account_url="{}://{}.dfs.core.windows.net".format("https",
                                                      storage_account_name),
    credential=storage_account_key)

#PERFORM THE LISTING OPERATION
file_systems = service_client.list_file_systems()
for file_system in file_systems:
    print(file_system.name)
Esempio n. 17
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)