Ejemplo n.º 1
0
def clean_request_name(request_name: str,
                       whitelist: str = VALID_REQUEST_NAME_CHARS,
                       char_limit: int = REQUEST_NAME_CHAR_LIMIT) -> str:
    """
    Removes invalid characters from an API request name.
    """
    return path_utils.clean_filename(
        filename=request_name, whitelist=whitelist, char_limit=char_limit).replace(':','_')
Ejemplo n.º 2
0
    if (os.path.basename(basename) != basename
            or unquote(posixpath.basename(urlpath)) != basename):
        raise ValueError  # reject '%2f' or 'dir%5Cbasename.ext' on Windows

    return basename


#%% Enumerate blobs to files

list_files = []

# folder_name = folder_names[0]
for folder_name in folder_names:
    list_file = os.path.join(
        filename_base, job_set_name + '_' +
        path_utils.clean_filename(folder_name) + '_all.json')

    # If this is intended to be a folder, it needs to end in '/', otherwise files that start
    # with the same string will match too
    folder_name_suffix = folder_name
    folder_name_suffix = folder_name_suffix.replace('\\', '/')
    if (not len(folder_name) == 0) and (not folder_name_suffix.endswith('/')):
        folder_name_suffix = folder_name_suffix + '/'
    prefix = container_prefix + folder_name_suffix
    file_list = prepare_api_submission.enumerate_blobs_to_file(
        output_file=list_file,
        account_name=account_name,
        sas_token=read_only_sas_token,
        container_name=container_name,
        account_key=None,
        rmatch=None,
def enumerate_prefix(prefix, sas_url, output_folder, get_sizes=False):

    account_name = sas_blob_utils.get_account_from_uri(sas_url)
    container_name = sas_blob_utils.get_container_from_uri(sas_url)
    ro_sas_token = sas_blob_utils.get_sas_token_from_uri(sas_url)
    assert not ro_sas_token.startswith('?')
    ro_sas_token = '?' + ro_sas_token

    storage_account_url_blob = 'https://' + account_name + '.blob.core.windows.net'

    # prefix = prefixes[0]; print(prefix)

    print('Starting enumeration for prefix {}'.format(prefix))

    # Open the output file
    fn = path_utils.clean_filename(prefix)
    output_file = os.path.join(output_folder, fn)

    # Create the container
    blob_service_client = BlobServiceClient(
        account_url=storage_account_url_blob, credential=ro_sas_token)

    container_client = blob_service_client.get_container_client(container_name)

    # Enumerate
    with open(output_file, 'w') as output_f:

        continuation_token = ''
        hit_debug_limit = False
        i_blob = 0

        while (continuation_token is not None) and (not hit_debug_limit):

            blobs_iter = container_client.list_blobs(
                name_starts_with=prefix,
                results_per_page=n_blobs_per_page).by_page(
                    continuation_token=continuation_token)

            # This is a paged list of BlobProperties objects
            blobs = next(blobs_iter)

            n_blobs_this_page = 0

            for blob in blobs:
                i_blob += 1
                n_blobs_this_page += 1
                if (debug_max_files > 0) and (i_blob > debug_max_files):
                    print('Hit debug path limit for prefix {}'.format(prefix))
                    i_blob -= 1
                    hit_debug_limit = True
                    break
                else:
                    size_string = ''
                    if get_sizes:
                        size_string = '\t' + str(blob.size)
                    output_f.write(blob.name + size_string + '\n')

            # print('Enumerated {} blobs'.format(n_blobs_this_page))
            cnt.increment(n=n_blobs_this_page)

            continuation_token = blobs_iter.continuation_token

            if sleep_time_per_page > 0:
                time.sleep(sleep_time_per_page)

        # ...while we're enumerating

    # ...with open(output_file)

    print('Finished enumerating {} blobs for prefix {}'.format(i_blob, prefix))
    if (os.path.basename(basename) != basename
            or unquote(posixpath.basename(urlpath)) != basename):
        raise ValueError  # reject '%2f' or 'dir%5Cbasename.ext' on Windows

    return basename


#%% Enumerate blobs to files

# file_lists_by_folder will contain a list of local JSON file names,
# each JSON file contains a list of blob names corresponding to an API taskgroup
file_lists_by_folder = []

# folder_name = folder_names[0]
for folder_name in folder_names:
    clean_folder_name = path_utils.clean_filename(folder_name)
    json_filename = f'{base_task_name}_{clean_folder_name}_all.json'
    list_file = os.path.join(filename_base, json_filename)

    # If this is intended to be a folder, it needs to end in '/', otherwise
    # files that start with the same string will match too
    folder_name = folder_name.replace('\\', '/')
    if len(folder_name) > 0 and (not folder_name.endswith('/')):
        folder_name = folder_name + '/'
    prefix = container_prefix + folder_name
    file_list = ai4e_azure_utils.enumerate_blobs_to_file(
        output_file=list_file,
        account_name=storage_account_name,
        container_name=container_name,
        sas_token=read_only_sas_token,
        blob_prefix=prefix)
Ejemplo n.º 5
0
    
    basename = posixpath.basename(unquote(urlpath))
    if (os.path.basename(basename) != basename or
        unquote(posixpath.basename(urlpath)) != basename):
        raise ValueError  # reject '%2f' or 'dir%5Cbasename.ext' on Windows
        
    return basename


#%% Enumerate blobs to files

list_files = []

# folder_name = folder_names[0]
for folder_name in folder_names:
    list_file = os.path.join(filename_base,job_set_name + '_' + path_utils.clean_filename(folder_name) + '_all.json')
    
    # If this is intended to be a folder, it needs to end in '/', otherwise files that start
    # with the same string will match too
    folder_name_suffix = folder_name
    folder_name_suffix = folder_name_suffix.replace('\\','/')
    if (not len(folder_name) == 0) and (not folder_name_suffix.endswith('/')):
        folder_name_suffix = folder_name_suffix + '/'
    prefix = container_prefix + folder_name_suffix
    file_list = prepare_api_submission.enumerate_blobs_to_file(output_file=list_file,
                                    account_name=account_name,sas_token=read_only_sas_token,
                                    container_name=container_name,
                                    account_key=None,
                                    rmatch=None,prefix=prefix)
    list_files.append(list_file)
def list_blobs_in_container(container_name,account_name,sas_token,output_folder,prefix=None):
    
    if not sas_token.startswith('?'):
        sas_token = '?' + sas_token

    storage_account_url_blob = 'https://' + account_name + '.blob.core.windows.net'
    
    # prefix = prefixes[0]; print(prefix)
    
    print('Starting enumeration for container {}'.format(container_name))
    
    # Open the output file
    fn = path_utils.clean_filename(container_name) + '.log'
    output_file = os.path.join(output_folder,fn)
    
    # Create the container
    blob_service_client = BlobServiceClient(
        account_url=storage_account_url_blob, 
                                        credential=sas_token)

    container_client = blob_service_client.get_container_client(container_name)
    
    # Enumerate
    with open(output_file,'w') as output_f:
    
        continuation_token = ''
        hit_debug_limit = False
        i_blob = 0
        
        while (continuation_token is not None) and (not hit_debug_limit):
            
            blobs_iter = container_client.list_blobs(
                name_starts_with=prefix,
                results_per_page=n_blobs_per_page).by_page(
                continuation_token=continuation_token)
            blobs = next(blobs_iter)
            
            n_blobs_this_page = 0
            
            for blob in blobs:
                i_blob += 1
                n_blobs_this_page += 1
                if (debug_max_files > 0) and (i_blob > debug_max_files):
                    print('Hit debug path limit for prefix {}'.format(prefix))
                    i_blob -= 1
                    hit_debug_limit = True
                    break
                else:
                    output_f.write(blob.name + '\n')
                    
            # print('Enumerated {} blobs'.format(n_blobs_this_page))
            cnt.increment(n=n_blobs_this_page)
            
            continuation_token = blobs_iter.continuation_token
            
            if sleep_time_per_page > 0:
                time.sleep(sleep_time_per_page)
                
        # ...while we're enumerating                
            
    # ...with open(output_file)

    print('Finished enumerating {} blobs for container {}'.format(
        i_blob,container_name))