Python s3_downloadの例、CPAC.AWS.aws_utils.s3_download Pythonの例

コード例 #1

0

ファイルを表示

ファイル: qap_download_output_from_S3.py プロジェクト: dbkeator/quality-assessment-protocol

def download_outputs(path_prefix, creds_path, bucket_name, qap_type, \
                          download_to):
                          
    import pickle
    from CPAC.AWS import fetch_creds
    from CPAC.AWS.aws_utils import s3_download

    src_list = []

    bucket = fetch_creds.return_bucket(creds_path, bucket_name)


    if qap_type == "anat_spatial":
        search_for = "anatomical_spatial"
    elif qap_type == "func_spatial":
        search_for = "functional_spatial"
    elif qap_type == "func_temporal":
        search_for = "functional_temporal"


    for k in bucket.list(prefix=path_prefix):

        k_name = str(k.name)
    
        if (search_for in k_name) and (".csv" in k_name):
    
            src_list.append(k_name)
        
 
    s3_download(bucket, src_list, download_to)

コード例 #2

0

ファイルを表示

def download_outputs(path_prefix, creds_path, bucket_name, qap_type, \
                          download_to):

    import pickle
    from CPAC.AWS import fetch_creds
    from CPAC.AWS.aws_utils import s3_download

    src_list = []

    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    if qap_type == "anat_spatial":
        search_for = "anatomical_spatial"
    elif qap_type == "func_spatial":
        search_for = "functional_spatial"
    elif qap_type == "func_temporal":
        search_for = "functional_temporal"

    for k in bucket.list(prefix=path_prefix):

        k_name = str(k.name)

        if (search_for in k_name) and (".csv" in k_name):

            src_list.append(k_name)

    s3_download(bucket, src_list, download_to)

コード例 #3

0

ファイルを表示

ファイル: cloud_utils.py プロジェクト: dbkeator/quality-assessment-protocol

def dl_subj_from_s3(subj_idx, cfg_file, s3_dict_yaml):
    '''
    '''

    # Import packages
    from CPAC.AWS import fetch_creds, aws_utils
    import yaml

    # Load config file
    with open(cfg_file,'r') as f:
        cfg_dict = yaml.load(f)

    # Init variables
    bucket_prefix = cfg_dict["bucket_prefix"]
    local_prefix = cfg_dict["local_prefix"]
    bucket_name = cfg_dict["bucket_name"]
    creds_path = cfg_dict["creds_path"]
    
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    s3_list = []
    s3_dict = {}


    # pull in S3 dict yaml
    with open(s3_dict_yaml,'r') as f:
        s3_dict = yaml.load(f)

    if len(s3_dict) == 0:
        err = "\n[!] Filepaths have not been successfully gathered from " \
              "the filepath YAML dictionary!\n"
        raise Exception(err)


    # Get list of subject keys for indexing
    sd_keys = s3_dict.keys()
    sd_keys.sort()

    # Grab subject dictionary of interest
    subj_key = sd_keys[subj_idx-1]
    sub_dict = s3_dict[subj_key]

    # Download subject data to local prefix
    s3_dl = []
    for s3_key, s3_path in sub_dict.items():
        s3_dl.append(s3_path)
        sub_dict[s3_key] = s3_path.replace(bucket_prefix, local_prefix)

    aws_utils.s3_download(bucket, s3_dl, local_prefix=local_prefix, \
                              bucket_prefix=bucket_prefix)

    sub_dict = {subj_key : sub_dict}

    # Return single subject dictionary
    return sub_dict

コード例 #4

0

ファイルを表示

def dl_subj_from_s3(subj_idx, cfg_file, s3_dict_yaml):
    '''
    '''

    # Import packages
    from CPAC.AWS import fetch_creds, aws_utils
    import yaml

    # Load config file
    with open(cfg_file, 'r') as f:
        cfg_dict = yaml.load(f)

    # Init variables
    bucket_prefix = cfg_dict["bucket_prefix"]
    local_prefix = cfg_dict["local_prefix"]
    bucket_name = cfg_dict["bucket_name"]
    creds_path = cfg_dict["creds_path"]

    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    s3_list = []
    s3_dict = {}

    # pull in S3 dict yaml
    with open(s3_dict_yaml, 'r') as f:
        s3_dict = yaml.load(f)

    if len(s3_dict) == 0:
        err = "\n[!] Filepaths have not been successfully gathered from " \
              "the filepath YAML dictionary!\n"
        raise Exception(err)

    # Get list of subject keys for indexing
    sd_keys = s3_dict.keys()
    sd_keys.sort()

    # Grab subject dictionary of interest
    subj_key = sd_keys[subj_idx - 1]
    sub_dict = s3_dict[subj_key]

    # Download subject data to local prefix
    s3_dl = []
    for s3_key, s3_path in sub_dict.items():
        s3_dl.append(s3_path)
        sub_dict[s3_key] = s3_path.replace(bucket_prefix, local_prefix)

    aws_utils.s3_download(bucket, s3_dl, local_prefix=local_prefix, \
                              bucket_prefix=bucket_prefix)

    sub_dict = {subj_key: sub_dict}

    # Return single subject dictionary
    return sub_dict

コード例 #5

0

ファイルを表示

ファイル: spot_check.py プロジェクト: varun-invent/indi_bidsification

key_list = []
for i, k in enumerate(bucket.list(prefix=s3_prefix)):
    key_list.append(str(k.name).replace(s3_prefix, ''))

# Fetch all unique participant codes.
participants = [k.split('/')[0] for k in key_list if 'sub-' in k]
participants = sorted(list(set(participants)))
participants = participants[0:4]

downloads_list = [
    os.path.join(s3_prefix, k) for k in key_list
    if ('sub-' in k and k.split('/')[0] in participants) or ('sub-' not in k)
]

# Download the files.
aws_utils.s3_download(bucket, downloads_list, tmp, bucket_prefix=s3_prefix)

# Run the BIDS validator- save the output to a file that is based off the last 'subdirectory'
# in the prefix.
validator_output = commands.getoutput('bids-validator %s' % tmp)
shutil.rmtree(tmp)

# E-mail the output to me and Dave.
email_list = ['*****@*****.**', '*****@*****.**']
msg = MIMEText(validator_output)
msg['Subject'] = 'BIDS validation results for %s' % (s3_prefix)
msg['From'] = '*****@*****.**'
msg['To'] = '; '.join(email_list)

s = smtplib.SMTP('localhost')
s.sendmail('*****@*****.**', email_list, msg.as_string())

コード例 #6

0

ファイルを表示

fixed = os.path.join(tmp, 'fixed', s3_prefix.split('/')[-2])
orig = os.path.join(tmp, 'orig', s3_prefix.split('/')[-2])

if not os.path.exists(fixed):
    os.makedirs(fixed)
if not os.path.exists(orig):
    os.makedirs(orig)

# Fetch 4 participants from the BIDS dataset and download to a temporary directory.
# Start by fetching all keys.
bucket = fetch_creds.return_bucket(creds, 'fcp-indi')
key_list = []
for i, k in enumerate(bucket.list(prefix=s3_prefix)):
    if 'participants.tsv' in str(k.name):
        key_list.append(str(k.name))

# Download the files.
aws_utils.s3_download(bucket, key_list, orig, bucket_prefix=s3_prefix)

# Change NaNs to 'n/a'.
df = pd.read_csv(os.path.join(orig, 'participants.tsv'), sep='\t')
df.to_csv(os.path.join(fixed, 'participants.tsv'),
          sep='\t',
          na_rep='n/a',
          header=True,
          index=False)
aws_utils.s3_upload(bucket, [os.path.join(fixed, 'participants.tsv')],
                    ['/'.join([s3_prefix, 'participants.tsv'])],
                    make_public=True,
                    overwrite=True)

コード例 #7

0

ファイルを表示

                stratdict[strat][subtar][f]['size']
                for f in stratdict[strat][subtar].keys()
            ]) / (1024.**3)
        print subsize_gb
        if (subsize_gb >= 2.5) and (subsize_gb <= 3.2):

            filestopull = [
                stratdict[strat][sub][f]['name'] for sub in tarlist
                for f in stratdict[strat][sub].keys()
            ]
            while not os.path.isfile('./' + filestopull[-1].replace(
                    'data/Projects/ACPI/Outputs/', './')):
                try:
                    aws_utils.s3_download(
                        bucket,
                        filestopull,
                        './',
                        bucket_prefix='data/Projects/ACPI/Outputs/')
                except:
                    print "DL Falied, Trying Again"
            tarname = strat + '_' + tarlist[0] + '_' + tarlist[-1]
            print 'Tarring', tarlist, tarname
            fo.write(tarname + '\n')
            tar = tarfile.open(tarname + '.tar.gz', 'w:gz')
            tar.add(strat + '/')
            tar.close()
            shutil.rmtree(strat)
            aws_utils.s3_upload(
                bucket, [tarname + '.tar.gz'],
                ['data/Projects/ACPI/OutputTars/' + tarname + '.tar.gz'])
            os.remove(tarname + '.tar.gz')

コード例 #8

0

ファイルを表示

ファイル: eigen_run.py プロジェクト: zwxbupt/abide

def main(sub_idx):

    # Init variables
    bucket_name = 'fcp-indi'
    bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun'
    config_file = '/home/ubuntu/abide_run/settings/pipeline_config_abide_rerun.yml'
    creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
    local_prefix = '/mnt/eigen_run'
    sublist_file = '/home/ubuntu/abide_run/eig-subs1.yml'

    # Pull in bucket, config, and subject
    sublist = yaml.load(open(sublist_file, 'r'))
    subject = sublist[sub_idx]
    sub_id = subject.split('_')[-1]
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    c = Configuration(yaml.load(open(config_file, 'r')))

    # Test to see if theyre already upload
    to_do = True

    if to_do:
        ## Collect functional_mni list from S3 bucket
        filt_global = 'pipeline_abide_rerun__freq-filter/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/bandpassed_demeaned_filtered_antswarp.nii.gz' % sub_id
        filt_noglobal = filt_global.replace('global1', 'global0')
        nofilt_global = 'pipeline_abide_rerun/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/residual_antswarp.nii.gz' % sub_id
        nofilt_noglobal = nofilt_global.replace('global1', 'global0')
        s3_functional_mni_list = [
            filt_global, filt_noglobal, nofilt_global, nofilt_noglobal
        ]
        s3_functional_mni_list = [
            os.path.join(bucket_prefix, s) for s in s3_functional_mni_list
        ]

        # Download contents to local inputs directory
        try:
            aws_utils.s3_download(bucket,
                                  s3_functional_mni_list,
                                  local_prefix=os.path.join(
                                      local_prefix, 'centrality_inputs'),
                                  bucket_prefix=bucket_prefix)
        except Exception as e:
            print 'Unable to find eigenvector centrality inputs for subject %s, skipping...' % sub_id
            print 'Error: %s' % e
            return

        # Build strat dict (dictionary of strategies and local input paths)
        strat_dict = {
            'filt_global':
            os.path.join(local_prefix, 'centrality_inputs', filt_global),
            'filt_noglobal':
            os.path.join(local_prefix, 'centrality_inputs', filt_noglobal),
            'nofilt_noglobal':
            os.path.join(local_prefix, 'centrality_inputs', nofilt_noglobal),
            'nofilt_global':
            os.path.join(local_prefix, 'centrality_inputs', nofilt_global)
        }

        # Create list of processes
        proc_list = [
            Process(target=make_workflow,
                    args=(in_name, strat, sub_id, c, local_prefix))
            for strat, in_name in strat_dict.items()
        ]

        # Iterate through processes and fire off
        for p in proc_list:
            p.start()

        for p in proc_list:
            if p.is_alive():
                p.join()

        # Gather outputs
        wfs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))
        local_list = []
        for wf in wfs:
            for root, dirs, files in os.walk(wf):
                if files:
                    local_list.extend([os.path.join(root, f) for f in files])

        s3_list = [
            loc.replace(
                local_prefix,
                'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_eigen'
            ) for loc in local_list
        ]

        aws_utils.s3_upload(bucket, local_list, s3_list)

        # And delete working directories
        try:
            for input_file in strat_dict.values():
                print 'removing input file %s...' % input_file
                os.remove(input_file % sub_id)
        except Exception as e:
            print 'Unable to remove input files'
            print 'Error: %s' % e

        work_dirs = glob.glob(
            os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))

        for work_dir in work_dirs:
            print 'removing %s...' % work_dir
            shutil.rmtree(work_dir)
    else:
        print 'subject %s already processed and uploaded, skipping...' % sub_id

コード例 #9

0

ファイルを表示

ファイル: eigen_run.py プロジェクト: preprocessed-connectomes-project/abide

def main(sub_idx):

    # Init variables
    bucket_name = 'fcp-indi'
    bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun'
    config_file = '/home/ubuntu/abide_run/settings/pipeline_config_abide_rerun.yml'
    creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
    local_prefix = '/mnt/eigen_run'
    sublist_file = '/home/ubuntu/abide_run/eig-subs1.yml'

    # Pull in bucket, config, and subject
    sublist = yaml.load(open(sublist_file, 'r'))
    subject = sublist[sub_idx]
    sub_id = subject.split('_')[-1]
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    c = Configuration(yaml.load(open(config_file, 'r')))

    # Test to see if theyre already upload
    to_do = True

    if to_do:
        ## Collect functional_mni list from S3 bucket
        filt_global = 'pipeline_abide_rerun__freq-filter/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/bandpassed_demeaned_filtered_antswarp.nii.gz' % sub_id
        filt_noglobal = filt_global.replace('global1','global0')
        nofilt_global = 'pipeline_abide_rerun/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/residual_antswarp.nii.gz' % sub_id
        nofilt_noglobal = nofilt_global.replace('global1','global0')
        s3_functional_mni_list = [filt_global, filt_noglobal, nofilt_global, nofilt_noglobal]
        s3_functional_mni_list = [os.path.join(bucket_prefix, s) for s in s3_functional_mni_list]

        # Download contents to local inputs directory
        try:
            aws_utils.s3_download(bucket, s3_functional_mni_list, local_prefix=os.path.join(local_prefix, 'centrality_inputs'), bucket_prefix=bucket_prefix)
        except Exception as e:
            print 'Unable to find eigenvector centrality inputs for subject %s, skipping...' % sub_id
            print 'Error: %s' % e
            return

        # Build strat dict (dictionary of strategies and local input paths)
        strat_dict = {'filt_global' : os.path.join(local_prefix, 'centrality_inputs', filt_global),
                      'filt_noglobal' : os.path.join(local_prefix, 'centrality_inputs', filt_noglobal),
                      'nofilt_noglobal' : os.path.join(local_prefix, 'centrality_inputs', nofilt_noglobal),
                      'nofilt_global' : os.path.join(local_prefix, 'centrality_inputs', nofilt_global)}

        # Create list of processes
        proc_list = [Process(target=make_workflow, args=(in_name, strat, sub_id, c, local_prefix)) for strat, in_name in strat_dict.items()]

        # Iterate through processes and fire off
        for p in proc_list:
            p.start()

        for p in proc_list:
            if p.is_alive():
                p.join()

        # Gather outputs
        wfs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))
        local_list = []
        for wf in wfs:
            for root, dirs, files in os.walk(wf):
                if files:
                    local_list.extend([os.path.join(root, f) for f in files])

        s3_list = [loc.replace(local_prefix, 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_eigen') for loc in local_list]

        aws_utils.s3_upload(bucket, local_list, s3_list)

        # And delete working directories
        try:
            for input_file in strat_dict.values():
                print 'removing input file %s...' % input_file
                os.remove(input_file % sub_id)
        except Exception as e:
            print 'Unable to remove input files'
            print 'Error: %s' %e

        work_dirs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))

        for work_dir in work_dirs:
            print 'removing %s...' % work_dir
            shutil.rmtree(work_dir)
    else:
        print 'subject %s already processed and uploaded, skipping...' % sub_id