Python s3_upload Examples, CPAC.AWS.aws_utils.s3_upload Python Examples

Example #1

0

Show file

def upl_qap_output(cfg_file):
    '''
    '''

    # Import packages
    from CPAC.AWS import aws_utils, fetch_creds
    import os
    import yaml

    # Load config file
    with open(cfg_file, 'r') as f:
        cfg_dict = yaml.load(f)

    # Init variables
    bucket_name = cfg_dict["bucket_name"]
    bucket_out_prefix = cfg_dict["bucket_out_prefix"]
    creds_path = cfg_dict["creds_path"]

    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    output_dir = cfg_dict['output_directory']

    # And upload data
    upl_files = []
    for root, dirs, files in os.walk(output_dir):
        if files:
            upl_files.extend([os.path.join(root, fil) for fil in files])

    # Using CPAC AWS utils
    s3_upl_files = [ufile.replace(output_dir, bucket_out_prefix) \
                   for ufile in upl_files]
    aws_utils.s3_upload(bucket, upl_files, s3_upl_files)

Example #2

0

Show file

File: cloud_utils.py Project: dbkeator/quality-assessment-protocol

def upl_qap_output(cfg_file):
    '''
    '''

    # Import packages
    from CPAC.AWS import aws_utils, fetch_creds
    import os
    import yaml

    # Load config file
    with open(cfg_file,'r') as f:
        cfg_dict = yaml.load(f)

    # Init variables
    bucket_name = cfg_dict["bucket_name"]
    bucket_out_prefix = cfg_dict["bucket_out_prefix"]
    creds_path = cfg_dict["creds_path"]
    
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)
        
    output_dir = cfg_dict['output_directory']

    # And upload data
    upl_files = []
    for root, dirs, files in os.walk(output_dir):
        if files:
            upl_files.extend([os.path.join(root, fil) for fil in files])

    # Using CPAC AWS utils
    s3_upl_files = [ufile.replace(output_dir, bucket_out_prefix) \
                   for ufile in upl_files]
    aws_utils.s3_upload(bucket, upl_files, s3_upl_files)

Example #3

0

Show file

File: download_run_fs.py Project: computational-neuroimaging-lab/Clark2015_AWS

def main(index, local_dir):
    '''
    Function to download an anatomical dataset from S3 and process it
    through Freesurfer's recon-all command, then upload the data back
    to S3

    Parameters
    ----------
    index : integer
        the index of the subject to process
    local_dir : string
        filepath to the local directory to store the input and
        processed outputs
    '''

    # Import packages
    import boto
    import logging
    import os
    import subprocess
    from CPAC.AWS import aws_utils, fetch_creds

    # Init variables
    creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
    bucket = fetch_creds.return_bucket(creds_path, 'fcp-indi')
    prefix = 'data/Projects/CORR/RawData/IBA_TRT/'
    dl_dir = os.path.join(local_dir, 'inputs')
    subjects_dir = os.path.join(local_dir, 'subjects')

    # Setup logger
    fs_log_path = os.path.join(local_dir, 'download_run_fs_%d.log' % index)
    fs_log = setup_logger('fs_log', fs_log_path, logging.INFO, to_screen=True)

    # Make input and subject dirs
    if not os.path.exists(dl_dir):
        os.makedirs(dl_dir)

    if not os.path.exists(subjects_dir):
        os.makedirs(subjects_dir)

    # Get S3 anatomical paths dictionary
    anat_dict = return_anat_dict(bucket, prefix)

    # Get list of unique subject ids to download
    key_list = sorted(anat_dict.keys())

    # Extract subject of interest
    subj_id = key_list[index]
    s3_path = anat_dict[subj_id]

    # Download data
    fs_log.info('Downloading %s...' % s3_path)
    s3_key = bucket.get_key(s3_path)
    s3_filename = os.path.basename(s3_path)
    dl_filename = os.path.join(dl_dir, subj_id, s3_filename)

    # Make folders if need be
    dl_dirs = os.path.dirname(dl_filename)
    if not os.path.exists(dl_dirs):
        os.makedirs(dl_dirs)
    s3_key.get_contents_to_filename(dl_filename)

    # Execute recon-all
    cmd_list = ['recon-all', '-openmp', '4', '-i', dl_filename,
                '-subjid', subj_id, '-qcache', '-all']
    cmd_str = ' '.join(cmd_list)
    fs_log.info('Executing %s...' % cmd_str)
    # Use subprocess to send command and communicate outputs
    proc = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    # Stream output
    while proc.poll() is None:
        stdout_line = proc.stdout.readline()
        fs_log.info(stdout_line)

    proc.wait()

    # Gather processed data
    fs_log.info('Gathering outputs for upload to S3...')
    upl_list = []
    subj_dir = os.path.join(subjects_dir, subj_id)
    for root, dirs, files in os.walk(subj_dir):
        if files:
            upl_list.extend([os.path.join(root, fl) for fl in files])
    # Update log with upload info
    fs_log.info('Gathered %d files for upload to S3' % len(upl_list))

    # Build upload list
    upl_prefix = os.path.join(prefix.replace('RawData', 'Outputs'),
                              'freesurfer', subj_id)
    s3_upl_list = [upl.replace(subj_dir, upl_prefix) for upl in upl_list]

    # Upload to S3
    aws_utils.s3_upload(bucket, upl_list, s3_upl_list, overwrite=True, make_public=True)

Example #4

0

Show file

fixed = os.path.join(tmp, 'fixed', s3_prefix.split('/')[-2])
orig = os.path.join(tmp, 'orig', s3_prefix.split('/')[-2])

if not os.path.exists(fixed):
    os.makedirs(fixed)
if not os.path.exists(orig):
    os.makedirs(orig)

# Fetch 4 participants from the BIDS dataset and download to a temporary directory.
# Start by fetching all keys.
bucket = fetch_creds.return_bucket(creds, 'fcp-indi')
key_list = []
for i, k in enumerate(bucket.list(prefix=s3_prefix)):
    if 'participants.tsv' in str(k.name):
        key_list.append(str(k.name))

# Download the files.
aws_utils.s3_download(bucket, key_list, orig, bucket_prefix=s3_prefix)

# Change NaNs to 'n/a'.
df = pd.read_csv(os.path.join(orig, 'participants.tsv'), sep='\t')
df.to_csv(os.path.join(fixed, 'participants.tsv'),
          sep='\t',
          na_rep='n/a',
          header=True,
          index=False)
aws_utils.s3_upload(bucket, [os.path.join(fixed, 'participants.tsv')],
                    ['/'.join([s3_prefix, 'participants.tsv'])],
                    make_public=True,
                    overwrite=True)

Example #5

0

Show file

                    aws_utils.s3_download(
                        bucket,
                        filestopull,
                        './',
                        bucket_prefix='data/Projects/ACPI/Outputs/')
                except:
                    print "DL Falied, Trying Again"
            tarname = strat + '_' + tarlist[0] + '_' + tarlist[-1]
            print 'Tarring', tarlist, tarname
            fo.write(tarname + '\n')
            tar = tarfile.open(tarname + '.tar.gz', 'w:gz')
            tar.add(strat + '/')
            tar.close()
            shutil.rmtree(strat)
            aws_utils.s3_upload(
                bucket, [tarname + '.tar.gz'],
                ['data/Projects/ACPI/OutputTars/' + tarname + '.tar.gz'])
            os.remove(tarname + '.tar.gz')
            tarlist = []

        elif subsize_gb > 3.2:
            nextlist = []
            print 'TOOBIG', tarlist, subsize_gb
            while subsize_gb > 3.2:
                nextlist.append(tarlist[-1])
                del tarlist[-1]
                subsize_gb = 0
                for subtar in tarlist:
                    subsize_gb += sum([
                        stratdict[strat][subtar][f]['size']
                        for f in stratdict[strat][subtar].keys()

Example #6

0

Show file

File: rs_org.py Project: DaveOC90/EPI-DistCorr

def upload_dir_contents(ipdir, s3path, bucketname, creds):
    srclist = [os.path.abspath(g) for g in glob.glob(ipdir + '/*')]
    destlist = [s3path + '/' + s.split('/')[-1] for s in srclist]
    bucket = fetch_creds.return_bucket(creds, bucketname)
    aws_utils.s3_upload(bucket, srclist, destlist)

Example #7

0

Show file

    for idx, perp in enumerate(perps):
        if 'group_%d' % group_idx not in groups.keys():
            groups['group_%d' % group_idx] = []
        groups['group_%d' % group_idx].append(perp)
        #        print groups['group_%d' % group_idx]
        if idx % 3 == 0:
            group_idx += 1
    for group in groups:
        groupfiles = []
        for perp in groups[group]:
            groupfiles.extend(
                [fname for fname in release_files[release] if perp in fname])
        # Make relative paths in tar.
        os.chdir(warehouse_dir)
        for name in groupfiles:
            print '%s will be added to %s' % (name.replace(
                warehouse_dir + '/',
                './'), os.path.join(outdir, group + '.tar.gz'))
        if not os.path.isfile(os.path.join(outdir, group + '.tar.gz')):
            with tarfile.open(os.path.join(outdir, group + '.tar.gz'),
                              "w:gz") as tgz:
                for name in groupfiles:
                    tgz.add(name.replace(warehouse_dir + '/', './'))
        aws_utils.s3_upload(
            bucket, [os.path.join(outdir, group + '.tar.gz')], [
                'data/Projects/RocklandSample/RawDataTars/' +
                release_folders[release] + '/' + group + '.tar.gz'
            ],
            make_public=True)
        os.remove(os.path.join(outdir, group + '.tar.gz'))

Example #8

0

Show file

File: act_run.py Project: computational-neuroimaging-lab/Clark2015_AWS

def main(index, local_dir):
    '''
    Function to download an anatomical dataset from S3 and process it
    through ANTS antsCorticalThickness.sh script, then upload the data back
    to S3

    Parameters
    ----------
    index : integer
        the index of the subject to process
    local_dir : string
        filepath to the local directory to store the input and
        processed outputs
    '''

    # Import packages
    import boto
    import logging
    import os
    import subprocess
    import time
    from CPAC.AWS import aws_utils, fetch_creds

    # Init variables
    creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
    # Oasis template paths
    oasis_path = '/home/ubuntu/OASIS-30_Atropos_template/'
    # Bucket and S3 dataset prefix
    bucket = fetch_creds.return_bucket(creds_path, 'fcp-indi')
    prefix = 'data/Projects/CORR/RawData/IBA_TRT/'
    # Local dirs for working and download
    dl_dir = os.path.join(local_dir, 'inputs')

    # Setup logger
    act_log_path = '/home/ubuntu/run_act_%d.log' % index
    act_log = setup_logger('act_log', act_log_path, logging.INFO, to_screen=True)

    # Make input and workdirs
    if not os.path.exists(dl_dir):
        os.makedirs(dl_dir)

    # Get S3 anatomical paths dictionary
    anat_dict = return_anat_dict(bucket, prefix)

    # Get lis of unique subject ids to download
    key_list = sorted(anat_dict.keys())

    # Extract subject of interest
    subj_id = key_list[index]
    s3_path = anat_dict[subj_id]

    # Init working dir
    working_dir = os.path.join(local_dir, '%s_act_workdir' % subj_id)
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)

    # Download data
    act_log.info('Downloading %s...' % s3_path)
    s3_key = bucket.get_key(s3_path)
    s3_filename = os.path.basename(s3_path)
    dl_filename = os.path.join(dl_dir, subj_id, s3_filename)

    # Make folders if need be
    dl_dirs = os.path.dirname(dl_filename)
    if not os.path.exists(dl_dirs):
        os.makedirs(dl_dirs)
    s3_key.get_contents_to_filename(dl_filename)

    # Create the nipype workflow
    act_wf = create_workflow(working_dir, dl_filename, oasis_path)

    # Run the workflow
    act_log.info('Running the workflow...')
    # Start timing
    start = time.time()
    act_wf.run()
    # Finish timing
    fin = time.time()
    act_log.info('Completed workflow!')

    # Log finish and total computation time
    elapsed = (fin - start)/60.0
    act_log.info('Total time running is: %f minutes' % elapsed)

    # Gather processed data
    act_log.info('Gathering outputs for upload to S3...')
    upl_list = []
    for root, dirs, files in os.walk(working_dir):
        if files:
            upl_list.extend([os.path.join(root, fl) for fl in files])
    # Update log with upload info
    act_log.info('Gathered %d files for upload to S3' % len(upl_list))

    # Build upload list
    upl_prefix = os.path.join(prefix.replace('RawData', 'Outputs'),
                              'ants', subj_id)
    s3_upl_list = [upl.replace(working_dir, upl_prefix) for upl in upl_list]

    # Upload to S3
    aws_utils.s3_upload(bucket, upl_list, s3_upl_list)

Example #9

0

Show file

File: rs_org.py Project: DaveOC90/INDI-Organization-Scripts

def upload_dir_contents(ipdir,s3path, bucketname, creds):
    srclist=[os.path.abspath(g) for g in glob.glob(ipdir+'/*')]
    destlist=[s3path+'/'+s.split('/')[-1] for s in srclist]
    bucket=fetch_creds.return_bucket(creds, bucketname)
    aws_utils.s3_upload(bucket,srclist,destlist)

Example #10

0

Show file

File: eigen_run.py Project: zwxbupt/abide

def main(sub_idx):

    # Init variables
    bucket_name = 'fcp-indi'
    bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun'
    config_file = '/home/ubuntu/abide_run/settings/pipeline_config_abide_rerun.yml'
    creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
    local_prefix = '/mnt/eigen_run'
    sublist_file = '/home/ubuntu/abide_run/eig-subs1.yml'

    # Pull in bucket, config, and subject
    sublist = yaml.load(open(sublist_file, 'r'))
    subject = sublist[sub_idx]
    sub_id = subject.split('_')[-1]
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    c = Configuration(yaml.load(open(config_file, 'r')))

    # Test to see if theyre already upload
    to_do = True

    if to_do:
        ## Collect functional_mni list from S3 bucket
        filt_global = 'pipeline_abide_rerun__freq-filter/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/bandpassed_demeaned_filtered_antswarp.nii.gz' % sub_id
        filt_noglobal = filt_global.replace('global1', 'global0')
        nofilt_global = 'pipeline_abide_rerun/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/residual_antswarp.nii.gz' % sub_id
        nofilt_noglobal = nofilt_global.replace('global1', 'global0')
        s3_functional_mni_list = [
            filt_global, filt_noglobal, nofilt_global, nofilt_noglobal
        ]
        s3_functional_mni_list = [
            os.path.join(bucket_prefix, s) for s in s3_functional_mni_list
        ]

        # Download contents to local inputs directory
        try:
            aws_utils.s3_download(bucket,
                                  s3_functional_mni_list,
                                  local_prefix=os.path.join(
                                      local_prefix, 'centrality_inputs'),
                                  bucket_prefix=bucket_prefix)
        except Exception as e:
            print 'Unable to find eigenvector centrality inputs for subject %s, skipping...' % sub_id
            print 'Error: %s' % e
            return

        # Build strat dict (dictionary of strategies and local input paths)
        strat_dict = {
            'filt_global':
            os.path.join(local_prefix, 'centrality_inputs', filt_global),
            'filt_noglobal':
            os.path.join(local_prefix, 'centrality_inputs', filt_noglobal),
            'nofilt_noglobal':
            os.path.join(local_prefix, 'centrality_inputs', nofilt_noglobal),
            'nofilt_global':
            os.path.join(local_prefix, 'centrality_inputs', nofilt_global)
        }

        # Create list of processes
        proc_list = [
            Process(target=make_workflow,
                    args=(in_name, strat, sub_id, c, local_prefix))
            for strat, in_name in strat_dict.items()
        ]

        # Iterate through processes and fire off
        for p in proc_list:
            p.start()

        for p in proc_list:
            if p.is_alive():
                p.join()

        # Gather outputs
        wfs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))
        local_list = []
        for wf in wfs:
            for root, dirs, files in os.walk(wf):
                if files:
                    local_list.extend([os.path.join(root, f) for f in files])

        s3_list = [
            loc.replace(
                local_prefix,
                'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_eigen'
            ) for loc in local_list
        ]

        aws_utils.s3_upload(bucket, local_list, s3_list)

        # And delete working directories
        try:
            for input_file in strat_dict.values():
                print 'removing input file %s...' % input_file
                os.remove(input_file % sub_id)
        except Exception as e:
            print 'Unable to remove input files'
            print 'Error: %s' % e

        work_dirs = glob.glob(
            os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))

        for work_dir in work_dirs:
            print 'removing %s...' % work_dir
            shutil.rmtree(work_dir)
    else:
        print 'subject %s already processed and uploaded, skipping...' % sub_id

Example #11

0

Show file

File: eigen_run.py Project: preprocessed-connectomes-project/abide

def main(sub_idx):

    # Init variables
    bucket_name = 'fcp-indi'
    bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun'
    config_file = '/home/ubuntu/abide_run/settings/pipeline_config_abide_rerun.yml'
    creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
    local_prefix = '/mnt/eigen_run'
    sublist_file = '/home/ubuntu/abide_run/eig-subs1.yml'

    # Pull in bucket, config, and subject
    sublist = yaml.load(open(sublist_file, 'r'))
    subject = sublist[sub_idx]
    sub_id = subject.split('_')[-1]
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    c = Configuration(yaml.load(open(config_file, 'r')))

    # Test to see if theyre already upload
    to_do = True

    if to_do:
        ## Collect functional_mni list from S3 bucket
        filt_global = 'pipeline_abide_rerun__freq-filter/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/bandpassed_demeaned_filtered_antswarp.nii.gz' % sub_id
        filt_noglobal = filt_global.replace('global1','global0')
        nofilt_global = 'pipeline_abide_rerun/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/residual_antswarp.nii.gz' % sub_id
        nofilt_noglobal = nofilt_global.replace('global1','global0')
        s3_functional_mni_list = [filt_global, filt_noglobal, nofilt_global, nofilt_noglobal]
        s3_functional_mni_list = [os.path.join(bucket_prefix, s) for s in s3_functional_mni_list]

        # Download contents to local inputs directory
        try:
            aws_utils.s3_download(bucket, s3_functional_mni_list, local_prefix=os.path.join(local_prefix, 'centrality_inputs'), bucket_prefix=bucket_prefix)
        except Exception as e:
            print 'Unable to find eigenvector centrality inputs for subject %s, skipping...' % sub_id
            print 'Error: %s' % e
            return

        # Build strat dict (dictionary of strategies and local input paths)
        strat_dict = {'filt_global' : os.path.join(local_prefix, 'centrality_inputs', filt_global),
                      'filt_noglobal' : os.path.join(local_prefix, 'centrality_inputs', filt_noglobal),
                      'nofilt_noglobal' : os.path.join(local_prefix, 'centrality_inputs', nofilt_noglobal),
                      'nofilt_global' : os.path.join(local_prefix, 'centrality_inputs', nofilt_global)}

        # Create list of processes
        proc_list = [Process(target=make_workflow, args=(in_name, strat, sub_id, c, local_prefix)) for strat, in_name in strat_dict.items()]

        # Iterate through processes and fire off
        for p in proc_list:
            p.start()

        for p in proc_list:
            if p.is_alive():
                p.join()

        # Gather outputs
        wfs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))
        local_list = []
        for wf in wfs:
            for root, dirs, files in os.walk(wf):
                if files:
                    local_list.extend([os.path.join(root, f) for f in files])

        s3_list = [loc.replace(local_prefix, 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_eigen') for loc in local_list]

        aws_utils.s3_upload(bucket, local_list, s3_list)

        # And delete working directories
        try:
            for input_file in strat_dict.values():
                print 'removing input file %s...' % input_file
                os.remove(input_file % sub_id)
        except Exception as e:
            print 'Unable to remove input files'
            print 'Error: %s' %e

        work_dirs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id))

        for work_dir in work_dirs:
            print 'removing %s...' % work_dir
            shutil.rmtree(work_dir)
    else:
        print 'subject %s already processed and uploaded, skipping...' % sub_id

Example #12

0

Show file

    localname = k.name.replace(bucketpath, localpath)
    if os.path.isfile(localname):
        localname = os.path.abspath(localname)
        while os.path.islink(localname):
            localname = os.readlink(localname)
        x = os.popen('md5sum ' + localname).read()
        localmd5 = str(x.split(' ')[0])
        etag = str(k.etag).replace('"', '')
        if '-' in etag:
            numparts = int(etag.split('-')[-1])
            #print (os.stat(localname).st_size/(1024.0*1024.0))/numparts
            y = os.popen('bash s3etag.sh ' + localname + ' 8').read()
            localetag = y.strip().split(' ')[-1]
            if etag == localetag:
                pass  #print 'all good',buckname
            elif etag != localetag:
                print 'no bueno', buckname, localetag, etag
                aws_utils.s3_upload(bucket, [localname], [buckname],
                                    make_public=True,
                                    overwrite=True)
        elif '-' not in etag and localmd5 == etag:
            pass  #print 'all good',buckname
        elif '-' not in etag and localmd5 != etag:
            print 'no bueno', buckname, localmd5, etag
            aws_utils.s3_upload(bucket, [localname], [buckname],
                                make_public=True,
                                overwrite=True)

    else:
        print 'not found locally', buckname