def download_outputs(path_prefix, creds_path, bucket_name, qap_type, \ download_to): import pickle from CPAC.AWS import fetch_creds from CPAC.AWS.aws_utils import s3_download src_list = [] bucket = fetch_creds.return_bucket(creds_path, bucket_name) if qap_type == "anat_spatial": search_for = "anatomical_spatial" elif qap_type == "func_spatial": search_for = "functional_spatial" elif qap_type == "func_temporal": search_for = "functional_temporal" for k in bucket.list(prefix=path_prefix): k_name = str(k.name) if (search_for in k_name) and (".csv" in k_name): src_list.append(k_name) s3_download(bucket, src_list, download_to)
def dl_subj_from_s3(subj_idx, cfg_file, s3_dict_yaml): ''' ''' # Import packages from CPAC.AWS import fetch_creds, aws_utils import yaml # Load config file with open(cfg_file,'r') as f: cfg_dict = yaml.load(f) # Init variables bucket_prefix = cfg_dict["bucket_prefix"] local_prefix = cfg_dict["local_prefix"] bucket_name = cfg_dict["bucket_name"] creds_path = cfg_dict["creds_path"] bucket = fetch_creds.return_bucket(creds_path, bucket_name) s3_list = [] s3_dict = {} # pull in S3 dict yaml with open(s3_dict_yaml,'r') as f: s3_dict = yaml.load(f) if len(s3_dict) == 0: err = "\n[!] Filepaths have not been successfully gathered from " \ "the filepath YAML dictionary!\n" raise Exception(err) # Get list of subject keys for indexing sd_keys = s3_dict.keys() sd_keys.sort() # Grab subject dictionary of interest subj_key = sd_keys[subj_idx-1] sub_dict = s3_dict[subj_key] # Download subject data to local prefix s3_dl = [] for s3_key, s3_path in sub_dict.items(): s3_dl.append(s3_path) sub_dict[s3_key] = s3_path.replace(bucket_prefix, local_prefix) aws_utils.s3_download(bucket, s3_dl, local_prefix=local_prefix, \ bucket_prefix=bucket_prefix) sub_dict = {subj_key : sub_dict} # Return single subject dictionary return sub_dict
def dl_subj_from_s3(subj_idx, cfg_file, s3_dict_yaml): ''' ''' # Import packages from CPAC.AWS import fetch_creds, aws_utils import yaml # Load config file with open(cfg_file, 'r') as f: cfg_dict = yaml.load(f) # Init variables bucket_prefix = cfg_dict["bucket_prefix"] local_prefix = cfg_dict["local_prefix"] bucket_name = cfg_dict["bucket_name"] creds_path = cfg_dict["creds_path"] bucket = fetch_creds.return_bucket(creds_path, bucket_name) s3_list = [] s3_dict = {} # pull in S3 dict yaml with open(s3_dict_yaml, 'r') as f: s3_dict = yaml.load(f) if len(s3_dict) == 0: err = "\n[!] Filepaths have not been successfully gathered from " \ "the filepath YAML dictionary!\n" raise Exception(err) # Get list of subject keys for indexing sd_keys = s3_dict.keys() sd_keys.sort() # Grab subject dictionary of interest subj_key = sd_keys[subj_idx - 1] sub_dict = s3_dict[subj_key] # Download subject data to local prefix s3_dl = [] for s3_key, s3_path in sub_dict.items(): s3_dl.append(s3_path) sub_dict[s3_key] = s3_path.replace(bucket_prefix, local_prefix) aws_utils.s3_download(bucket, s3_dl, local_prefix=local_prefix, \ bucket_prefix=bucket_prefix) sub_dict = {subj_key: sub_dict} # Return single subject dictionary return sub_dict
key_list = [] for i, k in enumerate(bucket.list(prefix=s3_prefix)): key_list.append(str(k.name).replace(s3_prefix, '')) # Fetch all unique participant codes. participants = [k.split('/')[0] for k in key_list if 'sub-' in k] participants = sorted(list(set(participants))) participants = participants[0:4] downloads_list = [ os.path.join(s3_prefix, k) for k in key_list if ('sub-' in k and k.split('/')[0] in participants) or ('sub-' not in k) ] # Download the files. aws_utils.s3_download(bucket, downloads_list, tmp, bucket_prefix=s3_prefix) # Run the BIDS validator- save the output to a file that is based off the last 'subdirectory' # in the prefix. validator_output = commands.getoutput('bids-validator %s' % tmp) shutil.rmtree(tmp) # E-mail the output to me and Dave. email_list = ['*****@*****.**', '*****@*****.**'] msg = MIMEText(validator_output) msg['Subject'] = 'BIDS validation results for %s' % (s3_prefix) msg['From'] = '*****@*****.**' msg['To'] = '; '.join(email_list) s = smtplib.SMTP('localhost') s.sendmail('*****@*****.**', email_list, msg.as_string())
fixed = os.path.join(tmp, 'fixed', s3_prefix.split('/')[-2]) orig = os.path.join(tmp, 'orig', s3_prefix.split('/')[-2]) if not os.path.exists(fixed): os.makedirs(fixed) if not os.path.exists(orig): os.makedirs(orig) # Fetch 4 participants from the BIDS dataset and download to a temporary directory. # Start by fetching all keys. bucket = fetch_creds.return_bucket(creds, 'fcp-indi') key_list = [] for i, k in enumerate(bucket.list(prefix=s3_prefix)): if 'participants.tsv' in str(k.name): key_list.append(str(k.name)) # Download the files. aws_utils.s3_download(bucket, key_list, orig, bucket_prefix=s3_prefix) # Change NaNs to 'n/a'. df = pd.read_csv(os.path.join(orig, 'participants.tsv'), sep='\t') df.to_csv(os.path.join(fixed, 'participants.tsv'), sep='\t', na_rep='n/a', header=True, index=False) aws_utils.s3_upload(bucket, [os.path.join(fixed, 'participants.tsv')], ['/'.join([s3_prefix, 'participants.tsv'])], make_public=True, overwrite=True)
stratdict[strat][subtar][f]['size'] for f in stratdict[strat][subtar].keys() ]) / (1024.**3) print subsize_gb if (subsize_gb >= 2.5) and (subsize_gb <= 3.2): filestopull = [ stratdict[strat][sub][f]['name'] for sub in tarlist for f in stratdict[strat][sub].keys() ] while not os.path.isfile('./' + filestopull[-1].replace( 'data/Projects/ACPI/Outputs/', './')): try: aws_utils.s3_download( bucket, filestopull, './', bucket_prefix='data/Projects/ACPI/Outputs/') except: print "DL Falied, Trying Again" tarname = strat + '_' + tarlist[0] + '_' + tarlist[-1] print 'Tarring', tarlist, tarname fo.write(tarname + '\n') tar = tarfile.open(tarname + '.tar.gz', 'w:gz') tar.add(strat + '/') tar.close() shutil.rmtree(strat) aws_utils.s3_upload( bucket, [tarname + '.tar.gz'], ['data/Projects/ACPI/OutputTars/' + tarname + '.tar.gz']) os.remove(tarname + '.tar.gz')
def main(sub_idx): # Init variables bucket_name = 'fcp-indi' bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun' config_file = '/home/ubuntu/abide_run/settings/pipeline_config_abide_rerun.yml' creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv' local_prefix = '/mnt/eigen_run' sublist_file = '/home/ubuntu/abide_run/eig-subs1.yml' # Pull in bucket, config, and subject sublist = yaml.load(open(sublist_file, 'r')) subject = sublist[sub_idx] sub_id = subject.split('_')[-1] bucket = fetch_creds.return_bucket(creds_path, bucket_name) c = Configuration(yaml.load(open(config_file, 'r'))) # Test to see if theyre already upload to_do = True if to_do: ## Collect functional_mni list from S3 bucket filt_global = 'pipeline_abide_rerun__freq-filter/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/bandpassed_demeaned_filtered_antswarp.nii.gz' % sub_id filt_noglobal = filt_global.replace('global1', 'global0') nofilt_global = 'pipeline_abide_rerun/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/residual_antswarp.nii.gz' % sub_id nofilt_noglobal = nofilt_global.replace('global1', 'global0') s3_functional_mni_list = [ filt_global, filt_noglobal, nofilt_global, nofilt_noglobal ] s3_functional_mni_list = [ os.path.join(bucket_prefix, s) for s in s3_functional_mni_list ] # Download contents to local inputs directory try: aws_utils.s3_download(bucket, s3_functional_mni_list, local_prefix=os.path.join( local_prefix, 'centrality_inputs'), bucket_prefix=bucket_prefix) except Exception as e: print 'Unable to find eigenvector centrality inputs for subject %s, skipping...' % sub_id print 'Error: %s' % e return # Build strat dict (dictionary of strategies and local input paths) strat_dict = { 'filt_global': os.path.join(local_prefix, 'centrality_inputs', filt_global), 'filt_noglobal': os.path.join(local_prefix, 'centrality_inputs', filt_noglobal), 'nofilt_noglobal': os.path.join(local_prefix, 'centrality_inputs', nofilt_noglobal), 'nofilt_global': os.path.join(local_prefix, 'centrality_inputs', nofilt_global) } # Create list of processes proc_list = [ Process(target=make_workflow, args=(in_name, strat, sub_id, c, local_prefix)) for strat, in_name in strat_dict.items() ] # Iterate through processes and fire off for p in proc_list: p.start() for p in proc_list: if p.is_alive(): p.join() # Gather outputs wfs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id)) local_list = [] for wf in wfs: for root, dirs, files in os.walk(wf): if files: local_list.extend([os.path.join(root, f) for f in files]) s3_list = [ loc.replace( local_prefix, 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_eigen' ) for loc in local_list ] aws_utils.s3_upload(bucket, local_list, s3_list) # And delete working directories try: for input_file in strat_dict.values(): print 'removing input file %s...' % input_file os.remove(input_file % sub_id) except Exception as e: print 'Unable to remove input files' print 'Error: %s' % e work_dirs = glob.glob( os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id)) for work_dir in work_dirs: print 'removing %s...' % work_dir shutil.rmtree(work_dir) else: print 'subject %s already processed and uploaded, skipping...' % sub_id
def main(sub_idx): # Init variables bucket_name = 'fcp-indi' bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun' config_file = '/home/ubuntu/abide_run/settings/pipeline_config_abide_rerun.yml' creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv' local_prefix = '/mnt/eigen_run' sublist_file = '/home/ubuntu/abide_run/eig-subs1.yml' # Pull in bucket, config, and subject sublist = yaml.load(open(sublist_file, 'r')) subject = sublist[sub_idx] sub_id = subject.split('_')[-1] bucket = fetch_creds.return_bucket(creds_path, bucket_name) c = Configuration(yaml.load(open(config_file, 'r'))) # Test to see if theyre already upload to_do = True if to_do: ## Collect functional_mni list from S3 bucket filt_global = 'pipeline_abide_rerun__freq-filter/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/_bandpass_freqs_0.01.0.1/bandpassed_demeaned_filtered_antswarp.nii.gz' % sub_id filt_noglobal = filt_global.replace('global1','global0') nofilt_global = 'pipeline_abide_rerun/%s_session_1/functional_mni/_scan_rest_1_rest/_csf_threshold_0.96/_gm_threshold_0.7/_wm_threshold_0.96/_compcor_ncomponents_5_selector_pc10.linear1.wm0.global1.motion1.quadratic1.gm0.compcor1.csf0/residual_antswarp.nii.gz' % sub_id nofilt_noglobal = nofilt_global.replace('global1','global0') s3_functional_mni_list = [filt_global, filt_noglobal, nofilt_global, nofilt_noglobal] s3_functional_mni_list = [os.path.join(bucket_prefix, s) for s in s3_functional_mni_list] # Download contents to local inputs directory try: aws_utils.s3_download(bucket, s3_functional_mni_list, local_prefix=os.path.join(local_prefix, 'centrality_inputs'), bucket_prefix=bucket_prefix) except Exception as e: print 'Unable to find eigenvector centrality inputs for subject %s, skipping...' % sub_id print 'Error: %s' % e return # Build strat dict (dictionary of strategies and local input paths) strat_dict = {'filt_global' : os.path.join(local_prefix, 'centrality_inputs', filt_global), 'filt_noglobal' : os.path.join(local_prefix, 'centrality_inputs', filt_noglobal), 'nofilt_noglobal' : os.path.join(local_prefix, 'centrality_inputs', nofilt_noglobal), 'nofilt_global' : os.path.join(local_prefix, 'centrality_inputs', nofilt_global)} # Create list of processes proc_list = [Process(target=make_workflow, args=(in_name, strat, sub_id, c, local_prefix)) for strat, in_name in strat_dict.items()] # Iterate through processes and fire off for p in proc_list: p.start() for p in proc_list: if p.is_alive(): p.join() # Gather outputs wfs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id)) local_list = [] for wf in wfs: for root, dirs, files in os.walk(wf): if files: local_list.extend([os.path.join(root, f) for f in files]) s3_list = [loc.replace(local_prefix, 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_eigen') for loc in local_list] aws_utils.s3_upload(bucket, local_list, s3_list) # And delete working directories try: for input_file in strat_dict.values(): print 'removing input file %s...' % input_file os.remove(input_file % sub_id) except Exception as e: print 'Unable to remove input files' print 'Error: %s' %e work_dirs = glob.glob(os.path.join(local_prefix, 'eigen_wf_%s_*' % sub_id)) for work_dir in work_dirs: print 'removing %s...' % work_dir shutil.rmtree(work_dir) else: print 'subject %s already processed and uploaded, skipping...' % sub_id