def insert_abide_subjects(creds_path, xls_pheno_guid_path): # Import packages import fetch_creds import pandas # Init variables xlsx_path = xls_pheno_guid_path # Connect to database with cursor cursor = fetch_creds.return_cursor(creds_path) # Load data guid_df = pandas.read_excel(xlsx_path) # For each subject in the xlsx file, upload their data to table nrows = guid_df.shape[0] for i in range(nrows): sub = guid_df.ix[i, :] guid = sub["GUID"] # Test if it's a NaN (registers as a float) if type(guid) != float: guid = str(guid) site_id = str(sub["SITE_ID"]) sub_id = str(int(sub["SUB_ID"])) dx_group = sub["DX_GROUP"] dsm_iv_tr = sub["DSM_IV_TR"] age = sub["AGE_AT_SCAN"] sex = sub["SEX"] if sex == 1: sex = "M" else: sex = "F" handedness = str(sub["HANDEDNESS_CATEGORY"]) # Command to insert record cmd = """ insert into abide_subjects (id, guid, site_id, sub_id, dx_group, dsm_iv_tr, age_at_scan, sex, handedness) values (:col_1, :col_2, :col_3, :col_4, :col_5, :col_6, :col_7, :col_8, :col_9) """ cursor.execute( cmd, col_1=i - 1, col_2=guid, col_3=site_id, col_4=sub_id, col_5=dx_group, col_6=dsm_iv_tr, col_7=age, col_8=sex, col_9=handedness, ) # Print to screen print i, guid
def main(min_id, max_id, creds_path, output_path): ''' Method to query the IMAGE03 table from a miNDAR database instance and create a subject list of the form (img03_id, s3_path), where img03_id is an integer corresponding to the image03_id of the DB entry and s3_path is a string corresponding to the path of the image on S3. It will save the subject list as a yaml file on disk. Parameters ---------- min_id : integer The minimum of the image03_id range to build the subject list max_id : integer The maximum of the image03_id range to build the subject list creds_path : string (filepath) path to the csv file with 'ACCESS_KEY_ID' as the header and the corresponding ASCII text for the key underneath; same with the 'SECRET_ACCESS_KEY' string and ASCII text output_path : string (filepath) path to save the output subject list yaml file Returns ------- sublist : list (tuple) A list of tuples, where each tuple consists of (int, str), corresponding to the image03_id and s3_path of the database entry. ''' # Import packages import fetch_creds import os import yaml # Init variables cursor = fetch_creds.return_cursor(creds_path) cmd = ''' select image03_id, image_file from NITRC_IMAGE03 where image03_id >= :arg_1 and image03_id <= :arg_2 ''' out_fp = os.path.abspath(output_path) # Execute command cursor.execute(cmd, arg_1=min_id, arg_2=max_id) res = cursor.fetchall() # And save result to yaml file with open(out_fp, 'w') as f: print 'Saving subject list to %s' % out_fp f.write(yaml.dump(res)) f.close() # Return the list return res
def delete_dups(creds_path): # Find the old items def find_old_items(in_list): out_list = [] for l in in_list: if l[1].startswith("/path/"): out_list.append(int(l[0])) return out_list import fetch_creds cursor = fetch_creds.return_cursor(creds_path) cmd = """ select datasetid, count(datasetid) from derivatives_unormd group by datasetid having count (datasetid) > 97 """ cursor.execute(cmd) dups = cursor.fetchall() dups = [d[0] for d in dups] find_cmd = "select id,cfgfilelocation from derivatives_unormd where datasetid = :arg_1" del_cmd = "delete from derivatives_unormd where id = :arg_1" i = 1 for d in dups: cursor.execute(find_cmd, arg_1=d) found_list = cursor.fetchall() old_items = find_old_items(found_list) for oi in old_items: print "deleting entry with id = %d" % oi cursor.execute(del_cmd, arg_1=oi) cursor.execute("commit") print "done with %d/%d" % (i, len(dups)) i += 1
def main(inputs_dir, study_name, creds_path, sublist_yaml): ''' Function generally used for task-specific scripting of functions declared in this module Parameters ---------- inputs_dir : string filepath to the directory where all of the subjects' folders and sub-folders and niftis will be written to study_name : string the name of the study/site that all of the subjects will be placed in creds_path : string path to the csv file with 'ACCESS_KEY_ID' as the header and the corresponding ASCII text for the key underneath; same with the 'SECRET_ACCESS_KEY' string and ASCII text sublist_yaml : string filepath to output the subject list yaml file to Returns ------- sublist : list Returns a list of dictionaries where the format of each dict- ionary is as follows: {'anat': '/path/to/anat.nii.gz', 'rest': {rest_1_rest: '/path/to/rest_1.nii.gz', rest_2_rest: '/path/to/rest_2.nii.gz', ...} 'subject_id': 'subject1234' 'unique_id': 'session_1'} ''' # Import packages import csv import fetch_creds import os import sys import yaml # Init variables cursor = fetch_creds.return_cursor(creds_path) # Test the yaml subject list file for errors sublist_yaml = os.path.abspath(sublist_yaml) if os.path.exists(sublist_yaml): print '%s exists, please specify a different path' % sublist_yaml sys.exit() elif os.access(os.path.dirname(sublist_yaml), os.W_OK): print 'Subject list will be written to %s' % sublist_yaml else: print 'Cannot write to output directory for sublist %s; please '\ 'specify a different path' % sublist_yaml # Query IMAGE_AGGREGATE for subject image info, get S3 path from IMAGE03 # Here's how the column names correspond between the two: # IMAGE_AGGREGATE ---> IMAGE03 columns EXAMPLE # --------------- --------------- ------- # subjectkey subjectkey 'NDARABCD1234' # image_subtype image_description 'MPRAGE', 'EPI' # image_category image_modality 'MRI', 'FMRI' # image_scanner_manufacturer scanner_manufacturer_pd 'SIEMENS' # image_tr mri_repetition_time_pd '2.53' # image_te mri_echo_time_pd '0.033' # image_flip_angle flip_angle '90' # Query commands # Get all of the data from IMAGE_AGGREGATE agg_query = ''' select subjectkey, interview_age, subject_id, image_category, image_dimensions, image_subtype, image_scanner_manufacturer, image_tr, image_te, image_flip_angle from IMAGE_AGGREGATE ''' # Get initial list form image_aggregate table print 'Querying database...' cursor.execute(agg_query) img_agg_results = cursor.fetchall() # Build subkey dictionary from query results subkey_dict = build_subkey_dict(cursor, img_agg_results) # Build phenotypic file from subkey_dict pheno_list = build_pheno_list(cursor, subkey_dict) # Save pheno to disk as csv in the same directory as subject list pheno_csv = os.path.join(os.path.dirname(sublist_yaml), 'subs_pheno.csv') with open(pheno_csv, 'w') as csv_file: csv_out = csv.writer(csv_file) for pheno_entry in pheno_list: csv_out.writerow(pheno_entry) print 'Successfully saved phenotypic file to %s' % pheno_csv # Now create S3-file cpac-sublist, unique id is interview age for now # Also restricted to 1 anatomical image for now s3_sublist = [{'subject_id': str(subkey), 'unique_id': entry_dict['anat'][0][1], 'anat': entry_dict['anat'][0][-1], 'rest': {'rest_%d_rest' % (rest_num+1) : entry_dict['rest'][rest_num][-1] \ for rest_num in range(len(entry_dict['rest']))} } \ for subkey, entry_dict in subkey_dict.items()] # If downloading imaging data if inputs_dir and study_name: # Create the directory if it does not exist if not os.path.exists(inputs_dir): try: print 'creating inputs directory: %s' % inputs_dir os.makedirs(inputs_dir) except OSError as err: print 'Unable to make inputs directory %s' % inputs_dir print 'This might be due to permissions: %s' % err sys.exit() # Download imaging data and build local subject list local_sublist = download_s3_sublist(s3_sublist, inputs_dir, study_name, creds_path) # Use local sublist sublist = local_sublist # Otherwise, just use S3 sublist else: sublist = s3_sublist # And write it to disk with open(sublist_yaml, 'w') as f: f.write(yaml.dump(sublist)) # Return the subject list return sublist
def main(creds_path, creds_path2, bucket, b_prefix, pipeline, num_res): ''' Function that analyzes data in an S3 bucket and then uploads it into a tabular format as an entry in a database table Parameters ---------- creds_path : string filepath to the S3 bucket credentials as a csv file creds_path2 : string filepath to the database instance credentials as a csv file bucket : string name of the S3 bucket to analyze data from b_prefix : string prefix filepath within the S3 bucket to parse for data pipeline : string name of the pipeline to gather outputs from for tabulating in DB num_res : integer the number of results you would expect the pipeline to have per derivative when checking if the information was already entered Returns src_list : list (boto Keys) a list of the keys that were inserted into the database ''' # Import packages import fetch_creds # ANTs if pipeline == 'ants': import ants_insert as db_insert # CIVET elif pipeline == 'civet': import civet_insert as db_insert # Freesurfer elif pipeline == 'freesurfer': import freesurfer_insert as db_insert # Otherwise, assume its ccs, cpac, dparsf, or niak else: import insert_utils as db_insert # Init variables prefix = 'https://s3.amazonaws.com/' + bucket # Get AWS keys b = fetch_creds.return_bucket(creds_path, bucket) cursor = fetch_creds.return_cursor(creds_path2) # Set up lists of keys src_list = b.list(prefix=b_prefix) file_list = [s for s in src_list if pipeline in str(s.name)] # Part of the list is already uploaded, hack off some no_files = len(file_list) print 'done creating file list, it has %d elements' % no_files # Iterate through list i = 0 for f in file_list: url_path = prefix + str(f.name) exists = check_existing(cursor, url_path, 'abide_img_results', num_res) if not exists: db_insert.upload_results(cursor, url_path) print 'uploaded file %s successfully!' % url_path else: print 'already loaded file %s, skipping...' % url_path i += 1 per = 100*(float(i)/no_files) print 'done with file %d/%d\n%f%% complete\n' % \ (i, no_files, per) # Return the src_list return src_list
def main(creds_path, table_name, ids_yml, bucket_name=None, roi_map_yml=None): """ Function to query the table of interest for entries in the datasetid list from the ids_yaml file. Parameters ---------- creds_path : string path to the csv file with 'Access Key Id' as the header and the corresponding ASCII text for the key underneath; same with the 'Secret Access Key' string and ASCII text table_name : string the name of the table to query in miNDAR database ids_yml : string filepath to the input yaml file that contains a list of datasetids to query bucket_name : string (optional) the name of the bucket to get data from; only needed for ROI entries upload roi_map_yml : string (optional) filepath to the input yaml file that contains a dictionary of roi labels and names; only needed for ROI entries upload Returns ------- None This function does not return a value. """ # Import packages import fetch_creds import sys import yaml # Init variables cursor = fetch_creds.return_cursor(creds_path) ids_list = yaml.load(open(ids_yml, "r")) no_files = len(ids_list) s3_prefix = "s3://ndar_data/outputs/" # Init roi mapping dictionary if it was specified if roi_map_yml: roi_map_dict = yaml.load(open(roi_map_yml, "r")) num_entries = len(roi_map_dict) else: roi_map_dict = None num_entries = 1 i = 0 # Go through the list for id in ids_list: cmd = "select * from %s where datasetid = :arg_1" % table_name cursor.execute(cmd, arg_1=id) res = cursor.fetchall() num_res = len(res) # If the number of entries isn't what we expect if num_res < num_entries: # If there is an incomplete number of entries, delete them if num_res > 0: print "Deleting partially-populated entries with datasetid = %s" % id cursor.execute("delete from %s where datasetid = :arg_1", arg_1=id) # If we're loading in ROIs, get the roi_dic from the S3 bucket if roi_map_dict: roi_dict = get_roi_dict(creds_path, bucket_name, id) s3_path = None else: roi_dict = None s3_path = s3_prefix + id + "/" + id + "_corticalthickness_normd.nii.gz" # And populate the table entries insert_unormd(cursor, id, table_name, s3_path=s3_path, roi_map=roi_map_dict, roi_dict=roi_dict) print "Successfully inserted entry %s!" % id # If we see more than we expect, raise an error elif num_res > num_entries: raise ValueError, "more entries found than expected, investigate " "this manually, datasetid: %s" % id sys.exit() # Otherwise, the amount of entries is the amount we expect, move on else: print "Found the right amount of entries, dataset: %s is good" % id # Increment counter i += 1 per = 100 * (float(i) / no_files) print "done with file %d/%d\n%f%% complete\n" % (i, no_files, per)
def transfer_table_entries(creds_path): ''' Function to transfer all of the ABIDE subjects results in the DERIVATIVES_UNORMD and IMG_DERIVATIVES_UNORMD tables to the ABIDE_IMG_RESULTS table Parameters ---------- creds_path : string (filepath) path to the csv file with 'Access Key Id' as the header and the corresponding ASCII text for the key underneath; same with the 'Secret Access Key' string and ASCII text Returns ------- None This function does not return any value. It transfers table entries in an Oracle database. ''' # Import packages import insert_utils import fetch_creds # Init variables deriv_id = insert_utils.return_next_pk(cursor, 'ABIDE_IMG_RESULTS') template = 'OASIS-30 Atropos Template' cursor = fetch_creds.return_cursor(creds_path) # Get ACT img derivatives from img_derivatives_unormd imgs_get = ''' select pipelinename, pipelinetype, pipelinetools, pipelineversion, pipelinedescription, name, measurename, guid, datasetid, roidescription, roi, template, s3_path, cfgfilelocation from img_derivatives_unormd where instr(datasetid, :arg_1) > 0 ''' # Get ROI derivatives from DERIVATIVES_UNORMD rois_get = ''' select pipelinename, pipelinetype, pipelinetools, pipelineversion, pipelinedescription, derivativename, measurename, guid, datasetid, roidescription, roi, template, value, units, cfgfilelocation from derivatives_unormd where instr(datasetid, :arg_1) > 0 ''' # Insert entries into ABIDE_IMG_RESULTS air_put = ''' insert into abide_img_results (id, pipelinename, pipelinetype, pipelinetools, pipelineversion, pipelinedescription, name, measurename, timestamp, guid, datasetid, roidescription, roi, atlas, value, units, s3_path, template, cfgfilelocation) values (:col_1, :col_2, :col_3, :col_4, :col_5, :col_6, :col_7, :col_8, :col_9, :col_10, :col_11, :col_12, :col_13, :col_14, :col_15, :col_16, :col_17, :col_18, :col_19) ''' # Get abide results from derivatives_unormd (ABIDE id's have an 'a' in them) cursor.execute(rois_get, arg_1='a') roi_entries = cursor.fetchall() print 'Found %d roi results, inserting into ABIDE table' % len(roi_entries) # For each ROI entry, copy its fields over to ABIDE_IMG_RESULTS for entry in roi_entries: # Extract field values from entry result pname = entry[0] ptype = entry[1] ptools = entry[2] pver = entry[3] pdesc = entry[4] dname = entry[5] mname = entry[6] guid = entry[7] datasetid = entry[8] roidesc = entry[9] roi = entry[10] # template --> atlas atlas = entry[11] value = entry[12] units = entry[13] cfgfile = entry[14] # Timestamp timestamp = str(time.ctime(time.time())) # Find/make s3 path s3_path = make_roi_s3(cursor, datasetid) # And insert all of this into ABIDE_IMG_RESULTS cursor.execute(air_put, col_1=deriv_id, col_2=pname, col_3=ptype, col_4=ptools, col_5=pver, col_6=pdesc, col_7=dname, col_8=mname, col_9=timestamp, col_10=guid, col_11=datasetid, col_12=roidescription, col_13=roi, col_14=atlas, col_15=value, col_16=units, col_17=s3_path, col_18=template, col_19=cfgfile) # Commit changes cursor.execute('commit') # Increment to next unique pk id deriv_id += 1 print deriv_id # Get abide results from derivatives_unormd (ABIDE id's have an 'a' in them) cursor.execute(imgs_get, arg_1='a') img_entries = cursor.fetchall() print 'Found %d image results, inserting into ABIDE table' % len(img_entries) # For each IMG entry, copy its fields over to ABIDE_IMG_RESULTS for entry in img_entries: # Extract field values from entry result pname = entry[0] ptype = entry[1] ptools = entry[2] pver = entry[3] pdesc = entry[4] dname = entry[5] mname = entry[6] guid = entry[7] datasetid = entry[8] roidesc = entry[9] roi = entry[10] # template --> atlas template = entry[11] s3_path = entry[12] cfgfile = entry[13] # Timestamp timestamp = str(time.ctime(time.time())) # Find/make s3 path s3_path = make_roi_s3(cursor, datasetid) # And insert all of this into ABIDE_IMG_RESULTS cursor.execute(air_put, col_1=deriv_id, col_2=pname, col_3=ptype, col_4=ptools, col_5=pver, col_6=pdesc, col_7=dname, col_8=mname, col_9=timestamp, col_10=guid, col_11=datasetid, col_12=roidescription, col_13=roi, col_14='', col_15='', col_16='', col_17=s3_path, col_18=template, col_19=cfgfile) # Commit changes cursor.execute('commit') # Increment to next unique pk id deriv_id += 1 print deriv_id
def insert_img_unormd(id_s3_list, creds_path): ''' Function to insert image results data for ANTs cortical thickness to the IMG_DERIVATIVES_UNORMD table in miNDAR. Parameters ---------- id_s3_list : list (tuple) a list of tuples where each tuple contains 2 strings: (datasetid, s3_path) creds_path : string (filepath) path to the csv file with 'Access Key Id' as the header and the corresponding ASCII text for the key underneath; same with the 'Secret Access Key' string and ASCII text Returns ------- None This function has no return value. It uploads the data from the list to a miNDAR database and exits. ''' # Import packages import cx_Oracle import datetime import fetch_creds import os import yaml # Init variables # Create cursor for queries and data inserts cursor = fetch_creds.return_cursor(creds_path) # Constant arguments for all entries pipeline_name = 'act_workflow.py' deriv_name = 'Normalized cortical thickness image' measure_name = 'image' # Knowns roi_id = 'Grey matter' roi_description = 'Grey matter cortex' template = 'OASIS-30_Atropos Template' atlas_name = 'OASIS-TRT-20_jointfusion_DKT31_CMA_labels_in_OASIS-30.nii.gz' atlas_ver = '2mm (2013)' pipeline_name = 'act_workflow.py' pipeline_type = 'nipype workflow' cfg_file_loc = '/path/to/act_workflow.py' pipeline_tools = 'ants, nipype, python' pipeline_ver = 'v0.1' pipeline_desc = 'compute the cortical thickness of an extracted brain in ' \ 'subject space, and normalize to template' # Get the next derivativeid (primary key from table) deriv_id = return_next_pk(cursor, 'img_derivatives_unormd') # Command string cmd = ''' insert into img_derivatives_unormd (id, roi, pipelinename, pipelinetype, cfgfilelocation, pipelinetools, pipelineversion, pipelinedescription, name, measurename, timestamp, s3_path, template, guid, datasetid, roidescription) values (:col_1, :col_2, :col_3, :col_4, :col_5, :col_6, :col_7, :col_8, :col_9, :col_10, :col_11, :col_12, :col_13, :col_14, :col_15, :col_16) ''' # Iterate through dictionary and upload data for sub in id_s3_list: # Timestamp timestamp = str(datetime.datetime.now()) # Get datasetid and s3_path dataset_id = sub[0] s3_path = sub[1] # Get id id_find = ''' select guid from abide_subjects where id = :arg_1 ''' cursor.execute(id_find, arg_1=dataset_id) res = cursor.fetchall() guid = res[0][0] # Execute insert command cursor.execute(cmd, col_1 = int(deriv_id), col_2 = roi_id, col_3 = pipeline_name, col_4 = pipeline_type, col_5 = cfg_file_loc, col_6 = pipeline_tools, col_7 = pipeline_ver, col_8 = pipeline_desc, col_9 = deriv_name, col_10 = measure_name, col_11 = timestamp, col_12 = s3_path, col_13 = template, col_14 = guid, col_15 = dataset_id, col_16 = roi_description) # Increment the unique id print 'deriv_id ', deriv_id deriv_id += 1 # Commit the changes and close the cursor/connection cursor.execute('commit') cursor.close()
def insert_unormd(roi_txt_fpaths, creds_path, oasis_file): ''' Function to insert image results data for ANTs cortical thickness to the DERIVATIVES_UNORMD table in miNDAR. Parameters ---------- roi_txt_fpaths : list (str) a list of filepaths as strings to the ROIstats.txt files to upload creds_path : string (filepath) path to the csv file with 'Access Key Id' as the header and the corresponding ASCII text for the key underneath; same with the 'Secret Access Key' string and ASCII text oasis_file : string filepath to the Oasis_ROIs.txt file Returns ------- None This function has no return value. It uploads the data from the list to a miNDAR database and exits. ''' # Import packages import cx_Oracle import datetime import fetch_creds import os # Init variables big_dic = {} # For each subject for sub in roi_txt_fpaths: temp_list = [] # Gather each subjects ROIs with open(sub,'r') as f: for i,line in enumerate(f): temp_list.append(line.split()) # Trim off top elements (not ROIs) key = temp_list[0][2:] val = temp_list[1][2:] big_dic[os.path.basename(sub)] = dict(zip(key,val)) # Close ROI txt file f.close() # Build mapping dictionary roi_dic = {} with open(oasis_file) as f: for i,line in enumerate(f): # Split the line into list (tab delimiter) split_line = line.split('\t') # Filter out any blank strings in the list split_line = filter(None, split_line) # Filter out leading/trailing spaces key = split_line[0].strip() val = split_line[1].strip() # Store in dictionary roi_dic[key] = val # User and database info cursor = fetch_creds.return_cursor(creds_path) # Constant arguments for all entries atlas_name = 'OASIS-TRT-20_jointfusion_DKT31_CMA_labels_in_OASIS-30.nii.gz' atlas_ver = '2mm (2013)' pipeline_name = 'act_workflow.py' pipeline_type = 'nipype workflow' cfg_file_loc = '/path/to/act_workflow.py' pipeline_tools = 'ants, nipype, python' pipeline_ver = 'v0.1' pipeline_desc = 'compute the mean thickness of cortex in ROI for the ABIDE dataset' deriv_name = 'cortical thickness' measure_name = 'mean' units = 'mm' # Get the next derivativeid (primary key from table) deriv_id = return_next_pk(cursor, 'derivatives_unormd') # Command string cmd = ''' insert into derivatives_unormd (id, atlasname, atlasversion, roi, roidescription, pipelinename, pipelinetype, cfgfilelocation, pipelinetools, pipelineversion, pipelinedescription, derivativename, measurename, datasetid, timestamp, value, units, guid) values (:col_1, :col_2, :col_3, :col_4, :col_5, :col_6, :col_7, :col_8, :col_9, :col_10, :col_11, :col_12, :col_13, :col_14, :col_15, :col_16, :col_17, :col_18) ''' # Iterate through dictionary and upload data not_in_nitrc = [] for key, val in big_dic.iteritems(): # Find subject in image03 to get datasetID dataset_id = key.split('_')[0] print dataset_id id_find = ''' select guid from abide_subjects where id = :arg_1 ''' cursor.execute(id_find, arg_1=dataset_id) res = cursor.fetchall() guid = res[0][0] print 'dataset_id ', dataset_id print 'guid', guid # Iterate through ROIs for k, v in val.iteritems(): # Timestamp timestamp = str(datetime.datetime.now()) # Get ROI number roi = k.split('Mean_')[1] roi_name = roi_dic[k] # Value value = float(v) # Execute insert command cursor.execute(cmd, col_1 = deriv_id, col_2 = atlas_name, col_3 = atlas_ver, col_4 = roi, col_5 = roi_name, col_6 = pipeline_name, col_7 = pipeline_type, col_8 = cfg_file_loc, col_9 = pipeline_tools, col_10 = pipeline_ver, col_11 = pipeline_desc, col_12 = deriv_name, col_13 = measure_name, col_14 = dataset_id, col_15 = timestamp, col_16 = value, col_17 = units, col_18 = guid) # Increment the unique id deriv_id += 1 print 'deriv_id ', deriv_id # And commit changes cursor.execute('commit')
def main(sub_list, sub_idx): ''' Method to preprocess a subject's image (nifti) data using ANTs and upload it to a miNDAR database. First argument to script specifies index of subject to process of subject list, which is Parameters ---------- sub_list : string filepath to a yaml file which contains a python list of tuples each tuple in the list is of the form (img03_id, s3_path), where img03_id is an integer corresponding to the image03_id of the image and the s3_path is a string corresponding to the path of the image on S3. e.g. (123, 's3://NDAR_Bucket/subject/image01.nii') sub_idx : integer index of subject to process from the sub_list yaml file Returns ------- None The function doesn't return any value, it processes and uploads data to S3 and creates a log file of the overall progress. ''' # Import packages import boto import cx_Oracle import fetch_creds import logging from nipype import logging as np_logging from nipype import config import os import re import subprocess import sys import time import yaml # Start timing start = time.time() # Init variables base_path = '/data/act_run/' creds_path = '/data/creds/Daniels_credentials.csv' # Oasis template paths oasis_path = '/data/OASIS-30_Atropos_template/' oasis_roi_yaml = oasis_path + 'oasis_roi_map.yml' # Load in OASIS ROI map oasis_roi_map = yaml.load(open(oasis_roi_yaml,'r')) # Setup s3 bucket, RDS cursor connections for uploading aws_access_key_id, aws_secret_access_key = fetch_creds.return_aws_keys(creds_path) bucket = fetch_creds.return_bucket(creds_path, 'ndar-data') cursor = fetch_creds.return_cursor(creds_path) # Get subject info subject = sub_list[sub_idx-1] img03_id_str = str(subject[0]) s3_path = subject[1] # Change bucket name to always be 'NDAR_Central' (caps-sensitive) s3_list = s3_path.split('/') s3_list[2] = 'NDAR_Central' s3_path = '/'.join(s3_list) # --- Set up log file --- log_file = base_path + 'logs/' + img03_id_str + '.log' setup_logger('log1', log_file, logging.INFO) ndar_log = logging.getLogger('log1') # Log input image stats ndar_log.info('-------- RUNNING SUBJECT NO. #%d --------' % (sub_idx)) ndar_log.info('Start time: %s ' % time.ctime(start)) ndar_log.info('Input S3 path: %s' % s3_path) ndar_log.info('Input IMAGE03 ID: %s' % img03_id_str) # --- Search results_stats table for previous entries of that img03_id --- cmd = ''' select rs_id, wf_status from results_stats where img03_id = :arg_1 ''' cursor.execute(cmd, arg_1=int(img03_id_str)) result = cursor.fetchall() # If the record already exists, check to see if it was successful wkflow_flag = 0 for record in result: wkflow_status = record[1] if wkflow_status == 'PASS': wkflow_flag = 1 rs_id = record[0] # Log if already found and exit if wkflow_flag: ndar_log.info('Image already successfully ran, found at RS_ID: %d' % rs_id) sys.exit() # --- Download and extract data from NDAR_Central S3 bucket --- nifti_file = base_path + 'inputs-ef/' + img03_id_str + '.nii.gz' # Execute ndar_unpack for that subject cmd = './ndar_unpack' if not os.path.exists(nifti_file): cmd_list = [cmd, '--aws-access-key-id', aws_access_key_id, '--aws-secret-access-key', aws_secret_access_key, '-v', nifti_file, s3_path] cmd_str = ' '.join(cmd_list) ndar_log.info('Executing command: %s ' % cmd_str) p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() stdout, stderr = p.communicate() ndar_log.info(stdout) else: ndar_log.info('Nifti file already present for IMAGE03 ID %s' % img03_id_str) ndar_log.info('ndar_unpack did not need to run') extract_status_str = 'PASS' # If file was never created, log and exit if not os.path.exists(nifti_file): ndar_log.info('File extraction FAILED for IMAGE03 ID %s' % img03_id_str) extract_status_str = 'FAIL' # Upload the log file time_str = time.strftime('%Y-%m-%d_%H%M-%S',time.localtime(time.time())) s3_filename = time_str + '_' + img03_id_str up_log_list = [] s3_log_list = [] s3_log_path = 'logs/' + s3_filename + '.log' up_log_list.append(log_file) s3_log_list.append(s3_log_path) upload_to_s3(bucket, up_log_list, s3_log_list) # Finally upload the record to the database add_db_record(cursor, img03_id_str, 'N/A', extract_status_str, 'https://s3.amazonaws.com/ndar-data/' + s3_log_path, 'N/A', 'N/A') # And quit sys.exit() # Create the nipype workflow wf, crash_dir = create_workflow(base_path, img03_id_str, nifti_file, oasis_path) # --- Run the workflow --- wf_base_dir = base_path + 'work-dirs/' + img03_id_str up_nifti_path = wf_base_dir + \ '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz' up_roi_path = wf_base_dir + '/output/ROIstats.txt' if os.path.exists(up_nifti_path) and os.path.exists(up_roi_path): wf_status = 1 else: wf_status = 0 if wf_status == 0: try: ndar_log.info('Running the workflow...') wf.run() # We're successful at this point, add it as a file to the completed path ndar_log.info('Workflow completed successfully for IMAGE03 ID %s' % img03_id_str) wf_status = 1 finish_str = 'Finish time: %s' # If the workflow run fails except: ndar_log.info('ACT Workflow failed for IMAGE03 ID %s' % img03_id_str) finish_str = 'Crash time: %s' else: finish_str = 'Workflow did not need to run as files were already there: %s' # Log finish and total computation time fin = time.time() elapsed = (fin - start)/60 ndar_log.info(finish_str % time.ctime(fin)) ndar_log.info('Total time running IMAGE03 ID %s is: %s minutes' \ %(img03_id_str,str(elapsed))) up_list = [] s3_list = [] time_str = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(fin)) s3_filename = time_str + '_' + img03_id_str # If workflow completed succesfully if wf_status: # Define cloud data and status wf_status_str = 'PASS' s3_nifti_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \ '_corticalthickness_normd.nii.gz' s3_roi_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \ '_ROIstats.txt' full_s3_nifti_path = 's3://ndar_data/' + s3_nifti_path full_s3_roi_path = 's3://ndar_data/' + s3_roi_path # Upload paths #wf_base_dir = base_path + 'work-dirs/' + img03_id_str #up_nifti_path = wf_base_dir + \ # '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz' #up_roi_path = wf_base_dir + '/output/ROIstats.txt' # Append upload/s3 lists with path names up_list.append(up_nifti_path) up_list.append(up_roi_path) s3_list.append(s3_nifti_path) s3_list.append(s3_roi_path) # Log nifti and roi files upload ndar_log.info('Uploading nifti and roi files...') # Create dictionary of ROIs for that subject sub_roi_dic = create_roi_dic(up_roi_path) try: # Insert the ROIs into the unorm'd and norm'd databases ndar_log.info('uploading rois...') print '----------------------------------' insert_unormd(cursor, img03_id_str, roi_dic=sub_roi_dic) ndar_log.info('uploading imgs...') # Insert the act nifti into the unorm'd and norm'd databases insert_unormd(cursor, img03_id_str, s3_path=full_s3_nifti_path) except: e = sys.exc_info()[0] ndar_log.info('Error inserting results to MINDAR, message: %s' % str(e)) wf_status_str = 'Error inserting results into MINDAR database' # Otherwise, there were crash files, upload those else: # Define cloud data and status wf_status_str = 's3://ndar-data/crashes/' + s3_filename + '/' full_s3_nifti_path = 'N/A' full_s3_roi_path = 'N/A' # Find crash file names/paths for root, dirs, files in os.walk(crash_dir): root_path = os.path.abspath(root) crash_files = files # Append crash file and s3 path lists for f in crash_files: crash_path = root_path + '/' + f s3_crash_path = 'crashes/' + s3_filename + '/' + f up_list.append(crash_path) s3_list.append(s3_crash_path) # Log crash file upload ndar_log.info('Uploading crash files into %s ...' % wf_status_str) # Call the upload function upload_to_s3(bucket, up_list, s3_list) ndar_log.info('Done') # Upload the log file up_log_list = [] s3_log_list = [] s3_log_path = 'logs/' + s3_filename + '.log' up_log_list.append(log_file) s3_log_list.append(s3_log_path) upload_to_s3(bucket, up_log_list, s3_log_list) # Finally upload the record to the database add_db_record(cursor, img03_id_str, wf_status_str, extract_status_str, 's3://ndar-data/'+s3_log_path, full_s3_nifti_path, full_s3_roi_path)