Esempio n. 1
0
def download_s3_sublist(s3_sublist, inputs_dir, study_name, creds_path):
    '''
    Function to download the imaging data from S3 based on an S3-path-
    formatted C-PAC subject list; it then uses the local downloaded files
    as the image paths in the resulting subject list

    Parameters
    ----------
    s3_sublist : list
        a C-PAC-compatible subject list with the S3 filepaths instead
        of local filepaths
    inputs_dir : string
        filepath to the directory where all of the subjects' folders
        and sub-folders and niftis will be written to
    study_name : string
        the name of the study/site that all of the subjects will be
        placed in
    creds_path : string
        path to the csv file with 'ACCESS_KEY_ID' as the header and the
        corresponding ASCII text for the key underneath; same with the
        'SECRET_ACCESS_KEY' string and ASCII text

    Returns
    -------
    s3_sublist: list
        a modified version of the input s3_sublist where the filepaths
        for each image type point to a downloaded file
    '''

    # Import packages
    import fetch_creds

    # Init variables
    aws_access_key_id, aws_secret_access_key = \
            fetch_creds.return_aws_keys(creds_path)

    # Go through sublist to create filepaths; download data via ndar_unpack
    no_subs = len(s3_sublist)
    for sub in s3_sublist:
        idx = s3_sublist.index(sub)
        # First create subject directories
        unique_sub_dir = os.path.join(inputs_dir, study_name,
                                      str(sub['subject_id']),
                                      str(sub['unique_id']))
        # If the file directory doesn't exist already
        if not os.path.exists(unique_sub_dir):
            print 'creating subject/session directories: %s' % unique_sub_dir
            os.makedirs(unique_sub_dir)

        # ndar_unpack the anatomical
        anat_dir = unique_sub_dir + '/anat_1'
        if not os.path.exists(anat_dir):
            print 'creating anatomical directory: %s' % anat_dir
            os.makedirs(anat_dir)
        # Set nifti file output
        s3_path = sub['anat']
        out_nii = anat_dir + '/' + 'anat.nii.gz'
        if not os.path.exists(out_nii):
            # And try and extract the image
            try:
                print 'attempting to download and extract %s to %s'\
                      % (s3_path, out_nii)
                run_ndar_unpack(s3_path, out_nii, aws_access_key_id, 
                                                  aws_secret_access_key)
                print 'Success!'
                # If it is successful, replace s3_path with out_nii
                s3_sublist[idx]['anat'] = out_nii
            except OSError as e:
                print e
                print 'Failed anatomical image %s extraction for %s.\n'\
                      'Trying functional...' % (s3_path, sub['subject_id'])
        else:
            print 'Anatomical file %s exists! Skipping...' % out_nii

        # ndar_unpack the functional for each functional
        for folder, s3_path in sub['rest'].items():
            rest_dir = unique_sub_dir + '/' + folder.split('_rest')[0]
            if not os.path.exists(rest_dir):
                print 'creating functional directory: %s' % rest_dir
                os.makedirs(rest_dir)
            out_nii = rest_dir + '/' + 'rest.nii.gz'
            # And try and extract the image
            if not os.path.exists(out_nii):
                try:
                    print 'attempting to download and extract %s to %s'\
                          % (s3_path, out_nii)
                    run_ndar_unpack(s3_path, out_nii, aws_access_key_id, 
                                                      aws_secret_access_key)
                    print 'Success!'
                    # If it is successful, replace s3_path with out_nii
                    s3_sublist[idx]['rest'][folder] = out_nii
                except OSError as e:
                    print e
                    print 'Failed functional image %s extraction for %s'\
                          % (s3_path, sub['subject_id'])
            else:
                print 'Functional file %s eists! Skippng...' % out_nii

        # Print % complete
        i = idx+1
        per = 100*(float(i)/no_subs)
        print 'Done extracting %d/%d\n%f%% complete' % (i, no_subs, per)

    # Return the new s3_sublist
    return s3_sublist
Esempio n. 2
0
def main(sub_list, sub_idx):
    '''
    Method to preprocess a subject's image (nifti) data using ANTs
    and upload it to a miNDAR database. First argument to script
    specifies index of subject to process of subject list, which is
    
    Parameters
    ----------
    sub_list : string
        filepath to a yaml file which contains a python list of tuples
        each tuple in the list is of the form (img03_id, s3_path),
        where img03_id is an integer corresponding to the image03_id
        of the image and the s3_path is a string corresponding to the
        path of the image on S3.
        e.g. (123, 's3://NDAR_Bucket/subject/image01.nii')
    sub_idx : integer
        index of subject to process from the sub_list yaml file

    Returns
    -------
    None
        The function doesn't return any value, it processes and uploads
        data to S3 and creates a log file of the overall progress.
    '''

    # Import packages
    import boto
    import cx_Oracle
    import fetch_creds
    import logging
    from nipype import logging as np_logging
    from nipype import config
    import os
    import re
    import subprocess
    import sys
    import time
    import yaml

    # Start timing
    start = time.time()

    # Init variables
    base_path = '/data/act_run/'
    creds_path = '/data/creds/Daniels_credentials.csv'
    # Oasis template paths
    oasis_path = '/data/OASIS-30_Atropos_template/'
    oasis_roi_yaml = oasis_path + 'oasis_roi_map.yml'
    # Load in OASIS ROI map
    oasis_roi_map = yaml.load(open(oasis_roi_yaml,'r'))
    
    # Setup s3 bucket, RDS cursor connections for uploading
    aws_access_key_id, aws_secret_access_key = fetch_creds.return_aws_keys(creds_path)
    bucket = fetch_creds.return_bucket(creds_path, 'ndar-data')
    cursor = fetch_creds.return_cursor(creds_path)

    # Get subject info
    subject = sub_list[sub_idx-1]
    img03_id_str = str(subject[0])
    s3_path = subject[1]
    
    # Change bucket name to always be 'NDAR_Central' (caps-sensitive)
    s3_list = s3_path.split('/')
    s3_list[2] = 'NDAR_Central'
    s3_path = '/'.join(s3_list)

    # --- Set up log file ---
    log_file = base_path + 'logs/' + img03_id_str + '.log'
    setup_logger('log1', log_file, logging.INFO)
    ndar_log = logging.getLogger('log1')
    # Log input image stats
    ndar_log.info('-------- RUNNING SUBJECT NO. #%d --------' % (sub_idx))
    ndar_log.info('Start time: %s ' % time.ctime(start))
    ndar_log.info('Input S3 path: %s' % s3_path)
    ndar_log.info('Input IMAGE03 ID: %s' % img03_id_str)

    # --- Search results_stats table for previous entries of that img03_id ---
    cmd = '''
          select rs_id, wf_status
          from results_stats
          where img03_id = :arg_1
          '''
    cursor.execute(cmd, arg_1=int(img03_id_str))
    result = cursor.fetchall()
    # If the record already exists, check to see if it was successful
    wkflow_flag = 0
    for record in result:
        wkflow_status = record[1]
        if wkflow_status == 'PASS':
            wkflow_flag = 1
            rs_id = record[0]
    # Log if already found and exit
    if wkflow_flag:
        ndar_log.info('Image already successfully ran, found at RS_ID: %d' % rs_id)
        sys.exit()

    # --- Download and extract data from NDAR_Central S3 bucket ---
    nifti_file = base_path + 'inputs-ef/' + img03_id_str + '.nii.gz'
    # Execute ndar_unpack for that subject
    cmd = './ndar_unpack'
    if not os.path.exists(nifti_file):
        cmd_list = [cmd, '--aws-access-key-id', aws_access_key_id, 
                    '--aws-secret-access-key', aws_secret_access_key, 
                    '-v', nifti_file, s3_path]
        cmd_str = ' '.join(cmd_list)
        ndar_log.info('Executing command: %s ' % cmd_str)
        p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, 
                             stderr=subprocess.STDOUT)
        p.wait()
        stdout, stderr = p.communicate()
        ndar_log.info(stdout)
    else:
        ndar_log.info('Nifti file already present for IMAGE03 ID %s' % img03_id_str)
        ndar_log.info('ndar_unpack did not need to run')

    extract_status_str = 'PASS'
    # If file was never created, log and exit
    if not os.path.exists(nifti_file):
        ndar_log.info('File extraction FAILED for IMAGE03 ID %s' % img03_id_str)
        extract_status_str = 'FAIL'
        # Upload the log file
        time_str = time.strftime('%Y-%m-%d_%H%M-%S',time.localtime(time.time()))
        s3_filename = time_str + '_' + img03_id_str
        up_log_list = []
        s3_log_list = []
        s3_log_path = 'logs/' + s3_filename + '.log'
        up_log_list.append(log_file)
        s3_log_list.append(s3_log_path)
        upload_to_s3(bucket, up_log_list, s3_log_list)
        # Finally upload the record to the database
        add_db_record(cursor, img03_id_str, 'N/A', extract_status_str, 
                      'https://s3.amazonaws.com/ndar-data/' + s3_log_path, 'N/A', 'N/A')
        # And quit
        sys.exit()

    # Create the nipype workflow
    wf, crash_dir = create_workflow(base_path, img03_id_str, nifti_file, oasis_path)

    # --- Run the workflow ---
    wf_base_dir = base_path + 'work-dirs/' + img03_id_str
    up_nifti_path = wf_base_dir + \
                    '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz'
    up_roi_path = wf_base_dir + '/output/ROIstats.txt'
    if os.path.exists(up_nifti_path) and os.path.exists(up_roi_path):
        wf_status = 1
    else:
        wf_status = 0
    if wf_status == 0:
	    try:
                ndar_log.info('Running the workflow...')
                wf.run()
                # We're successful at this point, add it as a file to the completed path
                ndar_log.info('Workflow completed successfully for IMAGE03 ID %s' % img03_id_str)
                wf_status = 1
                finish_str = 'Finish time: %s'
	    # If the workflow run fails
	    except:
                ndar_log.info('ACT Workflow failed for IMAGE03 ID %s' % img03_id_str)
                finish_str = 'Crash time: %s'
    else:
        finish_str = 'Workflow did not need to run as files were already there: %s'

    # Log finish and total computation time
    fin = time.time()
    elapsed = (fin - start)/60
    ndar_log.info(finish_str % time.ctime(fin))
    ndar_log.info('Total time running IMAGE03 ID %s is: %s minutes' \
                  %(img03_id_str,str(elapsed)))

    up_list = []
    s3_list = []
    time_str = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(fin))
    s3_filename = time_str + '_' + img03_id_str

    # If workflow completed succesfully
    if wf_status:
        # Define cloud data and status
        wf_status_str = 'PASS'
        s3_nifti_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \
                        '_corticalthickness_normd.nii.gz'
        s3_roi_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \
                      '_ROIstats.txt' 
        full_s3_nifti_path = 's3://ndar_data/' + s3_nifti_path
        full_s3_roi_path = 's3://ndar_data/' + s3_roi_path
        # Upload paths
        #wf_base_dir = base_path + 'work-dirs/' + img03_id_str
        #up_nifti_path = wf_base_dir + \
        #                '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz'
        #up_roi_path = wf_base_dir + '/output/ROIstats.txt'
        # Append upload/s3 lists with path names
        up_list.append(up_nifti_path)
        up_list.append(up_roi_path)
        s3_list.append(s3_nifti_path)
        s3_list.append(s3_roi_path)
        # Log nifti and roi files upload
        ndar_log.info('Uploading nifti and roi files...')
        # Create dictionary of ROIs for that subject
        sub_roi_dic = create_roi_dic(up_roi_path)
        try:
            # Insert the ROIs into the unorm'd and norm'd databases
            ndar_log.info('uploading rois...')
            print '----------------------------------'
            insert_unormd(cursor, img03_id_str, roi_dic=sub_roi_dic)
            ndar_log.info('uploading imgs...')
            # Insert the act nifti into the unorm'd and norm'd databases
            insert_unormd(cursor, img03_id_str, s3_path=full_s3_nifti_path)
        except:
            e = sys.exc_info()[0]
            ndar_log.info('Error inserting results to MINDAR, message: %s' % str(e))
            wf_status_str = 'Error inserting results into MINDAR database'
    # Otherwise, there were crash files, upload those
    else:
        # Define cloud data and status
        wf_status_str = 's3://ndar-data/crashes/' + s3_filename + '/'
        full_s3_nifti_path = 'N/A'
        full_s3_roi_path = 'N/A'
        # Find crash file names/paths
        for root, dirs, files in os.walk(crash_dir):
            root_path = os.path.abspath(root)
            crash_files = files
        # Append crash file and s3 path lists
        for f in crash_files:
            crash_path = root_path + '/' + f
            s3_crash_path = 'crashes/' + s3_filename + '/' + f
            up_list.append(crash_path)
            s3_list.append(s3_crash_path)
        # Log crash file upload 
        ndar_log.info('Uploading crash files into %s ...' % wf_status_str)

    # Call the upload function
    upload_to_s3(bucket, up_list, s3_list)
    ndar_log.info('Done')

    # Upload the log file
    up_log_list = []
    s3_log_list = []
    s3_log_path = 'logs/' + s3_filename + '.log'
    up_log_list.append(log_file)
    s3_log_list.append(s3_log_path)
    upload_to_s3(bucket, up_log_list, s3_log_list) 

    # Finally upload the record to the database
    add_db_record(cursor, 
                  img03_id_str, 
                  wf_status_str, 
                  extract_status_str, 
                  's3://ndar-data/'+s3_log_path, 
                  full_s3_nifti_path, 
                  full_s3_roi_path)