Exemple #1
0
def get_roi_dict(creds_path, bucket_name, datasetid):
    """
    Function to read txt stream from URL of an ROI file generated
    from an ANTs cortical thickness run

    Parameters
    ----------
    creds_path : string
        path to the csv file with 'Access Key Id' as the header and the
        corresponding ASCII text for the key underneath; same with the
        'Secret Access Key' string and ASCII text
    bucket : string
        the name of the bucket to get the ROI txt file from
    datasetid : string
        the dataset id of interest

    Returns
    -------
    sub_dict : dictionary {str : str}
        the ROI dictionary with the ROI label (key) mapped to its ROI
        value
    """

    # Import packages
    import fetch_creds

    # Init variables
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    key_path = "outputs/" + datasetid + "/" + datasetid + "_ROIstats.txt"
    key = bucket.get_key(key_path)

    # Get file contents and split into list
    kstring = key.get_contents_as_string()
    temp_list = kstring.split("\n")

    # Form subject ROI dictionary
    key = temp_list[0].split()[2:]
    val = temp_list[1].split()[2:]
    sub_dict = dict(zip(key, val))

    # Return the subject ROI dictionary
    return sub_dict
def main(creds_path, creds_path2, bucket, b_prefix, pipeline, num_res):
    '''
    Function that analyzes data in an S3 bucket and then uploads it
    into a tabular format as an entry in a database table

    Parameters
    ----------
    creds_path : string
        filepath to the S3 bucket credentials as a csv file
    creds_path2 : string
        filepath to the database instance credentials as a csv file
    bucket : string
        name of the S3 bucket to analyze data from
    b_prefix : string
        prefix filepath within the S3 bucket to parse for data
    pipeline : string
        name of the pipeline to gather outputs from for tabulating in DB
    num_res : integer
        the number of results you would expect the pipeline to have per
        derivative when checking if the information was already entered

    Returns
    src_list : list (boto Keys)
        a list of the keys that were inserted into the database
    '''

    # Import packages
    import fetch_creds
    # ANTs
    if pipeline == 'ants':
        import ants_insert as db_insert
    # CIVET
    elif pipeline == 'civet':
        import civet_insert as db_insert
    # Freesurfer
    elif pipeline == 'freesurfer':
        import freesurfer_insert as db_insert
    # Otherwise, assume its ccs, cpac, dparsf, or niak
    else:
        import insert_utils as db_insert

    # Init variables
    prefix = 'https://s3.amazonaws.com/' + bucket

    # Get AWS keys
    b = fetch_creds.return_bucket(creds_path, bucket)
    cursor = fetch_creds.return_cursor(creds_path2)

    # Set up lists of keys
    src_list = b.list(prefix=b_prefix)
    file_list = [s for s in src_list if pipeline in str(s.name)]

    # Part of the list is already uploaded, hack off some
    no_files = len(file_list)
    print 'done creating file list, it has %d elements' % no_files

    # Iterate through list
    i = 0
    for f in file_list:
        url_path = prefix + str(f.name)
        exists = check_existing(cursor, url_path, 'abide_img_results', num_res)
        if not exists:
            db_insert.upload_results(cursor, url_path)
            print 'uploaded file %s successfully!' % url_path
        else:
            print 'already loaded file %s, skipping...' % url_path
        i += 1
        per = 100*(float(i)/no_files)
        print 'done with file %d/%d\n%f%% complete\n' % \
        (i, no_files, per)

    # Return the src_list
    return src_list
Exemple #3
0
def main(sub_list, sub_idx):
    '''
    Method to preprocess a subject's image (nifti) data using ANTs
    and upload it to a miNDAR database. First argument to script
    specifies index of subject to process of subject list, which is
    
    Parameters
    ----------
    sub_list : string
        filepath to a yaml file which contains a python list of tuples
        each tuple in the list is of the form (img03_id, s3_path),
        where img03_id is an integer corresponding to the image03_id
        of the image and the s3_path is a string corresponding to the
        path of the image on S3.
        e.g. (123, 's3://NDAR_Bucket/subject/image01.nii')
    sub_idx : integer
        index of subject to process from the sub_list yaml file

    Returns
    -------
    None
        The function doesn't return any value, it processes and uploads
        data to S3 and creates a log file of the overall progress.
    '''

    # Import packages
    import boto
    import cx_Oracle
    import fetch_creds
    import logging
    from nipype import logging as np_logging
    from nipype import config
    import os
    import re
    import subprocess
    import sys
    import time
    import yaml

    # Start timing
    start = time.time()

    # Init variables
    base_path = '/data/act_run/'
    creds_path = '/data/creds/Daniels_credentials.csv'
    # Oasis template paths
    oasis_path = '/data/OASIS-30_Atropos_template/'
    oasis_roi_yaml = oasis_path + 'oasis_roi_map.yml'
    # Load in OASIS ROI map
    oasis_roi_map = yaml.load(open(oasis_roi_yaml,'r'))
    
    # Setup s3 bucket, RDS cursor connections for uploading
    aws_access_key_id, aws_secret_access_key = fetch_creds.return_aws_keys(creds_path)
    bucket = fetch_creds.return_bucket(creds_path, 'ndar-data')
    cursor = fetch_creds.return_cursor(creds_path)

    # Get subject info
    subject = sub_list[sub_idx-1]
    img03_id_str = str(subject[0])
    s3_path = subject[1]
    
    # Change bucket name to always be 'NDAR_Central' (caps-sensitive)
    s3_list = s3_path.split('/')
    s3_list[2] = 'NDAR_Central'
    s3_path = '/'.join(s3_list)

    # --- Set up log file ---
    log_file = base_path + 'logs/' + img03_id_str + '.log'
    setup_logger('log1', log_file, logging.INFO)
    ndar_log = logging.getLogger('log1')
    # Log input image stats
    ndar_log.info('-------- RUNNING SUBJECT NO. #%d --------' % (sub_idx))
    ndar_log.info('Start time: %s ' % time.ctime(start))
    ndar_log.info('Input S3 path: %s' % s3_path)
    ndar_log.info('Input IMAGE03 ID: %s' % img03_id_str)

    # --- Search results_stats table for previous entries of that img03_id ---
    cmd = '''
          select rs_id, wf_status
          from results_stats
          where img03_id = :arg_1
          '''
    cursor.execute(cmd, arg_1=int(img03_id_str))
    result = cursor.fetchall()
    # If the record already exists, check to see if it was successful
    wkflow_flag = 0
    for record in result:
        wkflow_status = record[1]
        if wkflow_status == 'PASS':
            wkflow_flag = 1
            rs_id = record[0]
    # Log if already found and exit
    if wkflow_flag:
        ndar_log.info('Image already successfully ran, found at RS_ID: %d' % rs_id)
        sys.exit()

    # --- Download and extract data from NDAR_Central S3 bucket ---
    nifti_file = base_path + 'inputs-ef/' + img03_id_str + '.nii.gz'
    # Execute ndar_unpack for that subject
    cmd = './ndar_unpack'
    if not os.path.exists(nifti_file):
        cmd_list = [cmd, '--aws-access-key-id', aws_access_key_id, 
                    '--aws-secret-access-key', aws_secret_access_key, 
                    '-v', nifti_file, s3_path]
        cmd_str = ' '.join(cmd_list)
        ndar_log.info('Executing command: %s ' % cmd_str)
        p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, 
                             stderr=subprocess.STDOUT)
        p.wait()
        stdout, stderr = p.communicate()
        ndar_log.info(stdout)
    else:
        ndar_log.info('Nifti file already present for IMAGE03 ID %s' % img03_id_str)
        ndar_log.info('ndar_unpack did not need to run')

    extract_status_str = 'PASS'
    # If file was never created, log and exit
    if not os.path.exists(nifti_file):
        ndar_log.info('File extraction FAILED for IMAGE03 ID %s' % img03_id_str)
        extract_status_str = 'FAIL'
        # Upload the log file
        time_str = time.strftime('%Y-%m-%d_%H%M-%S',time.localtime(time.time()))
        s3_filename = time_str + '_' + img03_id_str
        up_log_list = []
        s3_log_list = []
        s3_log_path = 'logs/' + s3_filename + '.log'
        up_log_list.append(log_file)
        s3_log_list.append(s3_log_path)
        upload_to_s3(bucket, up_log_list, s3_log_list)
        # Finally upload the record to the database
        add_db_record(cursor, img03_id_str, 'N/A', extract_status_str, 
                      'https://s3.amazonaws.com/ndar-data/' + s3_log_path, 'N/A', 'N/A')
        # And quit
        sys.exit()

    # Create the nipype workflow
    wf, crash_dir = create_workflow(base_path, img03_id_str, nifti_file, oasis_path)

    # --- Run the workflow ---
    wf_base_dir = base_path + 'work-dirs/' + img03_id_str
    up_nifti_path = wf_base_dir + \
                    '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz'
    up_roi_path = wf_base_dir + '/output/ROIstats.txt'
    if os.path.exists(up_nifti_path) and os.path.exists(up_roi_path):
        wf_status = 1
    else:
        wf_status = 0
    if wf_status == 0:
	    try:
                ndar_log.info('Running the workflow...')
                wf.run()
                # We're successful at this point, add it as a file to the completed path
                ndar_log.info('Workflow completed successfully for IMAGE03 ID %s' % img03_id_str)
                wf_status = 1
                finish_str = 'Finish time: %s'
	    # If the workflow run fails
	    except:
                ndar_log.info('ACT Workflow failed for IMAGE03 ID %s' % img03_id_str)
                finish_str = 'Crash time: %s'
    else:
        finish_str = 'Workflow did not need to run as files were already there: %s'

    # Log finish and total computation time
    fin = time.time()
    elapsed = (fin - start)/60
    ndar_log.info(finish_str % time.ctime(fin))
    ndar_log.info('Total time running IMAGE03 ID %s is: %s minutes' \
                  %(img03_id_str,str(elapsed)))

    up_list = []
    s3_list = []
    time_str = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(fin))
    s3_filename = time_str + '_' + img03_id_str

    # If workflow completed succesfully
    if wf_status:
        # Define cloud data and status
        wf_status_str = 'PASS'
        s3_nifti_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \
                        '_corticalthickness_normd.nii.gz'
        s3_roi_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \
                      '_ROIstats.txt' 
        full_s3_nifti_path = 's3://ndar_data/' + s3_nifti_path
        full_s3_roi_path = 's3://ndar_data/' + s3_roi_path
        # Upload paths
        #wf_base_dir = base_path + 'work-dirs/' + img03_id_str
        #up_nifti_path = wf_base_dir + \
        #                '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz'
        #up_roi_path = wf_base_dir + '/output/ROIstats.txt'
        # Append upload/s3 lists with path names
        up_list.append(up_nifti_path)
        up_list.append(up_roi_path)
        s3_list.append(s3_nifti_path)
        s3_list.append(s3_roi_path)
        # Log nifti and roi files upload
        ndar_log.info('Uploading nifti and roi files...')
        # Create dictionary of ROIs for that subject
        sub_roi_dic = create_roi_dic(up_roi_path)
        try:
            # Insert the ROIs into the unorm'd and norm'd databases
            ndar_log.info('uploading rois...')
            print '----------------------------------'
            insert_unormd(cursor, img03_id_str, roi_dic=sub_roi_dic)
            ndar_log.info('uploading imgs...')
            # Insert the act nifti into the unorm'd and norm'd databases
            insert_unormd(cursor, img03_id_str, s3_path=full_s3_nifti_path)
        except:
            e = sys.exc_info()[0]
            ndar_log.info('Error inserting results to MINDAR, message: %s' % str(e))
            wf_status_str = 'Error inserting results into MINDAR database'
    # Otherwise, there were crash files, upload those
    else:
        # Define cloud data and status
        wf_status_str = 's3://ndar-data/crashes/' + s3_filename + '/'
        full_s3_nifti_path = 'N/A'
        full_s3_roi_path = 'N/A'
        # Find crash file names/paths
        for root, dirs, files in os.walk(crash_dir):
            root_path = os.path.abspath(root)
            crash_files = files
        # Append crash file and s3 path lists
        for f in crash_files:
            crash_path = root_path + '/' + f
            s3_crash_path = 'crashes/' + s3_filename + '/' + f
            up_list.append(crash_path)
            s3_list.append(s3_crash_path)
        # Log crash file upload 
        ndar_log.info('Uploading crash files into %s ...' % wf_status_str)

    # Call the upload function
    upload_to_s3(bucket, up_list, s3_list)
    ndar_log.info('Done')

    # Upload the log file
    up_log_list = []
    s3_log_list = []
    s3_log_path = 'logs/' + s3_filename + '.log'
    up_log_list.append(log_file)
    s3_log_list.append(s3_log_path)
    upload_to_s3(bucket, up_log_list, s3_log_list) 

    # Finally upload the record to the database
    add_db_record(cursor, 
                  img03_id_str, 
                  wf_status_str, 
                  extract_status_str, 
                  's3://ndar-data/'+s3_log_path, 
                  full_s3_nifti_path, 
                  full_s3_roi_path)