コード例 #1
0
ファイル: insert_utils.py プロジェクト: dragonlet/ndar-dev
def insert_abide_subjects(creds_path, xls_pheno_guid_path):
    # Import packages
    import fetch_creds
    import pandas

    # Init variables
    xlsx_path = xls_pheno_guid_path

    # Connect to database with cursor
    cursor = fetch_creds.return_cursor(creds_path)

    # Load data
    guid_df = pandas.read_excel(xlsx_path)

    # For each subject in the xlsx file, upload their data to table
    nrows = guid_df.shape[0]
    for i in range(nrows):
        sub = guid_df.ix[i, :]
        guid = sub["GUID"]
        # Test if it's a NaN (registers as a float)
        if type(guid) != float:
            guid = str(guid)
            site_id = str(sub["SITE_ID"])
            sub_id = str(int(sub["SUB_ID"]))
            dx_group = sub["DX_GROUP"]
            dsm_iv_tr = sub["DSM_IV_TR"]
            age = sub["AGE_AT_SCAN"]
            sex = sub["SEX"]
            if sex == 1:
                sex = "M"
            else:
                sex = "F"
            handedness = str(sub["HANDEDNESS_CATEGORY"])
            # Command to insert record
            cmd = """
                  insert into abide_subjects
                  (id, guid, site_id, sub_id, dx_group, dsm_iv_tr, age_at_scan,
                   sex, handedness)
                  values
                  (:col_1, :col_2, :col_3, :col_4, :col_5, :col_6, :col_7, 
                   :col_8, :col_9)
                  """
            cursor.execute(
                cmd,
                col_1=i - 1,
                col_2=guid,
                col_3=site_id,
                col_4=sub_id,
                col_5=dx_group,
                col_6=dsm_iv_tr,
                col_7=age,
                col_8=sex,
                col_9=handedness,
            )
            # Print to screen
            print i, guid
コード例 #2
0
def main(min_id, max_id, creds_path, output_path):
    '''
    Method to query the IMAGE03 table from a miNDAR database instance
    and create a subject list of the form (img03_id, s3_path), where
    img03_id is an integer corresponding to the image03_id of the DB
    entry and s3_path is a string corresponding to the path of the
    image on S3. It will save the subject list as a yaml file on disk.

    Parameters
    ----------
    min_id : integer
        The minimum of the image03_id range to build the subject list
    max_id : integer
        The maximum of the image03_id range to build the subject list
    creds_path : string (filepath)
        path to the csv file with 'ACCESS_KEY_ID' as the header and the
        corresponding ASCII text for the key underneath; same with the
        'SECRET_ACCESS_KEY' string and ASCII text
    output_path : string (filepath)
        path to save the output subject list yaml file

    Returns
    -------
    sublist : list (tuple)
        A list of tuples, where each tuple consists of (int, str), 
        corresponding to the image03_id and s3_path of the database
        entry.
    '''
    # Import packages
    import fetch_creds
    import os
    import yaml

    # Init variables
    cursor = fetch_creds.return_cursor(creds_path)
    cmd = '''
          select image03_id, image_file from NITRC_IMAGE03
          where 
          image03_id >= :arg_1 and image03_id <= :arg_2
          '''
    out_fp = os.path.abspath(output_path)

    # Execute command
    cursor.execute(cmd, arg_1=min_id, arg_2=max_id)
    res = cursor.fetchall()

    # And save result to yaml file
    with open(out_fp, 'w') as f:
        print 'Saving subject list to %s' % out_fp
        f.write(yaml.dump(res))
    f.close()

    # Return the list
    return res
コード例 #3
0
ファイル: insert_utils.py プロジェクト: dragonlet/ndar-dev
def delete_dups(creds_path):

    # Find the old items
    def find_old_items(in_list):
        out_list = []
        for l in in_list:
            if l[1].startswith("/path/"):
                out_list.append(int(l[0]))
        return out_list

    import fetch_creds

    cursor = fetch_creds.return_cursor(creds_path)

    cmd = """
          select datasetid, count(datasetid)
          from derivatives_unormd
          group by datasetid
          having count (datasetid) > 97
          """

    cursor.execute(cmd)
    dups = cursor.fetchall()
    dups = [d[0] for d in dups]

    find_cmd = "select id,cfgfilelocation from derivatives_unormd where datasetid = :arg_1"
    del_cmd = "delete from derivatives_unormd where id = :arg_1"

    i = 1
    for d in dups:
        cursor.execute(find_cmd, arg_1=d)
        found_list = cursor.fetchall()
        old_items = find_old_items(found_list)
        for oi in old_items:
            print "deleting entry with id = %d" % oi
            cursor.execute(del_cmd, arg_1=oi)
            cursor.execute("commit")
        print "done with %d/%d" % (i, len(dups))
        i += 1
コード例 #4
0
def main(inputs_dir, study_name, creds_path, sublist_yaml):
    '''
    Function generally used for task-specific scripting of functions
    declared in this module

    Parameters
    ----------
    inputs_dir : string
        filepath to the directory where all of the subjects' folders
        and sub-folders and niftis will be written to
    study_name : string
        the name of the study/site that all of the subjects will be
        placed in
    creds_path : string
        path to the csv file with 'ACCESS_KEY_ID' as the header and the
        corresponding ASCII text for the key underneath; same with the
        'SECRET_ACCESS_KEY' string and ASCII text
    sublist_yaml : string
        filepath to output the subject list yaml file to

    Returns
    -------
    sublist : list
        Returns a list of dictionaries where the format of each dict-
        ionary is as follows:
        {'anat': '/path/to/anat.nii.gz',
         'rest': {rest_1_rest: '/path/to/rest_1.nii.gz',
                  rest_2_rest: '/path/to/rest_2.nii.gz',
                  ...}
         'subject_id': 'subject1234'
         'unique_id': 'session_1'}
    '''

    # Import packages
    import csv
    import fetch_creds
    import os
    import sys
    import yaml

    # Init variables
    cursor = fetch_creds.return_cursor(creds_path)

    # Test the yaml subject list file for errors
    sublist_yaml = os.path.abspath(sublist_yaml)
    if os.path.exists(sublist_yaml):
        print '%s exists, please specify a different path' % sublist_yaml
        sys.exit()
    elif os.access(os.path.dirname(sublist_yaml), os.W_OK):
        print 'Subject list will be written to %s' % sublist_yaml
    else:
        print 'Cannot write to output directory for sublist %s; please '\
              'specify a different path' % sublist_yaml

    # Query IMAGE_AGGREGATE for subject image info, get S3 path from IMAGE03
    # Here's how the column names correspond between the two:
    # IMAGE_AGGREGATE    --->       IMAGE03 columns          EXAMPLE
    # ---------------               ---------------          -------
    # subjectkey                    subjectkey               'NDARABCD1234'
    # image_subtype                 image_description        'MPRAGE', 'EPI'
    # image_category                image_modality           'MRI', 'FMRI'
    # image_scanner_manufacturer    scanner_manufacturer_pd  'SIEMENS'
    # image_tr                      mri_repetition_time_pd   '2.53'
    # image_te                      mri_echo_time_pd         '0.033'
    # image_flip_angle              flip_angle               '90'

    # Query commands
    # Get all of the data from IMAGE_AGGREGATE
    agg_query = '''
                select subjectkey, interview_age, subject_id,
                image_category, image_dimensions, image_subtype,
                image_scanner_manufacturer, image_tr, image_te,
                image_flip_angle
                from
                IMAGE_AGGREGATE
                '''

    # Get initial list form image_aggregate table
    print 'Querying database...'
    cursor.execute(agg_query)
    img_agg_results = cursor.fetchall()

    # Build subkey dictionary from query results
    subkey_dict = build_subkey_dict(cursor, img_agg_results)

    # Build phenotypic file from subkey_dict
    pheno_list = build_pheno_list(cursor, subkey_dict)

    # Save pheno to disk as csv in the same directory as subject list
    pheno_csv = os.path.join(os.path.dirname(sublist_yaml), 'subs_pheno.csv')
    with open(pheno_csv, 'w') as csv_file:
        csv_out = csv.writer(csv_file)
        for pheno_entry in pheno_list:
            csv_out.writerow(pheno_entry)
    print 'Successfully saved phenotypic file to %s' % pheno_csv

    # Now create S3-file cpac-sublist, unique id is interview age for now
    # Also restricted to 1 anatomical image for now
    s3_sublist = [{'subject_id': str(subkey),

                'unique_id': entry_dict['anat'][0][1],

                'anat': entry_dict['anat'][0][-1],

                'rest': {'rest_%d_rest' % (rest_num+1) :
                         entry_dict['rest'][rest_num][-1] \
                         for rest_num in range(len(entry_dict['rest']))}
                } \
               for subkey, entry_dict in subkey_dict.items()]

    # If downloading imaging data
    if inputs_dir and study_name:
        # Create the directory if it does not exist
        if not os.path.exists(inputs_dir):
            try:
                print 'creating inputs directory: %s' % inputs_dir
                os.makedirs(inputs_dir)
            except OSError as err:
                print 'Unable to make inputs directory %s' % inputs_dir
                print 'This might be due to permissions: %s' % err
                sys.exit()

        # Download imaging data and build local subject list
        local_sublist = download_s3_sublist(s3_sublist, inputs_dir,
                                            study_name, creds_path)

        # Use local sublist
        sublist = local_sublist

    # Otherwise, just use S3 sublist
    else:
        sublist = s3_sublist

    # And write it to disk
    with open(sublist_yaml, 'w') as f:
        f.write(yaml.dump(sublist))

    # Return the subject list
    return sublist
コード例 #5
0
def main(creds_path, creds_path2, bucket, b_prefix, pipeline, num_res):
    '''
    Function that analyzes data in an S3 bucket and then uploads it
    into a tabular format as an entry in a database table

    Parameters
    ----------
    creds_path : string
        filepath to the S3 bucket credentials as a csv file
    creds_path2 : string
        filepath to the database instance credentials as a csv file
    bucket : string
        name of the S3 bucket to analyze data from
    b_prefix : string
        prefix filepath within the S3 bucket to parse for data
    pipeline : string
        name of the pipeline to gather outputs from for tabulating in DB
    num_res : integer
        the number of results you would expect the pipeline to have per
        derivative when checking if the information was already entered

    Returns
    src_list : list (boto Keys)
        a list of the keys that were inserted into the database
    '''

    # Import packages
    import fetch_creds
    # ANTs
    if pipeline == 'ants':
        import ants_insert as db_insert
    # CIVET
    elif pipeline == 'civet':
        import civet_insert as db_insert
    # Freesurfer
    elif pipeline == 'freesurfer':
        import freesurfer_insert as db_insert
    # Otherwise, assume its ccs, cpac, dparsf, or niak
    else:
        import insert_utils as db_insert

    # Init variables
    prefix = 'https://s3.amazonaws.com/' + bucket

    # Get AWS keys
    b = fetch_creds.return_bucket(creds_path, bucket)
    cursor = fetch_creds.return_cursor(creds_path2)

    # Set up lists of keys
    src_list = b.list(prefix=b_prefix)
    file_list = [s for s in src_list if pipeline in str(s.name)]

    # Part of the list is already uploaded, hack off some
    no_files = len(file_list)
    print 'done creating file list, it has %d elements' % no_files

    # Iterate through list
    i = 0
    for f in file_list:
        url_path = prefix + str(f.name)
        exists = check_existing(cursor, url_path, 'abide_img_results', num_res)
        if not exists:
            db_insert.upload_results(cursor, url_path)
            print 'uploaded file %s successfully!' % url_path
        else:
            print 'already loaded file %s, skipping...' % url_path
        i += 1
        per = 100*(float(i)/no_files)
        print 'done with file %d/%d\n%f%% complete\n' % \
        (i, no_files, per)

    # Return the src_list
    return src_list
コード例 #6
0
ファイル: check_entries.py プロジェクト: dragonlet/ndar-dev
def main(creds_path, table_name, ids_yml, bucket_name=None, roi_map_yml=None):
    """
    Function to query the table of interest for entries in the datasetid list
    from the ids_yaml file.

    Parameters
    ----------
    creds_path : string
        path to the csv file with 'Access Key Id' as the header and the
        corresponding ASCII text for the key underneath; same with the
        'Secret Access Key' string and ASCII text
    table_name : string
        the name of the table to query in miNDAR database
    ids_yml : string
        filepath to the input yaml file that contains a list of
        datasetids to query
    bucket_name : string (optional)
        the name of the bucket to get data from; only needed for ROI
        entries upload
    roi_map_yml : string (optional)
        filepath to the input yaml file that contains a dictionary of
        roi labels and names; only needed for ROI entries upload

    Returns
    -------
    None
        This function does not return a value.
    """

    # Import packages
    import fetch_creds
    import sys
    import yaml

    # Init variables
    cursor = fetch_creds.return_cursor(creds_path)
    ids_list = yaml.load(open(ids_yml, "r"))
    no_files = len(ids_list)
    s3_prefix = "s3://ndar_data/outputs/"
    # Init roi mapping dictionary if it was specified
    if roi_map_yml:
        roi_map_dict = yaml.load(open(roi_map_yml, "r"))
        num_entries = len(roi_map_dict)
    else:
        roi_map_dict = None
        num_entries = 1

    i = 0
    # Go through the list
    for id in ids_list:
        cmd = "select * from %s where datasetid = :arg_1" % table_name
        cursor.execute(cmd, arg_1=id)
        res = cursor.fetchall()
        num_res = len(res)
        # If the number of entries isn't what we expect
        if num_res < num_entries:
            # If there is an incomplete number of entries, delete them
            if num_res > 0:
                print "Deleting partially-populated entries with datasetid = %s" % id
                cursor.execute("delete from %s where datasetid = :arg_1", arg_1=id)
            # If we're loading in ROIs, get the roi_dic from the S3 bucket
            if roi_map_dict:
                roi_dict = get_roi_dict(creds_path, bucket_name, id)
                s3_path = None
            else:
                roi_dict = None
                s3_path = s3_prefix + id + "/" + id + "_corticalthickness_normd.nii.gz"
            # And populate the table entries
            insert_unormd(cursor, id, table_name, s3_path=s3_path, roi_map=roi_map_dict, roi_dict=roi_dict)
            print "Successfully inserted entry %s!" % id
        # If we see more than we expect, raise an error
        elif num_res > num_entries:
            raise ValueError, "more entries found than expected, investigate " "this manually, datasetid: %s" % id
            sys.exit()
        # Otherwise, the amount of entries is the amount we expect, move on
        else:
            print "Found the right amount of entries, dataset: %s is good" % id
        # Increment counter
        i += 1
        per = 100 * (float(i) / no_files)
        print "done with file %d/%d\n%f%% complete\n" % (i, no_files, per)
コード例 #7
0
ファイル: ants_insert.py プロジェクト: FCP-INDI/ndar-dev
def transfer_table_entries(creds_path):
    '''
    Function to transfer all of the ABIDE subjects results in the
    DERIVATIVES_UNORMD and IMG_DERIVATIVES_UNORMD tables to the 
    ABIDE_IMG_RESULTS table

    Parameters
    ----------
    creds_path : string (filepath)
        path to the csv file with 'Access Key Id' as the header and the
        corresponding ASCII text for the key underneath; same with the
        'Secret Access Key' string and ASCII text

    Returns
    -------
    None
        This function does not return any value. It transfers table
        entries in an Oracle database.
    '''

    # Import packages
    import insert_utils
    import fetch_creds

    # Init variables
    deriv_id = insert_utils.return_next_pk(cursor, 'ABIDE_IMG_RESULTS')
    template = 'OASIS-30 Atropos Template'
    cursor = fetch_creds.return_cursor(creds_path)
    # Get ACT img derivatives from img_derivatives_unormd
    imgs_get = '''
               select
               pipelinename, pipelinetype, pipelinetools, pipelineversion,
               pipelinedescription, name, measurename, guid,
               datasetid, roidescription, roi, template, s3_path, cfgfilelocation
               from
               img_derivatives_unormd
               where instr(datasetid, :arg_1) > 0
               '''
    # Get ROI derivatives from DERIVATIVES_UNORMD
    rois_get = '''
               select
               pipelinename, pipelinetype, pipelinetools, pipelineversion, 
               pipelinedescription, derivativename, measurename, guid, 
               datasetid, roidescription, roi, template, value, units,
               cfgfilelocation
               from
               derivatives_unormd
               where instr(datasetid, :arg_1) > 0
               '''
    # Insert entries into ABIDE_IMG_RESULTS
    air_put = '''
              insert into abide_img_results
              (id, pipelinename, pipelinetype, pipelinetools,
              pipelineversion, pipelinedescription, name, measurename, 
              timestamp, guid, datasetid, roidescription, roi, atlas, value,
              units, s3_path, template, cfgfilelocation)
              values
              (:col_1, :col_2, :col_3, :col_4, :col_5, :col_6, :col_7, :col_8,
              :col_9, :col_10, :col_11, :col_12, :col_13, :col_14, :col_15,
              :col_16, :col_17, :col_18, :col_19)
              '''

    # Get abide results from derivatives_unormd (ABIDE id's have an 'a' in them)
    cursor.execute(rois_get, arg_1='a')
    roi_entries = cursor.fetchall()

    print 'Found %d roi results, inserting into ABIDE table' % len(roi_entries)
    # For each ROI entry, copy its fields over to ABIDE_IMG_RESULTS
    for entry in roi_entries:
        # Extract field values from entry result
        pname = entry[0]
        ptype = entry[1]
        ptools = entry[2]
        pver = entry[3]
        pdesc = entry[4]
        dname = entry[5]
        mname = entry[6]
        guid = entry[7]
        datasetid = entry[8]
        roidesc = entry[9]
        roi = entry[10]
        # template --> atlas
        atlas = entry[11]
        value = entry[12]
        units = entry[13]
        cfgfile = entry[14]
        # Timestamp
        timestamp = str(time.ctime(time.time()))
        # Find/make s3 path
        s3_path = make_roi_s3(cursor, datasetid)
        # And insert all of this into ABIDE_IMG_RESULTS
        cursor.execute(air_put, col_1=deriv_id,
                                col_2=pname,
                                col_3=ptype,
                                col_4=ptools,
                                col_5=pver,
                                col_6=pdesc,
                                col_7=dname,
                                col_8=mname,
                                col_9=timestamp,
                                col_10=guid,
                                col_11=datasetid,
                                col_12=roidescription,
                                col_13=roi,
                                col_14=atlas,
                                col_15=value,
                                col_16=units,
                                col_17=s3_path,
                                col_18=template,
                                col_19=cfgfile)
        # Commit changes
        cursor.execute('commit')
        # Increment to next unique pk id
        deriv_id += 1
        print deriv_id

    # Get abide results from derivatives_unormd (ABIDE id's have an 'a' in them)
    cursor.execute(imgs_get, arg_1='a')
    img_entries = cursor.fetchall()
    print 'Found %d image results, inserting into ABIDE table' % len(img_entries)
    # For each IMG entry, copy its fields over to ABIDE_IMG_RESULTS
    for entry in img_entries:
        # Extract field values from entry result
        pname = entry[0]
        ptype = entry[1]
        ptools = entry[2]
        pver = entry[3]
        pdesc = entry[4]
        dname = entry[5]
        mname = entry[6]
        guid = entry[7]
        datasetid = entry[8]
        roidesc = entry[9]
        roi = entry[10]
        # template --> atlas
        template = entry[11]
        s3_path = entry[12]
        cfgfile = entry[13]
        # Timestamp
        timestamp = str(time.ctime(time.time()))
        # Find/make s3 path
        s3_path = make_roi_s3(cursor, datasetid)
        # And insert all of this into ABIDE_IMG_RESULTS
        cursor.execute(air_put, col_1=deriv_id,
                                col_2=pname,
                                col_3=ptype,
                                col_4=ptools,
                                col_5=pver,
                                col_6=pdesc,
                                col_7=dname,
                                col_8=mname,
                                col_9=timestamp,
                                col_10=guid,
                                col_11=datasetid,
                                col_12=roidescription,
                                col_13=roi,
                                col_14='',
                                col_15='',
                                col_16='',
                                col_17=s3_path,
                                col_18=template,
                                col_19=cfgfile)
        # Commit changes
        cursor.execute('commit')
        # Increment to next unique pk id
        deriv_id += 1
        print deriv_id
コード例 #8
0
ファイル: ants_insert.py プロジェクト: FCP-INDI/ndar-dev
def insert_img_unormd(id_s3_list, creds_path):
    '''
    Function to insert image results data for ANTs cortical thickness
    to the IMG_DERIVATIVES_UNORMD table in miNDAR.

    Parameters
    ----------
    id_s3_list : list (tuple)
        a list of tuples where each tuple contains 2 strings:
        (datasetid, s3_path)
    creds_path : string (filepath)
        path to the csv file with 'Access Key Id' as the header and the
        corresponding ASCII text for the key underneath; same with the
        'Secret Access Key' string and ASCII text

    Returns
    -------
    None
        This function has no return value. It uploads the data from the
        list to a miNDAR database and exits.
    '''

    # Import packages
    import cx_Oracle
    import datetime
    import fetch_creds
    import os
    import yaml

    # Init variables
    # Create cursor for queries and data inserts
    cursor = fetch_creds.return_cursor(creds_path)

    # Constant arguments for all entries
    pipeline_name = 'act_workflow.py'
    deriv_name = 'Normalized cortical thickness image'
    measure_name = 'image'

    # Knowns
    roi_id = 'Grey matter'
    roi_description = 'Grey matter cortex'
    template = 'OASIS-30_Atropos Template'
    atlas_name = 'OASIS-TRT-20_jointfusion_DKT31_CMA_labels_in_OASIS-30.nii.gz'
    atlas_ver = '2mm (2013)'
    pipeline_name = 'act_workflow.py'
    pipeline_type = 'nipype workflow'
    cfg_file_loc = '/path/to/act_workflow.py'
    pipeline_tools = 'ants, nipype, python'
    pipeline_ver = 'v0.1'
    pipeline_desc = 'compute the cortical thickness of an extracted brain in ' \
                    'subject space, and normalize to template'

    # Get the next derivativeid (primary key from table)
    deriv_id = return_next_pk(cursor, 'img_derivatives_unormd')

    # Command string
    cmd = '''
          insert into img_derivatives_unormd
          (id, roi, pipelinename, pipelinetype, cfgfilelocation, 
          pipelinetools, pipelineversion, pipelinedescription, name, measurename, 
          timestamp, s3_path, template, guid, datasetid, roidescription)
          values
          (:col_1, :col_2, :col_3, :col_4, :col_5, :col_6, :col_7, :col_8,
           :col_9, :col_10, :col_11, :col_12, :col_13, :col_14, :col_15, :col_16)
          '''

    # Iterate through dictionary and upload data
    for sub in id_s3_list:
        # Timestamp
        timestamp = str(datetime.datetime.now())
        # Get datasetid and s3_path
        dataset_id = sub[0]
        s3_path = sub[1]
        # Get id
        id_find = '''
                  select guid from abide_subjects
                  where id = :arg_1
                  '''
        cursor.execute(id_find, arg_1=dataset_id)
        res = cursor.fetchall()
        guid = res[0][0]
        # Execute insert command
        cursor.execute(cmd,
                       col_1 = int(deriv_id),
                       col_2 = roi_id,
                       col_3 = pipeline_name,
                       col_4 = pipeline_type,
                       col_5 = cfg_file_loc,
                       col_6 = pipeline_tools,
                       col_7 = pipeline_ver,
                       col_8 = pipeline_desc,
                       col_9 = deriv_name,
                       col_10 = measure_name,
                       col_11 = timestamp,
                       col_12 = s3_path,
                       col_13 = template,
                       col_14 = guid,
                       col_15 = dataset_id,
                       col_16 = roi_description)
        # Increment the unique id
        print 'deriv_id ', deriv_id
        deriv_id += 1

    # Commit the changes and close the cursor/connection
    cursor.execute('commit')
    cursor.close()
コード例 #9
0
ファイル: ants_insert.py プロジェクト: FCP-INDI/ndar-dev
def insert_unormd(roi_txt_fpaths, creds_path, oasis_file):
    '''
    Function to insert image results data for ANTs cortical thickness
    to the DERIVATIVES_UNORMD table in miNDAR.

    Parameters
    ----------
    roi_txt_fpaths : list (str)
        a list of filepaths as strings to the ROIstats.txt files to
        upload
    creds_path : string (filepath)
        path to the csv file with 'Access Key Id' as the header and the
        corresponding ASCII text for the key underneath; same with the
        'Secret Access Key' string and ASCII text
    oasis_file : string
        filepath to the Oasis_ROIs.txt file

    Returns
    -------
    None
        This function has no return value. It uploads the data from the
        list to a miNDAR database and exits.
    '''

    # Import packages
    import cx_Oracle
    import datetime
    import fetch_creds
    import os

    # Init variables
    big_dic = {}

    # For each subject
    for sub in roi_txt_fpaths:
        temp_list = []
        # Gather each subjects ROIs
        with open(sub,'r') as f:
            for i,line in enumerate(f):
                temp_list.append(line.split())
        # Trim off top elements (not ROIs)
        key = temp_list[0][2:]
        val = temp_list[1][2:]
        big_dic[os.path.basename(sub)] = dict(zip(key,val))
        # Close ROI txt file
        f.close()

    # Build mapping dictionary
    roi_dic = {}
    with open(oasis_file) as f:
        for i,line in enumerate(f):
            # Split the line into list (tab delimiter)
            split_line = line.split('\t')
            # Filter out any blank strings in the list
            split_line = filter(None, split_line)
            # Filter out leading/trailing spaces
            key = split_line[0].strip()
            val = split_line[1].strip()
            # Store in dictionary
            roi_dic[key] = val

    # User and database info
    cursor = fetch_creds.return_cursor(creds_path)

    # Constant arguments for all entries
    atlas_name = 'OASIS-TRT-20_jointfusion_DKT31_CMA_labels_in_OASIS-30.nii.gz'
    atlas_ver = '2mm (2013)'
    pipeline_name = 'act_workflow.py'
    pipeline_type = 'nipype workflow'
    cfg_file_loc = '/path/to/act_workflow.py'
    pipeline_tools = 'ants, nipype, python'
    pipeline_ver = 'v0.1'
    pipeline_desc = 'compute the mean thickness of cortex in ROI for the ABIDE dataset'
    deriv_name = 'cortical thickness'
    measure_name = 'mean'
    units = 'mm'
    # Get the next derivativeid (primary key from table)
    deriv_id = return_next_pk(cursor, 'derivatives_unormd')

    # Command string
    cmd = '''
          insert into derivatives_unormd
          (id, atlasname, atlasversion, roi, roidescription, pipelinename, pipelinetype, 
          cfgfilelocation, pipelinetools, pipelineversion, pipelinedescription, 
          derivativename, measurename, datasetid, timestamp, value, units, guid)
          values
          (:col_1, :col_2, :col_3, :col_4, :col_5, :col_6, :col_7, :col_8, :col_9, 
           :col_10, :col_11, :col_12, :col_13, :col_14, :col_15, :col_16, :col_17,
           :col_18)
          '''

    # Iterate through dictionary and upload data
    not_in_nitrc = []
    for key, val in big_dic.iteritems():
        # Find subject in image03 to get datasetID
        dataset_id = key.split('_')[0]
        print dataset_id
        id_find = '''
                  select guid
                  from abide_subjects
                  where id = :arg_1
                  '''
        cursor.execute(id_find, arg_1=dataset_id)
        res = cursor.fetchall()
        guid = res[0][0]
        print 'dataset_id ', dataset_id
        print 'guid', guid
        # Iterate through ROIs
        for k, v in val.iteritems():
            # Timestamp
            timestamp = str(datetime.datetime.now())
            # Get ROI number
            roi = k.split('Mean_')[1]
            roi_name = roi_dic[k]
            # Value
            value = float(v)
            # Execute insert command
            cursor.execute(cmd,
                           col_1 = deriv_id,
                           col_2 = atlas_name,
                           col_3 = atlas_ver,
                           col_4 = roi,
                           col_5 = roi_name,
                           col_6 = pipeline_name,
                           col_7 = pipeline_type,
                           col_8 = cfg_file_loc,
                           col_9 = pipeline_tools,
                           col_10 = pipeline_ver,
                           col_11 = pipeline_desc,
                           col_12 = deriv_name,
                           col_13 = measure_name,
                           col_14 = dataset_id,
                           col_15 = timestamp,
                           col_16 = value,
                           col_17 = units,
                           col_18 = guid)
            # Increment the unique id
            deriv_id += 1
            print 'deriv_id ', deriv_id

    # And commit changes
    cursor.execute('commit')
コード例 #10
0
ファイル: ndar_act_run.py プロジェクト: FCP-INDI/ndar-dev
def main(sub_list, sub_idx):
    '''
    Method to preprocess a subject's image (nifti) data using ANTs
    and upload it to a miNDAR database. First argument to script
    specifies index of subject to process of subject list, which is
    
    Parameters
    ----------
    sub_list : string
        filepath to a yaml file which contains a python list of tuples
        each tuple in the list is of the form (img03_id, s3_path),
        where img03_id is an integer corresponding to the image03_id
        of the image and the s3_path is a string corresponding to the
        path of the image on S3.
        e.g. (123, 's3://NDAR_Bucket/subject/image01.nii')
    sub_idx : integer
        index of subject to process from the sub_list yaml file

    Returns
    -------
    None
        The function doesn't return any value, it processes and uploads
        data to S3 and creates a log file of the overall progress.
    '''

    # Import packages
    import boto
    import cx_Oracle
    import fetch_creds
    import logging
    from nipype import logging as np_logging
    from nipype import config
    import os
    import re
    import subprocess
    import sys
    import time
    import yaml

    # Start timing
    start = time.time()

    # Init variables
    base_path = '/data/act_run/'
    creds_path = '/data/creds/Daniels_credentials.csv'
    # Oasis template paths
    oasis_path = '/data/OASIS-30_Atropos_template/'
    oasis_roi_yaml = oasis_path + 'oasis_roi_map.yml'
    # Load in OASIS ROI map
    oasis_roi_map = yaml.load(open(oasis_roi_yaml,'r'))
    
    # Setup s3 bucket, RDS cursor connections for uploading
    aws_access_key_id, aws_secret_access_key = fetch_creds.return_aws_keys(creds_path)
    bucket = fetch_creds.return_bucket(creds_path, 'ndar-data')
    cursor = fetch_creds.return_cursor(creds_path)

    # Get subject info
    subject = sub_list[sub_idx-1]
    img03_id_str = str(subject[0])
    s3_path = subject[1]
    
    # Change bucket name to always be 'NDAR_Central' (caps-sensitive)
    s3_list = s3_path.split('/')
    s3_list[2] = 'NDAR_Central'
    s3_path = '/'.join(s3_list)

    # --- Set up log file ---
    log_file = base_path + 'logs/' + img03_id_str + '.log'
    setup_logger('log1', log_file, logging.INFO)
    ndar_log = logging.getLogger('log1')
    # Log input image stats
    ndar_log.info('-------- RUNNING SUBJECT NO. #%d --------' % (sub_idx))
    ndar_log.info('Start time: %s ' % time.ctime(start))
    ndar_log.info('Input S3 path: %s' % s3_path)
    ndar_log.info('Input IMAGE03 ID: %s' % img03_id_str)

    # --- Search results_stats table for previous entries of that img03_id ---
    cmd = '''
          select rs_id, wf_status
          from results_stats
          where img03_id = :arg_1
          '''
    cursor.execute(cmd, arg_1=int(img03_id_str))
    result = cursor.fetchall()
    # If the record already exists, check to see if it was successful
    wkflow_flag = 0
    for record in result:
        wkflow_status = record[1]
        if wkflow_status == 'PASS':
            wkflow_flag = 1
            rs_id = record[0]
    # Log if already found and exit
    if wkflow_flag:
        ndar_log.info('Image already successfully ran, found at RS_ID: %d' % rs_id)
        sys.exit()

    # --- Download and extract data from NDAR_Central S3 bucket ---
    nifti_file = base_path + 'inputs-ef/' + img03_id_str + '.nii.gz'
    # Execute ndar_unpack for that subject
    cmd = './ndar_unpack'
    if not os.path.exists(nifti_file):
        cmd_list = [cmd, '--aws-access-key-id', aws_access_key_id, 
                    '--aws-secret-access-key', aws_secret_access_key, 
                    '-v', nifti_file, s3_path]
        cmd_str = ' '.join(cmd_list)
        ndar_log.info('Executing command: %s ' % cmd_str)
        p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, 
                             stderr=subprocess.STDOUT)
        p.wait()
        stdout, stderr = p.communicate()
        ndar_log.info(stdout)
    else:
        ndar_log.info('Nifti file already present for IMAGE03 ID %s' % img03_id_str)
        ndar_log.info('ndar_unpack did not need to run')

    extract_status_str = 'PASS'
    # If file was never created, log and exit
    if not os.path.exists(nifti_file):
        ndar_log.info('File extraction FAILED for IMAGE03 ID %s' % img03_id_str)
        extract_status_str = 'FAIL'
        # Upload the log file
        time_str = time.strftime('%Y-%m-%d_%H%M-%S',time.localtime(time.time()))
        s3_filename = time_str + '_' + img03_id_str
        up_log_list = []
        s3_log_list = []
        s3_log_path = 'logs/' + s3_filename + '.log'
        up_log_list.append(log_file)
        s3_log_list.append(s3_log_path)
        upload_to_s3(bucket, up_log_list, s3_log_list)
        # Finally upload the record to the database
        add_db_record(cursor, img03_id_str, 'N/A', extract_status_str, 
                      'https://s3.amazonaws.com/ndar-data/' + s3_log_path, 'N/A', 'N/A')
        # And quit
        sys.exit()

    # Create the nipype workflow
    wf, crash_dir = create_workflow(base_path, img03_id_str, nifti_file, oasis_path)

    # --- Run the workflow ---
    wf_base_dir = base_path + 'work-dirs/' + img03_id_str
    up_nifti_path = wf_base_dir + \
                    '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz'
    up_roi_path = wf_base_dir + '/output/ROIstats.txt'
    if os.path.exists(up_nifti_path) and os.path.exists(up_roi_path):
        wf_status = 1
    else:
        wf_status = 0
    if wf_status == 0:
	    try:
                ndar_log.info('Running the workflow...')
                wf.run()
                # We're successful at this point, add it as a file to the completed path
                ndar_log.info('Workflow completed successfully for IMAGE03 ID %s' % img03_id_str)
                wf_status = 1
                finish_str = 'Finish time: %s'
	    # If the workflow run fails
	    except:
                ndar_log.info('ACT Workflow failed for IMAGE03 ID %s' % img03_id_str)
                finish_str = 'Crash time: %s'
    else:
        finish_str = 'Workflow did not need to run as files were already there: %s'

    # Log finish and total computation time
    fin = time.time()
    elapsed = (fin - start)/60
    ndar_log.info(finish_str % time.ctime(fin))
    ndar_log.info('Total time running IMAGE03 ID %s is: %s minutes' \
                  %(img03_id_str,str(elapsed)))

    up_list = []
    s3_list = []
    time_str = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(fin))
    s3_filename = time_str + '_' + img03_id_str

    # If workflow completed succesfully
    if wf_status:
        # Define cloud data and status
        wf_status_str = 'PASS'
        s3_nifti_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \
                        '_corticalthickness_normd.nii.gz'
        s3_roi_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \
                      '_ROIstats.txt' 
        full_s3_nifti_path = 's3://ndar_data/' + s3_nifti_path
        full_s3_roi_path = 's3://ndar_data/' + s3_roi_path
        # Upload paths
        #wf_base_dir = base_path + 'work-dirs/' + img03_id_str
        #up_nifti_path = wf_base_dir + \
        #                '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz'
        #up_roi_path = wf_base_dir + '/output/ROIstats.txt'
        # Append upload/s3 lists with path names
        up_list.append(up_nifti_path)
        up_list.append(up_roi_path)
        s3_list.append(s3_nifti_path)
        s3_list.append(s3_roi_path)
        # Log nifti and roi files upload
        ndar_log.info('Uploading nifti and roi files...')
        # Create dictionary of ROIs for that subject
        sub_roi_dic = create_roi_dic(up_roi_path)
        try:
            # Insert the ROIs into the unorm'd and norm'd databases
            ndar_log.info('uploading rois...')
            print '----------------------------------'
            insert_unormd(cursor, img03_id_str, roi_dic=sub_roi_dic)
            ndar_log.info('uploading imgs...')
            # Insert the act nifti into the unorm'd and norm'd databases
            insert_unormd(cursor, img03_id_str, s3_path=full_s3_nifti_path)
        except:
            e = sys.exc_info()[0]
            ndar_log.info('Error inserting results to MINDAR, message: %s' % str(e))
            wf_status_str = 'Error inserting results into MINDAR database'
    # Otherwise, there were crash files, upload those
    else:
        # Define cloud data and status
        wf_status_str = 's3://ndar-data/crashes/' + s3_filename + '/'
        full_s3_nifti_path = 'N/A'
        full_s3_roi_path = 'N/A'
        # Find crash file names/paths
        for root, dirs, files in os.walk(crash_dir):
            root_path = os.path.abspath(root)
            crash_files = files
        # Append crash file and s3 path lists
        for f in crash_files:
            crash_path = root_path + '/' + f
            s3_crash_path = 'crashes/' + s3_filename + '/' + f
            up_list.append(crash_path)
            s3_list.append(s3_crash_path)
        # Log crash file upload 
        ndar_log.info('Uploading crash files into %s ...' % wf_status_str)

    # Call the upload function
    upload_to_s3(bucket, up_list, s3_list)
    ndar_log.info('Done')

    # Upload the log file
    up_log_list = []
    s3_log_list = []
    s3_log_path = 'logs/' + s3_filename + '.log'
    up_log_list.append(log_file)
    s3_log_list.append(s3_log_path)
    upload_to_s3(bucket, up_log_list, s3_log_list) 

    # Finally upload the record to the database
    add_db_record(cursor, 
                  img03_id_str, 
                  wf_status_str, 
                  extract_status_str, 
                  's3://ndar-data/'+s3_log_path, 
                  full_s3_nifti_path, 
                  full_s3_roi_path)