'\n\n\n'
            raise Exception

        # gp_flow
        # Extracts the model files (.con, .grp, .mat, .fts) from the model
        # directory and sends them to the create_group_analysis workflow gpa_wf

        gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" %
                                               currentDerivative)
        gp_flow.inputs.inputspec.grp_model = model
        gp_flow.inputs.inputspec.fTest = fTest

        # gpa_wf
        # Creates the actual group analysis workflow

        gpa_wf = create_group_analysis(fTest,
                                       "gp_analysis_%s" % currentDerivative)

        gpa_wf.inputs.inputspec.zmap_files = ordered_paths
        gpa_wf.inputs.inputspec.z_threshold = c.zThreshold
        gpa_wf.inputs.inputspec.p_threshold = c.pThreshold
        gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')

        print "group model: ", model
        print "f test: ", fTest
        print "z threshold: ", c.zThreshold
        print "p threshold: ", c.pThreshold
        print "parameters: ", (c.FSLDIR, 'MNI152')

        wf.connect(gp_flow, 'outputspec.mat', gpa_wf, 'inputspec.mat_file')
        wf.connect(gp_flow, 'outputspec.con', gpa_wf, 'inputspec.con_file')
        wf.connect(gp_flow, 'outputspec.grp', gpa_wf, 'inputspec.grp_file')
def prep_group_analysis_workflow(c, resource, subject_infos):
    
    p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos))    
    
    if type(c.modelFile) is list:
        model_sub_list = c.modelFile
    elif os.path.exists(c.modelFile):    
        model_sub_list = [line.rstrip('\r\n').split() \
                          for line in open(c.modelFile, 'r') \
                          if not (line == '\n') and not line.startswith('#')]
    else:
        raise ValueError("modelFile %s has invalid entry" %(c.modelFile)) 

    for model_sub in model_sub_list:
        
        model, subject_list = model_sub
        
        print "running for model %s and resource %s..."%(os.path.basename(model), resource)
        
        if not os.path.exists(model):
            raise Exception("path to model %s doesn't exit"%model)
        
        if not os.path.exists(subject_list):
            raise Exception("path to input subject list %s is invalid"%subject_list)
        
        if c.mixedScanAnalysis == True:
            wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model)))
        else:
            wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s/%s'%(resource, os.path.basename(model), scan_ids[0])) 

        wf.base_dir = c.workingDirectory
    
        input_subject_list = [line.rstrip('\r\n') for line in open(subject_list, 'r') \
                              if not (line == '\n') and not line.startswith('#')]
    
        ordered_paths=[]
        for sub in input_subject_list :
           for path in s_paths:
               if sub in path:
                   ordered_paths.append(path)
        
        print "input_subject_list ->", input_subject_list
        #print "ordered_paths ->", ordered_paths
    
        strgy_path = os.path.dirname(s_paths[0]).split(scan_ids[0])[1]
        for ch in ['.']:
            if ch in strgy_path:
                strgy_path = strgy_path.replace(ch, '_')
        
        gp_flow = create_gpa_dataflow("gp_dataflow%s"%strgy_path)
        gp_flow.inputs.inputspec.input_sublist = input_subject_list 
        gp_flow.inputs.inputspec.output_sublist = s_ids
        gp_flow.inputs.inputspec.grp_model = model
        gp_flow.inputs.inputspec.ftest = c.fTest
        
        from CPAC.group_analysis import create_group_analysis
        
        gpa_wf = create_group_analysis(c.fTest, "gp_analysis%s"%strgy_path)

        gpa_wf.inputs.inputspec.zmap_files = ordered_paths
        gpa_wf.inputs.inputspec.z_threshold = c.zThreshold
        gpa_wf.inputs.inputspec.p_threshold = c.pThreshold
        gpa_wf.inputs.inputspec.parameters = (c.FSLDIR,
                                                   'MNI152')
        
        wf.connect(gp_flow, 'outputspec.mat',
                   gpa_wf, 'inputspec.mat_file')
        wf.connect(gp_flow, 'outputspec.con',
                   gpa_wf, 'inputspec.con_file')
        wf.connect(gp_flow, 'outputspec.grp',
                    gpa_wf, 'inputspec.grp_file')
            
        if c.fTest:
            wf.connect(gp_flow, 'outputspec.fts',
                       gpa_wf, 'inputspec.fts_file') 
        
        ds = pe.Node(nio.DataSink(), name='gpa_sink')
        out_dir = os.path.dirname(s_paths[0]).replace(s_ids[0], 'group_analysis_results/_grp_model_%s'%(os.path.basename(model)))
        
        if 'sca_roi' in resource:
            out_dir = os.path.join(out_dir, \
              re.search('ROI_number_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
        if 'centrality' in resource:
             names = ['degree_centrality_binarize', 'degree_centrality_weighted', \
                      'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted']
             for name in names:
                 if name in os.path.basename(s_paths[0]):
                     out_dir = os.path.join(out_dir, name)
                     break
        if 'tempreg_maps_z_files' in resource:
            out_dir = os.path.join(out_dir, \
                re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
        
        if c.mixedScanAnalysis == True:
            out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir)
            
            
        ds.inputs.base_directory = out_dir
        ds.inputs.container = ''
        
        ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'),
                                          (r'(?<=model_files)(.)*[/]','/'),
                                          (r'(?<=merged)(.)*[/]','/'),
                                          (r'(?<=stats/clusterMap)(.)*[/]','/'),
                                          (r'(?<=stats/unthreshold)(.)*[/]','/'),
                                          (r'(?<=stats/threshold)(.)*[/]','/'),
                                          (r'_cluster(.)*[/]',''),
                                          (r'_slicer(.)*[/]',''),
                                          (r'_overlay(.)*[/]','')]
    
        if 1 in c.runSymbolicLinks:
    
    
            link_node = pe.MapNode(interface=util.Function(
                                input_names=['in_file',
                                            'resource'],
                                    output_names=[],
                                    function=prepare_gp_links),
                                    name='link_gp_', iterfield=['in_file'])
            link_node.inputs.resource = resource
            wf.connect(ds, 'out_file', link_node, 'in_file')
    
    
        ########datasink connections#########
        
        wf.connect(gp_flow, 'outputspec.mat',
                   ds, 'model_files')
        wf.connect(gp_flow, 'outputspec.grp',
                   ds, 'model_files.@02')
        wf.connect(gp_flow, 'outputspec.sublist',
                   ds, 'model_files.@03')
        wf.connect(gpa_wf, 'outputspec.merged',
                   ds, 'merged')
        wf.connect(gpa_wf, 'outputspec.zstats',
                   ds, 'stats.unthreshold')
        wf.connect(gpa_wf, 'outputspec.zfstats',
                   ds,'stats.unthreshold.@01')
        wf.connect(gpa_wf, 'outputspec.fstats',
                   ds,'stats.unthreshold.@02')
        wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf',
                   ds, 'stats.threshold')
        wf.connect(gpa_wf, 'outputspec.cluster_index_zf',
                   ds,'stats.clusterMap')
        wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf',
                   ds, 'stats.clusterMap.@01')
        wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf',
                   ds, 'rendered')
        wf.connect(gpa_wf, 'outputspec.rendered_image_zf',
                   ds, 'rendered.@01')   
        wf.connect(gpa_wf, 'outputspec.cluster_threshold',
                   ds,  'stats.threshold.@01')
        wf.connect(gpa_wf, 'outputspec.cluster_index',
                   ds, 'stats.clusterMap.@02')
        wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt',
                   ds, 'stats.clusterMap.@03')
        wf.connect(gpa_wf, 'outputspec.overlay_threshold',
                   ds, 'rendered.@02')
        wf.connect(gpa_wf, 'outputspec.rendered_image',
                   ds, 'rendered.@03')
        
        ######################################
        
        wf.run(plugin='MultiProc',
                             plugin_args={'n_procs': c.numCoresPerSubject})
    
        print "**Workflow finished for model %s and resource %s"%(os.path.basename(model), resource)
Example #3
0
func_files = read_table(func_list, header=None).ix[:, 0].tolist()
sca_dirs = joins(dirnames(func_files), "sca/fwhm_08")
sca_files = joins(
    sca_dirs, "smoothed_zscore_scan2_peaks100_2mm.nii_roi_n%02i.nii.gz" % roi)
nsubjects = len(func_files)

if not op.exists(sca_files[0]): raise Exception("SCA files doesn't exist")

####

zThreshold = 1.96
pThreshold = 0.05
FSLDIR = os.environ['FSLDIR']

gpa_wf = create_group_analysis(False, "gp_analysis_compcor")
gpa_wf.base_directory = odir

gpa_wf.inputs.inputspec.zmap_files = sca_files
gpa_wf.inputs.inputspec.z_threshold = zThreshold
gpa_wf.inputs.inputspec.p_threshold = pThreshold
gpa_wf.inputs.inputspec.parameters = (FSLDIR, 'MNI152')
gpa_wf.inputs.inputspec.mat_file = matfile
gpa_wf.inputs.inputspec.con_file = confile
gpa_wf.inputs.inputspec.grp_file = grpfile

####

ds = pe.Node(nio.DataSink(), name='gpa_sink')
ds.inputs.base_directory = op.join(odir, "roi_n%02i" % roi)
ds.inputs.container = ''
Example #4
0
        # (remove special characters)
        strgy_path_name = strgy_path.replace('/', '__')

        # gp_flow
        # Extracts the model files (.con, .grp, .mat, .fts) from the model
        # directory and sends them to the create_group_analysis workflow gpa_wf

        gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" %
                                               strgy_path_name)
        gp_flow.inputs.inputspec.grp_model = model
        gp_flow.inputs.inputspec.ftest = c.fTest

        # gpa_wf
        # Creates the actual group analysis workflow

        gpa_wf = create_group_analysis(c.fTest,
                                       "gp_analysis_%s" % strgy_path_name)

        gpa_wf.inputs.inputspec.zmap_files = ordered_paths
        gpa_wf.inputs.inputspec.z_threshold = c.zThreshold
        gpa_wf.inputs.inputspec.p_threshold = c.pThreshold
        gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')

        print "group model: ", model
        print "f test: ", c.fTest
        print "z threshold: ", c.zThreshold
        print "p threshold: ", c.pThreshold
        print "parameters: ", (c.FSLDIR, 'MNI152')

        wf.connect(gp_flow, 'outputspec.mat', gpa_wf, 'inputspec.mat_file')
        wf.connect(gp_flow, 'outputspec.con', gpa_wf, 'inputspec.con_file')
        wf.connect(gp_flow, 'outputspec.grp', gpa_wf, 'inputspec.grp_file')
def prep_group_analysis_workflow(c, resource, subject_infos):
    print "Preparing Group Analysis workflow for resource", resource
    print "subjects", subject_infos

    p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos))

    if c.mixedScanAnalysis == True:
        wf = pe.Workflow(name="group_analysis/%s" % resource)
    else:
        wf = pe.Workflow(name="group_analysis/%s/%s" % (resource, scan_ids[0]))

    wf.base_dir = c.workingDirectory

    # extract model files
    model_list = [
        line.rstrip("\r\n") for line in open(c.modelFile, "r") if not (line == "\n") and not line.startswith("#")
    ]

    if not model_list:
        raise Exception(
            "mode_list is empty. Please provide"
            "a model file with full paths of the"
            "folder containing models for group analysis"
        )

    from collections import defaultdict

    model_map = defaultdict(list)

    # create a map of model as key and its sub files as values
    import os
    import glob

    for model in model_list:
        if os.path.exists(model):
            files = glob.glob(os.path.join(model, "*"))
            model_map[os.path.basename(model)] = files
        else:
            raise Exception("Path to the model %s doesn't exist" % model)

    # print model_map

    input_subject_list = [
        line.rstrip("\r\n")
        for line in open(c.groupAnalysisSubjectList, "r")
        if not (line == "\n") and not line.startswith("#")
    ]

    ordered_paths = []
    for sub in input_subject_list:
        for path in s_paths:
            if sub in path:
                ordered_paths.append(path)
    print "input_subject_list", input_subject_list
    print "ordered_paths", ordered_paths

    strgy_path = os.path.dirname(s_paths[0]).split(scan_ids[0])[1]
    for ch in ["."]:
        if ch in strgy_path:
            strgy_path = strgy_path.replace(ch, "_")

    gp_flow = create_gpa_dataflow(model_map, c.fTest, "gp_dataflow%s" % strgy_path)
    gp_flow.inputs.inputspec.input_sublist = input_subject_list
    gp_flow.inputs.inputspec.output_sublist = s_ids

    from CPAC.group_analysis import create_group_analysis

    gpa_wf = create_group_analysis(c.fTest, "gp_analysis%s" % strgy_path)
    gpa_wf.inputs.inputspec.zmap_files = ordered_paths
    gpa_wf.inputs.inputspec.z_threshold = c.zThreshold
    gpa_wf.inputs.inputspec.p_threshold = c.pThreshold
    gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, "MNI152")

    wf.connect(gp_flow, "outputspec.mat", gpa_wf, "inputspec.mat_file")
    wf.connect(gp_flow, "outputspec.con", gpa_wf, "inputspec.con_file")
    wf.connect(gp_flow, "outputspec.grp", gpa_wf, "inputspec.grp_file")

    if c.fTest:
        wf.connect(gp_flow, "outputspec.fts", gpa_wf, "inputspec.fts_file")

    ds = pe.Node(nio.DataSink(), name="gpa_sink")
    # out_dir = os.path.join('group_analysis_results', resource)
    out_dir = os.path.dirname(s_paths[0]).replace(s_ids[0], "group_analysis_results")
    if "sca_roi" in resource:
        out_dir = os.path.join(
            out_dir,
            re.search("ROI_number_(\d)+", os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(
                0
            ),
        )
    if "centrality" in resource:
        names = [
            "degree_centrality_binarize",
            "degree_centrality_weighted",
            "eigenvector_centrality_binarize",
            "eigenvector_centrality_weighted",
        ]
        for name in names:
            if name in os.path.basename(s_paths[0]):
                out_dir = os.path.join(out_dir, name)
                break
    if c.mixedScanAnalysis == True:
        out_dir = re.sub(r"(\w)*scan_(\w)*(\d)*(\w)*[/]", "", out_dir)

    ds.inputs.base_directory = out_dir
    ds.inputs.container = ""

    ds.inputs.regexp_substitutions = [
        (r"(?<=rendered)(.)*_grp_model_", "/_grp_model_"),
        (r"(?<=model_files)(.)*_grp_model_", "/_grp_model_"),
        (r"(?<=merged)(.)*[/]", "/"),
        (r"(?<=stats/clusterMap)(.)*_grp_model_", "/_grp_model_"),
        (r"(?<=stats/unthreshold)(.)*_grp_model_", "/_grp_model_"),
        (r"(?<=stats/threshold)(.)*_grp_model_", "/_grp_model_"),
        (r"_cluster(.)*[/]", ""),
        (r"_slicer(.)*[/]", ""),
        (r"_overlay(.)*[/]", ""),
    ]

    if 1 in c.runSymbolicLinks:

        link_node = pe.MapNode(
            interface=util.Function(input_names=["in_file", "resource"], output_names=[], function=prepare_gp_links),
            name="link_gp_",
            iterfield=["in_file"],
        )
        link_node.inputs.resource = resource
        wf.connect(ds, "out_file", link_node, "in_file")

    ########datasink connections#########

    wf.connect(gp_flow, "outputspec.mat", ds, "model_files")
    wf.connect(gp_flow, "outputspec.grp", ds, "model_files.@02")
    wf.connect(gp_flow, "outputspec.sublist", ds, "model_files.@03")
    wf.connect(gpa_wf, "outputspec.merged", ds, "merged")
    wf.connect(gpa_wf, "outputspec.zstats", ds, "stats.unthreshold")
    wf.connect(gpa_wf, "outputspec.zfstats", ds, "stats.unthreshold.@01")
    wf.connect(gpa_wf, "outputspec.fstats", ds, "stats.unthreshold.@02")
    wf.connect(gpa_wf, "outputspec.cluster_threshold_zf", ds, "stats.threshold")
    wf.connect(gpa_wf, "outputspec.cluster_index_zf", ds, "stats.clusterMap")
    wf.connect(gpa_wf, "outputspec.cluster_localmax_txt_zf", ds, "stats.clusterMap.@01")
    wf.connect(gpa_wf, "outputspec.overlay_threshold_zf", ds, "rendered")
    wf.connect(gpa_wf, "outputspec.rendered_image_zf", ds, "rendered.@01")
    wf.connect(gpa_wf, "outputspec.cluster_threshold", ds, "stats.threshold.@01")
    wf.connect(gpa_wf, "outputspec.cluster_index", ds, "stats.clusterMap.@02")
    wf.connect(gpa_wf, "outputspec.cluster_localmax_txt", ds, "stats.clusterMap.@03")
    wf.connect(gpa_wf, "outputspec.overlay_threshold", ds, "rendered.@02")
    wf.connect(gpa_wf, "outputspec.rendered_image", ds, "rendered.@03")

    ######################################

    wf.run(plugin="MultiProc", plugin_args={"n_procs": c.numCoresPerSubject})
Example #6
0
def prep_group_analysis_workflow(c, group_config_file, resource, subject_infos, threshold_val):
    
    #
    # this function runs once per output file during group analysis
    #

    import yaml
    import commands

    # p_id = a list of pipeline IDs, i.e. the name of the output folder for
    #        the strat
    
    # s_ids = a list of all the subject IDs

    # scan_ids = a list of scan IDs

    # s_paths = a list of all of the filepaths of this particular output
    #           file that prep_group_analysis_workflow is being called for

    p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos))

    try:
        group_conf = Configuration(yaml.load(open(os.path.realpath(group_config_file), 'r')))
    except Exception as e:
        err_string = "\n\n[!] CPAC says: Could not read group model " \
                     "configuration YML file. Ensure you have read access " \
                     "for the file and that it is formatted properly.\n\n" \
                     "Configuration file: %s\n\nError details: %s" \
                     % (group_config_file, e)
        raise Exception(err_string)

     
    group_sublist_file = open(group_conf.subject_list, 'r')

    group_sublist_items = group_sublist_file.readlines()

    group_sublist = [line.rstrip('\n') for line in group_sublist_items \
                          if not (line == '\n') and not line.startswith('#')]

    # list of subjects for which paths which DO exist
    exist_paths = []

    # paths to the actual derivatives for those subjects
    derivative_paths = []


    z_threshold = float(group_conf.z_threshold[0])

    p_threshold = float(group_conf.p_threshold[0])


    custom_confile = group_conf.custom_contrasts

    if ((custom_confile == None) or (custom_confile == '') or \
            ("None" in custom_confile)):

        if (len(group_conf.f_tests) == 0) or (group_conf.f_tests == None):
            fTest = False
        else:
            fTest = True

    else:

        if not os.path.exists(custom_confile):
            errmsg = "\n[!] CPAC says: You've specified a custom contrasts " \
                     ".CSV file for your group model, but this file cannot " \
                     "be found. Please double-check the filepath you have " \
                     "entered.\n\nFilepath: %s\n\n" % custom_confile
            raise Exception(errmsg)

        evs = open(custom_confile, 'r').readline()
        evs = evs.rstrip('\r\n').split(',')
        count_ftests = 0

        fTest = False

        for ev in evs:
            if "f_test" in ev:
                count_ftests += 1

        if count_ftests > 0:
            fTest = True



    ''' begin iteration through group subject list for processing '''

    print "Sorting through subject list to check for missing outputs " \
          "for %s..\n" % resource

    for ga_sub in group_sublist:
        # Strip out carriage-return character if it is there
        
        if ga_sub.endswith('\r'):
            ga_sub = ga_sub.rstrip('\r')

        # ga_sub = subject ID taken off the group analysis subject list

        # let's check to make sure the subject list is formatted for
        # repeated measures properly if repeated measures is enabled
        # and vice versa
        if (group_conf.repeated_measures == True) and (',' not in ga_sub):
            print '\n\n'
            print '[!] CPAC says: The group analysis subject list ' \
                  'is not in the appropriate format for repeated ' \
                  'measures.\n'
            print 'Please use the appropriate format as described in ' \
                  'the CPAC User Guide or turn off Repeated Measures ' \
                  'in the CPAC pipeline configuration editor, found ' \
                  'in the \'Group Analysis Settings\' tab of the ' \
                  'pipeline configuration editor.\n'
            print 'NOTE: CPAC generates a properly-formatted group ' \
                  'analysis subject list meant for running repeated ' \
                  'measures when you create your original subject ' \
                  'list. Look for \'subject_list_group_analysis_' \
                  'repeated_measures.txt\' in the directory where ' \
                  'you created your subject list.\n\n'
            raise Exception

        elif (group_conf.repeated_measures == False) and (',' in ga_sub):
            print '\n\n'
            print '[!] CPAC says: It looks like your group analysis ' \
                  'subject list is formatted for running repeated ' \
                  'measures, but \'Run Repeated Measures\' is not ' \
                  'enabled in the pipeline configuration, found in ' \
                  'the \'Group Analysis Settings\' tab of the ' \
                  'pipeline configuration editor.\n'
            print 'Double-check your pipeline configuration?\n\n'
            raise Exception



        ''' process subject ids for repeated measures, if it is on '''
        # if repeated measures is being run and the subject list
        # is a list of subject IDs and scan IDs concatenated
        if (group_conf.repeated_measures == True):

            # sub.count(',') equals 1 when there is either multiple scans
            # or multiple sessions but not both, for repeated measures

            # sub.count(',') equals 2 when there are multiple sessions
            # AND scans, for repeated measures

            if ga_sub.count(',') == 1:
                sub_id = ga_sub.split(',',1)[0]
                other_id = ga_sub.split(',',1)[1]

            elif ga_sub.count(',') == 2:
                sub_id = ga_sub.split(',',2)[0]
                scan_id = ga_sub.split(',',2)[1]
                session_id = ga_sub.split(',',2)[2]



        ''' drop subjects from the group subject list '''
        # check the path files in path_files_here folder in the
        # subject's output folder - and drop any subjects from the
        # group analysis subject list which do not exist in the paths
        # to the output files

        '''
        REVISIT THIS LATER to establish a potentially better way to
        pull output paths (instead of path_files_here)
        '''

        for path in s_paths:

            if (group_conf.repeated_measures == True):

                if ga_sub.count(',') == 1:
                    if (sub_id in path) and (other_id in path):
                        exist_paths.append(ga_sub)
                        derivative_paths.append(path)

                elif ga_sub.count(',') == 2:
                    if (sub_id in path) and (scan_id in path) and \
                            (session_id in path):
                        exist_paths.append(ga_sub)
                        derivative_paths.append(path)

            else:
                if ga_sub in path:
                    exist_paths.append(ga_sub)
                    derivative_paths.append(path)


        # END subject-dropping!

        if len(derivative_paths) == 0:
            print '\n\n\n[!] CPAC says: None of the subjects listed in the ' \
                  'group analysis subject list were found to have outputs ' \
                  'produced by individual-level analysis.\n\nEnsure that ' \
                  'the subjects listed in your group analysis subject list ' \
                  'are the same as the ones included in the individual-' \
                  'level analysis you are running group-level analysis for.' \
                  '\n\n\n'
            raise Exception

    ''' END subject list iteration '''
 

    # check to see if any derivatives of subjects are missing
    if len(list(set(group_sublist) - set(exist_paths))) >0:
        print "List of outputs missing for subjects:"
        print list(set(group_sublist) - set(exist_paths))
        print "..for derivatives:"
        print resource
        print "..at paths:"
        print os.path.dirname(s_paths[0]).replace(s_ids[0], '*')

        

    # create the path string for the group analysis output
    out_dir = os.path.dirname(s_paths[0]).split(p_id[0] + '/')
    out_dir = os.path.join(group_conf.output_dir, out_dir[1])
    out_dir = out_dir.replace(s_ids[0], 'group_analysis_results_%s/_grp_model_%s'%(p_id[0],group_conf.model_name))

    model_out_dir = os.path.join(group_conf.output_dir, 'group_analysis_results_%s/_grp_model_%s'%(p_id[0],group_conf.model_name))

    mod_path = os.path.join(out_dir, 'model_files')


    if not os.path.isdir(mod_path):
        os.makedirs(mod_path)

        
    ''' write the new subject list '''
    new_sub_file = os.path.join(mod_path, os.path.basename(group_conf.subject_list))

    try:

        f = open(new_sub_file, 'w')
         
        for sub in exist_paths:
            print >>f, sub
        
        f.close()

    except:

        print "Error: Could not open subject list file: ", new_sub_file
        raise Exception


    group_conf.update('subject_list',new_sub_file)

    sub_id_label = group_conf.subject_id_label


    # Run 'create_fsl_model' script to extract phenotypic data from
    # the phenotypic file for each of the subjects in the subject list

    ''' get the motion statistics parameter file, if present '''
    # get the parameter file so it can be passed to create_fsl_model.py
    # so MeanFD or other measures can be included in the design matrix

    measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS']

    for measure in measure_list:
    
        if (measure in group_conf.design_formula):    

            parameter_file = os.path.join(c.outputDirectory, p_id[0], '%s%s_all_params.csv'%(scan_ids[0].strip('_'),threshold_val))

            if 1 in c.runGenerateMotionStatistics:

                if not os.path.exists(parameter_file):
                    print '\n\n[!] CPAC says: Could not find or open the motion ' \
                          'parameter file. This is necessary if you have included ' \
                          'any of the MeanFD measures in your group model.\n\n' \
                          'If Generate Motion Statistics is enabled, this file can ' \
                          'usually be found in the output directory of your ' \
                          'individual-level analysis runs. If it is not there, ' \
                          'double-check to see if individual-level analysis had ' \
                          'completed successfully.\n'
                    print 'Path not found: ', parameter_file, '\n\n'
                    raise Exception

            else:

                def no_measures_error(measure):
                    print '\n\n[!] CPAC says: The measure %s was included in ' \
                          'your group analysis design matrix formula, but ' \
                          'Generate Motion Statistics was not run during ' \
                          'individual-level analysis.\n' % measure
                    print 'Please run Generate Motion Statistics if you wish ' \
                          'to include this measure in your model.\n'
                    print 'If you HAVE completed a run with this option ' \
                          'enabled, then you are seeing this error because ' \
                          'the motion parameter file normally created by this ' \
                          'option is missing.\n\n'
                    raise Exception

                for measure in measure_list:
                    if (measure in group_conf.design_formula):
                        no_measures_error(measure)

                parameter_file = None
                
            break
            
    else:
    
        parameter_file = None



    # path to the pipeline folder to be passed to create_fsl_model.py
    # so that certain files like output_means.csv can be accessed
    pipeline_path = os.path.join(c.outputDirectory, p_id[0])

    # the current output that cpac_group_analysis_pipeline.py and
    # create_fsl_model.py is currently being run for
    current_output = resource #s_paths[0].replace(pipeline_path, '').split('/')[2]

    # generate working directory for this output's group analysis run
    workDir = '%s/group_analysis/%s/%s_%s' % (c.workingDirectory, group_conf.model_name, resource, scan_ids[0])

    # s_paths is a list of paths to each subject's derivative (of the current
    # derivative gpa is being run on) - s_paths_dirList is a list of each directory
    # in this path separated into list elements
             
    # this makes strgy_path basically the directory path of the folders after
    # the scan ID folder level         
    strgy_path = os.path.dirname(s_paths[0]).split(scan_ids[0])[1]

    # get rid of periods in the path
    for ch in ['.']:
        if ch in strgy_path:
            strgy_path = strgy_path.replace(ch, "")
                
    # create nipype-workflow-name-friendly strgy_path
    # (remove special characters)
    strgy_path_name = strgy_path.replace('/', "_")

    workDir = workDir + '/' + strgy_path_name



    ''' merge the remaining subjects for this current output '''
    # then, take the group mask, and iterate over the list of subjects
    # remaining to extract the mean of each subject using the group
    # mask

    merge_input = " "

    merge_output_dir = workDir + "/merged_files"

    if not os.path.exists(merge_output_dir):
        os.makedirs(merge_output_dir)

    merge_output = merge_output_dir + "/" + current_output + "_merged.nii.gz"
    merge_mask_output = merge_output_dir + "/" + current_output + "_merged_mask.nii.gz"

    # create a string per derivative filled with every subject's path to the
    # derivative output file
    for derivative_path in derivative_paths:
        merge_input = merge_input + " " + derivative_path
        
    merge_string = "fslmerge -t %s %s" % (merge_output, merge_input)

    # MERGE the remaining outputs
    try:
        commands.getoutput(merge_string)
    except Exception as e:
        print "[!] CPAC says: FSL Merge failed for output: %s" % current_output
        print "Error details: %s\n\n" % e
        raise

    merge_mask_string = "fslmaths %s -abs -Tmin -bin %s" % (merge_output, merge_mask_output)

    # CREATE A MASK of the merged file
    try:
        commands.getoutput(merge_mask_string)
    except Exception as e:
        print "[!] CPAC says: FSL Mask failed for output: %s" % current_output
        print "Error details: %s\n\n" % e
        raise


    derivative_means_dict = {}
    roi_means_dict = {}

    
    # CALCULATE THE MEANS of each remaining output using the group mask
    for derivative_path in derivative_paths:

        try:
            if "Group Mask" in group_conf.mean_mask:
                maskave_output = commands.getoutput("3dmaskave -mask %s %s" % (merge_mask_output, derivative_path))
            elif "Individual Mask" in group_conf.mean_mask:
                maskave_output = commands.getoutput("3dmaskave -mask %s %s" % (derivative_path, derivative_path))
        except Exception as e:
            print "[!] CPAC says: AFNI 3dmaskave failed for output: %s\n" \
                  "(Measure Mean calculation)" % current_output
            print "Error details: %s\n\n" % e
            raise

        # get the subject ID of the current derivative path reliably
        derivative_path_subID = derivative_path.replace(pipeline_path,"").strip("/").split("/")[0]

        # this crazy-looking command simply extracts the mean from the
        # verbose AFNI 3dmaskave output string
        derivative_means_dict[derivative_path_subID] = maskave_output.split("\n")[-1].split(" ")[0]

        # derivative_means_dict is now something like this:
        # { 'sub001': 0.3124, 'sub002': 0.2981, .. }

 
        # if custom ROI means are included in the model, do the same for those
        if "Custom_ROI_Mean" in group_conf.design_formula:

            try:
            
                if "centrality" in derivative_path:
                
                    # resample custom roi mask to 3mm, then use that
                    resampled_roi_mask = merge_output_dir + "/" + current_output + "_resampled_roi_mask.nii.gz"
                    
                    commands.getoutput("flirt -in %s -ref %s -o %s -applyxfm -init %s -interp nearestneighbour" % (group_conf.custom_roi_mask, derivative_path, resampled_roi_mask, c.identityMatrix))
                    
                    ROIstats_output = commands.getoutput("3dROIstats -mask %s %s" % (resampled_roi_mask, derivative_path))       
                    
                else:    
                        
                    ROIstats_output = commands.getoutput("3dROIstats -mask %s %s" % (group_conf.custom_roi_mask, derivative_path))
                    
            except Exception as e:
                print "[!] CPAC says: AFNI 3dROIstats failed for output: %s" \
                      "\n(Custom ROI Mean calculation)" % current_output
                print "Error details: %s\n\n" % e
                raise

            ROIstats_list = ROIstats_output.split("\t")

            # calculate the number of ROIs - 3dROIstats output can be split
            # into a list, and the actual ROI means begin at a certain point
            num_rois = (len(ROIstats_list)-3)/2

            roi_means = []

            # create a list of the ROI means - each derivative of each subject
            # will have N number of ROIs depending on how many ROIs were
            # specified in the custom ROI mask
            for num in range(num_rois+3,len(ROIstats_list)):

                roi_means.append(ROIstats_list[num])


            roi_means_dict[derivative_path_subID] = roi_means

        else:

            roi_means_dict = None



    if len(derivative_means_dict.keys()) == 0:
        err_string = "[!] CPAC says: Something went wrong with the " \
                     "calculation of the output means via the group mask.\n\n"
        raise Exception(err_string)
                     


    ''' run create_fsl_model.py to generate the group analysis models '''
    
    from CPAC.utils import create_fsl_model
    create_fsl_model.run(group_conf, fTest, parameter_file, derivative_means_dict, pipeline_path, current_output, model_out_dir, roi_means_dict, True)



    ''' begin GA workflow setup '''

    if not os.path.exists(new_sub_file):
        raise Exception("path to input subject list %s is invalid" % new_sub_file)
        
    #if c.mixedScanAnalysis == True:
    #    wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model)))
    #else:

    wf = pe.Workflow(name = resource)

    wf.base_dir = workDir
    wf.config['execution'] = {'hash_method': 'timestamp', 'crashdump_dir': os.path.abspath(c.crashLogDirectory)}
    log_dir = os.path.join(group_conf.output_dir, 'logs', 'group_analysis', resource, 'model_%s' % (group_conf.model_name))
        

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    else:
        pass


    # gp_flow
    # Extracts the model files (.con, .grp, .mat, .fts) from the model
    # directory and sends them to the create_group_analysis workflow gpa_wf

    gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % resource)
    gp_flow.inputs.inputspec.grp_model = os.path.join(model_out_dir, "model_files", current_output)
    gp_flow.inputs.inputspec.model_name = group_conf.model_name
    gp_flow.inputs.inputspec.ftest = fTest
  

    # gpa_wf
    # Creates the actual group analysis workflow

    gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % resource)

    gpa_wf.inputs.inputspec.merged_file = merge_output
    gpa_wf.inputs.inputspec.merge_mask = merge_mask_output

    gpa_wf.inputs.inputspec.z_threshold = z_threshold
    gpa_wf.inputs.inputspec.p_threshold = p_threshold
    gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')
    
   
    wf.connect(gp_flow, 'outputspec.mat',
               gpa_wf, 'inputspec.mat_file')
    wf.connect(gp_flow, 'outputspec.con',
               gpa_wf, 'inputspec.con_file')
    wf.connect(gp_flow, 'outputspec.grp',
                gpa_wf, 'inputspec.grp_file')
           
    if fTest:
        wf.connect(gp_flow, 'outputspec.fts',
                   gpa_wf, 'inputspec.fts_file')
        

    # ds
    # Creates the datasink node for group analysis
       
    ds = pe.Node(nio.DataSink(), name='gpa_sink')
     
    if 'sca_roi' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('sca_roi_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
            
            
    if 'dr_tempreg_maps_zstat_files_to_standard_smooth' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('temp_reg_map_z_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
            
            
    if 'centrality' in resource:
        names = ['degree_centrality_binarize', 'degree_centrality_weighted', \
                 'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted', \
                 'lfcd_binarize', 'lfcd_weighted']

        for name in names:
            if name in os.path.basename(s_paths[0]):
                out_dir = os.path.join(out_dir, name)
                break

    if 'tempreg_maps' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
        
#     if c.mixedScanAnalysis == True:
#         out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir)
              
    ds.inputs.base_directory = out_dir
    ds.inputs.container = ''
        
    ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'),
                                      (r'(?<=model_files)(.)*[/]','/'),
                                      (r'(?<=merged)(.)*[/]','/'),
                                      (r'(?<=stats/clusterMap)(.)*[/]','/'),
                                      (r'(?<=stats/unthreshold)(.)*[/]','/'),
                                      (r'(?<=stats/threshold)(.)*[/]','/'),
                                      (r'_cluster(.)*[/]',''),
                                      (r'_slicer(.)*[/]',''),
                                      (r'_overlay(.)*[/]','')]
   
    '''
    if 1 in c.runSymbolicLinks:
  
        link_node = pe.MapNode(interface=util.Function(
                            input_names=['in_file',
                                        'resource'],
                                output_names=[],
                                function=prepare_gp_links),
                                name='link_gp_', iterfield=['in_file'])
        link_node.inputs.resource = resource
        wf.connect(ds, 'out_file', link_node, 'in_file')
    '''
    

    ########datasink connections#########
    if fTest:
        wf.connect(gp_flow, 'outputspec.fts',
                   ds, 'model_files.@0') 
        
    wf.connect(gp_flow, 'outputspec.mat',
               ds, 'model_files.@1' )
    wf.connect(gp_flow, 'outputspec.con',
               ds, 'model_files.@2')
    wf.connect(gp_flow, 'outputspec.grp',
               ds, 'model_files.@3')
    wf.connect(gpa_wf, 'outputspec.merged',
               ds, 'merged')
    wf.connect(gpa_wf, 'outputspec.zstats',
               ds, 'stats.unthreshold')
    wf.connect(gpa_wf, 'outputspec.zfstats',
               ds,'stats.unthreshold.@01')
    wf.connect(gpa_wf, 'outputspec.fstats',
               ds,'stats.unthreshold.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf',
               ds, 'stats.threshold')
    wf.connect(gpa_wf, 'outputspec.cluster_index_zf',
               ds,'stats.clusterMap')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf',
               ds, 'stats.clusterMap.@01')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf',
               ds, 'rendered')
    wf.connect(gpa_wf, 'outputspec.rendered_image_zf',
               ds, 'rendered.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold',
               ds,  'stats.threshold.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_index',
               ds, 'stats.clusterMap.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt',
               ds, 'stats.clusterMap.@03')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold',
               ds, 'rendered.@02')
    wf.connect(gpa_wf, 'outputspec.rendered_image',
               ds, 'rendered.@03')
       
    ######################################

    # Run the actual group analysis workflow
    wf.run()

    '''
    except:

        print "Error: Group analysis workflow run command did not complete successfully."
        print "subcount: ", subcount
        print "pathcount: ", pathcount
        print "sublist: ", sublist_items
        print "input subject list: "
        print "conf: ", conf.subjectListFile
            
        raise Exception
    '''
    
    print "**Workflow finished for model %s and resource %s"%(os.path.basename(group_conf.output_dir), resource)
Example #7
0
func_files  = read_table(func_list, header=None).ix[:,0].tolist()
sca_dirs    = joins(dirnames(func_files), "sca/fwhm_08")
sca_files   = joins(sca_dirs, "smoothed_zscore_peaks100_2mm.nii_roi_n%02i.nii.gz" % roi)
nsubjects   = len(func_files)

if not op.exists(sca_files[0]): raise Exception("SCA files doesn't exist")


####


zThreshold = 1.96
pThreshold = 0.05
FSLDIR     = os.environ['FSLDIR']

gpa_wf = create_group_analysis(False, "gp_analysis_compcor")
gpa_wf.base_directory = odir

gpa_wf.inputs.inputspec.zmap_files  = sca_files
gpa_wf.inputs.inputspec.z_threshold = zThreshold
gpa_wf.inputs.inputspec.p_threshold = pThreshold
gpa_wf.inputs.inputspec.parameters  = (FSLDIR, 'MNI152')
gpa_wf.inputs.inputspec.mat_file    = matfile
gpa_wf.inputs.inputspec.con_file    = confile
gpa_wf.inputs.inputspec.grp_file    = grpfile


####

ds = pe.Node(nio.DataSink(), name='gpa_sink')
ds.inputs.base_directory = op.join(odir, "roi_n%02i" % roi)
        strgy_path_name = strgy_path.replace('/', '__')

        # gp_flow
        # Extracts the model files (.con, .grp, .mat, .fts) from the model
        # directory and sends them to the create_group_analysis workflow gpa_wf

        gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % strgy_path_name)
        gp_flow.inputs.inputspec.grp_model = model
        gp_flow.inputs.inputspec.ftest = c.fTest
        
        
        
        # gpa_wf
        # Creates the actual group analysis workflow

        gpa_wf = create_group_analysis(c.fTest, "gp_analysis_%s" % strgy_path_name)

        gpa_wf.inputs.inputspec.zmap_files = ordered_paths
        gpa_wf.inputs.inputspec.z_threshold = c.zThreshold
        gpa_wf.inputs.inputspec.p_threshold = c.pThreshold
        gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')
    
        print "group model: ", model
        print "f test: ", c.fTest
        print "z threshold: ", c.zThreshold
        print "p threshold: ", c.pThreshold
        print "parameters: ", (c.FSLDIR, 'MNI152')

    
        wf.connect(gp_flow, 'outputspec.mat',
                   gpa_wf, 'inputspec.mat_file')
def prep_group_analysis_workflow(model_df, pipeline_config_obj, \
    model_name, group_config_obj, resource_id, preproc_strat, \
    series_or_repeated_label):

    #
    # this function runs once per derivative type and preproc strat combo
    # during group analysis
    #

    import os

    import nipype.pipeline.engine as pe
    import nipype.interfaces.utility as util
    import nipype.interfaces.io as nio

    pipeline_ID = pipeline_config_obj.pipeline_name

    # get thresholds
    z_threshold = float(group_config_obj.z_threshold[0])

    p_threshold = float(group_config_obj.p_threshold[0])

    sub_id_label = group_config_obj.subject_id_label

    # determine if f-tests are included or not
    custom_confile = group_config_obj.custom_contrasts

    if ((custom_confile == None) or (custom_confile == '') or \
            ("None" in custom_confile) or ("none" in custom_confile)):

        if (len(group_config_obj.f_tests) == 0) or \
            (group_config_obj.f_tests == None):
            fTest = False
        else:
            fTest = True

    else:

        if not os.path.exists(custom_confile):
            errmsg = "\n[!] CPAC says: You've specified a custom contrasts " \
                     ".CSV file for your group model, but this file cannot " \
                     "be found. Please double-check the filepath you have " \
                     "entered.\n\nFilepath: %s\n\n" % custom_confile
            raise Exception(errmsg)

        with open(custom_confile, "r") as f:
            evs = f.readline()

        evs = evs.rstrip('\r\n').split(',')
        count_ftests = 0

        fTest = False

        for ev in evs:
            if "f_test" in ev:
                count_ftests += 1

        if count_ftests > 0:
            fTest = True

    # create path for output directory
    out_dir = os.path.join(group_config_obj.output_dir, \
        "group_analysis_results_%s" % pipeline_ID, \
        "group_model_%s" % model_name, resource_id, \
        series_or_repeated_label, preproc_strat)

    model_path = os.path.join(out_dir, 'model_files')

    # generate working directory for this output's group analysis run
    work_dir = os.path.join(c.workingDirectory, "group_analysis", model_name,\
        resource_id, series_or_repeated_label, preproc_strat)

    log_dir = os.path.join(out_dir, 'logs', resource_id, \
        'model_%s' % model_name)

    # create the actual directories
    if not os.path.isdir(model_path):
        try:
            os.makedirs(model_path)
        except Exception as e:
            err = "\n\n[!] Could not create the group analysis output " \
                  "directories.\n\nAttempted directory creation: %s\n\n" \
                  "Error details: %s\n\n" % (model_path, e)
            raise Exception(err)

    if not os.path.isdir(work_dir):
        try:
            os.makedirs(work_dir)
        except Exception as e:
            err = "\n\n[!] Could not create the group analysis working " \
                  "directories.\n\nAttempted directory creation: %s\n\n" \
                  "Error details: %s\n\n" % (model_path, e)
            raise Exception(err)

    if not os.path.isdir(log_dir):
        try:
            os.makedirs(log_dir)
        except Exception as e:
            err = "\n\n[!] Could not create the group analysis logfile " \
                  "directories.\n\nAttempted directory creation: %s\n\n" \
                  "Error details: %s\n\n" % (model_path, e)
            raise Exception(err)

    # create new subject list based on which subjects are left after checking
    # for missing outputs
    new_participant_list = []
    for part in list(model_df["Participant"]):
        # do this instead of using "set" just in case, to preserve order
        #   only reason there may be duplicates is because of multiple-series
        #   repeated measures runs
        if part not in new_participant_list:
            new_participant_list.append(part)

    new_sub_file = write_new_sub_file(model_path, \
                                      group_config_obj.participant_list, \
                                      new_participant_list)

    group_conf.update('participant_list', new_sub_file)

    # start processing the dataframe further
    design_formula = group_config_obj.design_formula

    # demean the motion params
    if ("MeanFD" in design_formula) or ("MeanDVARS" in design_formula):
        params = ["MeanFD_Power", "MeanFD_Jenkinson", "MeanDVARS"]
        for param in params:
            model_df[param] = model_df[param].astype(float)
            model_df[param] = model_df[param].sub(model_df[param].mean())

    # create 4D merged copefile, in the correct order, identical to design
    # matrix
    merge_outfile = model_name + "_" + resource_id + "_merged.nii.gz"
    merge_outfile = os.path.join(model_path, merge_outfile)

    merge_file = create_merged_copefile(list(model_df["Filepath"]), \
                                        merge_outfile)

    # create merged group mask
    if group_config_obj.mean_mask[0] == "Group Mask":
        merge_mask_outfile = os.path.basename(merge_file) + "_mask.nii.gz"
        merge_mask = create_merged_mask(merge_file, merge_mask_outfile)

    # calculate measure means, and demean
    if "Measure_Mean" in design_formula:
        model_df = calculate_measure_mean_in_df(model_df, merge_mask)

    # calculate custom ROIs, and demean (in workflow?)
    if "Custom_ROI_Mean" in design_formula:

        custom_roi_mask = group_config_obj.custom_roi_mask

        if (custom_roi_mask == None) or (custom_roi_mask == "None") or \
            (custom_roi_mask == "none") or (custom_roi_mask == ""):
            err = "\n\n[!] You included 'Custom_ROI_Mean' in your design " \
                  "formula, but you didn't supply a custom ROI mask file." \
                  "\n\nDesign formula: %s\n\n" % design_formula
            raise Exception(err)

        # make sure the custom ROI mask file is the same resolution as the
        # output files - if not, resample and warn the user
        roi_mask = check_mask_file_resolution(list(model_df["Raw_Filepath"])[0], \
                                              custom_roi_mask, model_path, \
                                              resource_id)

        # if using group merged mask, trim the custom ROI mask to be within
        # its constraints
        if merge_mask:
            output_mask = os.path.join(model_path, "group_masked_%s" \
                                       % os.path.basename(input_mask))
            roi_mask = trim_mask(roi_mask, merge_mask, output_mask)

        # calculate
        model_df = calculate_custom_roi_mean_in_df(model_df, roi_mask)

    # modeling group variances separately

    # add repeated measures 1's matrices

    # patsify model DF, drop columns not in design formula

    # process contrasts

    wf = pe.Workflow(name=resource_id)

    wf.base_dir = work_dir
    crash_dir = os.path.join(pipeline_config_obj.crashLogDirectory, \
                             "group_analysis", model_name)

    wf.config['execution'] = {'hash_method': 'timestamp', \
                              'crashdump_dir': crash_dir}

    if "Measure_Mean" in design_formula:
        measure_mean = pe.Node(util.Function(
            input_names=['model_df', 'merge_mask'],
            output_names=['model_df'],
            function=calculate_measure_mean_in_df),
                               name='measure_mean')
        measure_mean.inputs.model_df = model_df

        wf.connect(merge_mask, "out_file", measure_mean, "merge_mask")

    if "Custom_ROI_Mean" in design_formula:
        roi_mean = pe.Node(util.Function())

    group_config_obj.custom_roi_mask

    #----------------

    import yaml
    import pandas as pd

    # load group analysis model configuration file
    try:
        with open(os.path.realpath(group_config_file), "r") as f:
            group_conf = Configuration(yaml.load(f))
    except Exception as e:
        err_string = "\n\n[!] CPAC says: Could not read group model " \
                     "configuration YML file. Ensure you have read access " \
                     "for the file and that it is formatted properly.\n\n" \
                     "Configuration file: %s\n\nError details: %s" \
                     % (group_config_file, e)
        raise Exception(err_string)

    # gather all of the information
    # - lists of all the participant unique IDs (participant_site_session) and
    # of all of the series IDs present in output_file_list
    # - also returns the pipeline ID
    new_participant_list, all_series_names, pipeline_ID = \
        gather_new_participant_list(output_path_file, output_file_list)

    # create the path string for the group analysis output
    #    replicate the directory path of one of the participant's output
    #    folder path to the derivative's file, but replace the participant ID
    #    with the group model name
    #        this is to ensure nothing gets overwritten between strategies
    #        or thresholds, etc.
    out_dir = os.path.dirname(output_file_list[0]).split(pipeline_ID + '/')
    out_dir = out_dir[1].split(out_dir[1].split("/")[-1])[0]
    out_dir = os.path.join(group_conf.output_dir, out_dir)
    out_dir = out_dir.replace(new_participant_list[0], \
                  'group_analysis_results_%s/_grp_model_%s' \
                  % (pipeline_ID, group_conf.model_name))

    # !!!!!!!!!!
    if (group_conf.repeated_measures == True) and (series_ids[0] != None):
        out_dir = out_dir.replace(series_ids[0] + "/", "multiple_series")

    # create model file output directories
    model_out_dir = os.path.join(group_conf.output_dir, \
        'group_analysis_results_%s/_grp_model_%s' \
        %(pipeline_ID, group_conf.model_name))

    mod_path = os.path.join(model_out_dir, 'model_files')

    if not os.path.isdir(mod_path):
        os.makedirs(mod_path)

    # current_mod_path = folder under
    #   "/gpa_output/_grp_model_{model name}/model_files/{current derivative}"
    current_mod_path = os.path.join(mod_path, resource)

    if not os.path.isdir(current_mod_path):
        os.makedirs(current_mod_path)

    # create new subject list based on which subjects are left after checking
    # for missing outputs
    new_sub_file = write_new_sub_file(current_mod_path, \
                       group_conf.subject_list, new_participant_list)

    group_conf.update('subject_list', new_sub_file)

    # create new design matrix with only the subjects that are left

    # Run 'create_fsl_model' script to extract phenotypic data from
    # the phenotypic file for each of the subjects in the subject list

    # get the motion statistics parameter file, if present
    # get the parameter file so it can be passed to create_fsl_model.py
    # so MeanFD or other measures can be included in the design matrix
    ''' okay, here we go... how are we handling series? because here it needs to take in '''
    ''' the appropriate series to get the appropriate parameter file ! ! ! '''
    ''' MAY HAVE TO GO BACK ON THIS, and just have one series sent in per this function...'''

    power_params_files = {}

    measure_list = ['MeanFD_Power', 'MeanFD_Jenkinson', 'MeanDVARS']

    for measure in measure_list:

        if measure in group_conf.design_formula:

            for series_id in all_series_names:

                parameter_file = os.path.join(c.outputDirectory, \
                                              pipeline_ID, \
                                              '%s%s_all_params.csv' % \
                                              (series_id.strip('_'), \
                                              threshold_val))

                if not os.path.exists(parameter_file):
                    err = "\n\n[!] CPAC says: Could not find or open the motion "\
                          "parameter file. This is necessary if you have " \
                          "included any of the MeanFD measures in your group " \
                          "model.\n\nThis file can usually be found in the " \
                          "output directory of your individual-level analysis " \
                          "runs. If it is not there, double-check to see if " \
                          "individual-level analysis had completed successfully."\
                          "\n\nPath not found: %s\n\n" % parameter_file
                    raise Exception(err)

                power_params_files[series_id] = parameter_file

            break

    else:

        power_params_files = None

    # path to the pipeline folder to be passed to create_fsl_model.py
    # so that certain files like output_means.csv can be accessed
    pipeline_path = os.path.join(c.outputDirectory, pipeline_ID)

    # generate working directory for this output's group analysis run
    workDir = '%s/group_analysis/%s/%s' % (c.workingDirectory, \
                                               group_conf.model_name, \
                                               resource)

    # this makes strgy_path basically the directory path of the folders after
    # the resource/derivative folder level
    strgy_path = os.path.dirname(output_file_list[0]).split(resource)[1]

    # get rid of periods in the path
    for ch in ['.']:
        if ch in strgy_path:
            strgy_path = strgy_path.replace(ch, "")

    # create nipype-workflow-name-friendly strgy_path
    # (remove special characters)
    strgy_path_name = strgy_path.replace('/', "_")

    workDir = workDir + '/' + strgy_path_name

    # merge the subjects for this current output
    # then, take the group mask, and iterate over the list of subjects
    # to extract the mean of each subject using the group mask
    merge_output, merge_mask_output, merge_output_dir = \
        create_merged_files(workDir, resource, output_file_list)

    # CALCULATE THE MEANS of each output using the group mask
    derivative_means_dict, roi_means_dict = \
        calculate_output_means(resource, output_file_list, \
                               group_conf.mean_mask, \
                               group_conf.design_formula, \
                               group_conf.custom_roi_mask, pipeline_path, \
                               merge_output_dir, c.identityMatrix)

    measure_dict = {}

    # extract motion measures from CPAC-generated power params file
    if power_params_files != None:
        for param_file in power_params_files.values():
            new_measure_dict = get_measure_dict(param_file)
            measure_dict.update(new_measure_dict)

    # combine the motion measures dictionary with the measure_mean
    # dictionary (if it exists)
    if derivative_means_dict:
        measure_dict["Measure_Mean"] = derivative_means_dict

    # run create_fsl_model.py to generate the group analysis models

    from CPAC.utils import create_fsl_model, kill_me
    create_fsl_model.run(group_conf, resource, parameter_file, \
                             derivative_means_dict, roi_means_dict, \
                                 current_mod_path, True)

    # begin GA workflow setup

    if not os.path.exists(new_sub_file):
        raise Exception("path to input subject list %s is invalid" %
                        new_sub_file)

    #if c.mixedScanAnalysis == True:
    #    wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model)))
    #else:

    wf = pe.Workflow(name=resource)

    wf.base_dir = workDir
    wf.config['execution'] = {
        'hash_method': 'timestamp',
        'crashdump_dir': os.path.abspath(c.crashLogDirectory)
    }
    log_dir = os.path.join(group_conf.output_dir, 'logs', 'group_analysis',
                           resource, 'model_%s' % (group_conf.model_name))

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    else:
        pass

    # gp_flow
    # Extracts the model files (.con, .grp, .mat, .fts) from the model
    # directory and sends them to the create_group_analysis workflow gpa_wf

    gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % resource)
    gp_flow.inputs.inputspec.grp_model = os.path.join(mod_path, resource)
    gp_flow.inputs.inputspec.model_name = group_conf.model_name
    gp_flow.inputs.inputspec.ftest = fTest

    # gpa_wf
    # Creates the actual group analysis workflow

    gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % resource)

    gpa_wf.inputs.inputspec.merged_file = merge_output
    gpa_wf.inputs.inputspec.merge_mask = merge_mask_output

    gpa_wf.inputs.inputspec.z_threshold = z_threshold
    gpa_wf.inputs.inputspec.p_threshold = p_threshold
    gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')

    wf.connect(gp_flow, 'outputspec.mat', gpa_wf, 'inputspec.mat_file')
    wf.connect(gp_flow, 'outputspec.con', gpa_wf, 'inputspec.con_file')
    wf.connect(gp_flow, 'outputspec.grp', gpa_wf, 'inputspec.grp_file')

    if fTest:
        wf.connect(gp_flow, 'outputspec.fts', gpa_wf, 'inputspec.fts_file')

    # ds
    # Creates the datasink node for group analysis

    ds = pe.Node(nio.DataSink(), name='gpa_sink')

    if 'sca_roi' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('sca_roi_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0))

    if 'dr_tempreg_maps_zstat_files_to_standard_smooth' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('temp_reg_map_z_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0))

    if 'centrality' in resource:
        names = ['degree_centrality_binarize', 'degree_centrality_weighted', \
                 'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted', \
                 'lfcd_binarize', 'lfcd_weighted']

        for name in names:
            if name in os.path.basename(output_file_list[0]):
                out_dir = os.path.join(out_dir, name)
                break

    if 'tempreg_maps' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0))


#     if c.mixedScanAnalysis == True:
#         out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir)

    ds.inputs.base_directory = out_dir
    ds.inputs.container = ''

    ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]', '/'),
                                      (r'(?<=model_files)(.)*[/]', '/'),
                                      (r'(?<=merged)(.)*[/]', '/'),
                                      (r'(?<=stats/clusterMap)(.)*[/]', '/'),
                                      (r'(?<=stats/unthreshold)(.)*[/]', '/'),
                                      (r'(?<=stats/threshold)(.)*[/]', '/'),
                                      (r'_cluster(.)*[/]', ''),
                                      (r'_slicer(.)*[/]', ''),
                                      (r'_overlay(.)*[/]', '')]

    ########datasink connections#########
    if fTest:
        wf.connect(gp_flow, 'outputspec.fts', ds, 'model_files.@0')

    wf.connect(gp_flow, 'outputspec.mat', ds, 'model_files.@1')
    wf.connect(gp_flow, 'outputspec.con', ds, 'model_files.@2')
    wf.connect(gp_flow, 'outputspec.grp', ds, 'model_files.@3')
    wf.connect(gpa_wf, 'outputspec.merged', ds, 'merged')
    wf.connect(gpa_wf, 'outputspec.zstats', ds, 'stats.unthreshold')
    wf.connect(gpa_wf, 'outputspec.zfstats', ds, 'stats.unthreshold.@01')
    wf.connect(gpa_wf, 'outputspec.fstats', ds, 'stats.unthreshold.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf', ds,
               'stats.threshold')
    wf.connect(gpa_wf, 'outputspec.cluster_index_zf', ds, 'stats.clusterMap')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf', ds,
               'stats.clusterMap.@01')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf', ds, 'rendered')
    wf.connect(gpa_wf, 'outputspec.rendered_image_zf', ds, 'rendered.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold', ds,
               'stats.threshold.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_index', ds, 'stats.clusterMap.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt', ds,
               'stats.clusterMap.@03')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold', ds, 'rendered.@02')
    wf.connect(gpa_wf, 'outputspec.rendered_image', ds, 'rendered.@03')

    ######################################

    # Run the actual group analysis workflow
    wf.run()


    print "\n\nWorkflow finished for model %s and resource %s\n\n" \
          % (os.path.basename(group_conf.output_dir), resource)
Example #10
0
def prep_group_analysis_workflow(model_df, pipeline_config_path, \
    model_name, group_config_path, resource_id, preproc_strat, \
    series_or_repeated_label):

    #
    # this function runs once per derivative type and preproc strat combo
    # during group analysis
    #

    import os
    import patsy
    import numpy as np

    import nipype.pipeline.engine as pe
    import nipype.interfaces.utility as util
    import nipype.interfaces.io as nio

    from CPAC.pipeline.cpac_group_runner import load_config_yml
    from CPAC.utils.create_flame_model_files import create_flame_model_files
    from CPAC.utils.create_group_analysis_info_files import write_design_matrix_csv

    pipeline_config_obj = load_config_yml(pipeline_config_path)
    group_config_obj = load_config_yml(group_config_path)

    pipeline_ID = pipeline_config_obj.pipelineName

    # remove file names from preproc_strat
    filename = preproc_strat.split("/")[-1]
    preproc_strat = preproc_strat.replace(filename, "")
    preproc_strat = preproc_strat.lstrip("/").rstrip("/")

    # get thresholds
    z_threshold = float(group_config_obj.z_threshold[0])

    p_threshold = float(group_config_obj.p_threshold[0])

    sub_id_label = group_config_obj.participant_id_label

    ftest_list = []
    readme_flags = []

    # determine if f-tests are included or not
    custom_confile = group_config_obj.custom_contrasts

    if ((custom_confile == None) or (custom_confile == '') or \
            ("None" in custom_confile) or ("none" in custom_confile)):

        custom_confile = None

        if (len(group_config_obj.f_tests) == 0) or \
            (group_config_obj.f_tests == None):
            fTest = False
        else:
            fTest = True
            ftest_list = group_config_obj.f_tests

    else:

        if not os.path.exists(custom_confile):
            errmsg = "\n[!] CPAC says: You've specified a custom contrasts " \
                     ".CSV file for your group model, but this file cannot " \
                     "be found. Please double-check the filepath you have " \
                     "entered.\n\nFilepath: %s\n\n" % custom_confile
            raise Exception(errmsg)

        with open(custom_confile, "r") as f:
            evs = f.readline()

        evs = evs.rstrip('\r\n').split(',')
        count_ftests = 0

        fTest = False

        for ev in evs:
            if "f_test" in ev:
                count_ftests += 1

        if count_ftests > 0:
            fTest = True

    # create path for output directory
    out_dir = os.path.join(group_config_obj.output_dir,
                           "group_analysis_results_%s" % pipeline_ID,
                           "group_model_%s" % model_name, resource_id,
                           series_or_repeated_label, preproc_strat)

    if 'sca_roi' in resource_id:
        out_dir = os.path.join(out_dir,
            re.search('sca_ROI_(\d)+',os.path.splitext(\
                os.path.splitext(os.path.basename(\
                    model_df["Filepath"][0]))[0])[0]).group(0))

    if 'dr_tempreg_maps_zstat_files_to_standard_smooth' in resource_id:
        out_dir = os.path.join(out_dir,
            re.search('temp_reg_map_z_(\d)+',os.path.splitext(\
                os.path.splitext(os.path.basename(\
                    model_df["Filepath"][0]))[0])[0]).group(0))

    if 'centrality' in resource_id:
        names = ['degree_centrality_binarize', 'degree_centrality_weighted', \
                 'eigenvector_centrality_binarize',
                 'eigenvector_centrality_weighted',
                 'lfcd_binarize', 'lfcd_weighted']

        for name in names:
            if name in filename:
                out_dir = os.path.join(out_dir, name)
                break

    if 'tempreg_maps' in resource_id:
        out_dir = os.path.join(out_dir, re.search('\w*[#]*\d+', \
            os.path.splitext(os.path.splitext(os.path.basename(\
                model_df["Filepath"][0]))[0])[0]).group(0))

    model_path = os.path.join(out_dir, 'model_files')

    second_half_out = \
        out_dir.split("group_analysis_results_%s" % pipeline_ID)[1]

    # generate working directory for this output's group analysis run
    work_dir = os.path.join(pipeline_config_obj.workingDirectory, \
        "group_analysis", second_half_out.lstrip("/"))

    log_dir = os.path.join(pipeline_config_obj.logDirectory, \
        "group_analysis", second_half_out.lstrip("/"))

    # create the actual directories
    create_dir(model_path, "group analysis output")
    create_dir(work_dir, "group analysis working")
    create_dir(log_dir, "group analysis logfile")

    # create new subject list based on which subjects are left after checking
    # for missing outputs
    new_participant_list = []
    for part in list(model_df["Participant"]):
        # do this instead of using "set" just in case, to preserve order
        #   only reason there may be duplicates is because of multiple-series
        #   repeated measures runs
        if part not in new_participant_list:
            new_participant_list.append(part)

    new_sub_file = write_new_sub_file(model_path, \
                                      group_config_obj.participant_list, \
                                      new_participant_list)

    group_config_obj.update('participant_list', new_sub_file)

    num_subjects = len(list(model_df["Participant"]))

    # start processing the dataframe further
    design_formula = group_config_obj.design_formula

    # demean EVs set for demeaning
    for demean_EV in group_config_obj.ev_selections.get("demean", []):
        model_df[demean_EV] = model_df[demean_EV].astype(float)
        model_df[demean_EV] = model_df[demean_EV].sub(
            model_df[demean_EV].mean())

    # demean the motion params
    if ("MeanFD" in design_formula) or ("MeanDVARS" in design_formula):
        params = ["MeanFD_Power", "MeanFD_Jenkinson", "MeanDVARS"]
        for param in params:
            model_df[param] = model_df[param].astype(float)
            model_df[param] = model_df[param].sub(model_df[param].mean())

    # create 4D merged copefile, in the correct order, identical to design
    # matrix
    merge_outfile = model_name + "_" + resource_id + "_merged.nii.gz"
    merge_outfile = os.path.join(model_path, merge_outfile)

    merge_file = create_merged_copefile(list(model_df["Filepath"]), \
                                        merge_outfile)

    # create merged group mask
    merge_mask_outfile = model_name + "_" + resource_id + \
                         "_merged_mask.nii.gz"
    merge_mask_outfile = os.path.join(model_path, merge_mask_outfile)
    merge_mask = create_merge_mask(merge_file, merge_mask_outfile)

    if "Group Mask" in group_config_obj.mean_mask:
        mask_for_means = merge_mask
    else:
        individual_masks_dir = os.path.join(model_path, "individual_masks")
        create_dir(individual_masks_dir, "individual masks")
        for unique_id, series_id, raw_filepath in zip(
                model_df["Participant"], model_df["Series"],
                model_df["Raw_Filepath"]):

            mask_for_means_path = os.path.join(
                individual_masks_dir,
                "%s_%s_%s_mask.nii.gz" % (unique_id, series_id, resource_id))
            mask_for_means = create_merge_mask(raw_filepath,
                                               mask_for_means_path)
        readme_flags.append("individual_masks")

    # calculate measure means, and demean
    if "Measure_Mean" in design_formula:
        model_df = calculate_measure_mean_in_df(model_df, mask_for_means)

    # calculate custom ROIs, and demean (in workflow?)
    if "Custom_ROI_Mean" in design_formula:

        custom_roi_mask = group_config_obj.custom_roi_mask

        if (custom_roi_mask == None) or (custom_roi_mask == "None") or \
            (custom_roi_mask == "none") or (custom_roi_mask == ""):
            err = "\n\n[!] You included 'Custom_ROI_Mean' in your design " \
                  "formula, but you didn't supply a custom ROI mask file." \
                  "\n\nDesign formula: %s\n\n" % design_formula
            raise Exception(err)

        # make sure the custom ROI mask file is the same resolution as the
        # output files - if not, resample and warn the user
        roi_mask = check_mask_file_resolution(list(model_df["Raw_Filepath"])[0], \
                                              custom_roi_mask, mask_for_means, \
                                              model_path, resource_id)

        # trim the custom ROI mask to be within mask constraints
        output_mask = os.path.join(model_path, "masked_%s" \
                                   % os.path.basename(roi_mask))
        roi_mask = trim_mask(roi_mask, mask_for_means, output_mask)
        readme_flags.append("custom_roi_mask_trimmed")

        # calculate
        model_df = calculate_custom_roi_mean_in_df(model_df, roi_mask)

        # update the design formula
        new_design_substring = ""
        for col in model_df.columns:
            if "Custom_ROI_Mean_" in str(col):
                if str(col) == "Custom_ROI_Mean_1":
                    new_design_substring = new_design_substring + " %s" % col
                else:
                    new_design_substring = new_design_substring + " + %s" % col
        design_formula = design_formula.replace("Custom_ROI_Mean",
                                                new_design_substring)

    cat_list = []
    if "categorical" in group_config_obj.ev_selections.keys():
        cat_list = group_config_obj.ev_selections["categorical"]

    # prep design for repeated measures, if applicable
    if len(group_config_obj.sessions_list) > 0:
        design_formula = design_formula + " + Session"
        if "Session" not in cat_list:
            cat_list.append("Session")
    if len(group_config_obj.series_list) > 0:
        design_formula = design_formula + " + Series"
        if "Series" not in cat_list:
            cat_list.append("Series")
    for col in list(model_df.columns):
        if "participant_" in col:
            design_formula = design_formula + " + %s" % col
            cat_list.append(col)

    # parse out the EVs in the design formula at this point in time
    #   this is essentially a list of the EVs that are to be included
    ev_list = parse_out_covariates(design_formula)

    # SPLIT GROUPS here.
    #   CURRENT PROBLEMS: was creating a few doubled-up new columns
    grp_vector = [1] * num_subjects

    if group_config_obj.group_sep:

        # model group variances separately
        old_ev_list = ev_list

        model_df, grp_vector, ev_list, cat_list = split_groups(model_df, \
                                group_config_obj.grouping_var, \
                                ev_list, cat_list)

        # make the grouping variable categorical for Patsy (if we try to
        # do this automatically below, it will categorical-ize all of
        # the substrings too)
        design_formula = design_formula.replace(group_config_obj.grouping_var, \
                                  "C(" + group_config_obj.grouping_var + ")")
        if group_config_obj.coding_scheme == "Sum":
            design_formula = design_formula.replace(")", ", Sum)")

        # update design formula
        rename = {}
        for old_ev in old_ev_list:
            for new_ev in ev_list:
                if old_ev + "__FOR" in new_ev:
                    if old_ev not in rename.keys():
                        rename[old_ev] = []
                    rename[old_ev].append(new_ev)

        for old_ev in rename.keys():
            design_formula = design_formula.replace(old_ev, \
                                                   " + ".join(rename[old_ev]))

    # prep design formula for Patsy
    design_formula = patsify_design_formula(design_formula, cat_list, \
                         group_config_obj.coding_scheme[0])
    print design_formula
    # send to Patsy
    try:
        dmatrix = patsy.dmatrix(design_formula, model_df)
    except Exception as e:
        err = "\n\n[!] Something went wrong with processing the group model "\
              "design matrix using the Python Patsy package. Patsy might " \
              "not be properly installed, or there may be an issue with the "\
              "formatting of the design matrix.\n\nPatsy-formatted design " \
              "formula: %s\n\nError details: %s\n\n" \
              % (model_df.columns, design_formula, e)
        raise Exception(err)

    print dmatrix.design_info.column_names
    print dmatrix

    # check the model for multicollinearity - Patsy takes care of this, but
    # just in case
    check_multicollinearity(np.array(dmatrix))

    # prepare for final stages
    column_names = dmatrix.design_info.column_names

    # what is this for?
    design_matrix = np.array(dmatrix, dtype=np.float16)

    # check to make sure there are more time points than EVs!
    if len(column_names) >= num_subjects:
        err = "\n\n[!] CPAC says: There are more EVs than there are " \
              "participants currently included in the model for %s. There " \
              "must be more participants than EVs in the design.\n\nNumber " \
              "of participants: %d\nNumber of EVs: %d\n\nEV/covariate list: "\
              "%s\n\nNote: If you specified to model group " \
              "variances separately, the amount of EVs can nearly double " \
              "once they are split along the grouping variable.\n\n" \
              "If the number of subjects is lower than the number of " \
              "subjects in your group analysis subject list, this may be " \
              "because not every subject in the subject list has an output " \
              "for %s in the individual-level analysis output directory.\n\n"\
              % (resource_id, num_subjects, len(column_names), column_names, \
                 resource_id)
        raise Exception(err)

    # time for contrasts
    contrasts_list = None
    contrasts_vectors = None

    if ((custom_confile == None) or (custom_confile == '') or \
            ("None" in custom_confile) or ("none" in custom_confile)):

        # if no custom contrasts matrix CSV provided (i.e. the user
        # specified contrasts in the GUI)
        contrasts_list = group_config_obj.contrasts
        contrasts_vectors = create_contrasts_dict(dmatrix, contrasts_list,
                                                  resource_id)

    # check the merged file's order
    check_merged_file(model_df["Filepath"], merge_file)

    # we must demean the categorical regressors if the Intercept/Grand Mean
    # is included in the model, otherwise FLAME produces blank outputs
    if "Intercept" in column_names:

        cat_indices = []
        col_name_indices = dmatrix.design_info.column_name_indexes
        for col_name in col_name_indices.keys():
            if "C(" in col_name:
                cat_indices.append(int(col_name_indices[col_name]))

        # note: dmat_T is now no longer a DesignMatrix Patsy object, but only
        # an array
        dmat_T = dmatrix.transpose()

        for index in cat_indices:
            new_row = []
            for val in dmat_T[index]:
                new_row.append(val - dmat_T[index].mean())
            dmat_T[index] = new_row

        # we can go back, but we won't be the same
        dmatrix = dmat_T.transpose()

        readme_flags.append("cat_demeaned")

    # send off the info so the FLAME input model files can be generated!
    mat_file, grp_file, con_file, fts_file = create_flame_model_files(
        dmatrix, column_names, contrasts_vectors, contrasts_list,
        custom_confile, ftest_list, group_config_obj.group_sep, grp_vector,
        group_config_obj.coding_scheme[0], model_name, resource_id, model_path)

    dmat_csv_path = os.path.join(model_path, "design_matrix.csv")
    write_design_matrix_csv(dmatrix, model_df["Participant"], column_names,
                            dmat_csv_path)

    # workflow time
    wf_name = "%s_%s" % (resource_id, series_or_repeated_label)
    wf = pe.Workflow(name=wf_name)

    wf.base_dir = work_dir
    crash_dir = os.path.join(pipeline_config_obj.crashLogDirectory,
                             "group_analysis", model_name)

    wf.config['execution'] = {
        'hash_method': 'timestamp',
        'crashdump_dir': crash_dir
    }

    # gpa_wf
    # Creates the actual group analysis workflow
    gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % wf_name)

    gpa_wf.inputs.inputspec.merged_file = merge_file
    gpa_wf.inputs.inputspec.merge_mask = merge_mask

    gpa_wf.inputs.inputspec.z_threshold = z_threshold
    gpa_wf.inputs.inputspec.p_threshold = p_threshold
    gpa_wf.inputs.inputspec.parameters = (pipeline_config_obj.FSLDIR, 'MNI152')

    gpa_wf.inputs.inputspec.mat_file = mat_file
    gpa_wf.inputs.inputspec.con_file = con_file
    gpa_wf.inputs.inputspec.grp_file = grp_file

    if fTest:
        gpa_wf.inputs.inputspec.fts_file = fts_file

    # ds
    # Creates the datasink node for group analysis
    ds = pe.Node(nio.DataSink(), name='gpa_sink')

    #     if c.mixedScanAnalysis == True:
    #         out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir)

    ds.inputs.base_directory = str(out_dir)
    ds.inputs.container = ''

    ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]', '/'),
                                      (r'(?<=model_files)(.)*[/]', '/'),
                                      (r'(?<=merged)(.)*[/]', '/'),
                                      (r'(?<=stats/clusterMap)(.)*[/]', '/'),
                                      (r'(?<=stats/unthreshold)(.)*[/]', '/'),
                                      (r'(?<=stats/threshold)(.)*[/]', '/'),
                                      (r'_cluster(.)*[/]', ''),
                                      (r'_slicer(.)*[/]', ''),
                                      (r'_overlay(.)*[/]', '')]

    ########datasink connections#########
    #if fTest:
    #    wf.connect(gp_flow, 'outputspec.fts',
    #               ds, 'model_files.@0')

    #wf.connect(gp_flow, 'outputspec.mat',
    #           ds, 'model_files.@1' )
    #wf.connect(gp_flow, 'outputspec.con',
    #           ds, 'model_files.@2')
    #wf.connect(gp_flow, 'outputspec.grp',
    #           ds, 'model_files.@3')
    wf.connect(gpa_wf, 'outputspec.merged', ds, 'merged')
    wf.connect(gpa_wf, 'outputspec.zstats', ds, 'stats.unthreshold')
    wf.connect(gpa_wf, 'outputspec.zfstats', ds, 'stats.unthreshold.@01')
    wf.connect(gpa_wf, 'outputspec.fstats', ds, 'stats.unthreshold.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf', ds,
               'stats.threshold')
    wf.connect(gpa_wf, 'outputspec.cluster_index_zf', ds, 'stats.clusterMap')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf', ds,
               'stats.clusterMap.@01')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf', ds, 'rendered')
    wf.connect(gpa_wf, 'outputspec.rendered_image_zf', ds, 'rendered.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold', ds,
               'stats.threshold.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_index', ds, 'stats.clusterMap.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt', ds,
               'stats.clusterMap.@03')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold', ds, 'rendered.@02')
    wf.connect(gpa_wf, 'outputspec.rendered_image', ds, 'rendered.@03')

    ######################################

    # Run the actual group analysis workflow
    wf.run()

    print "\n\nWorkflow finished for model %s\n\n" % wf_name
        print "strgy_path: ", strgy_path

        # gp_flow
        # Extracts the model files (.con, .grp, .mat, .fts) from the model
        # directory and sends them to the create_group_analysis workflow gpa_wf

        gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % currentDerivative)
        gp_flow.inputs.inputspec.grp_model = model
        gp_flow.inputs.inputspec.ftest = c.fTest
        
        
        
        # gpa_wf
        # Creates the actual group analysis workflow

        gpa_wf = create_group_analysis(c.fTest, "gp_analysis_%s" % currentDerivative)

        gpa_wf.inputs.inputspec.zmap_files = ordered_paths
        gpa_wf.inputs.inputspec.z_threshold = c.zThreshold
        gpa_wf.inputs.inputspec.p_threshold = c.pThreshold
        gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')
    
        print "group model: ", model
        print "f test: ", c.fTest
        print "z threshold: ", c.zThreshold
        print "p threshold: ", c.pThreshold
        print "parameters: ", (c.FSLDIR, 'MNI152')

    
        wf.connect(gp_flow, 'outputspec.mat',
                   gpa_wf, 'inputspec.mat_file')
def prep_group_analysis_workflow(c, resource, subject_infos):
    print 'Preparing Group Analysis workflow'
    print 'subjects', subject_infos
    
    p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos))
    
    if c.mixedScanAnalysis == True:
        wf = pe.Workflow(name = 'group_analysis_%s'%resource)
    else:
        wf = pe.Workflow(name = 'group_analysis_%s_%s'%(resource,scan_ids[0])) 
    
    wf.base_dir = c.workingDirectory
    
    #extract model files
    model_list = [line.rstrip('\r\n') for line in open(c.modelFile, 'r')]
    
    if not model_list:
        raise Exception("mode_list is empty. Please provide" \
                         "a model file with full paths of the" \
                         "folder containing models for group analysis")
    
    from collections import defaultdict
    model_map = defaultdict(list)
    
    #create a map of model as key and its sub files as values
    import os 
    import glob
    for model in model_list:
        if os.path.exists(model):
            files = glob.glob(os.path.join(model, '*'))
            model_map[os.path.basename(model)] = files
        else:
            raise Exception ("Path to the model %s doesn't exist"%model)
    
    #print model_map
    
    input_subject_list = [line.rstrip('\r\n') for line in open(c.groupAnalysisSubjectList, 'r')]
    
    gp_flow = create_gpa_dataflow(model_map, c.fTest)
    gp_flow.inputs.inputspec.input_sublist = input_subject_list 
    gp_flow.inputs.inputspec.output_sublist = s_ids
    
    from CPAC.group_analysis import create_group_analysis
    
    gpa_wf = create_group_analysis(c.fTest)
    gpa_wf.inputs.inputspec.zmap_files = s_paths
    
    wf.connect(gp_flow, 'outputspec.mat',
               gpa_wf, 'inputspec.mat_file')
    wf.connect(gp_flow, 'outputspec.con',
               gpa_wf, 'inputspec.con_file')
    wf.connect(gp_flow, 'outputspec.grp',
                gpa_wf, 'inputspec.grp_file')
        
    if c.fTest:
        wf.connect(gp_flow, 'outputspec.fts',
                         gpa_wf, 'inputspec.fts_file') 
    
    ds = pe.Node(nio.DataSink(), name='gpa_sink')
    out_dir = os.path.dirname(s_paths[0]).replace(s_ids[0], 'group_analysis_results')
    ds.inputs.base_directory = out_dir
    ds.inputs.container = resource
    
    wf.run(plugin='MultiProc',
                         plugin_args={'n_procs': c.numCoresPerSubject})
def prep_group_analysis_workflow(c, resource, subject_infos):
    
    #
    # this function runs once per output file during group analysis
    #

    # p_id = a list of pipeline IDs, i.e. the name of the output folder for
    #        the strat
    
    # s_ids = a list of all the subject IDs

    # scan_ids = a list of scan IDs

    # s_paths = a list of all of the filepaths of this particular output
    #           file that prep_group_analysis_workflow is being called for

    p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos))


    # set this to False for now
    fTest = False

    def get_phenotypic_file(phenotypic_file, m_dict, m_list, mod_path, sub_id):
        
        import csv
        reader = csv.reader(open(phenotypic_file, 'rU'))
        columns = {}
        order = {}
        count = 0
        headers = next(reader)
                
        for h in headers:
            columns[h] =[]
            order[h] = count
            count+=1
            
        for r in reader:
            for h, v in zip(headers, r):
                if v:
                    columns[h].append(str(v))

        if m_dict:
            for measure in m_list:

                print('\n\nMeasure: ', measure, '\n\n')

                if measure in headers:
                    #check if 'MeanFD  is present'
                    if len(columns[measure]) < 1:

                        print('\n\ncolumns[sub_id]: ', columns[sub_id], '\n\n')

                        for sub in columns[sub_id]:

                            if m_dict.get(sub):
                                if m_dict.get(sub).get(measure):
                                    columns[measure].append(m_dict[sub][measure])
                                else:
                                    raise Exception("Couldn't find %s value for subject %s"%(measure,sub))
                            else:
                                raise Exception("Couldn't find subject %s in the parameter file"%sub)


        print('\n\ncolumns[measure]: ', columns, '\n\n')
        
        b = list(zip(*([k] + columns[k] for k in sorted(columns, key=order.get))))
        
        
        try:
            os.makedirs(mod_path)
        except:
            print("%s already exists"%(mod_path))
            
        new_phenotypic_file = os.path.join(mod_path, os.path.basename(phenotypic_file))
                
        a = csv.writer(open(new_phenotypic_file, 'w'))
        
        for col in b:
            a.writerow(list(col))
          
        return new_phenotypic_file

    # END get_phenotypic_file function



    threshold_val = None
    measure_dict = None
    measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS']
    model_sub_list = []
    

    if 1 in c.runScrubbing:

        #get scrubbing threshold
    
        if re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]):

            threshold_val = re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]).group(0)

        elif len(c.scrubbingThreshold) == 1:

            threshold_val = c.scrubbingThreshold[0]

        else:
            print("Found Multiple threshold value ")


        print("scrubbing threshold_val -->", threshold_val)

    else:

        print("No scrubbing enabled.")

        if len(c.scrubbingThreshold) == 1:
            threshold_val = c.scrubbingThreshold[0]




    import yaml    

    for config in c.modelConfigs:

        print(c.modelConfigs)
        print(config)
        
        try:
            conf = Configuration(yaml.load(open(os.path.realpath(config), 'r')))
        except:
            raise Exception("Error in reading %s configuration file" % config)

        
        group_sublist = open(conf.subject_list, 'r')

        sublist_items = group_sublist.readlines()

        subject_list = [line.rstrip('\n') for line in sublist_items \
                              if not (line == '\n') and not line.startswith('#')]

        # list of subject paths which DO exist
        exist_paths = []




        ''' begin iteration through group subject list for processing '''

        for sub in subject_list:

            # let's check to make sure the subject list is formatted for
            # repeated measures properly if repeated measures is enabled and
            # vice versa
            if (c.repeatedMeasures == True) and (',' not in sub):
                print('\n\n')
                print('[!] CPAC says: The group analysis subject list is ' \
                        'not inthe appropriate format for repeated ' \
                        'measures.\n')
                print('Please use the appropriate format as described in ' \
                        'the CPAC User Guide or turn off Repeated Measures ' \
                        'in the CPAC pipeline configuration editor, found ' \
                        'in the \'Group Analysis Settings\' tab of the ' \
                        'pipeline configuration editor.\n')
                print('NOTE: CPAC generates a properly-formatted group ' \
                        'analysis subject list meant for running repeated ' \
                        'measures when you create your original subject ' \
                        'list. Look for \'subject_list_group_analysis_' \
                        'repeated_measures.txt\' in the directory where ' \
                        'you created your subject list.\n\n')
                raise Exception

            elif (c.repeatedMeasures == False) and (',' in sub):
                print('\n\n')
                print('[!] CPAC says: It looks like your group analysis ' \
                        'subject list is formatted for running repeated ' \
                        'measures, but \'Run Repeated Measures\' is not ' \
                        'enabled in the pipeline configuration, found in ' \
                        'the \'Group Analysis Settings\' tab of the ' \
                        'pipeline configuration editor.\n')
                print('Double-check your pipeline configuration?\n\n')
                raise Exception



            ''' process subject ids for repeated measures, if it is on '''
            # if repeated measures is being run and the subject list
            # is a list of subject IDs and scan IDs concatenated
            if (c.repeatedMeasures == True):

                # sub.count(',') equals 1 when there is either multiple scans
                # or multiple sessions but not both, for repeated measures

                # sub.count(',') equals 2 when there are multiple sessions
                # AND scans, for repeated measures

                if sub.count(',') == 1:
                    sub_id = sub.split(',',1)[0]
                    other_id = sub.split(',',1)[1]

                elif sub.count(',') == 2:
                    sub_id = sub.split(',',2)[0]
                    scan_id = sub.split(',',2)[1]
                    session_id = sub.split(',',2)[2]



            ''' drop subjects from the group subject list '''
            # check the path files in path_files_here folder in the subject's
            # output folder - and drop any subjects from the group analysis
            # subject list which do not exist in the paths to the output files

            for path in s_paths:

                if (c.repeatedMeasures == True):

                    if sub.count(',') == 1:
                        if (sub_id in path) and (other_id in path):
                            exist_paths.append(sub)

                    elif sub.count(',') == 2:
                        if (sub_id in path) and (scan_id in path) and \
                                (session_id in path):
                            exist_paths.append(sub)

                else:

                    if sub in path:
                        exist_paths.append(sub)
 




        # check to see if any derivatives of subjects are missing
        if len(list(set(subject_list) - set(exist_paths))) >0:
            print("List of outputs missing for subjects:")
            print(list(set(subject_list) - set(exist_paths)))
            print("..for derivatives:")
            print(resource)
            print("..at paths:")
            print(os.path.dirname(s_paths[0]).replace(s_ids[0], '*'))

        

        # create the path string for the group analysis output
        out_dir = os.path.dirname(s_paths[0]).split(p_id[0] + '/')
        out_dir = os.path.join(conf.output_dir, out_dir[1])
        out_dir = out_dir.replace(s_ids[0], 'group_analysis_results_%s/_grp_model_%s'%(p_id[0],conf.model_name))

        mod_path = os.path.join(out_dir, 'model_files')


        if not os.path.isdir(mod_path):
            os.makedirs(mod_path)

        


        ''' write the new subject list '''
        new_sub_file = os.path.join(mod_path, os.path.basename(conf.subject_list))

        try:

            f = open(new_sub_file, 'w')
         
            for sub in exist_paths:
                print(sub, file=f)
        
            f.close()

        except:

            print("Error: Could not open subject list file: ", new_sub_file)
            raise Exception


        conf.update('subject_list',new_sub_file)

        sub_id = conf.subject_id_label
        


        if measure_dict != None:
            conf.update('pheno_file',get_phenotypic_file(conf.pheno_file, measure_dict, measure_list, mod_path, sub_id))
        
        print('conf updated pheno: ', conf.pheno_file, '\n\n')

            
        print("Model config dictionary ->")
        print(conf.__dict__)



        # Run 'create_fsl_model' script to extract phenotypic data from
        # the phenotypic file for each of the subjects in the subject list



        ''' get the motion statistics parameter file, if present '''
        # get the parameter file so it can be passed to create_fsl_model.py
        # so MeanFD or other measures can be included in the design matrix
        parameter_file = os.path.join(c.outputDirectory, p_id[0], '%s_threshold_%s_all_params.csv'%(scan_ids[0].strip('_'),threshold_val))

        if 1 in c.runGenerateMotionStatistics:

            if not os.path.exists(parameter_file):
                print('\n\n[!] CPAC says: Could not open the parameter file. ' \
                      'If Generate Motion Statistics is enabled, this can ' \
                      'usually be found in the output directory of your ' \
                      'individual-level analysis runs.\n')
                print('Path not found: ', parameter_file, '\n\n')
                raise Exception

        elif (1 not in c.runGenerateMotionStatistics) and (os.path.exists(parameter_file)):

            if not os.path.exists(parameter_file):
                print('\n\n[!] CPAC says: Could not open the parameter file. ' \
                      'If Generate Motion Statistics is enabled, this can ' \
                      'usually be found in the output directory of your ' \
                      'individual-level analysis runs.\n')
                print('Path not found: ', parameter_file, '\n\n')
                raise Exception

        else:

            def no_measures_error(measure):
                print('\n\n[!] CPAC says: The measure %s was included in ' \
                      'your group analysis design matrix formula, but ' \
                      'Generate Motion Statistics was not run during ' \
                      'individual-level analysis.\n' % measure)
                print('Please run Generate Motion Statistics if you wish ' \
                      'to include this measure in your model.\n')
                print('If you HAVE completed a run with this option ' \
                      'enabled, then you are seeing this error because ' \
                      'the motion parameter file normally created by this ' \
                      'option is missing.\n\n')
                raise Exception

            for measure in measure_list:
                if (measure in conf.design_formula):
                    no_measures_error(measure)

            parameter_file = None



        ''' run create_fsl_model.py to generate the group analysis models '''
        # path to the pipeline folder to be passed to create_fsl_model.py
        # so that certain files like output_means.csv can be accessed
        pipeline_path = os.path.join(c.outputDirectory, p_id[0])

        # the current output that cpac_group_analysis_pipeline.py and
        # create_fsl_model.py is currently being run for
        current_output = s_paths[0].replace(pipeline_path, '').split('/')[2]


        try:

            from CPAC.utils import create_fsl_model

            create_fsl_model.run(conf, fTest, parameter_file, pipeline_path, current_output, True)

            #print >>diag, "> Runs create_fsl_model."
            #print >>diag, ""

        except Exception as e:

            print("FSL Group Analysis model not successfully created - error in create_fsl_model script")
            #print "Error ->", e
            raise


            
        model_sub_list.append((conf.output_dir, conf.subject_list))


    
    if len(model_sub_list) == 0:
        raise Exception("no model found")





    ''' start group analysis '''

    print('\n\nPreparing the group analysis workflow..\n\n')

    for model_sub in model_sub_list:

        #print >>diag, "Current model_sub: ", model_sub
        #print >>diag, ""
        
        model, subject_list = model_sub
   

        if not os.path.exists(model):
            raise Exception("path to model %s doesn't exist"%model)
        
        if not os.path.exists(subject_list):
            raise Exception("path to input subject list %s is invalid" % subject_list)
        
        #if c.mixedScanAnalysis == True:
        #    wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model)))
        #else:
        
        
        # s_paths is a list of paths to each subject's derivative (of the current
        # derivative gpa is being run on) - s_paths_dirList is a list of each directory
        # in this path separated into list elements
        s_paths_dirList = s_paths[0].split('/')
        
        currentDerivativeFile = s_paths_dirList[-1]
        
        currentDerivative = currentDerivativeFile.split('.')[0]
        
        currentDerivative = currentDerivative.replace('#', '_')
        
        
        strgy_path = os.path.dirname(s_paths[0]).split(scan_ids[0])[1]

        for ch in ['.']:
            if ch in strgy_path:
                strgy_path = strgy_path.replace(ch, '_')
                
        # create nipype-workflow-name-friendly strgy_path
        # (remove special characters)
        strgy_path_name = strgy_path.replace('/', '__')
        
        

        wf = pe.Workflow(name = currentDerivative) 

        workDir = c.workingDirectory + '/group_analysis__%s__grp_model_%s__%s' % (resource, conf.model_name, scan_ids[0])
        workDir = workDir + '/' + strgy_path_name

        wf.base_dir = workDir
        wf.config['execution'] = {'hash_method': 'timestamp', 'crashdump_dir': os.path.abspath(c.crashLogDirectory)}
        log_dir = os.path.join(conf.output_dir, 'logs', 'group_analysis', resource, 'model_%s' % (conf.model_name))
        

        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        else:
            print("log_dir already exist")
        



        # enable logging
    
        from nipype import config
        from nipype import logging
        
        config.update_config({'logging': {'log_directory': log_dir,
                              'log_to_file': True}})
        
        # Temporarily disable until solved
        #logging.update_logging(config)

        iflogger = logging.getLogger('interface')




        ''' create the list of paths to all output files to go to model '''
        # create the 'ordered_paths' list, which is a list of all of the
        # output paths of the output files being included in the current
        # group-level analysis model
        #     'ordered_paths' is later connected to the 'zmap_files' input
        #     of the group analysis workflow - the files listed in this list
        #     are merged into the merged 4D file that goes into group analysis
      
        group_sublist = open(subject_list, 'r')
        sublist_items = group_sublist.readlines()

        input_subject_list = [line.rstrip('\n') for line in sublist_items \
                              if not (line == '\n') and not line.startswith('#')]

        ordered_paths = []
        pathcount = 0
        subcount = 0
        for sub in input_subject_list:

            subcount += 1

            if (c.repeatedMeasures == True):

                # sub.count(',') equals 1 when there is either multiple scans
                # or multiple sessions but not both, for repeated measures

                # sub.count(',') equals 2 when there are multiple sessions
                # AND scans, for repeated measures

                if sub.count(',') == 1:
                    sub_id = sub.split(',',1)[0]
                    other_id = sub.split(',',1)[1]

                elif sub.count(',') == 2:
                    sub_id = sub.split(',',2)[0]
                    scan_id = sub.split(',',2)[1]
                    session_id = sub.split(',',2)[2]


            for path in s_paths:

                if (c.repeatedMeasures == True):

                    # if repeated measures is enabled, make sure all of the
                    # relevant indicators are in the path before adding it
                    # to 'ordered_paths', i.e. the session and/or scan IDs

                    if sub.count(',') == 1:
                        if (sub_id in path) and (other_id in path):
                            pathcount += 1
                            ordered_paths.append(path)

                    elif sub.count(',') == 2:
                        if (sub_id in path) and (scan_id in path) and \
                                (session_id in path):
                            pathcount += 1
                            ordered_paths.append(path)

                else:
                    if sub in path:
                        pathcount += 1
                        ordered_paths.append(path)




        print('S_paths length: ', len(s_paths))

        print("Ordered paths length (number of subjects): ", len(ordered_paths))
      
        print("input_subject_list -> %s" % input_subject_list)

        print("strgy_path: ", strgy_path)


        if len(ordered_paths) == 0:
            print('\n\n\n[!] CPAC says: None of the subjects listed in the ' \
                  'group analysis subject list were found to have outputs ' \
                  'produced by individual-level analysis.\n\nEnsure that ' \
                  'the subjects listed in your group analysis subject list ' \
                  'are the same as the ones included in the individual-' \
                  'level analysis you are running group-level analysis for.' \
                  '\n\n\n')
            raise Exception



        # gp_flow
        # Extracts the model files (.con, .grp, .mat, .fts) from the model
        # directory and sends them to the create_group_analysis workflow gpa_wf

        gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % currentDerivative)
        gp_flow.inputs.inputspec.grp_model = model
        gp_flow.inputs.inputspec.fTest = fTest
  


        # gpa_wf
        # Creates the actual group analysis workflow

        gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % currentDerivative)

        gpa_wf.inputs.inputspec.zmap_files = ordered_paths
        gpa_wf.inputs.inputspec.z_threshold = c.zThreshold
        gpa_wf.inputs.inputspec.p_threshold = c.pThreshold
        gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')
    
        print("group model: ", model)
        print("f test: ", fTest)
        print("z threshold: ", c.zThreshold)
        print("p threshold: ", c.pThreshold)
        print("parameters: ", (c.FSLDIR, 'MNI152'))

    
        wf.connect(gp_flow, 'outputspec.mat',
                   gpa_wf, 'inputspec.mat_file')
        wf.connect(gp_flow, 'outputspec.con',
                   gpa_wf, 'inputspec.con_file')
        wf.connect(gp_flow, 'outputspec.grp',
                    gpa_wf, 'inputspec.grp_file')

            
        if fTest:
            wf.connect(gp_flow, 'outputspec.fts',
                       gpa_wf, 'inputspec.fts_file')
        


        # ds
        # Creates the datasink node for group analysis
        
        ds = pe.Node(nio.DataSink(), name='gpa_sink')
     
        if 'sca_roi' in resource:
            out_dir = os.path.join(out_dir, \
              re.search('ROI_number_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
            
        if 'centrality' in resource:
            names = ['degree_centrality_binarize', 'degree_centrality_weighted', \
                     'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted', \
                     'lfcd_binarize', 'lfcd_weighted']
            for name in names:
                if name in os.path.basename(s_paths[0]):
                    out_dir = os.path.join(out_dir, name)
                    break

        if 'tempreg_maps_z_files' in resource:
            out_dir = os.path.join(out_dir, \
                re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0))
        
#         if c.mixedScanAnalysis == True:
#             out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir)
              
        ds.inputs.base_directory = out_dir
        ds.inputs.container = ''
        
        ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'),
                                          (r'(?<=model_files)(.)*[/]','/'),
                                          (r'(?<=merged)(.)*[/]','/'),
                                          (r'(?<=stats/clusterMap)(.)*[/]','/'),
                                          (r'(?<=stats/unthreshold)(.)*[/]','/'),
                                          (r'(?<=stats/threshold)(.)*[/]','/'),
                                          (r'_cluster(.)*[/]',''),
                                          (r'_slicer(.)*[/]',''),
                                          (r'_overlay(.)*[/]','')]
    
        '''
        if 1 in c.runSymbolicLinks:
    
    
            link_node = pe.MapNode(interface=util.Function(
                                input_names=['in_file',
                                            'resource'],
                                    output_names=[],
                                    function=prepare_gp_links),
                                    name='link_gp_', iterfield=['in_file'])
            link_node.inputs.resource = resource
            wf.connect(ds, 'out_file', link_node, 'in_file')
        '''
    


        ########datasink connections#########
        if fTest:
            wf.connect(gp_flow, 'outputspec.fts',
                       ds, 'model_files.@0') 
        
        wf.connect(gp_flow, 'outputspec.mat',
                   ds, 'model_files.@1' )
        wf.connect(gp_flow, 'outputspec.con',
                   ds, 'model_files.@2')
        wf.connect(gp_flow, 'outputspec.grp',
                   ds, 'model_files.@3')
        wf.connect(gpa_wf, 'outputspec.merged',
                   ds, 'merged')
        wf.connect(gpa_wf, 'outputspec.zstats',
                   ds, 'stats.unthreshold')
        wf.connect(gpa_wf, 'outputspec.zfstats',
                   ds,'stats.unthreshold.@01')
        wf.connect(gpa_wf, 'outputspec.fstats',
                   ds,'stats.unthreshold.@02')
        wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf',
                   ds, 'stats.threshold')
        wf.connect(gpa_wf, 'outputspec.cluster_index_zf',
                   ds,'stats.clusterMap')
        wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf',
                   ds, 'stats.clusterMap.@01')
        wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf',
                   ds, 'rendered')
        wf.connect(gpa_wf, 'outputspec.rendered_image_zf',
                   ds, 'rendered.@01')
        wf.connect(gpa_wf, 'outputspec.cluster_threshold',
                   ds,  'stats.threshold.@01')
        wf.connect(gpa_wf, 'outputspec.cluster_index',
                   ds, 'stats.clusterMap.@02')
        wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt',
                   ds, 'stats.clusterMap.@03')
        wf.connect(gpa_wf, 'outputspec.overlay_threshold',
                   ds, 'rendered.@02')
        wf.connect(gpa_wf, 'outputspec.rendered_image',
                   ds, 'rendered.@03')
        
        ######################################

        # Run the actual group analysis workflow
        wf.run()

        '''
        except:

            print "Error: Group analysis workflow run command did not complete successfully."
            print "subcount: ", subcount
            print "pathcount: ", pathcount
            print "sublist: ", sublist_items
            print "input subject list: "
            print "conf: ", conf.subjectListFile
            
            raise Exception
        '''
    
        print("**Workflow finished for model %s and resource %s"%(os.path.basename(model), resource))
def prep_group_analysis_workflow(model_df, pipeline_config_obj, \
    model_name, group_config_obj, resource_id, preproc_strat, \
    series_or_repeated_label):
    
    #
    # this function runs once per derivative type and preproc strat combo
    # during group analysis
    #

    import os

    import nipype.pipeline.engine as pe
    import nipype.interfaces.utility as util
    import nipype.interfaces.io as nio

    pipeline_ID = pipeline_config_obj.pipeline_name

    # get thresholds
    z_threshold = float(group_config_obj.z_threshold[0])

    p_threshold = float(group_config_obj.p_threshold[0])

    sub_id_label = group_config_obj.subject_id_label

    # determine if f-tests are included or not
    custom_confile = group_config_obj.custom_contrasts

    if ((custom_confile == None) or (custom_confile == '') or \
            ("None" in custom_confile) or ("none" in custom_confile)):

        if (len(group_config_obj.f_tests) == 0) or \
            (group_config_obj.f_tests == None):
            fTest = False
        else:
            fTest = True

    else:

        if not os.path.exists(custom_confile):
            errmsg = "\n[!] CPAC says: You've specified a custom contrasts " \
                     ".CSV file for your group model, but this file cannot " \
                     "be found. Please double-check the filepath you have " \
                     "entered.\n\nFilepath: %s\n\n" % custom_confile
            raise Exception(errmsg)

        with open(custom_confile,"r") as f:
            evs = f.readline()

        evs = evs.rstrip('\r\n').split(',')
        count_ftests = 0

        fTest = False

        for ev in evs:
            if "f_test" in ev:
                count_ftests += 1

        if count_ftests > 0:
            fTest = True


    # create path for output directory
    out_dir = os.path.join(group_config_obj.output_dir, \
        "group_analysis_results_%s" % pipeline_ID, \
        "group_model_%s" % model_name, resource_id, \
        series_or_repeated_label, preproc_strat)

    model_path = os.path.join(out_dir, 'model_files')

    # generate working directory for this output's group analysis run
    work_dir = os.path.join(c.workingDirectory, "group_analysis", model_name,\
        resource_id, series_or_repeated_label, preproc_strat)

    log_dir = os.path.join(out_dir, 'logs', resource_id, \
        'model_%s' % model_name)

    # create the actual directories
    if not os.path.isdir(model_path):
        try:
            os.makedirs(model_path)
        except Exception as e:
            err = "\n\n[!] Could not create the group analysis output " \
                  "directories.\n\nAttempted directory creation: %s\n\n" \
                  "Error details: %s\n\n" % (model_path, e)
            raise Exception(err)

    if not os.path.isdir(work_dir):
        try:
            os.makedirs(work_dir)
        except Exception as e:
            err = "\n\n[!] Could not create the group analysis working " \
                  "directories.\n\nAttempted directory creation: %s\n\n" \
                  "Error details: %s\n\n" % (model_path, e)
            raise Exception(err)

    if not os.path.isdir(log_dir):
        try:
            os.makedirs(log_dir)
        except Exception as e:
            err = "\n\n[!] Could not create the group analysis logfile " \
                  "directories.\n\nAttempted directory creation: %s\n\n" \
                  "Error details: %s\n\n" % (model_path, e)
            raise Exception(err)


    # create new subject list based on which subjects are left after checking
    # for missing outputs
    new_participant_list = []
    for part in list(model_df["Participant"]):
        # do this instead of using "set" just in case, to preserve order
        #   only reason there may be duplicates is because of multiple-series
        #   repeated measures runs
        if part not in new_participant_list:
            new_participant_list.append(part)

    new_sub_file = write_new_sub_file(model_path, \
                                      group_config_obj.participant_list, \
                                      new_participant_list)

    group_conf.update('participant_list',new_sub_file)


    # start processing the dataframe further
    design_formula = group_config_obj.design_formula

    # demean the motion params
    if ("MeanFD" in design_formula) or ("MeanDVARS" in design_formula):
        params = ["MeanFD_Power", "MeanFD_Jenkinson", "MeanDVARS"]
        for param in params:
            model_df[param] = model_df[param].astype(float)
            model_df[param] = model_df[param].sub(model_df[param].mean())


    # create 4D merged copefile, in the correct order, identical to design
    # matrix
    merge_outfile = model_name + "_" + resource_id + "_merged.nii.gz"
    merge_outfile = os.path.join(model_path, merge_outfile)

    merge_file = create_merged_copefile(list(model_df["Filepath"]), \
                                        merge_outfile)

    # create merged group mask
    if group_config_obj.mean_mask[0] == "Group Mask":
        merge_mask_outfile = os.path.basename(merge_file) + "_mask.nii.gz"
        merge_mask = create_merged_mask(merge_file, merge_mask_outfile)

    # calculate measure means, and demean
    if "Measure_Mean" in design_formula:
        model_df = calculate_measure_mean_in_df(model_df, merge_mask)

    # calculate custom ROIs, and demean (in workflow?)
    if "Custom_ROI_Mean" in design_formula:

        custom_roi_mask = group_config_obj.custom_roi_mask

        if (custom_roi_mask == None) or (custom_roi_mask == "None") or \
            (custom_roi_mask == "none") or (custom_roi_mask == ""):
            err = "\n\n[!] You included 'Custom_ROI_Mean' in your design " \
                  "formula, but you didn't supply a custom ROI mask file." \
                  "\n\nDesign formula: %s\n\n" % design_formula
            raise Exception(err)

        # make sure the custom ROI mask file is the same resolution as the
        # output files - if not, resample and warn the user
        roi_mask = check_mask_file_resolution(list(model_df["Raw_Filepath"])[0], \
                                              custom_roi_mask, model_path, \
                                              resource_id)

        # if using group merged mask, trim the custom ROI mask to be within
        # its constraints
        if merge_mask:
            output_mask = os.path.join(model_path, "group_masked_%s" \
                                       % os.path.basename(input_mask))
            roi_mask = trim_mask(roi_mask, merge_mask, output_mask)

        # calculate
        model_df = calculate_custom_roi_mean_in_df(model_df, roi_mask)   

    


    # modeling group variances separately

    # add repeated measures 1's matrices

    # patsify model DF, drop columns not in design formula

    # process contrasts


        
    wf = pe.Workflow(name=resource_id)

    wf.base_dir = work_dir
    crash_dir = os.path.join(pipeline_config_obj.crashLogDirectory, \
                             "group_analysis", model_name)

    wf.config['execution'] = {'hash_method': 'timestamp', \
                              'crashdump_dir': crash_dir}








    if "Measure_Mean" in design_formula:
        measure_mean = pe.Node(util.Function(input_names=['model_df',
                                                          'merge_mask'],
                                       output_names=['model_df'],
                                       function=calculate_measure_mean_in_df),
                                       name='measure_mean')
        measure_mean.inputs.model_df = model_df

        wf.connect(merge_mask, "out_file", measure_mean, "merge_mask")


    if "Custom_ROI_Mean" in design_formula:
        roi_mean = pe.Node(util.Function())


    group_config_obj.custom_roi_mask
    






    #----------------

    import yaml
    import pandas as pd


    # load group analysis model configuration file
    try:
        with open(os.path.realpath(group_config_file),"r") as f:
            group_conf = Configuration(yaml.load(f))
    except Exception as e:
        err_string = "\n\n[!] CPAC says: Could not read group model " \
                     "configuration YML file. Ensure you have read access " \
                     "for the file and that it is formatted properly.\n\n" \
                     "Configuration file: %s\n\nError details: %s" \
                     % (group_config_file, e)
        raise Exception(err_string)


    # gather all of the information
    # - lists of all the participant unique IDs (participant_site_session) and
    # of all of the series IDs present in output_file_list
    # - also returns the pipeline ID
    new_participant_list, all_series_names, pipeline_ID = \
        gather_new_participant_list(output_path_file, output_file_list)

     

      

    # create the path string for the group analysis output
    #    replicate the directory path of one of the participant's output
    #    folder path to the derivative's file, but replace the participant ID
    #    with the group model name
    #        this is to ensure nothing gets overwritten between strategies
    #        or thresholds, etc.
    out_dir = os.path.dirname(output_file_list[0]).split(pipeline_ID + '/')
    out_dir = out_dir[1].split(out_dir[1].split("/")[-1])[0]
    out_dir = os.path.join(group_conf.output_dir, out_dir)
    out_dir = out_dir.replace(new_participant_list[0], \
                  'group_analysis_results_%s/_grp_model_%s' \
                  % (pipeline_ID, group_conf.model_name))

    # !!!!!!!!!!
    if (group_conf.repeated_measures == True) and (series_ids[0] != None):
        out_dir = out_dir.replace(series_ids[0] + "/", "multiple_series")

    # create model file output directories
    model_out_dir = os.path.join(group_conf.output_dir, \
        'group_analysis_results_%s/_grp_model_%s' \
        %(pipeline_ID, group_conf.model_name))

    mod_path = os.path.join(model_out_dir, 'model_files')

    if not os.path.isdir(mod_path):
        os.makedirs(mod_path)

    # current_mod_path = folder under
    #   "/gpa_output/_grp_model_{model name}/model_files/{current derivative}"
    current_mod_path = os.path.join(mod_path, resource)

    if not os.path.isdir(current_mod_path):
        os.makedirs(current_mod_path)

        
    # create new subject list based on which subjects are left after checking
    # for missing outputs
    new_sub_file = write_new_sub_file(current_mod_path, \
                       group_conf.subject_list, new_participant_list)

    group_conf.update('subject_list',new_sub_file)


    # create new design matrix with only the subjects that are left






    # Run 'create_fsl_model' script to extract phenotypic data from
    # the phenotypic file for each of the subjects in the subject list

    # get the motion statistics parameter file, if present
    # get the parameter file so it can be passed to create_fsl_model.py
    # so MeanFD or other measures can be included in the design matrix


    ''' okay, here we go... how are we handling series? because here it needs to take in '''
    ''' the appropriate series to get the appropriate parameter file ! ! ! '''

    ''' MAY HAVE TO GO BACK ON THIS, and just have one series sent in per this function...'''

    power_params_files = {}

    measure_list = ['MeanFD_Power', 'MeanFD_Jenkinson', 'MeanDVARS']

    for measure in measure_list:
    
        if measure in group_conf.design_formula:

            for series_id in all_series_names:

                parameter_file = os.path.join(c.outputDirectory, \
                                              pipeline_ID, \
                                              '%s%s_all_params.csv' % \
                                              (series_id.strip('_'), \
                                              threshold_val))

                if not os.path.exists(parameter_file):
                    err = "\n\n[!] CPAC says: Could not find or open the motion "\
                          "parameter file. This is necessary if you have " \
                          "included any of the MeanFD measures in your group " \
                          "model.\n\nThis file can usually be found in the " \
                          "output directory of your individual-level analysis " \
                          "runs. If it is not there, double-check to see if " \
                          "individual-level analysis had completed successfully."\
                          "\n\nPath not found: %s\n\n" % parameter_file
                    raise Exception(err)


                power_params_files[series_id] = parameter_file
                

            break
            
    else:
    
        power_params_files = None



    # path to the pipeline folder to be passed to create_fsl_model.py
    # so that certain files like output_means.csv can be accessed
    pipeline_path = os.path.join(c.outputDirectory, pipeline_ID)

    # generate working directory for this output's group analysis run
    workDir = '%s/group_analysis/%s/%s' % (c.workingDirectory, \
                                               group_conf.model_name, \
                                               resource)
            
    # this makes strgy_path basically the directory path of the folders after
    # the resource/derivative folder level         
    strgy_path = os.path.dirname(output_file_list[0]).split(resource)[1]

    # get rid of periods in the path
    for ch in ['.']:
        if ch in strgy_path:
            strgy_path = strgy_path.replace(ch, "")
                
    # create nipype-workflow-name-friendly strgy_path
    # (remove special characters)
    strgy_path_name = strgy_path.replace('/', "_")

    workDir = workDir + '/' + strgy_path_name



    # merge the subjects for this current output
    # then, take the group mask, and iterate over the list of subjects
    # to extract the mean of each subject using the group mask
    merge_output, merge_mask_output, merge_output_dir = \
        create_merged_files(workDir, resource, output_file_list)

    
    # CALCULATE THE MEANS of each output using the group mask
    derivative_means_dict, roi_means_dict = \
        calculate_output_means(resource, output_file_list, \
                               group_conf.mean_mask, \
                               group_conf.design_formula, \
                               group_conf.custom_roi_mask, pipeline_path, \
                               merge_output_dir, c.identityMatrix)


    measure_dict = {}

    # extract motion measures from CPAC-generated power params file
    if power_params_files != None:
        for param_file in power_params_files.values():
            new_measure_dict = get_measure_dict(param_file)
            measure_dict.update(new_measure_dict)


    # combine the motion measures dictionary with the measure_mean
    # dictionary (if it exists)
    if derivative_means_dict:
        measure_dict["Measure_Mean"] = derivative_means_dict

    # run create_fsl_model.py to generate the group analysis models
    
    from CPAC.utils import create_fsl_model, kill_me
    create_fsl_model.run(group_conf, resource, parameter_file, \
                             derivative_means_dict, roi_means_dict, \
                                 current_mod_path, True)


    # begin GA workflow setup

    if not os.path.exists(new_sub_file):
        raise Exception("path to input subject list %s is invalid" % new_sub_file)
        
    #if c.mixedScanAnalysis == True:
    #    wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model)))
    #else:

    wf = pe.Workflow(name = resource)

    wf.base_dir = workDir
    wf.config['execution'] = {'hash_method': 'timestamp', 'crashdump_dir': os.path.abspath(c.crashLogDirectory)}
    log_dir = os.path.join(group_conf.output_dir, 'logs', 'group_analysis', resource, 'model_%s' % (group_conf.model_name))
        

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    else:
        pass


    # gp_flow
    # Extracts the model files (.con, .grp, .mat, .fts) from the model
    # directory and sends them to the create_group_analysis workflow gpa_wf

    gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % resource)
    gp_flow.inputs.inputspec.grp_model = os.path.join(mod_path, resource)
    gp_flow.inputs.inputspec.model_name = group_conf.model_name
    gp_flow.inputs.inputspec.ftest = fTest
  

    # gpa_wf
    # Creates the actual group analysis workflow

    gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % resource)

    gpa_wf.inputs.inputspec.merged_file = merge_output
    gpa_wf.inputs.inputspec.merge_mask = merge_mask_output

    gpa_wf.inputs.inputspec.z_threshold = z_threshold
    gpa_wf.inputs.inputspec.p_threshold = p_threshold
    gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152')
    
   
    wf.connect(gp_flow, 'outputspec.mat',
               gpa_wf, 'inputspec.mat_file')
    wf.connect(gp_flow, 'outputspec.con',
               gpa_wf, 'inputspec.con_file')
    wf.connect(gp_flow, 'outputspec.grp',
                gpa_wf, 'inputspec.grp_file')
           
    if fTest:
        wf.connect(gp_flow, 'outputspec.fts',
                   gpa_wf, 'inputspec.fts_file')
        

    # ds
    # Creates the datasink node for group analysis
       
    ds = pe.Node(nio.DataSink(), name='gpa_sink')
     
    if 'sca_roi' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('sca_roi_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0))
            
            
    if 'dr_tempreg_maps_zstat_files_to_standard_smooth' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('temp_reg_map_z_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0))
            
            
    if 'centrality' in resource:
        names = ['degree_centrality_binarize', 'degree_centrality_weighted', \
                 'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted', \
                 'lfcd_binarize', 'lfcd_weighted']

        for name in names:
            if name in os.path.basename(output_file_list[0]):
                out_dir = os.path.join(out_dir, name)
                break

    if 'tempreg_maps' in resource:
        out_dir = os.path.join(out_dir, \
            re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0))
        
#     if c.mixedScanAnalysis == True:
#         out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir)
              
    ds.inputs.base_directory = out_dir
    ds.inputs.container = ''
        
    ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'),
                                      (r'(?<=model_files)(.)*[/]','/'),
                                      (r'(?<=merged)(.)*[/]','/'),
                                      (r'(?<=stats/clusterMap)(.)*[/]','/'),
                                      (r'(?<=stats/unthreshold)(.)*[/]','/'),
                                      (r'(?<=stats/threshold)(.)*[/]','/'),
                                      (r'_cluster(.)*[/]',''),
                                      (r'_slicer(.)*[/]',''),
                                      (r'_overlay(.)*[/]','')]
   

    ########datasink connections#########
    if fTest:
        wf.connect(gp_flow, 'outputspec.fts',
                   ds, 'model_files.@0') 
        
    wf.connect(gp_flow, 'outputspec.mat',
               ds, 'model_files.@1' )
    wf.connect(gp_flow, 'outputspec.con',
               ds, 'model_files.@2')
    wf.connect(gp_flow, 'outputspec.grp',
               ds, 'model_files.@3')
    wf.connect(gpa_wf, 'outputspec.merged',
               ds, 'merged')
    wf.connect(gpa_wf, 'outputspec.zstats',
               ds, 'stats.unthreshold')
    wf.connect(gpa_wf, 'outputspec.zfstats',
               ds,'stats.unthreshold.@01')
    wf.connect(gpa_wf, 'outputspec.fstats',
               ds,'stats.unthreshold.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf',
               ds, 'stats.threshold')
    wf.connect(gpa_wf, 'outputspec.cluster_index_zf',
               ds,'stats.clusterMap')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf',
               ds, 'stats.clusterMap.@01')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf',
               ds, 'rendered')
    wf.connect(gpa_wf, 'outputspec.rendered_image_zf',
               ds, 'rendered.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold',
               ds,  'stats.threshold.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_index',
               ds, 'stats.clusterMap.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt',
               ds, 'stats.clusterMap.@03')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold',
               ds, 'rendered.@02')
    wf.connect(gpa_wf, 'outputspec.rendered_image',
               ds, 'rendered.@03')
       
    ######################################

    # Run the actual group analysis workflow
    wf.run()

    
    print "\n\nWorkflow finished for model %s and resource %s\n\n" \
          % (os.path.basename(group_conf.output_dir), resource)
Example #15
0
def prep_group_analysis_workflow(model_df, pipeline_config_path, \
    model_name, group_config_path, resource_id, preproc_strat, \
    series_or_repeated_label):
    
    #
    # this function runs once per derivative type and preproc strat combo
    # during group analysis
    #

    import os
    import patsy
    import numpy as np

    import nipype.pipeline.engine as pe
    import nipype.interfaces.utility as util
    import nipype.interfaces.io as nio

    from CPAC.pipeline.cpac_group_runner import load_config_yml
    from CPAC.utils.create_flame_model_files import create_flame_model_files
    from CPAC.utils.create_group_analysis_info_files import write_design_matrix_csv

    pipeline_config_obj = load_config_yml(pipeline_config_path)
    group_config_obj = load_config_yml(group_config_path)

    pipeline_ID = pipeline_config_obj.pipelineName

    # remove file names from preproc_strat
    filename = preproc_strat.split("/")[-1]
    preproc_strat = preproc_strat.replace(filename,"")
    preproc_strat = preproc_strat.lstrip("/").rstrip("/")

    # get thresholds
    z_threshold = float(group_config_obj.z_threshold[0])

    p_threshold = float(group_config_obj.p_threshold[0])

    sub_id_label = group_config_obj.participant_id_label

    ftest_list = []
    readme_flags = []

    # determine if f-tests are included or not
    custom_confile = group_config_obj.custom_contrasts

    if ((custom_confile == None) or (custom_confile == '') or \
            ("None" in custom_confile) or ("none" in custom_confile)):

        custom_confile = None

        if (len(group_config_obj.f_tests) == 0) or \
            (group_config_obj.f_tests == None):
            fTest = False
        else:
            fTest = True
            ftest_list = group_config_obj.f_tests

    else:

        if not os.path.exists(custom_confile):
            errmsg = "\n[!] CPAC says: You've specified a custom contrasts " \
                     ".CSV file for your group model, but this file cannot " \
                     "be found. Please double-check the filepath you have " \
                     "entered.\n\nFilepath: %s\n\n" % custom_confile
            raise Exception(errmsg)

        with open(custom_confile,"r") as f:
            evs = f.readline()

        evs = evs.rstrip('\r\n').split(',')
        count_ftests = 0

        fTest = False

        for ev in evs:
            if "f_test" in ev:
                count_ftests += 1

        if count_ftests > 0:
            fTest = True


    # create path for output directory
    out_dir = os.path.join(group_config_obj.output_dir, \
        "group_analysis_results_%s" % pipeline_ID, \
        "group_model_%s" % model_name, resource_id, \
        series_or_repeated_label, preproc_strat)

    if 'sca_roi' in resource_id:
        out_dir = os.path.join(out_dir, \
            re.search('sca_roi_(\d)+',os.path.splitext(\
                os.path.splitext(os.path.basename(\
                    model_df["Filepath"][0]))[0])[0]).group(0))
            
    if 'dr_tempreg_maps_zstat_files_to_standard_smooth' in resource_id:
        out_dir = os.path.join(out_dir, \
            re.search('temp_reg_map_z_(\d)+',os.path.splitext(\
                os.path.splitext(os.path.basename(\
                    model_df["Filepath"][0]))[0])[0]).group(0))
            
    if 'centrality' in resource_id:
        names = ['degree_centrality_binarize', 'degree_centrality_weighted', \
                 'eigenvector_centrality_binarize', \
                 'eigenvector_centrality_weighted', \
                 'lfcd_binarize', 'lfcd_weighted']

        for name in names:
            if name in filename:
                out_dir = os.path.join(out_dir, name)
                break

    if 'tempreg_maps' in resource_id:
        out_dir = os.path.join(out_dir, re.search('\w*[#]*\d+', \
            os.path.splitext(os.path.splitext(os.path.basename(\
                model_df["Filepath"][0]))[0])[0]).group(0))

    model_path = os.path.join(out_dir, 'model_files')

    second_half_out = \
        out_dir.split("group_analysis_results_%s" % pipeline_ID)[1]

    # generate working directory for this output's group analysis run
    work_dir = os.path.join(pipeline_config_obj.workingDirectory, \
        "group_analysis", second_half_out.lstrip("/"))

    log_dir = os.path.join(pipeline_config_obj.logDirectory, \
        "group_analysis", second_half_out.lstrip("/"))

    # create the actual directories
    create_dir(model_path, "group analysis output")
    create_dir(work_dir, "group analysis working")
    create_dir(log_dir, "group analysis logfile")


    # create new subject list based on which subjects are left after checking
    # for missing outputs
    new_participant_list = []
    for part in list(model_df["Participant"]):
        # do this instead of using "set" just in case, to preserve order
        #   only reason there may be duplicates is because of multiple-series
        #   repeated measures runs
        if part not in new_participant_list:
            new_participant_list.append(part)

    new_sub_file = write_new_sub_file(model_path, \
                                      group_config_obj.participant_list, \
                                      new_participant_list)

    group_config_obj.update('participant_list',new_sub_file)

    num_subjects = len(list(model_df["Participant"]))


    # start processing the dataframe further
    design_formula = group_config_obj.design_formula

    # demean EVs set for demeaning
    for demean_EV in group_config_obj.ev_selections["demean"]:
        model_df[demean_EV] = model_df[demean_EV].astype(float)
        model_df[demean_EV] = model_df[demean_EV].sub(model_df[demean_EV].mean())

    # demean the motion params
    if ("MeanFD" in design_formula) or ("MeanDVARS" in design_formula):
        params = ["MeanFD_Power", "MeanFD_Jenkinson", "MeanDVARS"]
        for param in params:
            model_df[param] = model_df[param].astype(float)
            model_df[param] = model_df[param].sub(model_df[param].mean())


    # create 4D merged copefile, in the correct order, identical to design
    # matrix
    merge_outfile = model_name + "_" + resource_id + "_merged.nii.gz"
    merge_outfile = os.path.join(model_path, merge_outfile)

    merge_file = create_merged_copefile(list(model_df["Filepath"]), \
                                        merge_outfile)

    # create merged group mask
    merge_mask_outfile = model_name + "_" + resource_id + \
                             "_merged_mask.nii.gz"
    merge_mask_outfile = os.path.join(model_path, merge_mask_outfile)
    merge_mask = create_merge_mask(merge_file, merge_mask_outfile)

    if "Group Mask" in group_config_obj.mean_mask:
        mask_for_means = merge_mask
    else:
        individual_masks_dir = os.path.join(model_path, "individual_masks")
        create_dir(individual_masks_dir, "individual masks")
        for unique_id, series_id, raw_filepath in zip(model_df["Participant"],
            model_df["Series"], model_df["Raw_Filepath"]):
            
            mask_for_means_path = os.path.join(individual_masks_dir,
                "%s_%s_%s_mask.nii.gz" % (unique_id, series_id, resource_id))
            mask_for_means = create_merge_mask(raw_filepath, 
                                               mask_for_means_path)
        readme_flags.append("individual_masks")

    # calculate measure means, and demean
    if "Measure_Mean" in design_formula:
        model_df = calculate_measure_mean_in_df(model_df, mask_for_means)

    # calculate custom ROIs, and demean (in workflow?)
    if "Custom_ROI_Mean" in design_formula:

        custom_roi_mask = group_config_obj.custom_roi_mask

        if (custom_roi_mask == None) or (custom_roi_mask == "None") or \
            (custom_roi_mask == "none") or (custom_roi_mask == ""):
            err = "\n\n[!] You included 'Custom_ROI_Mean' in your design " \
                  "formula, but you didn't supply a custom ROI mask file." \
                  "\n\nDesign formula: %s\n\n" % design_formula
            raise Exception(err)

        # make sure the custom ROI mask file is the same resolution as the
        # output files - if not, resample and warn the user
        roi_mask = check_mask_file_resolution(list(model_df["Raw_Filepath"])[0], \
                                              custom_roi_mask, mask_for_means, \
                                              model_path, resource_id)

        # trim the custom ROI mask to be within mask constraints
        output_mask = os.path.join(model_path, "masked_%s" \
                                   % os.path.basename(roi_mask))
        roi_mask = trim_mask(roi_mask, mask_for_means, output_mask)
        readme_flags.append("custom_roi_mask_trimmed")

        # calculate
        model_df = calculate_custom_roi_mean_in_df(model_df, roi_mask)

        # update the design formula
        new_design_substring = ""
        for col in model_df.columns:
            if "Custom_ROI_Mean_" in str(col):
                if str(col) == "Custom_ROI_Mean_1":
                    new_design_substring = new_design_substring + " %s" % col
                else:
                    new_design_substring = new_design_substring +" + %s" % col
        design_formula = design_formula.replace("Custom_ROI_Mean", \
                                                new_design_substring)


    cat_list = []
    if "categorical" in group_config_obj.ev_selections.keys():
        cat_list = group_config_obj.ev_selections["categorical"]


    # prep design for repeated measures, if applicable
    if len(group_config_obj.sessions_list) > 0:
        design_formula = design_formula + " + Session"
        if "Session" not in cat_list:
            cat_list.append("Session")
    if len(group_config_obj.series_list) > 0:
        design_formula = design_formula + " + Series"
        if "Series" not in cat_list:
            cat_list.append("Series")
    for col in list(model_df.columns):
        if "participant_" in col:
            design_formula = design_formula + " + %s" % col
            cat_list.append(col)


    # parse out the EVs in the design formula at this point in time
    #   this is essentially a list of the EVs that are to be included
    ev_list = parse_out_covariates(design_formula)


    # SPLIT GROUPS here.
    #   CURRENT PROBLEMS: was creating a few doubled-up new columns
    grp_vector = [1] * num_subjects

    if group_config_obj.group_sep:

        # model group variances separately
        old_ev_list = ev_list

        model_df, grp_vector, ev_list, cat_list = split_groups(model_df, \
                                group_config_obj.grouping_var, \
                                ev_list, cat_list)

        # make the grouping variable categorical for Patsy (if we try to
        # do this automatically below, it will categorical-ize all of 
        # the substrings too)
        design_formula = design_formula.replace(group_config_obj.grouping_var, \
                                  "C(" + group_config_obj.grouping_var + ")")
        if group_config_obj.coding_scheme == "Sum":
            design_formula = design_formula.replace(")", ", Sum)")

        # update design formula
        rename = {}
        for old_ev in old_ev_list:
            for new_ev in ev_list:
                if old_ev + "__FOR" in new_ev:
                    if old_ev not in rename.keys():
                        rename[old_ev] = []
                    rename[old_ev].append(new_ev)

        for old_ev in rename.keys():
            design_formula = design_formula.replace(old_ev, \
                                                   " + ".join(rename[old_ev]))


    # prep design formula for Patsy
    design_formula = patsify_design_formula(design_formula, cat_list, \
                         group_config_obj.coding_scheme[0])
    print design_formula
    # send to Patsy
    try:
        dmatrix = patsy.dmatrix(design_formula, model_df)
    except Exception as e:
        err = "\n\n[!] Something went wrong with processing the group model "\
              "design matrix using the Python Patsy package. Patsy might " \
              "not be properly installed, or there may be an issue with the "\
              "formatting of the design matrix.\n\nPatsy-formatted design " \
              "formula: %s\n\nError details: %s\n\n" \
              % (model_df.columns, design_formula, e)
        raise Exception(err)

    print dmatrix.design_info.column_names
    print dmatrix

    # check the model for multicollinearity - Patsy takes care of this, but
    # just in case
    check_multicollinearity(np.array(dmatrix))

    # prepare for final stages
    column_names = dmatrix.design_info.column_names

    # what is this for?
    design_matrix = np.array(dmatrix, dtype=np.float16)
    
        
    # check to make sure there are more time points than EVs!
    if len(column_names) >= num_subjects:
        err = "\n\n[!] CPAC says: There are more EVs than there are " \
              "participants currently included in the model for %s. There " \
              "must be more participants than EVs in the design.\n\nNumber " \
              "of participants: %d\nNumber of EVs: %d\n\nEV/covariate list: "\
              "%s\n\nNote: If you specified to model group " \
              "variances separately, the amount of EVs can nearly double " \
              "once they are split along the grouping variable.\n\n" \
              "If the number of subjects is lower than the number of " \
              "subjects in your group analysis subject list, this may be " \
              "because not every subject in the subject list has an output " \
              "for %s in the individual-level analysis output directory.\n\n"\
              % (resource_id, num_subjects, len(column_names), column_names, \
                 resource_id)
        raise Exception(err)

    # time for contrasts
    contrasts_dict = None

    if ((custom_confile == None) or (custom_confile == '') or \
            ("None" in custom_confile) or ("none" in custom_confile)):

        # if no custom contrasts matrix CSV provided (i.e. the user
        # specified contrasts in the GUI)
        contrasts_list = group_config_obj.contrasts
        contrasts_dict = create_contrasts_dict(dmatrix, contrasts_list,
            resource_id)

    # check the merged file's order
    check_merged_file(model_df["Filepath"], merge_file)

    # we must demean the categorical regressors if the Intercept/Grand Mean
    # is included in the model, otherwise FLAME produces blank outputs
    if "Intercept" in column_names:

        cat_indices = []
        col_name_indices = dmatrix.design_info.column_name_indexes
        for col_name in col_name_indices.keys():
            if "C(" in col_name:
                cat_indices.append(int(col_name_indices[col_name]))

        # note: dmat_T is now no longer a DesignMatrix Patsy object, but only
        # an array
        dmat_T = dmatrix.transpose()

        for index in cat_indices:
            new_row = []
            for val in dmat_T[index]:
                new_row.append(val - dmat_T[index].mean())
            dmat_T[index] = new_row

        # we can go back, but we won't be the same
        dmatrix = dmat_T.transpose()

        readme_flags.append("cat_demeaned")

    # send off the info so the FLAME input model files can be generated!
    mat_file, grp_file, con_file, fts_file = create_flame_model_files(dmatrix, \
        column_names, contrasts_dict, custom_confile, ftest_list, \
        group_config_obj.group_sep, grp_vector, group_config_obj.coding_scheme[0], \
        model_name, resource_id, model_path)

    dmat_csv_path = os.path.join(model_path, "design_matrix.csv")
    write_design_matrix_csv(dmatrix, model_df["Participant"], column_names, \
        dmat_csv_path)

    # workflow time
    wf_name = "%s_%s" % (resource_id, series_or_repeated_label)
    wf = pe.Workflow(name=wf_name)

    wf.base_dir = work_dir
    crash_dir = os.path.join(pipeline_config_obj.crashLogDirectory, \
                             "group_analysis", model_name)

    wf.config['execution'] = {'hash_method': 'timestamp', \
                              'crashdump_dir': crash_dir} 

    # gpa_wf
    # Creates the actual group analysis workflow
    gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % wf_name)

    gpa_wf.inputs.inputspec.merged_file = merge_file
    gpa_wf.inputs.inputspec.merge_mask = merge_mask

    gpa_wf.inputs.inputspec.z_threshold = z_threshold
    gpa_wf.inputs.inputspec.p_threshold = p_threshold
    gpa_wf.inputs.inputspec.parameters = (pipeline_config_obj.FSLDIR, \
                                          'MNI152')

    gpa_wf.inputs.inputspec.mat_file = mat_file
    gpa_wf.inputs.inputspec.con_file = con_file
    gpa_wf.inputs.inputspec.grp_file = grp_file

    if fTest:
        gpa_wf.inputs.inputspec.fts_file = fts_file      

    # ds
    # Creates the datasink node for group analysis
    ds = pe.Node(nio.DataSink(), name='gpa_sink')
     
    #     if c.mixedScanAnalysis == True:
    #         out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir)
              
    ds.inputs.base_directory = str(out_dir)
    ds.inputs.container = ''
        
    ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'),
                                      (r'(?<=model_files)(.)*[/]','/'),
                                      (r'(?<=merged)(.)*[/]','/'),
                                      (r'(?<=stats/clusterMap)(.)*[/]','/'),
                                      (r'(?<=stats/unthreshold)(.)*[/]','/'),
                                      (r'(?<=stats/threshold)(.)*[/]','/'),
                                      (r'_cluster(.)*[/]',''),
                                      (r'_slicer(.)*[/]',''),
                                      (r'_overlay(.)*[/]','')]
   

    ########datasink connections#########
    #if fTest:
    #    wf.connect(gp_flow, 'outputspec.fts',
    #               ds, 'model_files.@0') 
        
    #wf.connect(gp_flow, 'outputspec.mat',
    #           ds, 'model_files.@1' )
    #wf.connect(gp_flow, 'outputspec.con',
    #           ds, 'model_files.@2')
    #wf.connect(gp_flow, 'outputspec.grp',
    #           ds, 'model_files.@3')
    wf.connect(gpa_wf, 'outputspec.merged',
               ds, 'merged')
    wf.connect(gpa_wf, 'outputspec.zstats',
               ds, 'stats.unthreshold')
    wf.connect(gpa_wf, 'outputspec.zfstats',
               ds,'stats.unthreshold.@01')
    wf.connect(gpa_wf, 'outputspec.fstats',
               ds,'stats.unthreshold.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf',
               ds, 'stats.threshold')
    wf.connect(gpa_wf, 'outputspec.cluster_index_zf',
               ds,'stats.clusterMap')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf',
               ds, 'stats.clusterMap.@01')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf',
               ds, 'rendered')
    wf.connect(gpa_wf, 'outputspec.rendered_image_zf',
               ds, 'rendered.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_threshold',
               ds,  'stats.threshold.@01')
    wf.connect(gpa_wf, 'outputspec.cluster_index',
               ds, 'stats.clusterMap.@02')
    wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt',
               ds, 'stats.clusterMap.@03')
    wf.connect(gpa_wf, 'outputspec.overlay_threshold',
               ds, 'rendered.@02')
    wf.connect(gpa_wf, 'outputspec.rendered_image',
               ds, 'rendered.@03')
       
    ######################################

    # Run the actual group analysis workflow
    wf.run()

    print "\n\nWorkflow finished for model %s\n\n" % wf_name