Ejemplo n.º 1
0
def pearson_correlation(datadir,
                        outdir,
                        usemask=True,
                        excludef='',
                        exclude_idx=-1):

    slidir = datadir + os.path.sep + au.slices_str()

    subjsfile = datadir + os.path.sep + au.subjects_str()
    labelsfile = datadir + os.path.sep + au.labels_str()

    lst = os.listdir(slidir)
    n = au.count_match(lst, au.data_str() + '_' + au.slice_regex())

    exclude_log = ''
    if exclude_idx > -1:
        exclude_log = ' excluding subject ' + str(exclude_idx)

    au.log.info('Calculating correlation of ' + slidir + os.path.sep +
                au.data_str() + '_' + au.slice_regex() + exclude_log)

    for i in range(n):
        slino = au.zeropad(i)

        dataf = slidir + os.path.sep + au.data_str() + '_' + au.slice_str(
        ) + '_' + slino + au.ext_str()
        maskf = slidir + os.path.sep + au.mask_str() + '_' + au.slice_str(
        ) + '_' + slino + au.ext_str()
        outf = outdir + os.path.sep + au.pearson_str() + '_' + au.slice_str(
        ) + '_' + slino

        if exclude_idx > -1:
            outf += '_' + au.excluded_str() + str(exclude_idx) + au.ext_str()
        else:
            outf += au.ext_str()

        if not os.path.isfile(dataf):
            au.log.error('Could not find ' + dataf)
            continue

        if not usemask:
            maskf = ''

        try:
            measure_pearson(dataf, labelsfile, outf, maskf, excludef,
                            exclude_idx)
        except:
            au.log.error(
                'pearson_correlation: Error measuring correlation on ' + dataf)
            au.log.error("Unexpected error: ", sys.exc_info()[0])
            exit(1)
Ejemplo n.º 2
0
def pearson_correlation (datadir, outdir, usemask=True, excludef='', exclude_idx=-1):

   slidir = datadir + os.path.sep + au.slices_str()

   subjsfile  = datadir + os.path.sep + au.subjects_str()
   labelsfile = datadir + os.path.sep + au.labels_str()

   lst = os.listdir(slidir)
   n = au.count_match(lst, au.data_str() + '_' + au.slice_regex())

   exclude_log = ''
   if exclude_idx > -1:
      exclude_log = ' excluding subject ' + str(exclude_idx)
   
   au.log.info ('Calculating correlation of ' + slidir + os.path.sep + au.data_str() + '_' + au.slice_regex() + exclude_log)

   for i in range(n):
      slino = au.zeropad(i)

      dataf = slidir + os.path.sep + au.data_str()    + '_' + au.slice_str() + '_' + slino + au.ext_str()
      maskf = slidir + os.path.sep + au.mask_str()    + '_' + au.slice_str() + '_' + slino + au.ext_str()
      outf  = outdir + os.path.sep + au.pearson_str() + '_' + au.slice_str() + '_' + slino

      if exclude_idx > -1:
         outf += '_' + au.excluded_str() + str(exclude_idx) + au.ext_str()
      else:
         outf += au.ext_str()

      if not os.path.isfile(dataf): 
         au.log.error('Could not find ' + dataf)
         continue

      if not usemask:
         maskf = ''

      try:
         measure_pearson(dataf, labelsfile, outf, maskf, excludef, exclude_idx)
      except:
         au.log.error('pearson_correlation: Error measuring correlation on ' + dataf)
         au.log.error("Unexpected error: ", sys.exc_info()[0] )
         exit(1)
        #getting output prefix
        prefix = prefixes[m]
        m += 1

        #saving the feature matrix and labels in a binary file

        #setting output file name
        ofname = features_str()
        if prefix:
            ofname = prefix + '_' + ofname
        if scale:
            ofname = ofname + '.' + scaled_str()

        if excluf:
            excl_ofname = au.excluded_str() + au.feats_str()
            if prefix:
                excl_ofname = prefix + '_' + excl_ofname
            if scale:
                excl_ofname = excl_ofname + '.' + au.scaled_str()

        filename = get_filepath(outdir, ofname, otype)
        if os.path.exists(filename):
            print(filename + ' already exists. Jumping to the next.')
        else:
            print('Creating ' + filename)

        #reading mask volume
        if not os.path.exists(maskf):
            err = 'Mask file not found: ' + maskf
            raise IOError(err)
Ejemplo n.º 4
0
def save_data (outdir, prefix, dataname, otype, excluding, leave, feats, labels, exclfeats, exclulabels, dmin, dmax, scale, scale_min, scale_max, lthr, uthr, thrp, absolute):

    #setting output file name
    ofname = au.feats_str()

    if leave > -1:
        ofname += '.' + au.excluded_str() + str(leave)

    if absolute:  ofname += '.' + au.abs_str()
    if lthr:      ofname += '.lthr_' + str(lthr)
    if uthr:      ofname += '.uthr_' + str(uthr)
    if thrp:      ofname += '.thrP_' + str(thrp)
    if scale:     ofname += '.' + au.scaled_str()

    if excluding:
        excl_ofname  = au.excluded_str() + '_' + ofname
        exclfilename = get_filepath (outdir, excl_ofname , otype)

    if prefix:
        ofname = prefix + '_' + ofname
        excl_ofname = prefix + '_' + excl_ofname

    filename = get_filepath (outdir, ofname, otype)

    #writing in a text file the scaling values of this training set
    if scale:
        write_scalingrange_file (outdir + os.path.sep + ofname + '.scaling_range', dmin, dmax, scale_min, scale_max)

    #saving binary file depending on output type
    if otype == 'numpybin':
        np.save (filename, feats)

        if excluding:
            np.save (exclfilename, exclfeats)

    elif otype == 'octave':
        sio.savemat (filename, {au.feats_str(): feats, au.labels_str(): labels})
        if excluding:
            exclulabels[exclulabels == 0] = -1
            sio.savemat (exclfilename, {au.feats_str(): exclfeats, au.labels_str(): exclulabels})

    elif otype == 'svmperf':
        labels[labels == 0] = -1
        ae.write_svmperf_dat(filename, dataname, feats, labels)

        if excluding:
            exclulabels[exclulabels == 0] = -1
            ae.write_svmperf_dat(exclfilename, dataname, exclfeats, exclulabels)

    elif otype == 'arff':
        featnames = np.arange(nfeats) + 1
        ae.write_arff (filename, dataname, featnames, feats, labels)

        if excluding:
            ae.write_arff (exclfilename, dataname, featnames, exclfeats, exclulabels)

    else:
        err = 'Output method not recognised!'
        au.log.error(err)
        sys.exit(-1)

    return [filename, exclfilename]
Ejemplo n.º 5
0
        #read the measure argument and start processing
        if measure == 'pea':
            #measure pearson correlation for each population slice
            step = au.measureperslice_str() + step_params
            if not au.is_done(chkf, step):
                pear.pearson_correlation(outdir, mdir, usemask, excluf, leave)
                au.checklist_add(chkf, step)

            #merge all correlation slice measures
            step = au.postmerging_str() + step_params
            if not au.is_done(chkf, step):
                pearegex = au.pearson_str() + '_' + au.slice_str() + '*'
                peameasf = mdir + os.path.sep + au.pearson_str()

                if leave > -1:
                    pearegex += '_' + au.excluded_str() + str(leave) + '*'
                    peameasf += '_' + au.excluded_str() + str(
                        leave) + '_' + au.pearson_str()

                post.merge_slices(mdir, pearegex, peameasf, mdir)

                if absval:
                    post.change_to_absolute_values(peameasf)

                au.checklist_add(chkf, step)

        #BHATTACHARYYA AND T-TEST
        elif measure == 'bat' or measure == 'ttest':

            if not os.path.exists(statsdir):
                os.mkdir(statsdir)
        # getting output prefix
        prefix = prefixes[m]
        m += 1

        # saving the feature matrix and labels in a binary file

        # setting output file name
        ofname = features_str()
        if prefix:
            ofname = prefix + "_" + ofname
        if scale:
            ofname = ofname + "." + scaled_str()

        if excluf:
            excl_ofname = au.excluded_str() + au.feats_str()
            if prefix:
                excl_ofname = prefix + "_" + excl_ofname
            if scale:
                excl_ofname = excl_ofname + "." + au.scaled_str()

        filename = get_filepath(outdir, ofname, otype)
        if os.path.exists(filename):
            print(filename + " already exists. Jumping to the next.")
        else:
            print("Creating " + filename)

        # reading mask volume
        if not os.path.exists(maskf):
            err = "Mask file not found: " + maskf
            raise IOError(err)
Ejemplo n.º 7
0
      #read the measure argument and start processing
      if measure == 'pea':
         #measure pearson correlation for each population slice
         step = au.measureperslice_str() + step_params
         if not au.is_done(chkf, step):
            pear.pearson_correlation (outdir, mdir, usemask, excluf, leave)
            au.checklist_add(chkf, step)

         #merge all correlation slice measures
         step = au.postmerging_str() + step_params
         if not au.is_done(chkf, step):
            pearegex = au.pearson_str() + '_' + au.slice_str() + '*'
            peameasf = mdir + os.path.sep + au.pearson_str()

            if leave > -1:
               pearegex += '_' + au.excluded_str() + str(leave) + '*'
               peameasf += '_' + au.excluded_str() + str(leave) + '_' + au.pearson_str()

            post.merge_slices (mdir, pearegex, peameasf, mdir)

            if absval:
               post.change_to_absolute_values(peameasf)

            au.checklist_add(chkf, step)

      #BHATTACHARYYA AND T-TEST
      elif measure == 'bat' or measure == 'ttest':

         if not os.path.exists (statsdir):
            os.mkdir(statsdir)
    print('Creating ' + filename)

    if otype == 'numpybin':
        np.save(filename, feats)

    elif otype == 'octave':
        sio.savemat(filename, {au.feats_str(): feats, au.labels_str(): labels})

    elif otype == 'svmperf':
        labels[labels == 0] = -1
        ae.write_svmperf_dat(filename, dataname, feats, labels)
        if excluf:
            exclulabels[exclulabels == 0] = -1
            exclfilename = set_filename(
                outdir, prefix + '_' + au.excluded_str() + au.feats_str(),
                otype)
            ae.write_svmperf_dat(exclfilename, dataname, exclfeats,
                                 exclulabels)

    elif otype == 'arff':
        featnames = np.arange(nfeats) + 1
        ae.write_arff(filename, dataname, featnames, feats, labels)

    else:
        err = 'Output method not recognised!'
        raise IOError(err)

    return filename

Ejemplo n.º 9
0
def group_distance (measure_function, datadir, groups, groupsizes, chkf, absolute=False, outdir='', foldno='', expname='', exclude_idx=-1, exclude_subj='', exclude_subjclass=''):

    olddir = os.getcwd()

    if not outdir:
        outdir = datadir

    ngroups = len(groups)
    #matrix of strings of 2000 characters maximum, to save filepaths
    gfnames = np.zeros ([ngroups,3], dtype=np.dtype('a2000'))

    subject_excluded = False

    for g1 in range(ngroups):
        g1name = groups[g1]
        #mean1fname
        gfnames[g1,0] = datadir + os.path.sep + g1name + '_' + au.mean_str()
        #var1fname  
        gfnames[g1,1] = datadir + os.path.sep + g1name + '_' + au.var_str()
        #std1fname
        gfnames[g1,2] = datadir + os.path.sep + g1name + '_' + au.std_str()

        for g2 in range(g1+1, ngroups):
            g2name = groups[g2]
            gfnames[g2,0] = datadir + os.path.sep + g2name + '_' + au.mean_str()
            gfnames[g2,1] = datadir + os.path.sep + g2name + '_' + au.var_str()
            gfnames[g2,2] = datadir + os.path.sep + g2name + '_' + au.std_str()

            experiment = g1name + '_vs_' + g2name

            #check if exclude_subjclass is any of both current groups
            eg = -1
            if exclude_idx > -1:
                if   exclude_subjclass == g1name: eg = g2
                elif exclude_subjclass == g2name: eg = g1

            step = au.measure_str() + ' ' + measure_function.func_name + ' ' + experiment + ' ' + datadir

            #remove subject from stats
            if eg > -1:
                exclude_str = '_' + au.excluded_str() + str(exclude_idx)
                step       += exclude_str
                experiment += exclude_str

                if not au.is_done(chkf, step):
                    if not subject_excluded:
                        newmeanfname = gfnames[eg,0] + exclude_str
                        newvarfname  = gfnames[eg,1] + exclude_str
                        newstdfname  = gfnames[eg,2] + exclude_str

                        rstep = au.remove_str() + ' ' + au.subject_str() + ' ' + str(exclude_subj) + ' ' + au.fromstats_str() + ' ' + datadir
                        if not au.is_done(chkf, rstep):
                           #(meanfname, varfname, samplesize, subjvolfname, newmeanfname, newvarfname, newstdfname='')
                           remove_subject_from_stats (gfnames[eg,0], gfnames[eg,1], groupsizes[eg][1], exclude_subj, newmeanfname, newvarfname, newstdfname)
                           au.checklist_add (chkf, rstep)

                        gfnames[eg,0] += exclude_str
                        gfnames[eg,1] += exclude_str
                        gfnames[eg,2] += exclude_str

                        groupsizes[eg][1] -= 1

                        subject_excluded = True

            #calculating distance
            if not au.is_done(chkf, step):
                mean1fname = au.add_extension_if_needed (gfnames[g1,0], au.ext_str())
                mean2fname = au.add_extension_if_needed (gfnames[g2,0], au.ext_str())
                var1fname  = au.add_extension_if_needed (gfnames[g1,1], au.ext_str())
                var2fname  = au.add_extension_if_needed (gfnames[g2,1], au.ext_str())
                std1fname  = au.add_extension_if_needed (gfnames[g1,2], au.ext_str())
                std2fname  = au.add_extension_if_needed (gfnames[g2,2], au.ext_str())

                outfname = measure_function (mean1fname, mean2fname, var1fname, var2fname, std1fname, std2fname, groupsizes[g1][1], groupsizes[g2][1], experiment, outdir, exclude_idx)

                if absolute:
                    change_to_absolute_values (outfname)

                au.checklist_add (chkf, step)

                return outfname
   filename = set_filename (outdir, prefix + '_' + au.features_str(), otype)

   print ('Creating ' + filename)

   if otype == 'numpybin':
      np.save (filename, feats)

   elif otype == 'octave':
      sio.savemat (filename, {au.feats_str(): feats, au.labels_str(): labels})

   elif otype == 'svmperf':
      labels[labels == 0] = -1
      ae.write_svmperf_dat(filename, dataname, feats, labels)
      if excluf:
         exclulabels[exclulabels == 0] = -1
         exclfilename = set_filename(outdir, prefix + '_' + au.excluded_str() + au.feats_str(), otype)
         ae.write_svmperf_dat(exclfilename, dataname, exclfeats, exclulabels)

   elif otype == 'arff':
      featnames = np.arange(nfeats) + 1
      ae.write_arff (filename, dataname, featnames, feats, labels)

   else:
      err = 'Output method not recognised!'
      raise IOError(err)

   return filename

#-------------------------------------------------------------------------------
## END EXTRACT FEATSET
#-------------------------------------------------------------------------------
Ejemplo n.º 11
0
def group_distance(measure_function,
                   datadir,
                   groups,
                   groupsizes,
                   chkf,
                   absolute=False,
                   outdir='',
                   foldno='',
                   expname='',
                   exclude_idx=-1,
                   exclude_subj='',
                   exclude_subjclass=''):

    olddir = os.getcwd()

    if not outdir:
        outdir = datadir

    ngroups = len(groups)
    #matrix of strings of 2000 characters maximum, to save filepaths
    gfnames = np.zeros([ngroups, 3], dtype=np.dtype('a2000'))

    subject_excluded = False

    for g1 in range(ngroups):
        g1name = groups[g1]
        #mean1fname
        gfnames[g1, 0] = datadir + os.path.sep + g1name + '_' + au.mean_str()
        #var1fname
        gfnames[g1, 1] = datadir + os.path.sep + g1name + '_' + au.var_str()
        #std1fname
        gfnames[g1, 2] = datadir + os.path.sep + g1name + '_' + au.std_str()

        for g2 in range(g1 + 1, ngroups):
            g2name = groups[g2]
            gfnames[g2,
                    0] = datadir + os.path.sep + g2name + '_' + au.mean_str()
            gfnames[g2,
                    1] = datadir + os.path.sep + g2name + '_' + au.var_str()
            gfnames[g2,
                    2] = datadir + os.path.sep + g2name + '_' + au.std_str()

            experiment = g1name + '_vs_' + g2name

            #check if exclude_subjclass is any of both current groups
            eg = -1
            if exclude_idx > -1:
                if exclude_subjclass == g1name: eg = g2
                elif exclude_subjclass == g2name: eg = g1

            step = au.measure_str(
            ) + ' ' + measure_function.func_name + ' ' + experiment + ' ' + datadir

            #remove subject from stats
            if eg > -1:
                exclude_str = '_' + au.excluded_str() + str(exclude_idx)
                step += exclude_str
                experiment += exclude_str

                if not au.is_done(chkf, step):
                    if not subject_excluded:
                        newmeanfname = gfnames[eg, 0] + exclude_str
                        newvarfname = gfnames[eg, 1] + exclude_str
                        newstdfname = gfnames[eg, 2] + exclude_str

                        rstep = au.remove_str() + ' ' + au.subject_str(
                        ) + ' ' + str(exclude_subj) + ' ' + au.fromstats_str(
                        ) + ' ' + datadir
                        if not au.is_done(chkf, rstep):
                            #(meanfname, varfname, samplesize, subjvolfname, newmeanfname, newvarfname, newstdfname='')
                            remove_subject_from_stats(gfnames[eg,
                                                              0], gfnames[eg,
                                                                          1],
                                                      groupsizes[eg][1],
                                                      exclude_subj,
                                                      newmeanfname,
                                                      newvarfname, newstdfname)
                            au.checklist_add(chkf, rstep)

                        gfnames[eg, 0] += exclude_str
                        gfnames[eg, 1] += exclude_str
                        gfnames[eg, 2] += exclude_str

                        groupsizes[eg][1] -= 1

                        subject_excluded = True

            #calculating distance
            if not au.is_done(chkf, step):
                mean1fname = au.add_extension_if_needed(
                    gfnames[g1, 0], au.ext_str())
                mean2fname = au.add_extension_if_needed(
                    gfnames[g2, 0], au.ext_str())
                var1fname = au.add_extension_if_needed(gfnames[g1, 1],
                                                       au.ext_str())
                var2fname = au.add_extension_if_needed(gfnames[g2, 1],
                                                       au.ext_str())
                std1fname = au.add_extension_if_needed(gfnames[g1, 2],
                                                       au.ext_str())
                std2fname = au.add_extension_if_needed(gfnames[g2, 2],
                                                       au.ext_str())

                outfname = measure_function(mean1fname, mean2fname, var1fname,
                                            var2fname, std1fname, std2fname,
                                            groupsizes[g1][1],
                                            groupsizes[g2][1], experiment,
                                            outdir, exclude_idx)

                if absolute:
                    change_to_absolute_values(outfname)

                au.checklist_add(chkf, step)

                return outfname