def pearson_correlation(datadir, outdir, usemask=True, excludef='', exclude_idx=-1): slidir = datadir + os.path.sep + au.slices_str() subjsfile = datadir + os.path.sep + au.subjects_str() labelsfile = datadir + os.path.sep + au.labels_str() lst = os.listdir(slidir) n = au.count_match(lst, au.data_str() + '_' + au.slice_regex()) exclude_log = '' if exclude_idx > -1: exclude_log = ' excluding subject ' + str(exclude_idx) au.log.info('Calculating correlation of ' + slidir + os.path.sep + au.data_str() + '_' + au.slice_regex() + exclude_log) for i in range(n): slino = au.zeropad(i) dataf = slidir + os.path.sep + au.data_str() + '_' + au.slice_str( ) + '_' + slino + au.ext_str() maskf = slidir + os.path.sep + au.mask_str() + '_' + au.slice_str( ) + '_' + slino + au.ext_str() outf = outdir + os.path.sep + au.pearson_str() + '_' + au.slice_str( ) + '_' + slino if exclude_idx > -1: outf += '_' + au.excluded_str() + str(exclude_idx) + au.ext_str() else: outf += au.ext_str() if not os.path.isfile(dataf): au.log.error('Could not find ' + dataf) continue if not usemask: maskf = '' try: measure_pearson(dataf, labelsfile, outf, maskf, excludef, exclude_idx) except: au.log.error( 'pearson_correlation: Error measuring correlation on ' + dataf) au.log.error("Unexpected error: ", sys.exc_info()[0]) exit(1)
def pearson_correlation (datadir, outdir, usemask=True, excludef='', exclude_idx=-1): slidir = datadir + os.path.sep + au.slices_str() subjsfile = datadir + os.path.sep + au.subjects_str() labelsfile = datadir + os.path.sep + au.labels_str() lst = os.listdir(slidir) n = au.count_match(lst, au.data_str() + '_' + au.slice_regex()) exclude_log = '' if exclude_idx > -1: exclude_log = ' excluding subject ' + str(exclude_idx) au.log.info ('Calculating correlation of ' + slidir + os.path.sep + au.data_str() + '_' + au.slice_regex() + exclude_log) for i in range(n): slino = au.zeropad(i) dataf = slidir + os.path.sep + au.data_str() + '_' + au.slice_str() + '_' + slino + au.ext_str() maskf = slidir + os.path.sep + au.mask_str() + '_' + au.slice_str() + '_' + slino + au.ext_str() outf = outdir + os.path.sep + au.pearson_str() + '_' + au.slice_str() + '_' + slino if exclude_idx > -1: outf += '_' + au.excluded_str() + str(exclude_idx) + au.ext_str() else: outf += au.ext_str() if not os.path.isfile(dataf): au.log.error('Could not find ' + dataf) continue if not usemask: maskf = '' try: measure_pearson(dataf, labelsfile, outf, maskf, excludef, exclude_idx) except: au.log.error('pearson_correlation: Error measuring correlation on ' + dataf) au.log.error("Unexpected error: ", sys.exc_info()[0] ) exit(1)
#getting output prefix prefix = prefixes[m] m += 1 #saving the feature matrix and labels in a binary file #setting output file name ofname = features_str() if prefix: ofname = prefix + '_' + ofname if scale: ofname = ofname + '.' + scaled_str() if excluf: excl_ofname = au.excluded_str() + au.feats_str() if prefix: excl_ofname = prefix + '_' + excl_ofname if scale: excl_ofname = excl_ofname + '.' + au.scaled_str() filename = get_filepath(outdir, ofname, otype) if os.path.exists(filename): print(filename + ' already exists. Jumping to the next.') else: print('Creating ' + filename) #reading mask volume if not os.path.exists(maskf): err = 'Mask file not found: ' + maskf raise IOError(err)
def save_data (outdir, prefix, dataname, otype, excluding, leave, feats, labels, exclfeats, exclulabels, dmin, dmax, scale, scale_min, scale_max, lthr, uthr, thrp, absolute): #setting output file name ofname = au.feats_str() if leave > -1: ofname += '.' + au.excluded_str() + str(leave) if absolute: ofname += '.' + au.abs_str() if lthr: ofname += '.lthr_' + str(lthr) if uthr: ofname += '.uthr_' + str(uthr) if thrp: ofname += '.thrP_' + str(thrp) if scale: ofname += '.' + au.scaled_str() if excluding: excl_ofname = au.excluded_str() + '_' + ofname exclfilename = get_filepath (outdir, excl_ofname , otype) if prefix: ofname = prefix + '_' + ofname excl_ofname = prefix + '_' + excl_ofname filename = get_filepath (outdir, ofname, otype) #writing in a text file the scaling values of this training set if scale: write_scalingrange_file (outdir + os.path.sep + ofname + '.scaling_range', dmin, dmax, scale_min, scale_max) #saving binary file depending on output type if otype == 'numpybin': np.save (filename, feats) if excluding: np.save (exclfilename, exclfeats) elif otype == 'octave': sio.savemat (filename, {au.feats_str(): feats, au.labels_str(): labels}) if excluding: exclulabels[exclulabels == 0] = -1 sio.savemat (exclfilename, {au.feats_str(): exclfeats, au.labels_str(): exclulabels}) elif otype == 'svmperf': labels[labels == 0] = -1 ae.write_svmperf_dat(filename, dataname, feats, labels) if excluding: exclulabels[exclulabels == 0] = -1 ae.write_svmperf_dat(exclfilename, dataname, exclfeats, exclulabels) elif otype == 'arff': featnames = np.arange(nfeats) + 1 ae.write_arff (filename, dataname, featnames, feats, labels) if excluding: ae.write_arff (exclfilename, dataname, featnames, exclfeats, exclulabels) else: err = 'Output method not recognised!' au.log.error(err) sys.exit(-1) return [filename, exclfilename]
#read the measure argument and start processing if measure == 'pea': #measure pearson correlation for each population slice step = au.measureperslice_str() + step_params if not au.is_done(chkf, step): pear.pearson_correlation(outdir, mdir, usemask, excluf, leave) au.checklist_add(chkf, step) #merge all correlation slice measures step = au.postmerging_str() + step_params if not au.is_done(chkf, step): pearegex = au.pearson_str() + '_' + au.slice_str() + '*' peameasf = mdir + os.path.sep + au.pearson_str() if leave > -1: pearegex += '_' + au.excluded_str() + str(leave) + '*' peameasf += '_' + au.excluded_str() + str( leave) + '_' + au.pearson_str() post.merge_slices(mdir, pearegex, peameasf, mdir) if absval: post.change_to_absolute_values(peameasf) au.checklist_add(chkf, step) #BHATTACHARYYA AND T-TEST elif measure == 'bat' or measure == 'ttest': if not os.path.exists(statsdir): os.mkdir(statsdir)
# getting output prefix prefix = prefixes[m] m += 1 # saving the feature matrix and labels in a binary file # setting output file name ofname = features_str() if prefix: ofname = prefix + "_" + ofname if scale: ofname = ofname + "." + scaled_str() if excluf: excl_ofname = au.excluded_str() + au.feats_str() if prefix: excl_ofname = prefix + "_" + excl_ofname if scale: excl_ofname = excl_ofname + "." + au.scaled_str() filename = get_filepath(outdir, ofname, otype) if os.path.exists(filename): print(filename + " already exists. Jumping to the next.") else: print("Creating " + filename) # reading mask volume if not os.path.exists(maskf): err = "Mask file not found: " + maskf raise IOError(err)
#read the measure argument and start processing if measure == 'pea': #measure pearson correlation for each population slice step = au.measureperslice_str() + step_params if not au.is_done(chkf, step): pear.pearson_correlation (outdir, mdir, usemask, excluf, leave) au.checklist_add(chkf, step) #merge all correlation slice measures step = au.postmerging_str() + step_params if not au.is_done(chkf, step): pearegex = au.pearson_str() + '_' + au.slice_str() + '*' peameasf = mdir + os.path.sep + au.pearson_str() if leave > -1: pearegex += '_' + au.excluded_str() + str(leave) + '*' peameasf += '_' + au.excluded_str() + str(leave) + '_' + au.pearson_str() post.merge_slices (mdir, pearegex, peameasf, mdir) if absval: post.change_to_absolute_values(peameasf) au.checklist_add(chkf, step) #BHATTACHARYYA AND T-TEST elif measure == 'bat' or measure == 'ttest': if not os.path.exists (statsdir): os.mkdir(statsdir)
print('Creating ' + filename) if otype == 'numpybin': np.save(filename, feats) elif otype == 'octave': sio.savemat(filename, {au.feats_str(): feats, au.labels_str(): labels}) elif otype == 'svmperf': labels[labels == 0] = -1 ae.write_svmperf_dat(filename, dataname, feats, labels) if excluf: exclulabels[exclulabels == 0] = -1 exclfilename = set_filename( outdir, prefix + '_' + au.excluded_str() + au.feats_str(), otype) ae.write_svmperf_dat(exclfilename, dataname, exclfeats, exclulabels) elif otype == 'arff': featnames = np.arange(nfeats) + 1 ae.write_arff(filename, dataname, featnames, feats, labels) else: err = 'Output method not recognised!' raise IOError(err) return filename
def group_distance (measure_function, datadir, groups, groupsizes, chkf, absolute=False, outdir='', foldno='', expname='', exclude_idx=-1, exclude_subj='', exclude_subjclass=''): olddir = os.getcwd() if not outdir: outdir = datadir ngroups = len(groups) #matrix of strings of 2000 characters maximum, to save filepaths gfnames = np.zeros ([ngroups,3], dtype=np.dtype('a2000')) subject_excluded = False for g1 in range(ngroups): g1name = groups[g1] #mean1fname gfnames[g1,0] = datadir + os.path.sep + g1name + '_' + au.mean_str() #var1fname gfnames[g1,1] = datadir + os.path.sep + g1name + '_' + au.var_str() #std1fname gfnames[g1,2] = datadir + os.path.sep + g1name + '_' + au.std_str() for g2 in range(g1+1, ngroups): g2name = groups[g2] gfnames[g2,0] = datadir + os.path.sep + g2name + '_' + au.mean_str() gfnames[g2,1] = datadir + os.path.sep + g2name + '_' + au.var_str() gfnames[g2,2] = datadir + os.path.sep + g2name + '_' + au.std_str() experiment = g1name + '_vs_' + g2name #check if exclude_subjclass is any of both current groups eg = -1 if exclude_idx > -1: if exclude_subjclass == g1name: eg = g2 elif exclude_subjclass == g2name: eg = g1 step = au.measure_str() + ' ' + measure_function.func_name + ' ' + experiment + ' ' + datadir #remove subject from stats if eg > -1: exclude_str = '_' + au.excluded_str() + str(exclude_idx) step += exclude_str experiment += exclude_str if not au.is_done(chkf, step): if not subject_excluded: newmeanfname = gfnames[eg,0] + exclude_str newvarfname = gfnames[eg,1] + exclude_str newstdfname = gfnames[eg,2] + exclude_str rstep = au.remove_str() + ' ' + au.subject_str() + ' ' + str(exclude_subj) + ' ' + au.fromstats_str() + ' ' + datadir if not au.is_done(chkf, rstep): #(meanfname, varfname, samplesize, subjvolfname, newmeanfname, newvarfname, newstdfname='') remove_subject_from_stats (gfnames[eg,0], gfnames[eg,1], groupsizes[eg][1], exclude_subj, newmeanfname, newvarfname, newstdfname) au.checklist_add (chkf, rstep) gfnames[eg,0] += exclude_str gfnames[eg,1] += exclude_str gfnames[eg,2] += exclude_str groupsizes[eg][1] -= 1 subject_excluded = True #calculating distance if not au.is_done(chkf, step): mean1fname = au.add_extension_if_needed (gfnames[g1,0], au.ext_str()) mean2fname = au.add_extension_if_needed (gfnames[g2,0], au.ext_str()) var1fname = au.add_extension_if_needed (gfnames[g1,1], au.ext_str()) var2fname = au.add_extension_if_needed (gfnames[g2,1], au.ext_str()) std1fname = au.add_extension_if_needed (gfnames[g1,2], au.ext_str()) std2fname = au.add_extension_if_needed (gfnames[g2,2], au.ext_str()) outfname = measure_function (mean1fname, mean2fname, var1fname, var2fname, std1fname, std2fname, groupsizes[g1][1], groupsizes[g2][1], experiment, outdir, exclude_idx) if absolute: change_to_absolute_values (outfname) au.checklist_add (chkf, step) return outfname
filename = set_filename (outdir, prefix + '_' + au.features_str(), otype) print ('Creating ' + filename) if otype == 'numpybin': np.save (filename, feats) elif otype == 'octave': sio.savemat (filename, {au.feats_str(): feats, au.labels_str(): labels}) elif otype == 'svmperf': labels[labels == 0] = -1 ae.write_svmperf_dat(filename, dataname, feats, labels) if excluf: exclulabels[exclulabels == 0] = -1 exclfilename = set_filename(outdir, prefix + '_' + au.excluded_str() + au.feats_str(), otype) ae.write_svmperf_dat(exclfilename, dataname, exclfeats, exclulabels) elif otype == 'arff': featnames = np.arange(nfeats) + 1 ae.write_arff (filename, dataname, featnames, feats, labels) else: err = 'Output method not recognised!' raise IOError(err) return filename #------------------------------------------------------------------------------- ## END EXTRACT FEATSET #-------------------------------------------------------------------------------
def group_distance(measure_function, datadir, groups, groupsizes, chkf, absolute=False, outdir='', foldno='', expname='', exclude_idx=-1, exclude_subj='', exclude_subjclass=''): olddir = os.getcwd() if not outdir: outdir = datadir ngroups = len(groups) #matrix of strings of 2000 characters maximum, to save filepaths gfnames = np.zeros([ngroups, 3], dtype=np.dtype('a2000')) subject_excluded = False for g1 in range(ngroups): g1name = groups[g1] #mean1fname gfnames[g1, 0] = datadir + os.path.sep + g1name + '_' + au.mean_str() #var1fname gfnames[g1, 1] = datadir + os.path.sep + g1name + '_' + au.var_str() #std1fname gfnames[g1, 2] = datadir + os.path.sep + g1name + '_' + au.std_str() for g2 in range(g1 + 1, ngroups): g2name = groups[g2] gfnames[g2, 0] = datadir + os.path.sep + g2name + '_' + au.mean_str() gfnames[g2, 1] = datadir + os.path.sep + g2name + '_' + au.var_str() gfnames[g2, 2] = datadir + os.path.sep + g2name + '_' + au.std_str() experiment = g1name + '_vs_' + g2name #check if exclude_subjclass is any of both current groups eg = -1 if exclude_idx > -1: if exclude_subjclass == g1name: eg = g2 elif exclude_subjclass == g2name: eg = g1 step = au.measure_str( ) + ' ' + measure_function.func_name + ' ' + experiment + ' ' + datadir #remove subject from stats if eg > -1: exclude_str = '_' + au.excluded_str() + str(exclude_idx) step += exclude_str experiment += exclude_str if not au.is_done(chkf, step): if not subject_excluded: newmeanfname = gfnames[eg, 0] + exclude_str newvarfname = gfnames[eg, 1] + exclude_str newstdfname = gfnames[eg, 2] + exclude_str rstep = au.remove_str() + ' ' + au.subject_str( ) + ' ' + str(exclude_subj) + ' ' + au.fromstats_str( ) + ' ' + datadir if not au.is_done(chkf, rstep): #(meanfname, varfname, samplesize, subjvolfname, newmeanfname, newvarfname, newstdfname='') remove_subject_from_stats(gfnames[eg, 0], gfnames[eg, 1], groupsizes[eg][1], exclude_subj, newmeanfname, newvarfname, newstdfname) au.checklist_add(chkf, rstep) gfnames[eg, 0] += exclude_str gfnames[eg, 1] += exclude_str gfnames[eg, 2] += exclude_str groupsizes[eg][1] -= 1 subject_excluded = True #calculating distance if not au.is_done(chkf, step): mean1fname = au.add_extension_if_needed( gfnames[g1, 0], au.ext_str()) mean2fname = au.add_extension_if_needed( gfnames[g2, 0], au.ext_str()) var1fname = au.add_extension_if_needed(gfnames[g1, 1], au.ext_str()) var2fname = au.add_extension_if_needed(gfnames[g2, 1], au.ext_str()) std1fname = au.add_extension_if_needed(gfnames[g1, 2], au.ext_str()) std2fname = au.add_extension_if_needed(gfnames[g2, 2], au.ext_str()) outfname = measure_function(mean1fname, mean2fname, var1fname, var2fname, std1fname, std2fname, groupsizes[g1][1], groupsizes[g2][1], experiment, outdir, exclude_idx) if absolute: change_to_absolute_values(outfname) au.checklist_add(chkf, step) return outfname