def make_fig(filename, hemi, surf, atlas_name, colour_func, output_dir, missing=(0.5, 0.5, 0.5)): """ Make a brain figure. filename : path to the csv that contains the brain region names and the associated values. hemi : 'lh' or 'rh'. surf : 'inflated' or 'pial' atlas_name : 'Destrieux' or 'DSK'. colour_func : function object that creates a colour dictionary based on the data inside the csv. output_dir : name of output directory (put inside 'outputs'). """ if colour_func is not None: min, max, cd = colour_func(filename, atlas_name) else: cd = {} regions = atlas.get_atlas_labels(atlas_name) for region in regions: if region in cd: c = cd[region] load.load_surface(hemi, surf, atlas_name, region, c) else: if missing is not None: c = missing print "Warning: '%s' region missing (%s atlas)." % (region, atlas_name) load.load_surface(hemi, surf, atlas_name, region, c) # reposition the camera angles = [[180, -90, -70], [180, 90, 70], [180, 0, 0], [0, 180, 0]] fpath = paths.pj(paths.OUTPUTS_DIR, output_dir) if not os.path.exists(fpath): os.makedirs(fpath) for a, e, r in angles: f = paths.pj(fpath, "%i_%i_%i.png" % (a, e, r)) mlab.view(a, e, roll=r) mlab.savefig(f, magnification=3)
def grab_phi2_matrix(ped_dir=paths.pj(paths.data_dir, 'newest_ped_files')): """ Returns a square matrix containing phi2 coefficients for all subjects in the sample based on the pedigree files in the given directory, with the singletons removed. Defaults to the oldest pedigree file that is used for linkage. """ # load the phi2 sparse matrix phi2_path = paths.pj(ped_dir, 'phi2.gz') df = pd.read_table( gzip.open(phi2_path, 'rb'), sep=' ', skipinitialspace=True ) df.columns = ['id0', 'id1', 'phi', 'd7'] # convert to square matrix n = df.id0.nunique() R = np.zeros((n+1, n+1)) for ix, row in df.iterrows(): R[row.id0, row.id1] = row.phi R[row.id1, row.id0] = row.phi R = pd.DataFrame(R) R.drop(0, axis=0, inplace=True) R.drop(0, axis=1, inplace=True) # label rows and columns according to subject ids ped_path = paths.pj(ped_dir, 'pedigree.csv') ids = pd.read_csv(ped_path, index_col=0).id R.columns = ids R = pd.concat([R, pd.DataFrame(ids)], axis=1) R.set_index('id', inplace=True) # remove singletons _R = R.replace(1, np.nan) rmax = _R.max(axis=1) singletons = rmax[rmax == 0].index.tolist() subjects = [s for s in R.index.tolist() if s not in singletons] R = R.loc[subjects, subjects] # return the complete phi2 matrix return R
def grab_traits(dic): """ Returns a pandas DataFrame containing the desired traits. The input is a dictionary with keys representing the csv file names and entries being lists of trait names within that csv. """ dfs = [] age_found = False for csv, traits in dic.iteritems(): df = pandas.read_csv(paths.pj(paths.DATA_DIR, csv), index_col=0) if age_found is False: if 'age' in df.columns: traits += ['age'] age_found = True elif 'Age' in df.columns: traits += ['Age'] age_found = True dfs.append(df[traits]) if age_found is False: warnings.warn("No 'age' trait found in the supplied CSVs.") return pandas.concat(dfs, axis=1)
def make_phi2_vectors(pheno_file, out_file, remove_affected=True, ped_dir=None, traits=None): """ Loads a phenotype file, and creates phi2 vectors for each of the dichotomous traits within it. Optionally removes affected subjects. """ # load the phi2 matrix if not ped_dir: R = grab_phi2_matrix() else: R = grab_phi2_matrix(ped_dir) # load the phenotype(s) df = pd.read_csv(pheno_file, index_col=0) if traits is None: traits = [c for c in df.columns if df[c].nunique() == 2] else: traits = traits.split(',') subjects = R.index.tolist() df = df.loc[subjects, traits] # make the phi2 vector(s) vectors, names = [], [] for trait_name in traits: trait = df[trait_name].dropna() cases = trait[trait == 1].index.tolist() _R = R.loc[subjects, cases] vector = _R.max(axis=1) if remove_affected is True or remove_affected != 'False': vector[vector == 1] = np.nan vectors.append(vector) name = 'phi2_%s_%s' % (trait_name, ('incl', 'excl')[remove_affected]) names.append(name) df2 = pd.concat(vectors, axis=1) df2.columns = names # save the phi2 vectors df2.to_csv(paths.pj(paths.merv_dir, out_file), index=True)
""" Grab the sats. """ __author__ = 'smathias' import os import pandas import solarpy.py2solar.parse as parse import solarpy.utils.paths as paths from make_asym_jobs import structures path = paths.pj(paths.OUTPUTS_DIR, 'asymmetries_mpi') data = [] for structure in structures: dic = {'name': structure} for x in ['Bilateral_', 'Left_', 'Right_', 'Asym_', 'Abs_Asym_']: _f = x + structure + '.sub' f = paths.pj(path, [f for f in os.listdir(path) if _f == f[:len(_f)]][0]) _dic = {x + k: v for k, v in parse.uni_polyg(f).iteritems()} dic.update(_dic) _f = 'rhog_' + structure + '.sub' f = paths.pj(path, [f for f in os.listdir(path) if _f in f][0]) _dic = parse.biv_polyg(f) dic.update(_dic) data.append(dic)
traits_outrm['age'] = traits['Age'] traits_outrm['icv'] = traits['etiv'] traits1 = traits_outrm # Make jobs univariates = traits1.columns.tolist() univariates.remove('age') univariates.remove('icv') for trait in univariates: make_jobs.make_single_job( 'asymmetries_mpi', trait, [trait], cov='age sex n_icv', old_peds=False ) for chrom in xrange(1, 23): make_jobs.make_single_job( 'asymmetries_yale_1', '%s_%i' % (trait, chrom), [trait], chrom=chrom ) for structure in structures: make_jobs.make_single_job( 'asymmetries_mpi', 'rhog_' + structure, ['Left_%s' % structure, 'Right_%s' % structure], cov='age sex n_icv', tests='-testrhog', old_peds=False ) for chrom in xrange(1, 23): make_jobs.make_single_job( 'asymmetries_yale_1', 'rhog_%s_%i' %(structure, chrom), ['Left_%s' % struct, 'Right_%s' % struct], tests='-testrhog', chrom=chrom ) traits1.to_csv(paths.pj(paths.JOBS_DIR, 'asymmetries_mpi', '_traits.csv'), index_label='id') traits.to_csv(paths.pj(paths.JOBS_DIR, 'asymmetries_yale', '_traits.csv'), index_label='id')
""" Grab the sats. """ __author__ = 'smathias' import os import pandas import solarpy.py2solar.parse as parse import solarpy.utils.paths as paths outputs_dir = paths.pj(paths.OUTPUTS_DIR, 'braincog') output_files = os.listdir(outputs_dir) entries = {} print len(output_files) for f in output_files: dic = parse.uni_polyg(paths.pj(outputs_dir, f)) entries[f.split('.')[0]] = dic df = pandas.DataFrame(entries).T df = df.drop(['f', 'trait', 'n', 'se'], axis=1) df.to_csv('h2rs.csv')
def make_single_job(jobs_name, job_name, traits, old_peds=True, inorm=True, resid=False, **kwargs): """ Creates a submission file and tcl file for a given job, and places them in the specific jobs directory. By default the job will calculate h2rs of the traits. Other tasks, such as running linkage, are done by passing kwargs. """ # make directory for job family job_path = paths.pj(paths.JOBS_DIR, jobs_name) if not os.path.exists(job_path): os.makedirs(job_path) # make a sub file ep = templates.EXPORT_PATH + job_name + '/' jp = templates.JOBS_PATH + jobs_name if old_peds is True: pp = templates.OLD_PED_PATH else: pp = templates.NEW_PED_PATH jn = job_name rp = templates.RESULTS_PATH dic = locals() dic = {k: v for k, v in dic.iteritems() if len(k) == 2} sub = templates.SUB.format(**dic) open(paths.pj(job_path, job_name + '.sub'), 'w').write(sub) # make a tcl file if inorm is True: s = 'define n_%s = inorm_%s' inorm_st = ''.join(s % (trait, trait) for trait in traits) traits_st = ' '.join('n_%s' % trait for trait in traits) else: inorm_st = '' traits_st = ' '.join(trait for trait in traits) if 'cov' in kwargs: if kwargs['cov'] is not None: cov_st = 'cov ' + kwargs['cov'] else: cov_st = '' else: cov_st = templates.COV if resid is True: resid_st = templates.RESID % job_name else: resid_st = '' if 'tests' in kwargs: tests_st = kwargs['tests'] else: tests_st = '' if 'chrom' in kwargs: chrom = kwargs['chrom'] mibd = paths.MIBD_DIR_TX if 'mibd' in kwargs: mibd = kwargs['mibd'] if mibd == 'local': mibd = paths.MIBD_DIR multipoint_st = templates.MULTIPOINT % (chrom) else: multipoint_st = '' job_name_st = job_name dic = locals() dic = {k[:-3]: v for k, v in dic.items() if '_st' in k} print dic tcl = templates.TCL_POLYG.substitute(**dic) open(paths.pj(job_path, job_name + '.tcl'), 'w').write(tcl)
__author__ = 'smathias' from itertools import product import numpy as np import pandas import solarpy.utils.paths as paths import solarpy.utils.qc as qc import solarpy.py2solar.make_jobs as make_jobs import matplotlib.pyplot as plt import seaborn as sns p = lambda f: paths.pj(paths.DATA_DIR, f) fs = [ 'all_facs_cog.csv', 'etiv4sam.csv', 'freesurfer.csv', 'gobs.csv', 'stan.csv' ] dfs = [pandas.read_csv(p(f), index_col=0) for f in fs] df = pandas.concat(dfs, axis=1) wanted_cols = [ 'GCOG_VM', 'GCOG_WM', 'GCOG_SM', 'GCOG_EF', 'GCOG_G', 'BIFG_VM', 'BIFG_WM', 'BIFG_SM',
""" Grab the residualised traits. """ __author__ = 'smathias' import os import pandas import solarpy.utils.paths as paths results_dir = paths.pj(paths.RESULTS_DIR, 'braincog') # results_dir = paths.RESULTS_DIR folders = os.listdir(results_dir) data = [] for folder in folders: print folder filename = paths.pj(results_dir, folder, 'resid_%s.csv' % folder) s = pandas.read_csv(filename, index_col=0)['residual'] s.name = folder data.append(s) df = pandas.concat(data, axis=1) df.to_csv('_traits_r.csv', index_label='id')