def main(): geno_in, norm_cov, cov_out_hdf5, cov_out_csv = sys.argv[1:] logger = LoggerFactory.get_logger(cov_out_hdf5 + '.log') LoggerFactory.log_command(logger, sys.argv[1:]) ## Import genotype data logger.info('Loading genotype from %s', geno_in) geno_reader = gr.genotype_reader_tables(geno_in) if (norm_cov == '1'): logger.info('Normalizing') norm = True else: logger.info('NOT normalizing') norm = False sample_relatedness = geno_reader.getCovariance(normalize=norm) logger.info('Saving covariance to HDF5 file %s', cov_out_hdf5) out_dict = {'Cov': sample_relatedness} o = h5py.File(cov_out_hdf5, 'w') util_functions.smartDumpDictHdf5(out_dict, o) o.close() logger.info('Saving covariance to CSV file %s', cov_out_csv) save_cov_in_text_format(cov_out_csv, sample_relatedness, geno_reader.sample_ID) logger.info('Done!')
def draw_and_save_panama(p, graphics_prefix, results_prefix): # retrieve stuff Kpanama = p.get_Kpanama() Ktot = p.get_Ktot() Kpop = p.Kpop vComp = p.get_varianceComps() Xpanama = p.get_Xpanama() out_file = graphics_prefix + '_Kmat.png' # compare Kpanama matrix, Kpop, and the total matrix fig = plt.figure(figsize=[10, 8]) subplt = pl.subplot(2, 2, 1) pl.title('Kpanama') pl.imshow(Kpanama, vmin=0, vmax=1, interpolation='none', cmap=cm.afmhot) pl.colorbar(ticks=[0, 0.5, 1], orientation='horizontal') subplt.set_xticks([]) subplt.set_yticks([]) subplt = pl.subplot(2, 2, 2) pl.title('Kpop') pl.imshow(Kpop, vmin=0, vmax=1, interpolation='none', cmap=cm.afmhot) pl.colorbar(ticks=[0, 0.5, 1], orientation='horizontal') subplt.set_xticks([]) subplt.set_yticks([]) subplt = pl.subplot(2, 2, 3) print(list(vComp.keys())) vComp_a = sp.array([vComp[k] for k in ['Kpanama', 'Kpop', 'noise']]) pl.bar(sp.arange(3) + 0.2, vComp_a, width=0.6) subplt.set_xticks([0.5, 1.5, 2.5]) subplt.set_xticklabels(['Kpanama', 'Kpop', 'noise']) pl.ylabel('Variance Explained') subplt = pl.subplot(2, 2, 4) pl.title('Ktot') pl.imshow(Ktot, vmin=0, vmax=1, interpolation='none', cmap=cm.afmhot) pl.colorbar(ticks=[0, 0.5, 1], orientation='horizontal') subplt.set_xticks([]) subplt.set_yticks([]) fig.savefig(out_file) plt.close(fig) out_dict = { 'Ktot': Ktot, 'Kpanama': Kpanama, 'vComp': vComp, 'Xpanama': Xpanama } out_file = results_prefix + '_dat.hdf5' o = h5py.File(out_file, 'w') util_functions.smartDumpDictHdf5(out_dict, o) o.close()
def fitNullTraitByTrait(self, verbose=False, cache=False, out_dir='./cache', fname=None, rewrite=False): """ Fit null model trait by trait """ read_from_file = False if cache: assert fname is not None, 'MultiTraitSetTest:: specify fname' if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join(out_dir, fname) read_from_file = os.path.exists(out_file) and not rewrite RV = {} if read_from_file: f = h5py.File(out_file, 'r') for p in range(self.P): trait_id = self.traitID[p] g = f[trait_id] RV[trait_id] = {} for key in g.keys(): RV[trait_id][key] = g[key][:] f.close() self.nullST = RV else: """ create stSet and fit null column by column returns all info """ if self.stSet is None: y = sp.zeros((self.N, 1)) self.stSet = MTSet(Y=y, S_R=self.S_R, U_R=self.U_R, F=self.F) RV = {} for p in range(self.P): trait_id = self.traitID[p] self.stSet.Y = self.Y[:, p:p + 1] RV[trait_id] = self.stSet.fitNull() self.nullST = RV if cache: f = h5py.File(out_file, 'w') smartDumpDictHdf5(RV, f) f.close() return RV
def fitNullTraitByTrait(self, verbose=False, cache=False, out_dir='./cache', fname=None, rewrite=False): """ Fit null model trait by trait """ read_from_file = False if cache: assert fname is not None, 'MultiTraitSetTest:: specify fname' if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join(out_dir,fname) read_from_file = os.path.exists(out_file) and not rewrite RV = {} if read_from_file: f = h5py.File(out_file,'r') for p in range(self.P): trait_id = self.traitID[p] g = f[trait_id] RV[trait_id] = {} for key in g.keys(): RV[trait_id][key] = g[key][:] f.close() self.nullST=RV else: """ create stSet and fit null column by column returns all info """ if self.stSet is None: y = sp.zeros((self.N,1)) self.stSet = MTSet(Y=y, S_R=self.S_R, U_R=self.U_R, F=self.F) RV = {} for p in range(self.P): trait_id = self.traitID[p] self.stSet.Y = self.Y[:,p:p+1] RV[trait_id] = self.stSet.fitNull() self.nullST = RV if cache: f = h5py.File(out_file,'w') smartDumpDictHdf5(RV,f) f.close() return RV
def fitNull(self, verbose=False, cache=False, out_dir='./cache', fname=None, rewrite=False, seed=None, n_times=10, factr=1e3, init_method=None): """ Fit null model """ if seed is not None: sp.random.seed(seed) read_from_file = False if cache: assert fname is not None, 'MultiTraitSetTest:: specify fname' if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join(out_dir, fname) read_from_file = os.path.exists(out_file) and not rewrite RV = {} if read_from_file: f = h5py.File(out_file, 'r') for key in f.keys(): RV[key] = f[key][:] f.close() self.setNull(RV) else: start = TIME.time() if self.bgRE: self._gpNull = GP2KronSum(Y=self.Y, F=None, A=None, Cg=self.Cg, Cn=self.Cn, R=None, S_R=self.S_R, U_R=self.U_R) else: self._gpNull = GP2KronSumLR(self.Y, self.Cn, G=sp.ones((self.N, 1)), F=self.F, A=self.A) # freezes Cg to 0 n_params = self._gpNull.covar.Cr.getNumberParams() self._gpNull.covar.Cr.setParams(1e-9 * sp.ones(n_params)) self._gpNull.covar.act_Cr = False for i in range(n_times): params0 = self._initParams(init_method=init_method) self._gpNull.setParams(params0) conv, info = self._gpNull.optimize(verbose=verbose) if conv: break if not conv: warnings.warn("not converged") LMLgrad = (self._gpNull.LML_grad()['covar']**2).mean() LML = self._gpNull.LML() if self._gpNull.mean.n_terms == 1: RV['B'] = self._gpNull.mean.B[0] elif self._gpNull.mean.n_terms > 1: warning.warn('generalize to more than 1 fixed effect term') if self.bgRE: RV['params0_g'] = self.Cg.getParams() else: RV['params0_g'] = sp.zeros_like(self.Cn.getParams()) RV['params0_n'] = self.Cn.getParams() if self.bgRE: RV['Cg'] = self.Cg.K() else: RV['Cg'] = sp.zeros_like(self.Cn.K()) RV['Cn'] = self.Cn.K() RV['conv'] = sp.array([conv]) RV['time'] = sp.array([TIME.time() - start]) RV['NLL0'] = sp.array([LML]) RV['LMLgrad'] = sp.array([LMLgrad]) RV['nit'] = sp.array([info['nit']]) RV['funcalls'] = sp.array([info['funcalls']]) self.null = RV if cache: f = h5py.File(out_file, 'w') smartDumpDictHdf5(RV, f) f.close() return RV
def fitNull(self, verbose=False, cache=False, out_dir='./cache', fname=None, rewrite=False, seed=None, n_times=10, factr=1e3, init_method=None): """ Fit null model """ if seed is not None: sp.random.seed(seed) read_from_file = False if cache: assert fname is not None, 'MultiTraitSetTest:: specify fname' if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join(out_dir,fname) read_from_file = os.path.exists(out_file) and not rewrite RV = {} if read_from_file: f = h5py.File(out_file,'r') for key in f.keys(): RV[key] = f[key][:] f.close() self.setNull(RV) else: start = TIME.time() if self.bgRE: self._gpNull = GP2KronSum(Y=self.Y, F=None, A=None, Cg=self.Cg, Cn=self.Cn, R=None, S_R=self.S_R, U_R=self.U_R) else: self._gpNull = GP2KronSumLR(self.Y, self.Cn, G=sp.ones((self.N,1)), F=self.F, A=self.A) # freezes Cg to 0 n_params = self._gpNull.covar.Cr.getNumberParams() self._gpNull.covar.Cr.setParams(1e-9 * sp.ones(n_params)) self._gpNull.covar.act_Cr = False for i in range(n_times): params0 = self._initParams(init_method=init_method) self._gpNull.setParams(params0) conv, info = self._gpNull.optimize(verbose=verbose) if conv: break if not conv: warnings.warn("not converged") LMLgrad = (self._gpNull.LML_grad()['covar']**2).mean() LML = self._gpNull.LML() if self._gpNull.mean.n_terms==1: RV['B'] = self._gpNull.mean.B[0] elif self._gpNull.mean.n_terms>1: warning.warn('generalize to more than 1 fixed effect term') if self.bgRE: RV['params0_g'] = self.Cg.getParams() else: RV['params0_g'] = sp.zeros_like(self.Cn.getParams()) RV['params0_n'] = self.Cn.getParams() if self.bgRE: RV['Cg'] = self.Cg.K() else: RV['Cg'] = sp.zeros_like(self.Cn.K()) RV['Cn'] = self.Cn.K() RV['conv'] = sp.array([conv]) RV['time'] = sp.array([TIME.time()-start]) RV['NLL0'] = sp.array([LML]) RV['LMLgrad'] = sp.array([LMLgrad]) RV['nit'] = sp.array([info['nit']]) RV['funcalls'] = sp.array([info['funcalls']]) self.null = RV if cache: f = h5py.File(out_file,'w') smartDumpDictHdf5(RV,f) f.close() return RV
params['Cr'] = gp.covar.Cr.getParams().copy() params['Cn'] = gp.covar.Cn.getParams().copy() gp0.setParams(params) print ' .. optimization' _t0 = time.time() conv, info = gp.optimize() _t1 = time.time() conv, info = OPT.opt_hyper(gp0, gp0.getParams()) _t2 = time.time() t[ni, ri] = _t1 - _t0 t0[ni, ri] = _t2 - _t1 r[ni, ri] = t[ni, ri] / t0[ni, ri] RV = {'t': t, 't0': t0, 'r': r, 'Ns': Ns} fout = h5py.File(out_file, 'w') smartDumpDictHdf5(RV, fout) fout.close() else: R = {} fin = h5py.File(out_file, 'r') for key in fin.keys(): R[key] = fin[key][:] fin.close() pdb.set_trace() import pylab as PL PL.subplot(211) PL.title('MTSet-PC') PL.plot(R['Ns'], R['t'].mean(1), 'g', label='new') PL.plot(R['Ns'], R['t0'].mean(1), 'r', label='old')
params['Cg'] = gp.covar.Cg.getParams().copy() params['Cn'] = gp.covar.Cn.getParams().copy() gp0.setParams(params) print ' .. optimization' _t0 = time.time() conv, info = gp.optimize() _t1 = time.time() conv,info = OPT.opt_hyper(gp0,gp0.getParams()) _t2 = time.time() t[ni, ri] = _t1-_t0 t0[ni, ri] = _t2-_t1 r[ni, ri] = t[ni, ri] / t0[ni, ri] RV = {'t': t, 't0': t0, 'r': r, 'Ns': Ns} fout = h5py.File(out_file, 'w') smartDumpDictHdf5(RV, fout) fout.close() else: R = {} fin = h5py.File(out_file, 'r') for key in fin.keys(): R[key] = fin[key][:] fin.close() pdb.set_trace() import pylab as PL PL.subplot(211) PL.title('MTSet') PL.plot(R['Ns'], R['t'].mean(1),'g') PL.plot(R['Ns'], R['t0'].mean(1),'r')