Example #1
0
def main():

    geno_in, norm_cov, cov_out_hdf5, cov_out_csv = sys.argv[1:]

    logger = LoggerFactory.get_logger(cov_out_hdf5 + '.log')
    LoggerFactory.log_command(logger, sys.argv[1:])

    ## Import genotype data
    logger.info('Loading genotype from %s', geno_in)
    geno_reader = gr.genotype_reader_tables(geno_in)
    if (norm_cov == '1'):
        logger.info('Normalizing')
        norm = True
    else:
        logger.info('NOT normalizing')
        norm = False

    sample_relatedness = geno_reader.getCovariance(normalize=norm)

    logger.info('Saving covariance to HDF5 file %s', cov_out_hdf5)
    out_dict = {'Cov': sample_relatedness}
    o = h5py.File(cov_out_hdf5, 'w')
    util_functions.smartDumpDictHdf5(out_dict, o)
    o.close()

    logger.info('Saving covariance to CSV file %s', cov_out_csv)
    save_cov_in_text_format(cov_out_csv, sample_relatedness,
                            geno_reader.sample_ID)

    logger.info('Done!')
Example #2
0
def draw_and_save_panama(p, graphics_prefix, results_prefix):
    # retrieve stuff
    Kpanama = p.get_Kpanama()
    Ktot = p.get_Ktot()
    Kpop = p.Kpop
    vComp = p.get_varianceComps()
    Xpanama = p.get_Xpanama()

    out_file = graphics_prefix + '_Kmat.png'
    # compare Kpanama matrix, Kpop, and the total matrix
    fig = plt.figure(figsize=[10, 8])
    subplt = pl.subplot(2, 2, 1)
    pl.title('Kpanama')
    pl.imshow(Kpanama, vmin=0, vmax=1, interpolation='none', cmap=cm.afmhot)
    pl.colorbar(ticks=[0, 0.5, 1], orientation='horizontal')
    subplt.set_xticks([])
    subplt.set_yticks([])
    subplt = pl.subplot(2, 2, 2)
    pl.title('Kpop')
    pl.imshow(Kpop, vmin=0, vmax=1, interpolation='none', cmap=cm.afmhot)
    pl.colorbar(ticks=[0, 0.5, 1], orientation='horizontal')
    subplt.set_xticks([])
    subplt.set_yticks([])
    subplt = pl.subplot(2, 2, 3)
    print(list(vComp.keys()))
    vComp_a = sp.array([vComp[k] for k in ['Kpanama', 'Kpop', 'noise']])

    pl.bar(sp.arange(3) + 0.2, vComp_a, width=0.6)
    subplt.set_xticks([0.5, 1.5, 2.5])
    subplt.set_xticklabels(['Kpanama', 'Kpop', 'noise'])
    pl.ylabel('Variance Explained')
    subplt = pl.subplot(2, 2, 4)
    pl.title('Ktot')
    pl.imshow(Ktot, vmin=0, vmax=1, interpolation='none', cmap=cm.afmhot)
    pl.colorbar(ticks=[0, 0.5, 1], orientation='horizontal')
    subplt.set_xticks([])
    subplt.set_yticks([])
    fig.savefig(out_file)
    plt.close(fig)

    out_dict = {
        'Ktot': Ktot,
        'Kpanama': Kpanama,
        'vComp': vComp,
        'Xpanama': Xpanama
    }
    out_file = results_prefix + '_dat.hdf5'
    o = h5py.File(out_file, 'w')
    util_functions.smartDumpDictHdf5(out_dict, o)
    o.close()
Example #3
0
    def fitNullTraitByTrait(self,
                            verbose=False,
                            cache=False,
                            out_dir='./cache',
                            fname=None,
                            rewrite=False):
        """
        Fit null model trait by trait
        """
        read_from_file = False
        if cache:
            assert fname is not None, 'MultiTraitSetTest:: specify fname'
            if not os.path.exists(out_dir): os.makedirs(out_dir)
            out_file = os.path.join(out_dir, fname)
            read_from_file = os.path.exists(out_file) and not rewrite

        RV = {}
        if read_from_file:
            f = h5py.File(out_file, 'r')
            for p in range(self.P):
                trait_id = self.traitID[p]
                g = f[trait_id]
                RV[trait_id] = {}
                for key in g.keys():
                    RV[trait_id][key] = g[key][:]
            f.close()
            self.nullST = RV
        else:
            """ create stSet and fit null column by column returns all info """
            if self.stSet is None:
                y = sp.zeros((self.N, 1))
                self.stSet = MTSet(Y=y, S_R=self.S_R, U_R=self.U_R, F=self.F)
            RV = {}
            for p in range(self.P):
                trait_id = self.traitID[p]
                self.stSet.Y = self.Y[:, p:p + 1]
                RV[trait_id] = self.stSet.fitNull()
            self.nullST = RV
            if cache:
                f = h5py.File(out_file, 'w')
                smartDumpDictHdf5(RV, f)
                f.close()
        return RV
Example #4
0
    def fitNullTraitByTrait(self, verbose=False, cache=False, out_dir='./cache', fname=None, rewrite=False):
        """
        Fit null model trait by trait
        """
        read_from_file = False
        if cache:
            assert fname is not None, 'MultiTraitSetTest:: specify fname'
            if not os.path.exists(out_dir): os.makedirs(out_dir)
            out_file = os.path.join(out_dir,fname)
            read_from_file = os.path.exists(out_file) and not rewrite

        RV = {}
        if read_from_file:
            f = h5py.File(out_file,'r')
            for p in range(self.P):
                trait_id = self.traitID[p]
                g = f[trait_id]
                RV[trait_id] = {}
                for key in g.keys():
                    RV[trait_id][key] = g[key][:]
            f.close()
            self.nullST=RV
        else:
            """ create stSet and fit null column by column returns all info """
            if self.stSet is None:
                y = sp.zeros((self.N,1)) 
                self.stSet = MTSet(Y=y, S_R=self.S_R, U_R=self.U_R, F=self.F)
            RV = {}
            for p in range(self.P):
                trait_id = self.traitID[p]
                self.stSet.Y = self.Y[:,p:p+1]
                RV[trait_id] = self.stSet.fitNull()
            self.nullST = RV
            if cache:
                f = h5py.File(out_file,'w')
                smartDumpDictHdf5(RV,f)
                f.close()
        return RV
Example #5
0
    def fitNull(self,
                verbose=False,
                cache=False,
                out_dir='./cache',
                fname=None,
                rewrite=False,
                seed=None,
                n_times=10,
                factr=1e3,
                init_method=None):
        """
        Fit null model
        """
        if seed is not None: sp.random.seed(seed)

        read_from_file = False
        if cache:
            assert fname is not None, 'MultiTraitSetTest:: specify fname'
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            out_file = os.path.join(out_dir, fname)
            read_from_file = os.path.exists(out_file) and not rewrite

        RV = {}
        if read_from_file:
            f = h5py.File(out_file, 'r')
            for key in f.keys():
                RV[key] = f[key][:]
            f.close()
            self.setNull(RV)
        else:
            start = TIME.time()
            if self.bgRE:
                self._gpNull = GP2KronSum(Y=self.Y,
                                          F=None,
                                          A=None,
                                          Cg=self.Cg,
                                          Cn=self.Cn,
                                          R=None,
                                          S_R=self.S_R,
                                          U_R=self.U_R)
            else:
                self._gpNull = GP2KronSumLR(self.Y,
                                            self.Cn,
                                            G=sp.ones((self.N, 1)),
                                            F=self.F,
                                            A=self.A)
                # freezes Cg to 0
                n_params = self._gpNull.covar.Cr.getNumberParams()
                self._gpNull.covar.Cr.setParams(1e-9 * sp.ones(n_params))
                self._gpNull.covar.act_Cr = False
            for i in range(n_times):
                params0 = self._initParams(init_method=init_method)
                self._gpNull.setParams(params0)
                conv, info = self._gpNull.optimize(verbose=verbose)
                if conv: break
            if not conv: warnings.warn("not converged")
            LMLgrad = (self._gpNull.LML_grad()['covar']**2).mean()
            LML = self._gpNull.LML()
            if self._gpNull.mean.n_terms == 1:
                RV['B'] = self._gpNull.mean.B[0]
            elif self._gpNull.mean.n_terms > 1:
                warning.warn('generalize to more than 1 fixed effect term')
            if self.bgRE:
                RV['params0_g'] = self.Cg.getParams()
            else:
                RV['params0_g'] = sp.zeros_like(self.Cn.getParams())
            RV['params0_n'] = self.Cn.getParams()
            if self.bgRE:
                RV['Cg'] = self.Cg.K()
            else:
                RV['Cg'] = sp.zeros_like(self.Cn.K())
            RV['Cn'] = self.Cn.K()
            RV['conv'] = sp.array([conv])
            RV['time'] = sp.array([TIME.time() - start])
            RV['NLL0'] = sp.array([LML])
            RV['LMLgrad'] = sp.array([LMLgrad])
            RV['nit'] = sp.array([info['nit']])
            RV['funcalls'] = sp.array([info['funcalls']])
            self.null = RV
            if cache:
                f = h5py.File(out_file, 'w')
                smartDumpDictHdf5(RV, f)
                f.close()
        return RV
Example #6
0
    def fitNull(self, verbose=False, cache=False, out_dir='./cache', fname=None, rewrite=False, seed=None, n_times=10, factr=1e3, init_method=None):
        """
        Fit null model
        """
        if seed is not None:    sp.random.seed(seed)

        read_from_file = False
        if cache:
            assert fname is not None, 'MultiTraitSetTest:: specify fname'
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            out_file = os.path.join(out_dir,fname)
            read_from_file = os.path.exists(out_file) and not rewrite

        RV = {}
        if read_from_file:
            f = h5py.File(out_file,'r')
            for key in f.keys():
                RV[key] = f[key][:]
            f.close()
            self.setNull(RV)
        else:
            start = TIME.time()
            if self.bgRE:
                self._gpNull = GP2KronSum(Y=self.Y, F=None, A=None, Cg=self.Cg, Cn=self.Cn, R=None, S_R=self.S_R, U_R=self.U_R)
            else:
                self._gpNull = GP2KronSumLR(self.Y, self.Cn, G=sp.ones((self.N,1)), F=self.F, A=self.A)
                # freezes Cg to 0
                n_params = self._gpNull.covar.Cr.getNumberParams()
                self._gpNull.covar.Cr.setParams(1e-9 * sp.ones(n_params))
                self._gpNull.covar.act_Cr = False
            for i in range(n_times):
                params0 = self._initParams(init_method=init_method)
                self._gpNull.setParams(params0)
                conv, info = self._gpNull.optimize(verbose=verbose)
                if conv: break
            if not conv:    warnings.warn("not converged")
            LMLgrad = (self._gpNull.LML_grad()['covar']**2).mean()
            LML = self._gpNull.LML()
            if self._gpNull.mean.n_terms==1:
                RV['B'] = self._gpNull.mean.B[0]
            elif self._gpNull.mean.n_terms>1:
                warning.warn('generalize to more than 1 fixed effect term')
            if self.bgRE:
                RV['params0_g'] = self.Cg.getParams()
            else:
                RV['params0_g'] = sp.zeros_like(self.Cn.getParams())
            RV['params0_n'] = self.Cn.getParams()
            if self.bgRE:
                RV['Cg'] = self.Cg.K()
            else:
                RV['Cg'] = sp.zeros_like(self.Cn.K())
            RV['Cn'] = self.Cn.K()
            RV['conv'] = sp.array([conv])
            RV['time'] = sp.array([TIME.time()-start])
            RV['NLL0'] = sp.array([LML])
            RV['LMLgrad'] = sp.array([LMLgrad])
            RV['nit'] = sp.array([info['nit']])
            RV['funcalls'] = sp.array([info['funcalls']])
            self.null = RV
            if cache:
                f = h5py.File(out_file,'w')
                smartDumpDictHdf5(RV,f)
                f.close()
        return RV
Example #7
0
                params['Cr'] = gp.covar.Cr.getParams().copy()
                params['Cn'] = gp.covar.Cn.getParams().copy()
                gp0.setParams(params)

                print '   .. optimization'
                _t0 = time.time()
                conv, info = gp.optimize()
                _t1 = time.time()
                conv, info = OPT.opt_hyper(gp0, gp0.getParams())
                _t2 = time.time()
                t[ni, ri] = _t1 - _t0
                t0[ni, ri] = _t2 - _t1
                r[ni, ri] = t[ni, ri] / t0[ni, ri]
        RV = {'t': t, 't0': t0, 'r': r, 'Ns': Ns}
        fout = h5py.File(out_file, 'w')
        smartDumpDictHdf5(RV, fout)
        fout.close()
    else:
        R = {}
        fin = h5py.File(out_file, 'r')
        for key in fin.keys():
            R[key] = fin[key][:]
        fin.close()

    pdb.set_trace()

    import pylab as PL
    PL.subplot(211)
    PL.title('MTSet-PC')
    PL.plot(R['Ns'], R['t'].mean(1), 'g', label='new')
    PL.plot(R['Ns'], R['t0'].mean(1), 'r', label='old')
                params['Cg'] = gp.covar.Cg.getParams().copy()
                params['Cn'] = gp.covar.Cn.getParams().copy()
                gp0.setParams(params)

                print '   .. optimization' 
                _t0 = time.time()
                conv, info = gp.optimize()
                _t1 = time.time()
                conv,info = OPT.opt_hyper(gp0,gp0.getParams())
                _t2 = time.time()
                t[ni, ri] = _t1-_t0
                t0[ni, ri] = _t2-_t1
                r[ni, ri] = t[ni, ri] / t0[ni, ri]
        RV = {'t': t, 't0': t0, 'r': r, 'Ns': Ns}
        fout = h5py.File(out_file, 'w')
        smartDumpDictHdf5(RV, fout)
        fout.close()
    else:
        R = {}
        fin = h5py.File(out_file, 'r')
        for key in fin.keys():
            R[key] = fin[key][:]
        fin.close()

    pdb.set_trace()

    import pylab as PL
    PL.subplot(211)
    PL.title('MTSet')
    PL.plot(R['Ns'], R['t'].mean(1),'g')
    PL.plot(R['Ns'], R['t0'].mean(1),'r')