Ejemplo n.º 1
0
def fitPairwiseModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False):
    N,P = Y.shape
    """ initilizes parameters """
    RV = fitSingleTraitModel(Y,XX=XX,S_XX=S_XX,U_XX=U_XX,verbose=verbose)
    Cg = covariance.freeform(2)
    Cn = covariance.freeform(2)
    gp = gp2kronSum(mean(Y[:,0:2]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX)
    conv2 = SP.ones((P,P),dtype=bool)
    rho_g = SP.ones((P,P))
    rho_n = SP.ones((P,P))
    for p1 in range(P):
        for p2 in range(p1):
            if verbose:
                print '.. fitting correlation (%d,%d)'%(p1,p2)
            gp.setY(Y[:,[p1,p2]])
            Cg_params0 = SP.array([SP.sqrt(RV['varST'][p1,0]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,0])])
            Cn_params0 = SP.array([SP.sqrt(RV['varST'][p1,1]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,1])])
            params0 = {'Cg':Cg_params0,'Cn':Cn_params0}
            conv2[p1,p2],info = OPT.opt_hyper(gp,params0,factr=1e3)
            rho_g[p1,p2] = Cg.K()[0,1]/SP.sqrt(Cg.K().diagonal().prod())
            rho_n[p1,p2] = Cn.K()[0,1]/SP.sqrt(Cn.K().diagonal().prod())
            conv2[p2,p1] = conv2[p1,p2]; rho_g[p2,p1] = rho_g[p1,p2]; rho_n[p2,p1] = rho_n[p1,p2]
    RV['Cg0'] = rho_g*SP.dot(SP.sqrt(RV['varST'][:,0:1]),SP.sqrt(RV['varST'][:,0:1].T))
    RV['Cn0'] = rho_n*SP.dot(SP.sqrt(RV['varST'][:,1:2]),SP.sqrt(RV['varST'][:,1:2].T))
    RV['conv2'] = conv2
    #3. regularizes covariance matrices
    offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(),0))+1e-4
    offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(),0))+1e-4
    RV['Cg0_reg'] = RV['Cg0']+offset_g*SP.eye(P)
    RV['Cn0_reg'] = RV['Cn0']+offset_n*SP.eye(P)
    RV['params0_Cg']=LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)]
    RV['params0_Cn']=LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)]
    return RV
Ejemplo n.º 2
0
 def optimize(self,
              Xr,
              params0=None,
              n_times=10,
              verbose=True,
              vmax=5,
              perturb=1e-3,
              factr=1e7):
     """
     Optimize the model considering Xr
     """
     # set params0 from null if params0==Null
     if params0 is None:
         if self.null is None:
             if verbose: print ".. fitting null model upstream"
             self.fitNull()
         if self.bgRE:
             params0 = {
                 'Cg': self.null['params0_g'],
                 'Cn': self.null['params0_n']
             }
         else:
             params0 = {'Cn': self.null['params0_n']}
         if 'params_mean' in self.null:
             if self.null['params_mean'].shape[0] > 0:
                 params0['mean'] = self.null['params_mean']
         params_was_None = True
     else:
         params_was_None = False
     Xr *= SP.sqrt(self.N / (Xr**2).sum())
     self.gp.set_Xr(Xr)
     self.gp.restart()
     start = TIME.time()
     for i in range(n_times):
         if params_was_None:
             params0['Cr'] = 1e-3 * SP.randn(self.rank_r * self.P)
         conv, info = OPT.opt_hyper(self.gp, params0, factr=factr)
         conv *= self.gp.Cr.K().diagonal().max() < vmax
         conv *= self.getLMLgrad() < 0.1
         if conv or not params_was_None: break
     self.infoOpt = info
     if not conv:
         warnings.warn("not converged")
     # return value
     RV = {}
     if self.P > 1:
         RV['Cr'] = self.getCr()
         if self.bgRE: RV['Cg'] = self.getCg()
         RV['Cn'] = self.getCn()
     RV['time'] = SP.array([TIME.time() - start])
     RV['params0'] = params0
     RV['nit'] = SP.array([info['nit']])
     RV['funcalls'] = SP.array([info['funcalls']])
     RV['var'] = self.getVariances()
     RV['conv'] = SP.array([conv])
     RV['NLLAlt'] = SP.array([self.getNLLAlt()])
     RV['LLR'] = SP.array([self.getLLR()])
     RV['LMLgrad'] = SP.array([self.getLMLgrad()])
     return RV
Ejemplo n.º 3
0
    def fitNull(self,verbose=True,cache=False,out_dir='./cache',fname=None,rewrite=False,seed=None,n_times=10,factr=1e3,init_method=None):
        """
        Fit null model
        """
        if seed is not None:    SP.random.seed(seed)

        read_from_file = False
        if cache:
            assert fname is not None, 'MultiTraitSetTest:: specify fname'
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            out_file = os.path.join(out_dir,fname)
            read_from_file = os.path.exists(out_file) and not rewrite

        RV = {}
        if read_from_file:
            f = h5py.File(out_file,'r')
            for key in f.keys():
                RV[key] = f[key][:]
            f.close()
            self.setNull(RV)
        else:
            start = TIME.time()
            if self.bgRE:
                self.gpNull = gp2kronSum(self.mean,self.Cg,self.Cn,XX=self.XX,S_XX=self.S_XX,U_XX=self.U_XX)
            else:
                self.gpNull = gp2kronSumLR(self.Y,self.Cn,Xr=SP.ones((self.N,1)),F=self.F)
            for i in range(n_times):
                params0,Ifilter=self._initParams(init_method=init_method)
                conv,info = OPT.opt_hyper(self.gpNull,params0,Ifilter=Ifilter,factr=factr)
                if conv: break
            if not conv:    warnings.warn("not converged")
            LMLgrad = SP.concatenate([self.gpNull.LMLgrad()[key]**2 for key in self.gpNull.LMLgrad().keys()]).mean()
            LML = self.gpNull.LML()
            if 'mean' in params0.keys():
                RV['params_mean'] = self.gpNull.mean.getParams()
            RV['params0_g'] = self.Cg.getParams()
            RV['params0_n'] = self.Cn.getParams()
            RV['Cg'] = self.Cg.K()
            RV['Cn'] = self.Cn.K()
            RV['conv'] = SP.array([conv])
            RV['time'] = SP.array([TIME.time()-start])
            RV['NLL0'] = SP.array([LML])
            RV['LMLgrad'] = SP.array([LMLgrad])
            RV['nit'] = SP.array([info['nit']])
            RV['funcalls'] = SP.array([info['funcalls']])
            if self.bgRE:
                RV['h2'] = self.gpNull.h2()
                RV['h2_ste'] = self.gpNull.h2_ste()
                RV['Cg_ste'] = self.gpNull.ste('Cg')
                RV['Cn_ste'] = self.gpNull.ste('Cn')
            self.null = RV
            if cache:
                f = h5py.File(out_file,'w')
                dumpDictHdf5(RV,f)
                f.close()
        return RV
Ejemplo n.º 4
0
 def optimize(self,Xr,params0=None,n_times=10,verbose=True,vmax=5,perturb=1e-3,factr=1e7):
     """
     Optimize the model considering Xr
     """
     # set params0 from null if params0==Null
     if params0 is None:
         if self.null is None:
             if verbose:     print ".. fitting null model upstream"
             self.fitNull()
         if self.bgRE:
             params0 = {'Cg':self.null['params0_g'],'Cn':self.null['params0_n']}
         else:
             params0 = {'Cn':self.null['params0_n']}
         if 'params_mean' in self.null:
             if self.null['params_mean'].shape[0]>0:
                 params0['mean'] = self.null['params_mean']
         params_was_None = True
     else:
         params_was_None = False
     Xr *= SP.sqrt(self.N/(Xr**2).sum())
     self.gp.set_Xr(Xr)
     self.gp.restart()
     start = TIME.time()
     for i in range(n_times):
         if params_was_None:
             params0['Cr'] = 1e-3*SP.randn(self.rank_r*self.P)
         conv,info = OPT.opt_hyper(self.gp,params0,factr=factr)
         conv *= self.gp.Cr.K().diagonal().max()<vmax
         conv *= self.getLMLgrad()<0.1
         if conv or not params_was_None: break
     self.infoOpt = info
     if not conv:
         warnings.warn("not converged")
     # return value
     RV = {}
     if self.P>1:
         RV['Cr']  = self.getCr()
         if self.bgRE: RV['Cg']  = self.getCg()
         RV['Cn']  = self.getCn()
     RV['time']  = SP.array([TIME.time()-start])
     RV['params0'] = params0
     RV['nit'] = SP.array([info['nit']])
     RV['funcalls'] = SP.array([info['funcalls']])
     RV['var']    = self.getVariances()
     RV['conv']  = SP.array([conv])
     RV['NLLAlt']  = SP.array([self.getNLLAlt()])
     RV['LLR']    = SP.array([self.getLLR()])
     RV['LMLgrad'] = SP.array([self.getLMLgrad()])
     return RV
Ejemplo n.º 5
0
def fitPairwiseModel(Y, XX=None, S_XX=None, U_XX=None, verbose=False):
    N, P = Y.shape
    """ initilizes parameters """
    RV = fitSingleTraitModel(Y, XX=XX, S_XX=S_XX, U_XX=U_XX, verbose=verbose)
    Cg = covariance.freeform(2)
    Cn = covariance.freeform(2)
    gp = gp2kronSum(mean(Y[:, 0:2]), Cg, Cn, XX=XX, S_XX=S_XX, U_XX=U_XX)
    conv2 = SP.ones((P, P), dtype=bool)
    rho_g = SP.ones((P, P))
    rho_n = SP.ones((P, P))
    for p1 in range(P):
        for p2 in range(p1):
            if verbose:
                print '.. fitting correlation (%d,%d)' % (p1, p2)
            gp.setY(Y[:, [p1, p2]])
            Cg_params0 = SP.array([
                SP.sqrt(RV['varST'][p1, 0]), 1e-6 * SP.randn(),
                SP.sqrt(RV['varST'][p2, 0])
            ])
            Cn_params0 = SP.array([
                SP.sqrt(RV['varST'][p1, 1]), 1e-6 * SP.randn(),
                SP.sqrt(RV['varST'][p2, 1])
            ])
            params0 = {'Cg': Cg_params0, 'Cn': Cn_params0}
            conv2[p1, p2], info = OPT.opt_hyper(gp, params0, factr=1e3)
            rho_g[p1, p2] = Cg.K()[0, 1] / SP.sqrt(Cg.K().diagonal().prod())
            rho_n[p1, p2] = Cn.K()[0, 1] / SP.sqrt(Cn.K().diagonal().prod())
            conv2[p2, p1] = conv2[p1, p2]
            rho_g[p2, p1] = rho_g[p1, p2]
            rho_n[p2, p1] = rho_n[p1, p2]
    RV['Cg0'] = rho_g * SP.dot(SP.sqrt(RV['varST'][:, 0:1]),
                               SP.sqrt(RV['varST'][:, 0:1].T))
    RV['Cn0'] = rho_n * SP.dot(SP.sqrt(RV['varST'][:, 1:2]),
                               SP.sqrt(RV['varST'][:, 1:2].T))
    RV['conv2'] = conv2
    #3. regularizes covariance matrices
    offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(), 0)) + 1e-4
    offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(), 0)) + 1e-4
    RV['Cg0_reg'] = RV['Cg0'] + offset_g * SP.eye(P)
    RV['Cn0_reg'] = RV['Cn0'] + offset_n * SP.eye(P)
    RV['params0_Cg'] = LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)]
    RV['params0_Cn'] = LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)]
    return RV
Ejemplo n.º 6
0
def fitSingleTraitModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False):
    """ fit single trait model """
    N,P = Y.shape
    RV = {}
    Cg = covariance.lowrank(1)
    Cn = covariance.lowrank(1)
    gp = gp2kronSum(mean(Y[:,0:1]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX)
    params0 = {'Cg':SP.sqrt(0.5)*SP.ones(1),'Cn':SP.sqrt(0.5)*SP.ones(1)}
    var = SP.zeros((P,2))
    conv1 = SP.zeros(P,dtype=bool)
    for p in range(P):
        if verbose:
            print '.. fitting variance trait %d'%p
        gp.setY(Y[:,p:p+1])
        conv1[p],info = OPT.opt_hyper(gp,params0,factr=1e3)
        var[p,0] = Cg.K()[0,0]
        var[p,1] = Cn.K()[0,0]
    RV['conv1'] = conv1
    RV['varST'] = var
    return RV
Ejemplo n.º 7
0
def fitSingleTraitModel(Y, XX=None, S_XX=None, U_XX=None, verbose=False):
    """ fit single trait model """
    N, P = Y.shape
    RV = {}
    Cg = covariance.lowrank(1)
    Cn = covariance.lowrank(1)
    gp = gp2kronSum(mean(Y[:, 0:1]), Cg, Cn, XX=XX, S_XX=S_XX, U_XX=U_XX)
    params0 = {
        'Cg': SP.sqrt(0.5) * SP.ones(1),
        'Cn': SP.sqrt(0.5) * SP.ones(1)
    }
    var = SP.zeros((P, 2))
    conv1 = SP.zeros(P, dtype=bool)
    for p in range(P):
        if verbose:
            print '.. fitting variance trait %d' % p
        gp.setY(Y[:, p:p + 1])
        conv1[p], info = OPT.opt_hyper(gp, params0, factr=1e3)
        var[p, 0] = Cg.K()[0, 0]
        var[p, 1] = Cn.K()[0, 0]
    RV['conv1'] = conv1
    RV['varST'] = var
    return RV
Ejemplo n.º 8
0
                    gp.covar.setRandomParams()
                else:
                    n_params = gp.covar.Cr.getNumberParams()
                    n_params += gp.covar.Cn.getNumberParams()
                    params1 = {'covar': sp.randn(n_params)}
                    gp.setParams(params1)
                params = {}
                params['Cr'] = gp.covar.Cr.getParams().copy()
                params['Cn'] = gp.covar.Cn.getParams().copy()
                gp0.setParams(params)

                print '   .. optimization'
                _t0 = time.time()
                conv, info = gp.optimize()
                _t1 = time.time()
                conv, info = OPT.opt_hyper(gp0, gp0.getParams())
                _t2 = time.time()
                t[ni, ri] = _t1 - _t0
                t0[ni, ri] = _t2 - _t1
                r[ni, ri] = t[ni, ri] / t0[ni, ri]
        RV = {'t': t, 't0': t0, 'r': r, 'Ns': Ns}
        fout = h5py.File(out_file, 'w')
        smartDumpDictHdf5(RV, fout)
        fout.close()
    else:
        R = {}
        fin = h5py.File(out_file, 'r')
        for key in fin.keys():
            R[key] = fin[key][:]
        fin.close()
Ejemplo n.º 9
0
                    n_params = gp.covar.Cr.getNumberParams()
                    n_params+= gp.covar.Cg.getNumberParams()
                    n_params+= gp.covar.Cn.getNumberParams()
                    params1 = {'covar': sp.randn(n_params)}
                    gp.setParams(params1)
                params = {}
                params['Cr'] = gp.covar.Cr.getParams().copy()
                params['Cg'] = gp.covar.Cg.getParams().copy()
                params['Cn'] = gp.covar.Cn.getParams().copy()
                gp0.setParams(params)

                print '   .. optimization' 
                _t0 = time.time()
                conv, info = gp.optimize()
                _t1 = time.time()
                conv,info = OPT.opt_hyper(gp0,gp0.getParams())
                _t2 = time.time()
                t[ni, ri] = _t1-_t0
                t0[ni, ri] = _t2-_t1
                r[ni, ri] = t[ni, ri] / t0[ni, ri]
        RV = {'t': t, 't0': t0, 'r': r, 'Ns': Ns}
        fout = h5py.File(out_file, 'w')
        smartDumpDictHdf5(RV, fout)
        fout.close()
    else:
        R = {}
        fin = h5py.File(out_file, 'r')
        for key in fin.keys():
            R[key] = fin[key][:]
        fin.close()
Ejemplo n.º 10
0
    # define covariance matrices
    Cg = limix.CFreeFormCF(P)
    Cn = limix.CFreeFormCF(P)
    
    if 0:
        # generate parameters
        params = {}
        params['Cg']   = SP.randn(int(0.5*P*(P+1)))
        params['Cn']   = SP.randn(int(0.5*P*(P+1)))
        params['mean'] = 1e-2*SP.randn(mean.getParams().shape[0])
        print "check gradient with gp2kronSum"
        gp = gp2kronSum(mean,Cg,Cn,XX)
        gp.setParams(params)
        gp.checkGradient()
        print "test optimization"
        conv,info = OPT.opt_hyper(gp,params,factr=1e3)
        print conv
        ipdb.set_trace()

    if 1:
        # generate parameters
        params = {}
        params['Cr']   = SP.randn(P)
        params['Cg']   = SP.randn(int(0.5*P*(P+1)))
        params['Cn']   = SP.randn(int(0.5*P*(P+1)))
        params['mean'] = 1e-2*SP.randn(mean.getParams().shape[0])
        print "check gradient with gp3kronSum"
        gp = gp3kronSum(mean,Cg,Cn,XX,Xr=Xr)
        gp.setParams(params)
        gp.LMLgrad()
        gp.checkGradient()
Ejemplo n.º 11
0
def test_lmm_lr_speed(G,y,Z,Kbg,Covs=None,S=None,U=None):
    """
    low-rank lmm

    input:
    G   :   genotypes
    y   :   phenotype
    Z   :   features of low-rank matrix
    Kbg   :   background covariance matrix
    Covs :  fixed effect covariates

    using mtset implementation
    """
    m = mean(y)
    one = np.ones((1,1))

    if Z.shape[1] > G.shape[0]:
        return test_lmm_lr(G, y, Z, Kbg, Covs=Covs)
    
    if Covs is not None:
        m.addFixedEffect(Covs)
        nCovs = Covs.shape[1]

    Cg = covariance.freeform(1)
    Cn = covariance.freeform(1)

    Z/=np.sqrt(Z.shape[1])
    gp = gp3kronSum(m,Cg,Cn,XX=Kbg,Xr=Z,S_XX=S,U_XX=U)

    
    params_rnd = {}
    params_rnd['Cg'] = 1e-4*np.random.randn(1)
    params_rnd['Cn'] = 1e-4*np.random.randn(1)
    params_rnd['Cr'] = 1e-4*np.random.randn(1)
    if Covs is not None:
        params_rnd['mean'] = 1e-6*np.random.randn(nCovs)
    
    conv,info = OPT.opt_hyper(gp,params_rnd)
        
    LML0 = gp.LML()
    params0 = gp.getParams()

    params_rnd = params0.copy()
    if Covs is not None:
        mean0 = params0['mean']
        params_rnd['mean'] = 1e-6*np.random.randn(nCovs+1)
        params_rnd['mean'][:nCovs] = mean0
    else:
        params_rnd['mean'] = 1e-6*np.random.randn(1)
    
    F = G.shape[1]
    LML = np.zeros(F)
    beta = np.zeros(F)
    
    for f in xrange(F):
        m.clearFixedEffect()
        if Covs is not None: m.addFixedEffect(Covs)
        m.addFixedEffect(G[:,[f]])
        conv,info = OPT.opt_hyper(gp, params_rnd)
        beta[f] = m.getParams()[-1]
        LML[f] = gp.LML()

    LRT = 2*(LML0-LML)
    pv = stats.chi2.sf(LRT,1)

    return pv, beta
Ejemplo n.º 12
0
    def fitNull(self,
                verbose=True,
                cache=False,
                out_dir='./cache',
                fname=None,
                rewrite=False,
                seed=None,
                n_times=10,
                factr=1e3,
                init_method=None):
        """
        Fit null model
        """
        if seed is not None: SP.random.seed(seed)

        read_from_file = False
        if cache:
            assert fname is not None, 'MultiTraitSetTest:: specify fname'
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            out_file = os.path.join(out_dir, fname)
            read_from_file = os.path.exists(out_file) and not rewrite

        RV = {}
        if read_from_file:
            f = h5py.File(out_file, 'r')
            for key in f.keys():
                RV[key] = f[key][:]
            f.close()
            self.setNull(RV)
        else:
            start = TIME.time()
            if self.bgRE:
                self.gpNull = gp2kronSum(self.mean,
                                         self.Cg,
                                         self.Cn,
                                         XX=self.XX,
                                         S_XX=self.S_XX,
                                         U_XX=self.U_XX)
            else:
                self.gpNull = gp2kronSumLR(self.Y,
                                           self.Cn,
                                           Xr=SP.ones((self.N, 1)),
                                           F=self.F)
            for i in range(n_times):
                params0, Ifilter = self._initParams(init_method=init_method)
                conv, info = OPT.opt_hyper(self.gpNull,
                                           params0,
                                           Ifilter=Ifilter,
                                           factr=factr)
                if conv: break
            if not conv: warnings.warn("not converged")
            LMLgrad = SP.concatenate([
                self.gpNull.LMLgrad()[key]**2
                for key in self.gpNull.LMLgrad().keys()
            ]).mean()
            LML = self.gpNull.LML()
            if 'mean' in params0.keys():
                RV['params_mean'] = self.gpNull.mean.getParams()
            RV['params0_g'] = self.Cg.getParams()
            RV['params0_n'] = self.Cn.getParams()
            RV['Cg'] = self.Cg.K()
            RV['Cn'] = self.Cn.K()
            RV['conv'] = SP.array([conv])
            RV['time'] = SP.array([TIME.time() - start])
            RV['NLL0'] = SP.array([LML])
            RV['LMLgrad'] = SP.array([LMLgrad])
            RV['nit'] = SP.array([info['nit']])
            RV['funcalls'] = SP.array([info['funcalls']])
            if self.bgRE:
                RV['h2'] = self.gpNull.h2()
                RV['h2_ste'] = self.gpNull.h2_ste()
                RV['Cg_ste'] = self.gpNull.ste('Cg')
                RV['Cn_ste'] = self.gpNull.ste('Cn')
            self.null = RV
            if cache:
                f = h5py.File(out_file, 'w')
                dumpDictHdf5(RV, f)
                f.close()
        return RV