Example #1
0
    def test_grad_beta(self):
        for n in [10**2]:
            for v in [1, 2, 10]:
                if v > n:
                    v = n
                for c in [1, 2, 10]:
                    if c > n:
                        c = n
                    for i in xrange(0, 10**2):
                        X = np.random.randn((n * c)).reshape((n, c))
                        V = np.random.randn((n * v)).reshape((n, v))
                        y = np.random.randn((n))
                        hetlm_mod = hetlm.model(y, X, V)
                        alpha = np.zeros((c))

                        def likelihood(beta):
                            return hetlm_mod.likelihood(beta,
                                                        alpha,
                                                        negative=True)

                        # Compute gradient numerically
                        num_grad = nd.Gradient(likelihood)(np.zeros((v)))
                        testing.assert_almost_equal(num_grad,
                                                    hetlm_mod.grad_beta(
                                                        np.zeros((v)),
                                                        alpha).reshape((v)),
                                                    decimal=5)
Example #2
0
 def test_alpha_mle(self):
     for n in [10**2]:
         for v in [1, 2, 10]:
             if v > n:
                 v = n
             for c in [1, 2, 10]:
                 if c > n:
                     c = n
                 for i in xrange(0, 10**2):
                     X = np.random.randn((n * c)).reshape((n, c))
                     V = np.random.randn((n * v)).reshape((n, v))
                     alpha = np.random.randn((c))
                     y = np.random.randn((n))
                     beta = np.random.randn((v)) / 10
                     Vb = np.dot(V, beta)
                     y = y * np.exp(Vb / 2.0) + X.dot(alpha)
                     Sigma = np.diag(np.exp(Vb))
                     Sigma_inv = np.linalg.inv(Sigma)
                     hetlm_mod = hetlm.model(y, X, V)
                     alpha = hetlm_mod.alpha_mle(beta)
                     safe_alpha = np.linalg.solve(
                         np.dot(X.T, Sigma_inv.dot(X)),
                         np.dot(X.T, Sigma_inv.dot(y)))
                     testing.assert_almost_equal(alpha,
                                                 safe_alpha,
                                                 decimal=5)
Example #3
0
 def test_likelihood(self):
     for n in [10**2]:
         for v in [1, 2, 10]:
             if v > n:
                 v = n
             for c in [1, 2, 10]:
                 if c > n:
                     c = n
                 for i in xrange(0, 10**2):
                     X = np.random.randn((n * c)).reshape((n, c))
                     V = np.random.randn((n * v)).reshape((n, v))
                     y = np.random.randn((n))
                     alpha = np.random.randn((c))
                     beta = np.random.randn((v))
                     Vb = np.dot(V, beta)
                     Sigma = np.diag(np.exp(Vb))
                     logdet = np.linalg.slogdet(Sigma)
                     logdet = logdet[0] * logdet[1]
                     Sigma_inv = np.linalg.inv(Sigma)
                     hetlm_mod = hetlm.model(y, X, V)
                     lik = hetlm_mod.likelihood(beta, alpha,
                                                negative=True) / float(n)
                     resid = y - X.dot(alpha)
                     safe_lik = np.dot(resid.T,
                                       Sigma_inv.dot(resid)) + logdet
                     testing.assert_almost_equal(lik,
                                                 safe_lik / float(n),
                                                 decimal=5)
Example #4
0
    def optimize_model(self,h2,SEs=True,dx=10**(-6)):
        """Find the maximum likelihood estimate (MLE) of the parameters and their sampling distribution.

        Parameters
        ----------
        h2 : :class:`float`
            initial value of variance explained by random effects
        SEs : :class:`bool`
            whether to compute sampling distribution of parameter estimates. Default is True.
        dx : :class:`float`
            the step size used to compute the Hessian for the computing the parameter sampling distribution

        Returns
        -------
        optim : :class:`dict`
            keys: MLEs ('alpha', fixed mean effects; 'beta', fixed variance effects; 'h2', variance explained by random effects),
            their standard errors ('alpha_se', 'beta_se', 'h2_se'),
            covariance matrix for sampling distribution of parameter vector ('par_cov', in order: alpha, beta, h2),
            maximum likelihood ('likelihood'), whether optimisation was successful ('success'), warnings from L-BFGS-B optimisation ('warnflag').
        """
        # Initialise parameters
        init_params=np.zeros((self.n_fixed_variance+1))
        init_params[self.n_fixed_variance]=h2
        # Get initial guess for beta
        init_params[0:self.n_fixed_variance] = hetlm.model(self.y, self.X, self.V).optimize_model()['beta']
        ## Set parameter boundaries
        # boundaries for beta
        parbounds = [(None,None) for i in xrange(0,self.n_fixed_variance)]
        # boundaries for h2
        parbounds.append((0.00001, None))
        # Optimize
        optimized = fmin_l_bfgs_b(func=lik_and_grad_var_pars,x0=init_params,
                                args=(self.y, self.X, self.V, self.G),
                                bounds=parbounds)
        # Get MLE
        optim = {}
        optim['success']=True
        optim['warnflag'] = optimized[2]['warnflag']
        if optim['warnflag']!=0:
            print('Optimization unsuccessful.')
            optim['success']=False
        optim['beta'] = optimized[0][0:self.n_fixed_variance]
        optim['h2'] = optimized[0][self.n_fixed_variance]
        optim['alpha'] = self.alpha_mle(optim['beta'],optim['h2'])
        # Get parameter covariance
        optim['likelihood'] = -0.5*np.float64(self.n)*(optimized[1]+np.log(2*np.pi))

        # Compute parameter covariance
        if SEs:
            optim['par_cov'] = self.parameter_covariance(optim['alpha'],optim['beta'],optim['h2'],dx)
            par_se = np.sqrt(np.diag(optim['par_cov'] ))
            optim['alpha_se'] = par_se[0:self.n_fixed_mean]
            optim['beta_se'] = par_se[self.n_fixed_mean:(self.n_fixed_variance+self.n_fixed_mean)]
            optim['h2_se']=par_se[self.n_fixed_mean+self.n_fixed_variance]

        return optim
Example #5
0
 def test_grad_beta(self):
     for n in [10**2]:
         for v in [1,2,10]:
             if v>n:
                 v=n
             for c in [1,2,10]:
                 if c>n:
                     c=n
                 for i in xrange(0,10**2):
                     X=np.random.randn((n*c)).reshape((n,c))
                     V = np.random.randn((n * v)).reshape((n, v))
                     y=np.random.randn((n))
                     hetlm_mod = hetlm.model(y, X, V)
                     alpha=np.zeros((c))
                     def likelihood(beta):
                         return hetlm_mod.likelihood(beta,alpha,negative=True)
                     # Compute gradient numerically
                     num_grad=nd.Gradient(likelihood)(np.zeros((v)))
                     testing.assert_almost_equal(num_grad,hetlm_mod.grad_beta(np.zeros((v)),alpha).reshape((v)),decimal=5)
Example #6
0
 def test_alpha_mle(self):
     for n in [10**2]:
         for v in [1,2,10]:
             if v>n:
                 v=n
             for c in [1,2,10]:
                 if c>n:
                     c=n
                 for i in xrange(0,10**2):
                     X=np.random.randn((n*c)).reshape((n,c))
                     V=np.random.randn((n*v)).reshape((n,v))
                     alpha=np.random.randn((c))
                     y=np.random.randn((n))
                     beta=np.random.randn((v))/10
                     Vb = np.dot(V, beta)
                     y=y*np.exp(Vb/2.0)+X.dot(alpha)
                     Sigma = np.diag(np.exp(Vb))
                     Sigma_inv=np.linalg.inv(Sigma)
                     hetlm_mod= hetlm.model(y, X, V)
                     alpha=hetlm_mod.alpha_mle(beta)
                     safe_alpha=np.linalg.solve(np.dot(X.T,Sigma_inv.dot(X)),np.dot(X.T,Sigma_inv.dot(y)))
                     testing.assert_almost_equal(alpha,safe_alpha,decimal=5)
Example #7
0
 def test_likelihood(self):
     for n in [10**2]:
         for v in [1,2,10]:
             if v>n:
                 v=n
             for c in [1,2,10]:
                 if c>n:
                     c=n
                 for i in xrange(0,10**2):
                     X=np.random.randn((n*c)).reshape((n,c))
                     V = np.random.randn((n * v)).reshape((n, v))
                     y=np.random.randn((n))
                     alpha=np.random.randn((c))
                     beta = np.random.randn((v))
                     Vb = np.dot(V, beta)
                     Sigma = np.diag(np.exp(Vb))
                     logdet=np.linalg.slogdet(Sigma)
                     logdet=logdet[0]*logdet[1]
                     Sigma_inv = np.linalg.inv(Sigma)
                     hetlm_mod= hetlm.model(y, X, V)
                     lik=hetlm_mod.likelihood(beta,alpha,negative=True)/float(n)
                     resid=y-X.dot(alpha)
                     safe_lik=np.dot(resid.T,Sigma_inv.dot(resid))+logdet
                     testing.assert_almost_equal(lik,safe_lik/float(n),decimal=5)
Example #8
0
    else:
        G = None

    ######### Fit  Model ##########
    ## Get initial guesses for null model
    print('Fitting Model')
    # Optimize null model
    if G is not None:
        optim= hetlmm.model(y, X, V, G).optimize_model(args.h2_init)
        # Save h2 estimate
        if not args.no_h2_estimate:
            np.savetxt(args.outprefix + '.h2.txt',
                       np.array([optim['h2'], optim['h2_se']], dtype='S20'),
                       delimiter='\t', fmt='%s')
    else:
        optim = hetlm.model(y, X, V).optimize_model()

    ## Record fitting of model
    # Get print out for fixed mean effects
    if args.mean_covar is not None:
        alpha_out=np.zeros((n_X,2))
        alpha_out[:,0]=optim['alpha']
        alpha_out[:,1]=optim['alpha_se']
        # Rescale
        if n_X>1:
            for i in xrange(0,2):
                alpha_out[1:n_X,i] = alpha_out[1:n_X,i]/X_stds
        # Save
        np.savetxt(args.outprefix + '.mean_effects.txt',
                   np.hstack((X_names.reshape((n_X, 1)), np.array(alpha_out, dtype='S20'))),
                   delimiter='\t', fmt='%s')
Example #9
0
        G = None

    ######### Fit  Model ##########
    ## Get initial guesses for null model
    print('Fitting Model')
    # Optimize null model
    if G is not None:
        optim = hetlmm.model(y, X, V, G).optimize_model(args.h2_init)
        # Save h2 estimate
        if not args.no_h2_estimate:
            np.savetxt(args.outprefix + '.h2.txt',
                       np.array([optim['h2'], optim['h2_se']], dtype='S20'),
                       delimiter='\t',
                       fmt='%s')
    else:
        optim = hetlm.model(y, X, V).optimize_model()

    ## Record fitting of model
    # Get print out for fixed mean effects
    if args.mean_covar is not None:
        alpha_out = np.zeros((n_X, 2))
        alpha_out[:, 0] = optim['alpha']
        alpha_out[:, 1] = optim['alpha_se']
        # Rescale
        if n_X > 1:
            for i in xrange(0, 2):
                alpha_out[1:n_X, i] = alpha_out[1:n_X, i] / X_stds
        # Save
        np.savetxt(args.outprefix + '.mean_effects.txt',
                   np.hstack((X_names.reshape(
                       (n_X, 1)), np.array(alpha_out, dtype='S20'))),
Example #10
0
    def optimize_model(self, h2, SEs=True, dx=10**(-6)):
        """Find the maximum likelihood estimate (MLE) of the parameters and their sampling distribution.

        Parameters
        ----------
        h2 : :class:`float`
            initial value of variance explained by random effects
        SEs : :class:`bool`
            whether to compute sampling distribution of parameter estimates. Default is True.
        dx : :class:`float`
            the step size used to compute the Hessian for the computing the parameter sampling distribution

        Returns
        -------
        optim : :class:`dict`
            keys: MLEs ('alpha', fixed mean effects; 'beta', fixed variance effects; 'h2', variance explained by random effects),
            their standard errors ('alpha_se', 'beta_se', 'h2_se'),
            covariance matrix for sampling distribution of parameter vector ('par_cov', in order: alpha, beta, h2),
            maximum likelihood ('likelihood'), whether optimisation was successful ('success'), warnings from L-BFGS-B optimisation ('warnflag').
        """
        # Initialise parameters
        init_params = np.zeros((self.n_fixed_variance + 1))
        init_params[self.n_fixed_variance] = h2
        # Get initial guess for beta
        init_params[0:self.n_fixed_variance] = hetlm.model(
            self.y, self.X, self.V).optimize_model()['beta']
        ## Set parameter boundaries
        # boundaries for beta
        parbounds = [(None, None) for i in xrange(0, self.n_fixed_variance)]
        # boundaries for h2
        parbounds.append((0.00001, None))
        # Optimize
        optimized = fmin_l_bfgs_b(func=lik_and_grad_var_pars,
                                  x0=init_params,
                                  args=(self.y, self.X, self.V, self.G),
                                  bounds=parbounds)
        # Get MLE
        optim = {}
        optim['success'] = True
        optim['warnflag'] = optimized[2]['warnflag']
        if optim['warnflag'] != 0:
            print('Optimization unsuccessful.')
            optim['success'] = False
        optim['beta'] = optimized[0][0:self.n_fixed_variance]
        optim['h2'] = optimized[0][self.n_fixed_variance]
        optim['alpha'] = self.alpha_mle(optim['beta'], optim['h2'])
        # Get parameter covariance
        optim['likelihood'] = -0.5 * np.float64(
            self.n) * (optimized[1] + np.log(2 * np.pi))

        # Compute parameter covariance
        if SEs:
            optim['par_cov'] = self.parameter_covariance(
                optim['alpha'], optim['beta'], optim['h2'], dx)
            par_se = np.sqrt(np.diag(optim['par_cov']))
            optim['alpha_se'] = par_se[0:self.n_fixed_mean]
            optim['beta_se'] = par_se[self.n_fixed_mean:(
                self.n_fixed_variance + self.n_fixed_mean)]
            optim['h2_se'] = par_se[self.n_fixed_mean + self.n_fixed_variance]

        return optim
Example #11
0
    ######### Initialise output files #######
    ## Output file
    if args.append:
        write_mode = 'ab'
    else:
        write_mode = 'wb'
    outfile = open(args.outprefix + '.models.gz', write_mode)
    if not args.append:
        header = 'SNP\tn\tfrequency\tlikelihood\tadd\tadd_se\tadd_t\tadd_pval\tvar\tvar_se\tvar_t\tvar_pval\tav_pval\n'
        outfile.write(header)

    ######### Fit Null Model ##########
    ## Get initial guesses for null model
    print('Fitting Null Model')
    # Optimize null model
    null_optim = hetlm.model(y, X, V).optimize_model()

    ## Record fitting of null model
    # Get print out for fixed mean effects
    alpha_out = np.zeros((n_X, 2))
    alpha_out[:, 0] = null_optim['alpha']
    alpha_out[:, 1] = null_optim['alpha_se']
    # Rescale
    if n_X > 1:
        for i in xrange(0, 2):
            alpha_out[1:n_X, i] = alpha_out[1:n_X, i] / X_stds
    if not args.append and not args.no_covariate_estimates and args.mean_covar is not None:
        np.savetxt(args.outprefix + '.null_mean_effects.txt',
                   np.hstack((X_names.reshape(
                       (n_X, 1)), np.array(alpha_out, dtype='S20'))),
                   delimiter='\t',
Example #12
0
    ######### Initialise output files #######
    ## Output file
    if args.append:
        write_mode='ab'
    else:
        write_mode='wb'
    outfile=open(args.outprefix+'.models.gz',write_mode)
    if not args.append:
        header='SNP\tn\tfrequency\tlikelihood\tadd\tadd_se\tadd_t\tadd_pval\tvar\tvar_se\tvar_t\tvar_pval\tav_pval\n'
        outfile.write(header)

    ######### Fit Null Model ##########
    ## Get initial guesses for null model
    print('Fitting Null Model')
    # Optimize null model
    null_optim= hetlm.model(y, X, V).optimize_model()

    ## Record fitting of null model
    # Get print out for fixed mean effects
    alpha_out=np.zeros((n_X,2))
    alpha_out[:,0]=null_optim['alpha']
    alpha_out[:,1]=null_optim['alpha_se']
    # Rescale
    if n_X>1:
        for i in xrange(0,2):
            alpha_out[1:n_X,i] = alpha_out[1:n_X,i]/X_stds
    if not args.append and not args.no_covariate_estimates and args.mean_covar is not None:
        np.savetxt(args.outprefix + '.null_mean_effects.txt',
                   np.hstack((X_names.reshape((n_X, 1)), np.array(alpha_out, dtype='S20'))),
                   delimiter='\t', fmt='%s')
    # variance effects