def test_grad_beta(self): for n in [10**2]: for v in [1, 2, 10]: if v > n: v = n for c in [1, 2, 10]: if c > n: c = n for i in xrange(0, 10**2): X = np.random.randn((n * c)).reshape((n, c)) V = np.random.randn((n * v)).reshape((n, v)) y = np.random.randn((n)) hetlm_mod = hetlm.model(y, X, V) alpha = np.zeros((c)) def likelihood(beta): return hetlm_mod.likelihood(beta, alpha, negative=True) # Compute gradient numerically num_grad = nd.Gradient(likelihood)(np.zeros((v))) testing.assert_almost_equal(num_grad, hetlm_mod.grad_beta( np.zeros((v)), alpha).reshape((v)), decimal=5)
def test_alpha_mle(self): for n in [10**2]: for v in [1, 2, 10]: if v > n: v = n for c in [1, 2, 10]: if c > n: c = n for i in xrange(0, 10**2): X = np.random.randn((n * c)).reshape((n, c)) V = np.random.randn((n * v)).reshape((n, v)) alpha = np.random.randn((c)) y = np.random.randn((n)) beta = np.random.randn((v)) / 10 Vb = np.dot(V, beta) y = y * np.exp(Vb / 2.0) + X.dot(alpha) Sigma = np.diag(np.exp(Vb)) Sigma_inv = np.linalg.inv(Sigma) hetlm_mod = hetlm.model(y, X, V) alpha = hetlm_mod.alpha_mle(beta) safe_alpha = np.linalg.solve( np.dot(X.T, Sigma_inv.dot(X)), np.dot(X.T, Sigma_inv.dot(y))) testing.assert_almost_equal(alpha, safe_alpha, decimal=5)
def test_likelihood(self): for n in [10**2]: for v in [1, 2, 10]: if v > n: v = n for c in [1, 2, 10]: if c > n: c = n for i in xrange(0, 10**2): X = np.random.randn((n * c)).reshape((n, c)) V = np.random.randn((n * v)).reshape((n, v)) y = np.random.randn((n)) alpha = np.random.randn((c)) beta = np.random.randn((v)) Vb = np.dot(V, beta) Sigma = np.diag(np.exp(Vb)) logdet = np.linalg.slogdet(Sigma) logdet = logdet[0] * logdet[1] Sigma_inv = np.linalg.inv(Sigma) hetlm_mod = hetlm.model(y, X, V) lik = hetlm_mod.likelihood(beta, alpha, negative=True) / float(n) resid = y - X.dot(alpha) safe_lik = np.dot(resid.T, Sigma_inv.dot(resid)) + logdet testing.assert_almost_equal(lik, safe_lik / float(n), decimal=5)
def optimize_model(self,h2,SEs=True,dx=10**(-6)): """Find the maximum likelihood estimate (MLE) of the parameters and their sampling distribution. Parameters ---------- h2 : :class:`float` initial value of variance explained by random effects SEs : :class:`bool` whether to compute sampling distribution of parameter estimates. Default is True. dx : :class:`float` the step size used to compute the Hessian for the computing the parameter sampling distribution Returns ------- optim : :class:`dict` keys: MLEs ('alpha', fixed mean effects; 'beta', fixed variance effects; 'h2', variance explained by random effects), their standard errors ('alpha_se', 'beta_se', 'h2_se'), covariance matrix for sampling distribution of parameter vector ('par_cov', in order: alpha, beta, h2), maximum likelihood ('likelihood'), whether optimisation was successful ('success'), warnings from L-BFGS-B optimisation ('warnflag'). """ # Initialise parameters init_params=np.zeros((self.n_fixed_variance+1)) init_params[self.n_fixed_variance]=h2 # Get initial guess for beta init_params[0:self.n_fixed_variance] = hetlm.model(self.y, self.X, self.V).optimize_model()['beta'] ## Set parameter boundaries # boundaries for beta parbounds = [(None,None) for i in xrange(0,self.n_fixed_variance)] # boundaries for h2 parbounds.append((0.00001, None)) # Optimize optimized = fmin_l_bfgs_b(func=lik_and_grad_var_pars,x0=init_params, args=(self.y, self.X, self.V, self.G), bounds=parbounds) # Get MLE optim = {} optim['success']=True optim['warnflag'] = optimized[2]['warnflag'] if optim['warnflag']!=0: print('Optimization unsuccessful.') optim['success']=False optim['beta'] = optimized[0][0:self.n_fixed_variance] optim['h2'] = optimized[0][self.n_fixed_variance] optim['alpha'] = self.alpha_mle(optim['beta'],optim['h2']) # Get parameter covariance optim['likelihood'] = -0.5*np.float64(self.n)*(optimized[1]+np.log(2*np.pi)) # Compute parameter covariance if SEs: optim['par_cov'] = self.parameter_covariance(optim['alpha'],optim['beta'],optim['h2'],dx) par_se = np.sqrt(np.diag(optim['par_cov'] )) optim['alpha_se'] = par_se[0:self.n_fixed_mean] optim['beta_se'] = par_se[self.n_fixed_mean:(self.n_fixed_variance+self.n_fixed_mean)] optim['h2_se']=par_se[self.n_fixed_mean+self.n_fixed_variance] return optim
def test_grad_beta(self): for n in [10**2]: for v in [1,2,10]: if v>n: v=n for c in [1,2,10]: if c>n: c=n for i in xrange(0,10**2): X=np.random.randn((n*c)).reshape((n,c)) V = np.random.randn((n * v)).reshape((n, v)) y=np.random.randn((n)) hetlm_mod = hetlm.model(y, X, V) alpha=np.zeros((c)) def likelihood(beta): return hetlm_mod.likelihood(beta,alpha,negative=True) # Compute gradient numerically num_grad=nd.Gradient(likelihood)(np.zeros((v))) testing.assert_almost_equal(num_grad,hetlm_mod.grad_beta(np.zeros((v)),alpha).reshape((v)),decimal=5)
def test_alpha_mle(self): for n in [10**2]: for v in [1,2,10]: if v>n: v=n for c in [1,2,10]: if c>n: c=n for i in xrange(0,10**2): X=np.random.randn((n*c)).reshape((n,c)) V=np.random.randn((n*v)).reshape((n,v)) alpha=np.random.randn((c)) y=np.random.randn((n)) beta=np.random.randn((v))/10 Vb = np.dot(V, beta) y=y*np.exp(Vb/2.0)+X.dot(alpha) Sigma = np.diag(np.exp(Vb)) Sigma_inv=np.linalg.inv(Sigma) hetlm_mod= hetlm.model(y, X, V) alpha=hetlm_mod.alpha_mle(beta) safe_alpha=np.linalg.solve(np.dot(X.T,Sigma_inv.dot(X)),np.dot(X.T,Sigma_inv.dot(y))) testing.assert_almost_equal(alpha,safe_alpha,decimal=5)
def test_likelihood(self): for n in [10**2]: for v in [1,2,10]: if v>n: v=n for c in [1,2,10]: if c>n: c=n for i in xrange(0,10**2): X=np.random.randn((n*c)).reshape((n,c)) V = np.random.randn((n * v)).reshape((n, v)) y=np.random.randn((n)) alpha=np.random.randn((c)) beta = np.random.randn((v)) Vb = np.dot(V, beta) Sigma = np.diag(np.exp(Vb)) logdet=np.linalg.slogdet(Sigma) logdet=logdet[0]*logdet[1] Sigma_inv = np.linalg.inv(Sigma) hetlm_mod= hetlm.model(y, X, V) lik=hetlm_mod.likelihood(beta,alpha,negative=True)/float(n) resid=y-X.dot(alpha) safe_lik=np.dot(resid.T,Sigma_inv.dot(resid))+logdet testing.assert_almost_equal(lik,safe_lik/float(n),decimal=5)
else: G = None ######### Fit Model ########## ## Get initial guesses for null model print('Fitting Model') # Optimize null model if G is not None: optim= hetlmm.model(y, X, V, G).optimize_model(args.h2_init) # Save h2 estimate if not args.no_h2_estimate: np.savetxt(args.outprefix + '.h2.txt', np.array([optim['h2'], optim['h2_se']], dtype='S20'), delimiter='\t', fmt='%s') else: optim = hetlm.model(y, X, V).optimize_model() ## Record fitting of model # Get print out for fixed mean effects if args.mean_covar is not None: alpha_out=np.zeros((n_X,2)) alpha_out[:,0]=optim['alpha'] alpha_out[:,1]=optim['alpha_se'] # Rescale if n_X>1: for i in xrange(0,2): alpha_out[1:n_X,i] = alpha_out[1:n_X,i]/X_stds # Save np.savetxt(args.outprefix + '.mean_effects.txt', np.hstack((X_names.reshape((n_X, 1)), np.array(alpha_out, dtype='S20'))), delimiter='\t', fmt='%s')
G = None ######### Fit Model ########## ## Get initial guesses for null model print('Fitting Model') # Optimize null model if G is not None: optim = hetlmm.model(y, X, V, G).optimize_model(args.h2_init) # Save h2 estimate if not args.no_h2_estimate: np.savetxt(args.outprefix + '.h2.txt', np.array([optim['h2'], optim['h2_se']], dtype='S20'), delimiter='\t', fmt='%s') else: optim = hetlm.model(y, X, V).optimize_model() ## Record fitting of model # Get print out for fixed mean effects if args.mean_covar is not None: alpha_out = np.zeros((n_X, 2)) alpha_out[:, 0] = optim['alpha'] alpha_out[:, 1] = optim['alpha_se'] # Rescale if n_X > 1: for i in xrange(0, 2): alpha_out[1:n_X, i] = alpha_out[1:n_X, i] / X_stds # Save np.savetxt(args.outprefix + '.mean_effects.txt', np.hstack((X_names.reshape( (n_X, 1)), np.array(alpha_out, dtype='S20'))),
def optimize_model(self, h2, SEs=True, dx=10**(-6)): """Find the maximum likelihood estimate (MLE) of the parameters and their sampling distribution. Parameters ---------- h2 : :class:`float` initial value of variance explained by random effects SEs : :class:`bool` whether to compute sampling distribution of parameter estimates. Default is True. dx : :class:`float` the step size used to compute the Hessian for the computing the parameter sampling distribution Returns ------- optim : :class:`dict` keys: MLEs ('alpha', fixed mean effects; 'beta', fixed variance effects; 'h2', variance explained by random effects), their standard errors ('alpha_se', 'beta_se', 'h2_se'), covariance matrix for sampling distribution of parameter vector ('par_cov', in order: alpha, beta, h2), maximum likelihood ('likelihood'), whether optimisation was successful ('success'), warnings from L-BFGS-B optimisation ('warnflag'). """ # Initialise parameters init_params = np.zeros((self.n_fixed_variance + 1)) init_params[self.n_fixed_variance] = h2 # Get initial guess for beta init_params[0:self.n_fixed_variance] = hetlm.model( self.y, self.X, self.V).optimize_model()['beta'] ## Set parameter boundaries # boundaries for beta parbounds = [(None, None) for i in xrange(0, self.n_fixed_variance)] # boundaries for h2 parbounds.append((0.00001, None)) # Optimize optimized = fmin_l_bfgs_b(func=lik_and_grad_var_pars, x0=init_params, args=(self.y, self.X, self.V, self.G), bounds=parbounds) # Get MLE optim = {} optim['success'] = True optim['warnflag'] = optimized[2]['warnflag'] if optim['warnflag'] != 0: print('Optimization unsuccessful.') optim['success'] = False optim['beta'] = optimized[0][0:self.n_fixed_variance] optim['h2'] = optimized[0][self.n_fixed_variance] optim['alpha'] = self.alpha_mle(optim['beta'], optim['h2']) # Get parameter covariance optim['likelihood'] = -0.5 * np.float64( self.n) * (optimized[1] + np.log(2 * np.pi)) # Compute parameter covariance if SEs: optim['par_cov'] = self.parameter_covariance( optim['alpha'], optim['beta'], optim['h2'], dx) par_se = np.sqrt(np.diag(optim['par_cov'])) optim['alpha_se'] = par_se[0:self.n_fixed_mean] optim['beta_se'] = par_se[self.n_fixed_mean:( self.n_fixed_variance + self.n_fixed_mean)] optim['h2_se'] = par_se[self.n_fixed_mean + self.n_fixed_variance] return optim
######### Initialise output files ####### ## Output file if args.append: write_mode = 'ab' else: write_mode = 'wb' outfile = open(args.outprefix + '.models.gz', write_mode) if not args.append: header = 'SNP\tn\tfrequency\tlikelihood\tadd\tadd_se\tadd_t\tadd_pval\tvar\tvar_se\tvar_t\tvar_pval\tav_pval\n' outfile.write(header) ######### Fit Null Model ########## ## Get initial guesses for null model print('Fitting Null Model') # Optimize null model null_optim = hetlm.model(y, X, V).optimize_model() ## Record fitting of null model # Get print out for fixed mean effects alpha_out = np.zeros((n_X, 2)) alpha_out[:, 0] = null_optim['alpha'] alpha_out[:, 1] = null_optim['alpha_se'] # Rescale if n_X > 1: for i in xrange(0, 2): alpha_out[1:n_X, i] = alpha_out[1:n_X, i] / X_stds if not args.append and not args.no_covariate_estimates and args.mean_covar is not None: np.savetxt(args.outprefix + '.null_mean_effects.txt', np.hstack((X_names.reshape( (n_X, 1)), np.array(alpha_out, dtype='S20'))), delimiter='\t',
######### Initialise output files ####### ## Output file if args.append: write_mode='ab' else: write_mode='wb' outfile=open(args.outprefix+'.models.gz',write_mode) if not args.append: header='SNP\tn\tfrequency\tlikelihood\tadd\tadd_se\tadd_t\tadd_pval\tvar\tvar_se\tvar_t\tvar_pval\tav_pval\n' outfile.write(header) ######### Fit Null Model ########## ## Get initial guesses for null model print('Fitting Null Model') # Optimize null model null_optim= hetlm.model(y, X, V).optimize_model() ## Record fitting of null model # Get print out for fixed mean effects alpha_out=np.zeros((n_X,2)) alpha_out[:,0]=null_optim['alpha'] alpha_out[:,1]=null_optim['alpha_se'] # Rescale if n_X>1: for i in xrange(0,2): alpha_out[1:n_X,i] = alpha_out[1:n_X,i]/X_stds if not args.append and not args.no_covariate_estimates and args.mean_covar is not None: np.savetxt(args.outprefix + '.null_mean_effects.txt', np.hstack((X_names.reshape((n_X, 1)), np.array(alpha_out, dtype='S20'))), delimiter='\t', fmt='%s') # variance effects