def bp_mf_free_energy(lmds, pis, args): theta, alpha, beta, gamma, emit_probs, X = (args.theta, args.alpha, args.beta, args.gamma, args.emit_probs, args.X) I, T, L = X.shape K = gamma.shape[0] log_theta, log_alpha, log_beta, log_gamma = sp.log(theta), sp.log(alpha), sp.log(beta), sp.log(gamma) log_obs_mat = args.log_obs_mat Q = bp_marginal_onenode(lmds, pis, args) entropy = (Q * sp.log(Q)).sum() #print 'mf entropy', -entropy total_free = entropy for i in xrange(I): #for i in prange(I, nogil=True): vp = vert_parent[i] #for t in prange(T, nogil=True): for t in xrange(T): for k in xrange(K): total_free -= Q[i,t,k] * log_obs_mat[i,t,k] if i == 0 and t == 0: total_free -= Q[i,t,k] * log_gamma[k] else: for v in xrange(K): if i == 0: total_free -= Q[i,t-1,v] * Q[i,t,k] * log_alpha[v,k] elif t == 0: total_free -= Q[vp,t,v] * Q[i,t,k] * log_beta[v,k] else: for h in xrange(K): total_free -= Q[vp,t,v] * Q[i,t-1,h] * Q[i,t,k] * log_theta[v,h,k] #print 'mf free energy:', total_free return total_free
def _box_cox_transform(self, verbose=False, method='standard'): """ Performs the Box-Cox transformation, over different ranges, picking the optimal one w. respect to normality. """ from scipy import stats a = sp.array(self.values) if method == 'standard': vals = (a - min(a)) + 0.1 * sp.var(a) else: vals = a sw_pvals = [] lambdas = sp.arange(-2.0, 2.1, 0.1) for l in lambdas: if l == 0: vs = sp.log(vals) else: vs = ((vals ** l) - 1) / l r = stats.shapiro(vs) if sp.isfinite(r[0]): pval = r[1] else: pval = 0.0 sw_pvals.append(pval) i = sp.argmax(sw_pvals) l = lambdas[i] if l == 0: vs = sp.log(vals) else: vs = ((vals ** l) - 1) / l self._perform_transform(vs,"box_cox") log.debug('optimal lambda was %0.1f' % l) return True
def coste(self, *args, **kwargs): """ material: 0 - Carbon steel 1 - Stainless steel 316 2 - Stainless steel 304 3 - Stainless steel 347 4 - Nickel 5 - Monel 6 - Inconel 7 - Zirconium 8 - Titanium 9 - Brick and rubber or brick and polyester-lined steel 10 - Rubber or lead-lined steel 11 - Polyester, fiberglass-reinforced 12 - Aluminum 13 - Copper 14 - Concrete """ self._indicesCoste(*args) self.material=kwargs["material"] V=self.Volumen.galUS Fm=[1., 2.7, 2.4, 3.0, 3.5, 3.3, 3.8, 11.0, 11.0, 2.75, 1.9, 0.32, 2.7, 2.3, 0.55][self.material] if V<=21000: C=Fm*exp(2.631+1.3673*log(V)-0.06309*log(V)**2) else: C=Fm*exp(11.662+0.6104*log(V)-0.04536*log(V)**2) self.C_adq=Currency(C*self.Current_index/self.Base_index) self.C_inst=Currency(self.C_adq*self.f_install)
def _calc_bic_(ll, num_snps, num_par, n): bic = -2 * (ll) + num_par * sp.log(n) extended_bic = bic + \ 2 * _log_choose_(num_snps, num_par - 2) modified_bic = bic + \ 2 * (num_par) * sp.log(num_snps / 2.2 - 1) return (bic, extended_bic, modified_bic)
def llfun(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred))) ll = ll * -1.0 / len(act) return ll
def gap(data, refs=None, nrefs=20, ks=range(1,11), method=None): shape = data.shape if refs is None: tops = data.max(axis=0) bots = data.min(axis=0) dists = scipy.matrix(scipy.diag(tops-bots)) rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs)) for i in range(nrefs): rands[:, :, i] = rands[:, :, i]*dists+bots else: rands = refs gaps = scipy.zeros((len(ks),)) for (i, k) in enumerate(ks): g1 = method(n_clusters=k).fit(data) (kmc, kml) = (g1.cluster_centers_, g1.labels_) disp = sum([euclidean(data[m, :], kmc[kml[m], :]) for m in range(shape[0])]) refdisps = scipy.zeros((rands.shape[2],)) for j in range(rands.shape[2]): g2 = method(n_clusters=k).fit(rands[:, :, j]) (kmc, kml) = (g2.cluster_centers_, g2.labels_) refdisps[j] = sum([euclidean(rands[m, :, j], kmc[kml[m],:]) for m in range(shape[0])]) gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp) return gaps
def _Psat(Tdb): """ ASHRAE Fundamentals Handbook pag 1.2 eq. 4 input: Dry bulb temperature, K return: Saturation pressure, Pa """ if 173.15 <= Tdb < 273.15: C1 = -5674.5359 C2 = 6.3925247 C3 = -0.009677843 C4 = 0.00000062215701 C5 = 2.0747825E-09 C6 = -9.484024E-13 C7 = 4.1635019 pws = exp(C1/Tdb + C2 + C3*Tdb + C4*Tdb**2 + C5*Tdb**3 + C6*Tdb**4 + C7*log(Tdb)) elif 273.15 <= Tdb <= 473.15: C8 = -5800.2206 C9 = 1.3914993 C10 = -0.048640239 C11 = 0.000041764768 C12 = -0.000000014452093 C13 = 6.5459673 pws = exp(C8/Tdb + C9 + C10*Tdb + C11*Tdb**2 + C12*Tdb**3 + C13*log(Tdb)) else: raise NotImplementedError("Incoming out of bound") return pws
def _LML_covar(self, hyperparams): """ log marginal likelihood contributions from covariance hyperparameters """ try: KV = self.get_covariances(hyperparams) except linalg.LinAlgError: LG.error("exception caught (%s)" % (str(hyperparams))) return 1E6 #all in one go #negative log marginal likelihood, see derivations lquad = 0.5* (KV['y_rot']*KV['Si']*KV['y_rot']).sum() ldet = 0.5*-SP.log(KV['Si'][:,:]).sum() LML = 0.5*self.n*self.d * SP.log(2*SP.pi) + lquad + ldet if VERBOSE: #1. slow and explicit way lmls_ = SP.zeros([self.d]) for i in xrange(self.d): _y = self.y[:,i] sigma2 = SP.exp(2*hyperparams['lik']) _K = KV['K'] + SP.diag(KV['Knoise'][:,i]) _Ki = SP.linalg.inv(_K) lquad_ = 0.5 * SP.dot(_y,SP.dot(_Ki,_y)) ldet_ = 0.5 * SP.log(SP.linalg.det(_K)) lmls_[i] = 0.5 * self.n* SP.log(2*SP.pi) + lquad_ + ldet_ assert SP.absolute(lmls_.sum()-LML)<1E-3, 'outch' return LML
def summarize_splits(splits, weighted=True): rows = [] if weighted: v = [ (x.likelihood * x.weight, x) for x in splits ] else: v = [ (x.likelihood, x) for x in splits ] ptot = sum([ x[0] for x in v ]) v.sort(); v.reverse() opt = scipy.log(v[0][0]) rows.append(["split", "lnL", "Rel.Prob"]) sumprob = 0.0 for L, split in v: lnL = scipy.log(L) relprob = (L/ptot) if sumprob < 0.95: #if (opt - lnL) < 2: rows.append([str(split), "%.4g" % lnL, "%.4g" % relprob]) sumprob += relprob widths = [] for i in range(3): w = max([ len(x[i]) for x in rows ]) for x in rows: x[i] = x[i].ljust(w) return [ " ".join(x) for x in rows ]
def survival_function(loss_ratio, **kwargs): """ Static method that prepares the calculation parameters to be passed to stats.lognorm.sf :param loss_ratio: current loss ratio :type loss_ratio: float :param kwargs: convenience dictionary :type kwargs: :py:class:`dict` with the following keys: **vf** - vulnerability function as provided by :py:class:`openquake.shapes.VulnerabilityFunction` **col** - matrix column number """ vuln_function = kwargs.get('vf') position = kwargs.get('col') vf_loss_ratio = vuln_function.loss_ratios[position] stddev = vuln_function.covs[position] * vf_loss_ratio variance = stddev ** 2.0 sigma = sqrt(log((variance / vf_loss_ratio ** 2.0) + 1.0)) mu = exp(log(vf_loss_ratio ** 2.0 / sqrt(variance + vf_loss_ratio ** 2.0))) return stats.lognorm.sf(loss_ratio, sigma, scale=mu)
def print_model_probability(logprob): """ Gives a nice overview of the model probability, allowing the practitioner to compare this model's probability to others """ prob = scipy.exp(logprob) limits = { 'eq' :logprob, 'barely' :logprob - scipy.log(3), 'substantial':logprob - scipy.log(10), 'strong' :logprob - scipy.log(30), 'very strong':logprob - scipy.log(100) } for i in limits: limits[i] = "%5.1f" % limits[i] limits[i] = " " * (7 - len(limits[i])) + limits[i] print("Model probability ln(p(D|M, I)): [about 10^%.0f] %.5f" % (logprob / scipy.log(10), logprob)) print((""" Table to compare support against other models (Jeffrey): other model | ln(p(D|M,I)) | supporting evidence for this model ------------------+------------------------------------- >%%(eq)%s Negative (supports other model) %%(eq)%s ..%%(barely)%s Barely worth mentioning %%(barely)%s ..%%(substantial)%s Substantial %%(substantial)%s ..%%(strong)%s Strong %%(strong)%s ..%%(very strong)%s Very strong <%%(very strong)%s Decisive be careful. """ % tuple(['s']*10)) % limits)
def evaluate_ll(y, yhat): epsilon = 1e-15 yhat = sp.maximum(epsilon, yhat) yhat = sp.minimum(1-epsilon, yhat) ll = sum(y*sp.log(yhat) + sp.subtract(1,y)*sp.log(sp.subtract(1,yhat))) ll = ll * -1.0/len(y) return ll
def binary_logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) res *= -1.0/len(y) return res
def log_d_pois_like_trunc_5(d,s1,s2,a,p): """double poisson w max 5 goals""" #dp = np.sign(d)*np.power(np.abs(d),p) dp = 1.5*np.arctan(d) #print(dp) return ( log(a)*(s1+s2)+dp*(s1-s2) - 2*a*cosh(dp) -gammaln(s1+1) - gammaln(s2+1) -log(gammaincc(6,a*exp(-dp))*gammaincc(6,a*exp(dp)) ) )
def compute_continuous_prob_value(parameters, distribution, rvs): mean = float(parameters[0]) stddev = float(parameters[1]) / 100 * mean A = float(parameters[2]) B = float(parameters[3]) result = float("-inf") if rvs is None: while result <= A or result > B: if distribution == "normal": rvs = stats.norm.rvs result = rvs(mean, stddev) elif distribution == "lognormal": variance = stddev ** 2.0 mu = log(mean ** 2.0 / sqrt(variance + mean ** 2.0)) sigma = sqrt(log((variance / mean ** 2.0) + 1.0)) rvs = stats.lognorm.rvs result = rvs(sigma, scale=scipy.exp(mu)) elif distribution == "gamma": betha = (stddev) ** 2 / mean alpha = mean / betha rvs = stats.gamma.rvs result = rvs(alpha, scale=betha) else: result = 1 return result
def loglike(self, data, paravec, sign = 1): la, lb, lp = paravec loglike = len(data) * sp.log(np.exp(la)) + (np.exp(la)*np.exp(lp)-1) * sum(sp.log(data)) - (1/np.exp(lb)**np.exp(la)) * sum(data**np.exp(la))\ -len(data)*np.exp(la)*np.exp(lp)*sp.log(np.exp(lb)) - len(data) * gammaln(np.exp(lp)) loglike = sign*loglike return loglike
def loglike(self, data, paravec, sign = 1): lu, lsig = paravec loglike = (-1/(2*np.exp(lsig)**2)) * sum((sp.log(data)-np.exp(lu))**2) - (len(data)/2) * sp.log(2*sp.pi) \ - len(data) * sp.log(np.exp(lsig)) - sum(sp.log(data)) loglike = sign*loglike return loglike
def run_demo(): LG.basicConfig(level=LG.INFO) random.seed(1) #1. create toy data [x,y] = create_toy_data() n_dimensions = 1 #2. location of unispaced predictions X = SP.linspace(0,10,100)[:,SP.newaxis] if 0: #old interface where the covaraince funciton and likelihood are one thing: #hyperparamters covar_parms = SP.log([1,1,1]) hyperparams = {'covar':covar_parms} #construct covariance function SECF = se.SqexpCFARD(n_dimensions=n_dimensions) noiseCF = noise.NoiseCFISO() covar = combinators.SumCF((SECF,noiseCF)) covar_priors = [] #scale covar_priors.append([lnpriors.lnGammaExp,[1,2]]) covar_priors.extend([[lnpriors.lnGammaExp,[1,1]] for i in xrange(n_dimensions)]) #noise covar_priors.append([lnpriors.lnGammaExp,[1,1]]) priors = {'covar':covar_priors} likelihood = None if 1: #new interface with likelihood parametres being decoupled from the covaraince function likelihood = lik.GaussLikISO() covar_parms = SP.log([1,1]) hyperparams = {'covar':covar_parms,'lik':SP.log([1])} #construct covariance function SECF = se.SqexpCFARD(n_dimensions=n_dimensions) covar = SECF covar_priors = [] #scale covar_priors.append([lnpriors.lnGammaExp,[1,2]]) covar_priors.extend([[lnpriors.lnGammaExp,[1,1]] for i in xrange(n_dimensions)]) lik_priors = [] #noise lik_priors.append([lnpriors.lnGammaExp,[1,1]]) priors = {'covar':covar_priors,'lik':lik_priors} gp = GP(covar,likelihood=likelihood,x=x,y=y) opt_model_params = opt.opt_hyper(gp,hyperparams,priors=priors,gradcheck=False)[0] #predict [M,S] = gp.predict(opt_model_params,X) #create plots gpr_plot.plot_sausage(X,M,SP.sqrt(S)) gpr_plot.plot_training_data(x,y) PL.show()
def logloss(act, pred): epsilon = 1e-4 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = -1.0/len(act) * sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) return ll
def fit(self, kk=None): """ Fit Fourier spectrum with the function set at class instantination ==> NB: fitting is done in logarithmic coordinates and fills plotting arrays with data -------- Options: -------- kk (k1,k2) <None> spectral interval for function fitting by default interval [ kk[1], kk[imax__kk] ] will be fitted ==> i.e. k=0 is excluded """ # fitting interval if kk: ik_min=(self.fft_data.kk[1:self.fft_data.imax__kk]<=kk[0]).nonzero()[0][-1] ik_max=(self.fft_data.kk[1:self.fft_data.imax__kk]<=kk[1]).nonzero()[0][-1] else: ik_min=1; ik_max=self.fft_data.imax__kk # do fitting self.__popt,self.__pcov = scipy.optimize.curve_fit(self.__func_fit, scipy.log(self.fft_data.kk[ik_min:ik_max]), scipy.log(self.fft_data.Ik[ik_min:ik_max]) ) # boundaries of fitted interval self.kmin = self.fft_data.kk[ik_min] self.kmax = self.fft_data.kk[ik_max] # fill plot arrays <=============== self.kk_plot=scipy.logspace( scipy.log10(self.kmin), scipy.log10(self.kmax), self.nk_plot ) self.Ik_plot=self.fitting_function(self.kk_plot)
def gap(data, refs=None, nrefs=20, ks=range(1,11), iter=10): """ Compute the Gap statistic for an nxm dataset in data. Either give a precomputed set of reference distributions in refs as an (n,m,k) scipy array, or state the number k of reference distributions in nrefs for automatic generation with a uniformed distribution within the bounding box of data. Give the list of k-values for which you want to compute the statistic in ks. """ shape = data.shape if refs==None: tops = data.max(axis=0) bots = data.min(axis=0) dists = scipy.matrix(scipy.diag(tops-bots)) rands = scipy.random.random_sample(size=(shape[0],shape[1],nrefs)) for i in range(nrefs): rands[:,:,i] = rands[:,:,i]*dists+bots else: rands = refs gaps = scipy.zeros((len(ks),)) for (i,k) in enumerate(ks): (kmc,kml) = scipy.cluster.vq.kmeans2(data, k, iter=iter) disp = sum([dst(data[m,:],kmc[kml[m],:]) for m in range(shape[0])]) refdisps = scipy.zeros((rands.shape[2],)) print 'For k =',k,'calculating random distribution #', for j in range(rands.shape[2]): print j, (kmc,kml) = scipy.cluster.vq.kmeans2(rands[:,:,j], k, iter=iter) refdisps[j] = sum([dst(rands[m,:,j],kmc[kml[m],:]) for m in range(shape[0])]) gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp) print "" return gaps
def TB_Cv_exceso(self, T, P): """Método de cálculo de la capacidad calorífica a volumen constante de exceso mediante la ecuación de estado de Trebble-Bishnoi""" a, b, c, d, q1, q2=self.TB_lib(T, P) v=self.TB_V(T, P) z=P*v/R_atml/T t=1+6*c/b+c**2/b**2+4*d**2/b**2 tita=abs(t)**0.5 A=a*P/R_atml**2/T**2 B=b*P/R_atml/T u=1+c/b delta=v**2+(b+c)*v-b*c-d**2 beta=1.+q2*(1-self.tr(T)+log(self.tr(T))) da=-q1*a/self.Tc dda=q1**2*a/self.Tc**2 if self.tr(T)<=1.0: db=b/beta*(1/T-1/self.Tc) ddb=-q2*b/beta/T**2 else: db=0 ddb=0 dt=-db/b**2*(6*c+2*c**2/b+8*d**2/b) dtita=abs(dt)/20 if t>=0: lamda=log((2*z+B*(u-tita))/(2*z+B*(u+tita))) dlamda=(db-db*tita-b*dtita)/(2*v+b+c-b*tita)-(db+db*tita+b*dtita)/((2*v+b+c+b*tita)) else: lamda=2*arctan((2*z+u*B)/B/tita)-pi dlamda=2/(1+((2*v+b+c)/b/tita)**2)*(db/b/tita-(2*v+b+c)*(db/b**2/tita+dtita/b/tita**2)) Cv=1/b/tita*(dlamda*(a-da*T)-lamda*dda*T-lamda*(a-da*T)*(db/b+dtita/tita))+(ddb*T+db)*(-R_atml*T/(v-b)+a/b**2/t*((v*(3*c+b)-b*c+c**2-2*d**2)/delta+(3*c+b)*lamda/b/tita))+db*T*(-R_atml/(v-b)-R_atml*T*db/(v-b)**2+1/b**2/t*(da-2*a*db/b-a*dt/t)*((v*(3*c+b)-b*c+c**2-2*d**2)/delta+(3*c+b)*lamda/b/tita)+a/b**2/t*(db*(v-c)*(v**2-2*c*v-c**2+d**2)/delta**2+db*lamda/b/tita+(3*c+b)/b/tita*(dlamda-lamda*(db/b+dtita/tita)))) return unidades.SpecificHeat(Cv*101325/1000/self.peso_molecular, "JkgK")
def KramersKronigFFT(ImX_A): ''' Hilbert transform used to calculate real part of a function from its imaginary part uses piecewise cubic interpolated integral kernel of the Hilbert transform use only if len(ImX_A)=2**m-1, uses fft from scipy.fftpack ''' X_A = sp.copy(ImX_A) N = int(len(X_A)) ## be careful with the data type, orherwise it fails for large N if N > 3e6: A = sp.arange(3,N+1,dtype='float64') else: A = sp.arange(3,N+1) X1 = 4.0*sp.log(1.5) X2 = 10.0*sp.log(4.0/3.0)-6.0*sp.log(1.5) ## filling the kernel if N > 3e6: Kernel_A = sp.zeros(N-2,dtype='float64') else: Kernel_A = sp.zeros(N-2) Kernel_A = (1-A**2)*((A-2)*sp.arctanh(1.0/(1-2*A))+(A+2)*sp.arctanh(1.0/(1+2*A)))\ +((A**3-6*A**2+11*A-6)*sp.arctanh(1.0/(3-2*A))+(A+3)*(A**2+3*A+2)*sp.arctanh(1.0/(2*A+3)))/3.0 Kernel_A = sp.concatenate([-sp.flipud(Kernel_A),sp.array([-X2,-X1,0.0,X1,X2]),Kernel_A])/sp.pi ## zero-padding the functions for fft ImXExt_A = sp.concatenate([X_A[int((N-1)/2):],sp.zeros(N+2),X_A[:int((N-1)/2)]]) KernelExt_A = sp.concatenate([Kernel_A[N:],sp.zeros(1),Kernel_A[:N]]) ## performing the fft ftReXExt_A = -fft(ImXExt_A)*fft(KernelExt_A) ReXExt_A = sp.real(ifft(ftReXExt_A)) ReX_A = sp.concatenate([ReXExt_A[int((3*N+3)/2+1):],ReXExt_A[:int((N-1)/2+1)]]) return ReX_A
def _visco0(self, rho, T, fase=None, coef=False): Visco0 = lambda T: -0.135311743/log(T) + 1.00347841 + \ 1.20654649*log(T) - 0.149564551*log(T)**2 + 0.0125208416*log(T)**3 def ViscoE(T, rho): x = log(T) B = -47.5295259/x+87.6799309-42.0741589*x+8.33128289*x**2-0.589252385*x**3 C = 547.309267/x-904.870586+431.404928*x-81.4504854*x**2+5.37005733*x**3 D = -1684.39324/x+3331.08630-1632.19172*x+308.804413*x**2-20.2936367*x**3 return rho.gcc*B+rho.gcc**2*C+rho.gcc**3*D if T < 100: # Section 4.2.1 for 3.5 < T < 100 no = Visco0(T) ne = ViscoE(T, rho) n = exp(no+ne) else: # Section 4.2.1 for T > 100 no = 196*T**0.71938*exp(12.451/T-295.67/T**2-4.1249) ne = exp(Visco0(T)+ViscoE(T, rho))-exp(Visco0(T)+ViscoE(T, unidades.Density(0))) n = no+ne if coef: return ne else: return unidades.Viscosity(n*1e-6, "P")
def _LML_covar(self, hyperparams): #calculate marginal likelihood of kronecker GP #1. get covariance structures needed: try: KV = self.get_covariances(hyperparams) except linalg.LinAlgError: LG.error("exception caught (%s)" % (str(hyperparams))) return 1E6 #2. build lml LML = 0 LMLc = 0.5* self.nd * SP.log(2.0 * SP.pi) #constant part of negative lml #quadratic form Si = KV['Si'] LMLq = 0.5 * SP.dot(KV['y_rot'].ravel(),KV['YSi'].ravel() ) #determinant stuff LMLd = -0.5 * SP.log(Si).sum() if VERBOSE: print "costly verbose debugging on" K = SP.kron(KV['Kr'],KV['Kc']) + SP.diag(KV['Knoise']) Ki = SP.linalg.inv(K) LMLq_ = 0.5* SP.dot(SP.dot(self.y.ravel(),Ki),self.y.ravel()) LMLd_ = 0.5* 2 * SP.log(SP.linalg.cholesky(K).diagonal()).sum() check_dist(LMLq,LMLq_) check_dist(LMLd,LMLd_) return LMLc+LMLq+LMLd
def tfidf(termFrequency): """ The student must code this. """ gf = sp.sum(termFrequency,axis=1).astype(float) p = (termFrequency.T/gf).T g = sp.sum(p*sp.log(p+1)/sp.log(len(p[0,:])),axis=1) + 1 a = (sp.log(termFrequency + 1).T*g).T return a
def NTU_fPR(P, R, flujo, **kwargs): """Calculo de la factor de correccion Flujo vendra definido por su acronimo CF: Counter flow PF: Parallel flow CrFMix: Crossflow, both fluids mixed CrFSMix: Crossflow, one fluid mixed, other unmixed CrFunMix: Crossflow, both fluids unmixed 1-2TEMAE: 1-2 pass shell and tube exchanger kwargs: Opciones adicionales: mixed: corriente mezclada para CrFSMix Cmin, Cmax """ if flujo == "1-2TEMAE": if R == 1: NTU = log((1-P)/2-3*P) else: E = (1+R**2)**0.5 NTU = log((2-P*(1+R-E))/(2-P*(1+R+E)))/E else: if R == 1: NTU = P/(1-P) else: NTU = log((1-R/P)/(1-P))/(1-R) return NTU
def _LML_covar(self,hyperparams,debugging=False): """ log marginal likelihood """ try: KV = self.get_covariances(hyperparams,debugging=debugging) except LA.LinAlgError: LG.error('linalg exception in _LML_covar') return 1E6 except ValueError: LG.error('value error in _LML_covar') return 1E6 lml_quad = 0.5*(KV['Ytilde']*KV['UYU']).sum() lml_det = 0.5 * SP.log(KV['S']).sum() lml_const = 0.5*self.n*self.t*(SP.log(2*SP.pi)) if debugging: # do calculation without kronecker tricks and compare _lml_quad = 0.5 * (KV['alpha']*KV['Yvec']).sum() _lml_det = SP.log(SP.diag(KV['L'])).sum() assert SP.allclose(_lml_quad,lml_quad), 'ouch, quadratic form is wrong in _LMLcovar' assert SP.allclose(_lml_det, lml_det), 'ouch, ldet is wrong in _LML_covar' lml = lml_quad + lml_det + lml_const return lml
def cdi_info(energy, h, z, pix, del_x_d, verbose = False): """ h - object size\nz - sam-det dist\npix - # of pix\ndel_x_d - pixel size """ x = (pix/2.)*del_x_d l = energy_to_wavelength(energy) NF = lambda nh, nl, nz : nh**2./(nl*nz) del_x_s = lambda l, z, x : (l*z)/(2.*x) nNF = NF(h,l,z) OS = lambda l,z,x,h,pix : ((pix*del_x_s(l,z,x))**2.)/(h**2.) nOS = OS(l,z,x,h,pix) if verbose: pyl.figure() zrange = sp.linspace(0, 2*z, 100) pyl.plot(zrange, sp.log(NF(h,l,zrange))) pyl.title('NF') pyl.xlabel('z [m]') pyl.ylabel('log NF') pyl.figure() pyl.plot(zrange, sp.log(OS(l,zrange, x, h, pix))) pyl.title('OS') pyl.xlabel('z [m]') pyl.ylabel('log OS') print 'NF: %1.2e\nOS: %1.2e\ndel_x_d: %1.2e\nw_d: %1.2e\ndel_x_s: %1.2e\nw_s: %1.2e' % (nNF, nOS, del_x_d, pix*del_x_d, del_x_s(l,z,x), del_x_s(l,z,x)*pix) aperture_stats(energy, z, x)
def LML(self,params=None,*kw_args): """ calculate LML """ if params is not None: self.setParams(params) self._update_cache() start = TIME.time() #1. const term lml = self.N*self.P*SP.log(2*SP.pi) #2. logdet term lml += SP.sum(SP.log(self.cache['Sc2']))*self.N lml += 2*SP.log(SP.diag(self.cache['cholB'])).sum() #3. quatratic term lml += SP.sum(self.cache['LY']*self.cache['LY']) lml -= SP.sum(self.cache['WLY']*self.cache['BiWLY']) lml *= 0.5 smartSum(self.time,'lml',TIME.time()-start) smartSum(self.count,'lml',1) return lml
def start(self, current, selections): current.progress.begin('Creating Spectogram') signals = current.analog_signals(self.which_signals + 1) if not signals: current.progress.done() raise SpykeException('No signals selected!') num_signals = len(signals) columns = int(round(sp.sqrt(num_signals))) current.progress.set_ticks(num_signals) samples = self.nfft_index[self.fft_samples] win = PlotDialog(toolbar=True, wintitle="Signal Spectogram (FFT window size %d)" % samples) for c in xrange(num_signals): pW = BaseImageWidget(win, yreverse=False, lock_aspect_ratio=False) plot = pW.plot s = signals[c] # Calculate spectrogram and create plot v = mlab.specgram(s, NFFT=samples, noverlap=samples / 2, Fs=s.sampling_rate) interpolation = 'nearest' if self.interpolate: interpolation = 'linear' img = make.image(sp.log(v[0]), ydata=[v[1][0], v[1][-1]], xdata=[v[2][0], v[2][-1]], interpolation=interpolation) plot.add_item(img) # Labels etc. if not self.show_color_bar: plot.disable_unused_axes() title = '' if s.recordingchannel and s.recordingchannel.name: title = s.recordingchannel.name if s.segment and s.segment.name: if title: title += ' , ' title += s.segment.name plot.set_title(title) plot.set_axis_title(plot.Y_LEFT, 'Frequency') plot.set_axis_unit(plot.Y_LEFT, s.sampling_rate.dimensionality.string) plot.set_axis_title(plot.X_BOTTOM, 'Time') time_unit = (1 / s.sampling_rate).simplified plot.set_axis_unit(plot.X_BOTTOM, time_unit.dimensionality.string) win.add_plot_widget(pW, c, column=c % columns) current.progress.step() current.progress.done() win.add_custom_image_tools() win.add_x_synchronization_option(True, range(num_signals)) win.add_y_synchronization_option(True, range(num_signals)) win.show()
def cvlognet(fit, \ lambdau, \ x, \ y, \ weights, \ offset, \ foldid, \ ptype, \ grouped, \ keep = False): typenames = { 'deviance': 'Binomial Deviance', 'mse': 'Mean-Squared Error', 'mae': 'Mean Absolute Error', 'auc': 'AUC', 'class': 'Misclassification Error' } if ptype == 'default': ptype = 'deviance' ptypeList = ['mse', 'mae', 'deviance', 'auc', 'class'] if not ptype in ptypeList: print('Warning: only ', ptypeList, 'available for binomial models; ' 'deviance' ' used') ptype = 'deviance' prob_min = 1.0e-5 prob_max = 1 - prob_min nc = y.shape[1] if nc == 1: classes, sy = scipy.unique(y, return_inverse=True) nc = len(classes) indexes = scipy.eye(nc, nc) y = indexes[sy, :] else: classes = scipy.arange(nc) + 1 # 1:nc N = y.size nfolds = scipy.amax(foldid) + 1 if (N / nfolds < 10) and (type == 'auc'): print( 'Warning: Too few (<10) observations per fold for type.measure=auc in cvlognet' ) print( 'Warning: changed to type.measure = deviance. Alternately, use smaller value ' ) print('Warning: for nfolds') ptype = 'deviance' if (N / nfolds < 3) and grouped: print( 'Warning: option grouped = False enforced in cvglmnet as there are < 3 observations per fold' ) grouped = False is_offset = not (len(offset) == 0) predmat = scipy.ones([y.shape[0], lambdau.size]) * scipy.NAN nfolds = scipy.amax(foldid) + 1 nlams = [] for i in range(nfolds): which = foldid == i fitobj = fit[i].copy() if is_offset: off_sub = offset[which, ] else: off_sub = scipy.empty([0]) preds = glmnetPredict(fitobj, x[which, ], scipy.empty([0]), 'response', False, off_sub) nlami = scipy.size(fit[i]['lambdau']) predmat[which, 0:nlami] = preds nlams.append(nlami) # convert nlams to scipy array nlams = scipy.array(nlams, dtype=scipy.integer) if ptype == 'auc': cvraw = scipy.zeros([nfolds, lambdau.size]) * scipy.NaN good = scipy.zeros([nfolds, lambdau.size]) for i in range(nfolds): good[i, 0:nlams[i]] = 1 which = foldid == i for j in range(nlams[i]): cvraw[i, j] = auc_mat(y[which, ], predmat[which, j], weights[which]) N = scipy.sum(good, axis=0) sweights = scipy.zeros([nfolds, 1]) for i in range(nfolds): sweights[i] = scipy.sum(weights[foldid == i], axis=0) weights = sweights else: ywt = scipy.sum(y, axis=1, keepdims=True) y = y / scipy.tile(ywt, [1, y.shape[1]]) weights = weights * ywt N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis=0, keepdims=True) yy1 = scipy.tile(y[:, 0:1], [1, lambdau.size]) yy2 = scipy.tile(y[:, 1:2], [1, lambdau.size]) if ptype == 'mse': cvraw = (yy1 - (1 - predmat))**2 + (yy2 - (1 - predmat))**2 elif ptype == 'deviance': predmat = scipy.minimum(scipy.maximum(predmat, prob_min), prob_max) lp = yy1 * scipy.log(1 - predmat) + yy2 * scipy.log(predmat) ly = scipy.log(y) ly[y == 0] = 0 ly = scipy.dot(y * ly, scipy.array([1.0, 1.0]).reshape([2, 1])) cvraw = 2 * (scipy.tile(ly, [1, lambdau.size]) - lp) elif ptype == 'mae': cvraw = scipy.absolute(yy1 - (1 - predmat)) + scipy.absolute(yy2 - (1 - predmat)) elif ptype == 'class': cvraw = yy1 * (predmat > 0.5) + yy2 * (predmat <= 0.5) if y.size / nfolds < 3 and grouped == True: print( 'Option grouped=false enforced in cv.glmnet, since < 3 observations per fold' ) grouped = False if grouped == True: cvob = cvcompute(cvraw, weights, foldid, nlams) cvraw = cvob['cvraw'] weights = cvob['weights'] N = cvob['N'] cvm = wtmean(cvraw, weights) sqccv = (cvraw - cvm)**2 cvsd = scipy.sqrt(wtmean(sqccv, weights) / (N - 1)) result = dict() result['cvm'] = cvm result['cvsd'] = cvsd result['name'] = typenames[ptype] if keep: result['fit_preval'] = predmat return (result)
def f_expected(u): return (sp.log(u)-sp.log(d_observed)) / (- sp.log(d_observed)) * \ (u > d_observed)
def test_gets_modes_by_scan(self): nf = self.nf nt = self.nt dt = self.dt bw = self.bw nb = self.nb # Give every channel a different thermal noise floor. thermal_norm = 1.0 + 1.0 / nf * sp.arange(nf) # K**2/Hz self.data *= sp.sqrt(thermal_norm * bw * 2) n_time = self.data.shape[0] # Now make a 1/f like noise component in a few frequency modes. n_modes = 3 index = -0.8 * (2.0 - sp.arange(n_modes, dtype=float) / n_modes) amp = 1.2 * (3.**(n_modes - 1. - sp.arange(n_modes, dtype=float)) ) # K**2/Hz f_0 = 1.0 # Hz modes = sp.empty((n_modes, nf)) for ii in range(n_modes): correlated_overf = noise_power.generate_overf_noise( amp[ii], index[ii], f_0, dt, n_time) # Generate the frequency mode. They should all be orthonormal. mode = sp.sin(2. * sp.pi * (ii + 1) * sp.arange(nf, dtype=float) / nf + 6.4 * (ii + 3)) mode /= sp.sqrt(sp.sum(mode**2)) modes[ii] = mode self.data += correlated_overf[:, None, None, None] * mode # Add a subdominant general 1/f noise to all channels. general_amp = 0.1 general_index = -0.9 general_cross_over = f_0 * general_amp**(-1. / general_index) for ii in range(nf): tmp_a = general_amp * thermal_norm[ii] correlated_overf = noise_power.generate_overf_noise( tmp_a, general_index, f_0, dt, n_time) self.data[:, 0, :, ii] += correlated_overf[:, None] # Now put the data into the form of the real data. Blocks = self.make_blocks() # Measure all the noise parameters. model_name = 'freq_modes_over_f_' + str(n_modes) parameters = mn.measure_noise_parameters(Blocks, [model_name], split_scans=True) for pol, pol_params in parameters.iteritems(): for ii in range(n_modes): mode_noise = pol_params[model_name]['over_f_mode_' + str(ii)] self.assertTrue( sp.allclose(mode_noise['amplitude'], amp[ii], rtol=0.5)) self.assertTrue( sp.allclose(mode_noise['index'], index[ii], atol=0.2)) #thermal_proj = sp.sum(thermal_norm * modes[ii,:]**2) #self.assertTrue(sp.allclose(mode_noise['thermal'], thermal_proj, # rtol=0.5)) self.assertTrue( abs(sp.dot(mode_noise['mode'], modes[ii, :])) > 0.90) thermal = pol_params[model_name]['thermal'] loss = float(nf - n_modes) / nf self.assertTrue(sp.allclose(thermal, thermal_norm * loss, rtol=0.4)) measured_general_ind = pol_params[model_name]['all_channel_index'] measured_corner = pol_params[model_name]['all_channel_corner_f'] if pol == 1: self.assertTrue( sp.allclose(measured_general_ind, general_index, atol=0.4)) #Only need logarithmic accuracy on the corner. self.assertTrue( sp.allclose(sp.log(measured_corner), sp.log(general_cross_over), atol=1.5)) elif pol == 3: self.assertTrue(measured_corner < 4. / dt / nt)
def nLLeval_test(self, y_test, beta, h2=0.0, logdelta=None, delta=None, sigma2=1.0, Kstar_star=None, robust=False): """ compute out-of-sample log-likelihood robust: boolean indicates if eigenvalues will be truncated at 1E-9 or 1E-4. The former (default) one was used in FastLMMC, but may lead to numerically unstable solutions. """ assert y_test.ndim == 1, "y_test should have 1 dimension" mu = self.predictMean(beta, h2=h2, logdelta=logdelta, delta=delta) res = y_test - mu sigma = self.predictVariance(h2=h2, logdelta=logdelta, delta=delta, sigma2=sigma2, Kstar_star=Kstar_star) #TODO: benchmark, record speed difference """ # efficient computation of: (y - mu)^T sigma2^{-1} (y - mu) # Solve the linear system x = (L L^T)^-1 res try: L = SP.linalg.cho_factor(sigma) res_sig = SP.linalg.cho_solve(L, res) logdetK = NP.linalg.slogdet(sigma)[1] except Exception, detail: print "Cholesky failed, using eigen-value decomposition!" """ [S_,U_] = LA.eigh(sigma) if robust: S_nonz=(S_>1E-4) else: S_nonz=(S_>1E-9) assert sum(S_nonz) > 0, "Some eigenvalues should be nonzero" S = S_[S_nonz] U = U_[:, S_nonz] Sdi = 1 / S res_sig = res.T.dot(Sdi * U).dot(U.T) logdetK = SP.log(S).sum() # some sanity checks if False: res_sig3 = SP.linalg.pinv(sigma).dot(res) NP.testing.assert_array_almost_equal(res_sig, res_sig3, decimal=2) # see Carl Rasmussen's book on GPs, equation 5.10, or term1 = -0.5 * logdetK term2 = -0.5 * SP.dot(res_sig.reshape(-1).T, res.reshape(-1)) #Change the inputs to the functions so that these are vectors, not 1xn,nx1 term3 = -0.5 * len(res) * SP.log(2 * SP.pi) if term2 < -10000: logging.warning("looks like nLLeval_test is running into numerical difficulties") SC = S.copy() SC.sort() logging.warning(["delta:", delta, "log det", logdetK, "term 2", term2, "term 3:", term3 ]) logging.warning(["largest eigv:", SC[-1], "second largest eigv:", SC[-2], "smallest eigv:", SC[0] ]) logging.warning(["ratio 1large/2large:", SC[-1]/SC[-2], "ratio lrg/small:", SC[-1]/SC[0] ]) neg_log_likelihood = -(term1 + term2 + term3) return neg_log_likelihood
def nLLeval(self,h2=0.0,REML=True, logdelta = None, delta = None, dof = None, scale = 1.0,penalty=0.0): ''' evaluate -ln( N( U^T*y | U^T*X*beta , h2*S + (1-h2)*I ) ), where ((1-a2)*K0 + a2*K1) = USU^T -------------------------------------------------------------------------- Input: h2 : mixture weight between K and Identity (environmental noise) REML : boolean if True : compute REML if False : compute ML dof : Degrees of freedom of the Multivariate student-t (default None uses multivariate Normal likelihood) logdelta: log(delta) allows to optionally parameterize in delta space delta : delta allows to optionally parameterize in delta space scale : Scale parameter the multiplies the Covariance matrix (default 1.0) -------------------------------------------------------------------------- Output dictionary: 'nLL' : negative log-likelihood 'sigma2' : the model variance sigma^2 'beta' : [D*1] array of fixed effects weights beta 'h2' : mixture weight between Covariance and noise 'REML' : True: REML was computed, False: ML was computed 'a2' : mixture weight between K0 and K1 'dof' : Degrees of freedom of the Multivariate student-t (default None uses multivariate Normal likelihood) 'scale' : Scale parameter that multiplies the Covariance matrix (default 1.0) -------------------------------------------------------------------------- ''' if (h2<0.0) or (h2>1.0): return {'nLL':3E20, 'h2':h2, 'REML':REML, 'scale':scale} k=self.S.shape[0] N=self.y.shape[0] D=self.UX.shape[1] #if REML == True: # # this needs to be fixed, please see test_gwas.py for details # raise NotImplementedError("this feature is not ready to use at this time, please use lmm_cov.py instead") if logdelta is not None: delta = SP.exp(logdelta) if delta is not None: Sd = (self.S+delta)*scale else: Sd = (h2*self.S + (1.0-h2))*scale UXS = self.UX / NP.lib.stride_tricks.as_strided(Sd, (Sd.size,self.UX.shape[1]), (Sd.itemsize,0)) UyS = self.Uy / Sd XKX = UXS.T.dot(self.UX) XKy = UXS.T.dot(self.Uy) yKy = UyS.T.dot(self.Uy) logdetK = SP.log(Sd).sum() if (k<N):#low rank part # determine normalization factor if delta is not None: denom = (delta*scale) else: denom = ((1.0-h2)*scale) XKX += self.UUX.T.dot(self.UUX)/(denom) XKy += self.UUX.T.dot(self.UUy)/(denom) yKy += self.UUy.T.dot(self.UUy)/(denom) logdetK+=(N-k) * SP.log(denom) # proximal contamination (see Supplement Note 2: An Efficient Algorithm for Avoiding Proximal Contamination) # available at: http://www.nature.com/nmeth/journal/v9/n6/extref/nmeth.2037-S1.pdf # exclude SNPs from the RRM in the likelihood evaluation if len(self.exclude_idx) > 0: num_exclude = len(self.exclude_idx) # consider only excluded SNPs G_exclude = self.G[:,self.exclude_idx] self.UW = self.U.T.dot(G_exclude) # needed for proximal contamination UWS = self.UW / NP.lib.stride_tricks.as_strided(Sd, (Sd.size,num_exclude), (Sd.itemsize,0)) assert UWS.shape == (k, num_exclude) WW = NP.eye(num_exclude) - UWS.T.dot(self.UW) WX = UWS.T.dot(self.UX) Wy = UWS.T.dot(self.Uy) assert WW.shape == (num_exclude, num_exclude) assert WX.shape == (num_exclude, D) assert Wy.shape == (num_exclude,) if (k<N):#low rank part self.UUW = G_exclude - self.U.dot(self.UW) WW += self.UUW.T.dot(self.UUW)/denom WX += self.UUW.T.dot(self.UUX)/denom Wy += self.UUW.T.dot(self.UUy)/denom #TODO: do cholesky, if fails do eigh # compute inverse efficiently [S_WW,U_WW] = LA.eigh(WW) UWX = U_WW.T.dot(WX) UWy = U_WW.T.dot(Wy) assert UWX.shape == (num_exclude, D) assert UWy.shape == (num_exclude,) # compute S_WW^{-1} * UWX WX = UWX / NP.lib.stride_tricks.as_strided(S_WW, (S_WW.size,UWX.shape[1]), (S_WW.itemsize,0)) # compute S_WW^{-1} * UWy Wy = UWy / S_WW # determinant update logdetK += SP.log(S_WW).sum() assert WX.shape == (num_exclude, D) assert Wy.shape == (num_exclude,) # perform updates (instantiations for a and b in Equation (1.5) of Supplement) yKy += UWy.T.dot(Wy) XKy += UWX.T.dot(Wy) XKX += UWX.T.dot(WX) ####### [SxKx,UxKx]= LA.eigh(XKX) #optionally regularize the beta weights by penalty if penalty>0.0: SxKx+=penalty i_pos = SxKx>1E-10 beta = SP.dot(UxKx[:,i_pos],(SP.dot(UxKx[:,i_pos].T,XKy)/SxKx[i_pos])) r2 = yKy-XKy.dot(beta) if dof is None:#Use the Multivariate Gaussian if REML: XX = self.X.T.dot(self.X) [Sxx,Uxx]= LA.eigh(XX) logdetXX = SP.log(Sxx).sum() logdetXKX = SP.log(SxKx).sum() sigma2 = r2 / (N - D) nLL = 0.5 * ( logdetK + logdetXKX - logdetXX + (N-D) * ( SP.log(2.0*SP.pi*sigma2) + 1 ) ) else: sigma2 = r2 / (N) nLL = 0.5 * ( logdetK + N * ( SP.log(2.0*SP.pi*sigma2) + 1 ) ) result = { 'nLL':nLL, 'sigma2':sigma2, 'beta':beta, 'h2':h2, 'REML':REML, 'a2':self.a2, 'scale':scale } else:#Use multivariate student-t if REML: XX = self.X.T.dot(self.X) [Sxx,Uxx]= LA.eigh(XX) logdetXX = SP.log(Sxx).sum() logdetXKX = SP.log(SxKx).sum() nLL = 0.5 * ( logdetK + logdetXKX - logdetXX + (dof + (N-D)) * SP.log(1.0+r2/dof) ) nLL += 0.5 * (N-D)*SP.log( dof*SP.pi ) + SS.gammaln( 0.5*dof ) - SS.gammaln( 0.5* (dof + (N-D) )) else: nLL = 0.5 * ( logdetK + (dof + N) * SP.log(1.0+r2/dof) ) nLL += 0.5 * N*SP.log( dof*SP.pi ) + SS.gammaln( 0.5*dof ) - SS.gammaln( 0.5* (dof + N )) result = { 'nLL':nLL, 'dof':dof, 'beta':beta, 'h2':h2, 'REML':REML, 'a2':self.a2, 'scale':scale } assert SP.all(SP.isreal(nLL)), "nLL has an imaginary component, possibly due to constant covariates" return result
def check_ExpCM_derivatives(self): """Use `sympy` to check values and derivatives of `ExpCM` attributes.""" (Prxy, Qxy, phiw, beta, omega, eta0, eta1, eta2, kappa) = sympy.symbols( 'Prxy, Qxy, phiw, beta, omega, eta0, eta1, eta2, kappa') values = { 'beta': self.params['beta'], 'omega': self.params['omega'], 'kappa': self.params['kappa'], 'eta0': self.params['eta'][0], 'eta1': self.params['eta'][1], 'eta2': self.params['eta'][2], } # check Prxy for r in range(self.nsites): for x in range(N_CODON): pirAx = self.prefs[r][INDEX_TO_AA[CODON_TO_AA[x]]] for y in [yy for yy in range(N_CODON) if yy != x]: pirAy = self.prefs[r][INDEX_TO_AA[CODON_TO_AA[y]]] if not CODON_SINGLEMUT[x][y]: Prxy = 0 else: w = NT_TO_INDEX[[ ynt for (xnt, ynt ) in zip(INDEX_TO_CODON[x], INDEX_TO_CODON[y]) if xnt != ynt ][0]] if w == 0: phiw = 1 - eta0 elif w == 1: phiw = eta0 * (1 - eta1) elif w == 2: phiw = eta0 * eta1 * (1 - eta2) elif w == 3: phiw = eta0 * eta1 * eta2 else: raise ValueError("Invalid w") self.assertTrue( scipy.allclose(float(phiw.subs(values)), self.expcm.phi[w])) if CODON_TRANSITION[x][y]: Qxy = kappa * phiw else: Qxy = phiw self.assertTrue( scipy.allclose(float(Qxy.subs(values)), self.expcm.Qxy[x][y])) if CODON_NONSYN[x][y]: if pirAx == pirAy: Prxy = Qxy * omega else: Prxy = Qxy * omega * ( -beta * scipy.log(pirAx / pirAy) / (1 - (pirAx / pirAy)**beta)) else: Prxy = Qxy for (name, actual, expect) in [ ('Prxy', self.expcm.Prxy[r][x][y], Prxy), ('dPrxy_dkappa', self.expcm.dPrxy['kappa'][r][x][y], sympy.diff(Prxy, kappa)), ('dPrxy_domega', self.expcm.dPrxy['omega'][r][x][y], sympy.diff(Prxy, omega)), ('dPrxy_dbeta', self.expcm.dPrxy['beta'][r][x][y], sympy.diff(Prxy, beta)), ('dPrxy_deta0', self.expcm.dPrxy['eta'][0][r][x][y], sympy.diff(Prxy, eta0)), ('dPrxy_deta1', self.expcm.dPrxy['eta'][1][r][x][y], sympy.diff(Prxy, eta1)), ('dPrxy_deta2', self.expcm.dPrxy['eta'][2][r][x][y], sympy.diff(Prxy, eta2)), ]: if Prxy == 0: expectval = 0 else: expectval = float(expect.subs(values)) self.assertTrue( scipy.allclose(actual, expectval, atol=1e-4), "{0}: {1} vs {2}".format(name, actual, expectval)) # check prx qxs = [sympy.Symbol('qx{0}'.format(x)) for x in range(N_CODON)] frxs = [sympy.Symbol('frx{0}'.format(x)) for x in range(N_CODON)] prx = sympy.Symbol('prx') phixs = [sympy.Symbol('phix{0}'.format(w)) for w in range(3)] for r in range(self.nsites): for x in range(N_CODON): pirAx = self.prefs[r][INDEX_TO_AA[CODON_TO_AA[x]]] frxs[x] = pirAx**beta xcodon = INDEX_TO_CODON[x] assert len(phixs) == len(xcodon) for (w, xwnt) in enumerate(xcodon): xw = NT_TO_INDEX[xwnt] if xw == 0: phixs[w] = 1 - eta0 elif xw == 1: phixs[w] = eta0 * (1 - eta1) elif xw == 2: phixs[w] = eta0 * eta1 * (1 - eta2) elif xw == 3: phixs[w] = eta0 * eta1 * eta2 else: raise ValueError("invalid xw") qxs[x] = phixs[0] * phixs[1] * phixs[2] for x in range(N_CODON): prx = frxs[x] * qxs[x] / sum(frx * qx for (frx, qx) in zip(frxs, qxs)) for (name, actual, expect) in [ ('prx', self.expcm.prx[r][x], prx), ('dprx_dbeta', self.expcm.dprx['beta'][r][x], sympy.diff(prx, beta)), ('dprx_deta0', self.expcm.dprx['eta'][0][r][x], sympy.diff(prx, eta0)), ('dprx_deta1', self.expcm.dprx['eta'][1][r][x], sympy.diff(prx, eta1)), ('dprx_deta2', self.expcm.dprx['eta'][2][r][x], sympy.diff(prx, eta2)), ]: expectval = float(expect.subs(values)) self.assertTrue( scipy.allclose(actual, expectval, atol=1e-5), "{0}: {1} vs {2}".format(name, actual, expectval))
def _fprime(self, sigma): logSoverK = log(self.S / self.K) n12 = ((self.r + sigma**2 / 2) * self.T) numerd1 = logSoverK + n12 d1 = numerd1 / (sigma * sqrt(self.T)) return self.S * sqrt(self.T) * norm.pdf(d1) * exp(-self.r * self.T)
def _BlackScholesCall(S, K, T, sigma, r, q): d1 = (log(S / K) + (r - q + (sigma**2) / 2) * T) / (sigma * sqrt(T)) d2 = d1 - sigma * sqrt(T) return S * exp(-q * T) * norm.cdf(d1) - K * exp(-r * T) * norm.cdf(d2)
Dd = bowD.D if Controller == 1: # Controller without integral part cl_polesd = sp.exp(cl_poles*Ts) # Desired discrete poles k = place(Ad, Bd, cl_polesd) elif Controller == 2: # LQR Controller Q = np.diag([5, 5, 10, 1]); Q = np.diag([20, 10, 100 , 10]); R = [4]; k, S, E = rp.dlqr(Ad, Bd, Q, R) # Observer design parameters preg = sp.log(E[0])/Ts w0 = max(abs(preg)); # process spectral radius # Modify poles for observer cl_poles = w0/wn*cl_poles cl_2poles = w0/wn*cl_2poles if Observer == 1: # Reduced order observer T=[[0,0,1,0],[0,0,0,1]] obs_polesc = obs_k*cl_2poles obs_polesd = sp.exp(obs_polesc*Ts) r_obs = red_obs(bowD,T, obs_polesd) # Put Observer and controller together (compact form) ctr = comp_form(bowD, r_obs, k)
def stdatmos(**altitude): """ Evaluate the standard atmosphere at any given altitude. This function allows input of a single variable to calculate the atmospheric properties at different altitudes. The function can work with different types of standard models. The default model values are set as defined by the International Standard Atmosphere. Parameters ---------- model : dict, optional A standard atmosphere model as obtained from stdmodel. Partial models are allowed. The remaining model values default to the International Standard Atmosphere. h or geom : array_like Geometrical altitude [meters]. geop : array_like Geopotential altitude [meters]. abs : array_like Absolute altitude [meters]. T : array_like Temperature altitude [K]. P : array_like Pressure altitude [Pa]. rho : array_like Density altitude [kg/m^3]. Returns ------- out : (h, T, P, rho, a) Tuple of geometrical altitude, temperature, pressure, density and speed of sound at given altitudes. Notes ----- This function assumes a continues lapserate below 0 altitude and above the top layer, which allows for extrapolation outside the specified region (0 to 86km in ISA). Temperature altitude is obtained as the first altitude from 0 where the specified temperature exists. See Also -------- stdmodel Examples -------- >>> stdatmos(P=[1e5, 1e4, 1e3])[0] [110.8864127251899, 16221.007939493587, 31207.084373790043] >>> stdatmos(h=sp.linspace(-2000, 81000)) (array, array, array, array, array) """ #pop atmospherical model from input model = altitude.pop("model", {}) #check if model is a dictionary if type(model) is not DictType: raise Exception("Custom atmosphere model is incompatible.") #check if a single remaining input exists if len(altitude) is not 1: raise Exception("Function needs exactly one altitude input.") #pop the altitude input mtype, alt = altitude.popitem() #check if the altitude input type is valid if mtype not in ["h", "geom", "geop", "abs", "T", "P", "rho"]: raise Exception("The altitude input should be a valid input type.") #convert the input to numpy arrays itype, alt = to_ndarray(alt) #model values R = model.get("R", 287.053) #gas constant [J/kg/K] (air) gamma = model.get("gamma", 1.4) #specific heat ratio [-] (air) g = model.get("g0", 9.80665) #gravity [m/s^2] (earth) radius = model.get("radius", 6356766.0) #earth radius [m] (earth) Tb = model.get("T0", 288.15) #base temperature [K] Pb = model.get("P0", 101325.0) #base pressure [Pa] #model lapse rate and height layers Hb = sp.array([0, 11, 20, 32, 47, 51, 71, sp.inf], sp.float64) * 1000 Lr = sp.array([-6.5, 0, 1, 2.8, 0, -2.8, -2], sp.float64) * 0.001 Hb = model.get("layers", Hb) #layer height [km] Lr = model.get("lapserate", Lr) #lapse rate [K/km] #preshape solution arrays T = sp.ones(alt.shape, sp.float64) * sp.nan P = sp.ones(alt.shape, sp.float64) * sp.nan #define the height array if mtype in ["h", "geom"]: h = alt * radius / (radius + alt) elif mtype is "geop": h = alt elif mtype is "abs": h = alt - radius else: h = sp.ones(alt.shape, sp.float64) * sp.nan for lr, hb, ht in zip(Lr, Hb[:-1], Hb[1:]): #calculate the temperature at layer top Tt = Tb + lr * (ht - hb) if mtype is "T": #break the loop if there are no nans in the solution array if not sp.isnan(h).any(): break #select all temperatures in current layer if lr == 0: sel = (alt == Tb) else: s = sp.sign(lr) bot = -sp.inf if hb == 0 else Tb * s top = sp.inf if ht == Hb[-1] else Tt * s sel = sp.logical_and(alt * s >= bot, alt * s < top) #only select when not already solved sel = sp.logical_and(sel, sp.isnan(h)) #temperature is given as input T[sel] = alt[sel] #solve for height and pressure if lr == 0: h[sel] = hb P[sel] = Pb else: h[sel] = hb + (1.0 / lr) * (T[sel] - Tb) P[sel] = Pb * (T[sel] / Tb)**(-g / (lr * R)) elif mtype in ["P", "rho"]: #choose base value as pressure or density vb = Pb if mtype is "P" else Pb / (R * Tb) #select all input values below given pressure or density sel = alt <= (sp.inf if hb == 0 else vb) #break if nothing is selected if not sel.any(): break #solve for temperature and height if lr == 0: T[sel] = Tb h[sel] = hb - sp.log(alt[sel] / vb) * R * Tb / g else: x = g if mtype is "P" else (lr * R + g) T[sel] = Tb * (alt[sel] / vb)**(-lr * R / x) h[sel] = hb + (T[sel] - Tb) / lr #pressure is given as input P[sel] = alt[sel] if mtype is "P" else alt[sel] * R * T[sel] else: #select all height values above layer base sel = h >= (-sp.inf if hb == 0 else hb) #break if nothing is selected if not sel.any(): break #solve for temperature and pressure if lr == 0: T[sel] = Tb P[sel] = Pb * sp.exp((-g / (R * Tb)) * (h[sel] - hb)) else: T[sel] = Tb + lr * (h[sel] - hb) P[sel] = Pb * (T[sel] / Tb)**(-g / (lr * R)) #update pressure base value if lr == 0: Pb *= sp.exp((-g / (R * Tb)) * (ht - hb)) else: Pb *= (Tt / Tb)**(-g / (lr * R)) #update temperature base value Tb = Tt #convert geopotential altitude to geometrical altitude h *= radius / (radius - h) #density rho = P / (R * T) #speed of sound a = sp.sqrt(gamma * R * T) return from_ndarray(itype, h, T, P, rho, a)
def _BlackScholesPut(S, K, T, sigma, r, q): d1 = (log(S / K) + (r - q + (sigma**2) / 2) * T) / (sigma * sqrt(T)) d2 = d1 - sigma * sqrt(T) return K * exp(-r * T) * norm.cdf(-d2) - S * exp( -q * T) * norm.cdf(-d1)
def parse_sum_stats_custom(filename=None, bimfile=None, only_hm3=False, hdf5_file=None, n=None, ch=None, pos=None, A1=None, A2=None, reffreq=None, case_freq=None, control_freq=None, case_n=None, control_n=None, info=None, rs=None, pval=None, eff=None, ncol=None, input_is_beta=False, match_genomic_pos=False, debug=False, summary_dict=None): # Check required fields are here assert not A2 is None, 'Require header for non-effective allele' assert not A1 is None, 'Require header for effective allele' assert not rs is None, 'Require header for RS ID' assert not eff is None, 'Require header for Statistics' assert not pval is None, 'Require header for pval' assert not ncol is None or not n is None or ( control_n is not None and case_n is not None), 'Require either N or NCOL information' if ch is None: assert not bimfile is None, 'Require bimfile when chromosome header not provided' print("Chromosome Header not provided, will use info from bim file") if pos is None: assert not bimfile is None, 'Require bimfile when position header not provided' print("Position Header not provided, will use info from bim file") num_lines = util.count_lines(filename) snps_pos_map = {} if only_hm3: if debug: print('Loading HapMap3 SNPs') hm3_sids = util.load_hapmap_SNPs() if bimfile is not None: valid_sids = set() if debug: print('Parsing bim file: %s' % bimfile) with open(bimfile) as f: for line in f: l = line.split() chrom = util.get_chrom_num(l[0]) if chrom not in util.ok_chromosomes: continue sid = l[1] if only_hm3: if sid in hm3_sids: valid_sids.add(sid) snps_pos_map[sid] = {'pos': int(l[3]), 'chrom': chrom} else: valid_sids.add(sid) snps_pos_map[sid] = {'pos': int(l[3]), 'chrom': chrom} if len(valid_sids) == 0: raise Exception('Unable to parse BIM file') else: raise Exception( 'BIM file missing. Please check genotype paths provided.') invalid_chr = 0 invalid_pos = 0 invalid_p = 0 invalid_beta = 0 chrom_dict = {} opener = open if is_gz(filename): opener = gzip.open print('Parsing summary statistics file: %s' % filename) with opener(filename) as f: header = f.readline() if is_gz(filename): header = header.decode('utf-8') if debug: print('File header:') print(header) header_dict = {} columns = (header.strip()).split() index = 0 for col in columns: header_dict[col] = index index += 1 assert ch is None or ch in header_dict, 'Chromosome header cannot be found in summary statistic file' assert A2 in header_dict, 'Non-effective allele column cannot be found in summary statistic file' assert A1 in header_dict, 'Effective allele column cannot be found in summary statistic file' assert eff in header_dict, 'Effect size column not found in summary statistic file' assert rs in header_dict, 'SNP ID column not found in summary statistic file' assert pos is None or pos in header_dict, 'Position column not found in summary statistic file' assert pval in header_dict, 'P Value column not found in summary statistic file' assert not n is None or ncol in header_dict or (control_n in header_dict and case_n in header_dict), 'Sample size column not found in summary statistic ' \ 'file and N not provided' # header_dict now contains the header column name for each corresponding input bad_chromosomes = set() line_i = 1 for line in f: line_i += 1 if line_i % 1000 == 0 and num_lines > 0: sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%' % (100.0 * (float(line_i) / (num_lines)))) sys.stdout.flush() if is_gz(filename): line = line.decode('utf-8') l = (line.strip()).split() # get the SNP ID first sid = l[header_dict[rs]] # check the SNP ID if sid in valid_sids: # Get the chromosome information chrom = 0 if not ch is None and ch in header_dict: chrom = util.get_chrom_num(l[header_dict[ch]]) # Check if the chromosome of the SNP is correct if not chrom == snps_pos_map[sid]['chrom']: invalid_chr += 1 continue else: chrom = snps_pos_map[sid]['chrom'] pos_read = 0 if not pos is None and pos in header_dict: pos_read = int(l[header_dict[pos]]) if not pos_read == snps_pos_map[sid]['pos']: invalid_pos += 1 if match_genomic_pos: continue else: pos_read = snps_pos_map[sid]['pos'] pval_read = float(l[header_dict[pval]]) if not isfinite(stats.norm.ppf(pval_read)): invalid_p += 1 continue if not isfinite(float(l[header_dict[eff]])): invalid_beta += 1 continue if not chrom in chrom_dict: chrom_dict[chrom] = { 'ps': [], 'log_odds': [], 'infos': [], 'freqs': [], 'betas': [], 'nts': [], 'sids': [], 'positions': [] } chrom_dict[chrom]['sids'].append(sid) chrom_dict[chrom]['positions'].append(pos_read) # Check the frequency if reffreq is not None and reffreq in header_dict: if l[header_dict[reffreq]] == '.' or l[ header_dict[reffreq]] == 'NA': chrom_dict[chrom]['freqs'].append(-1) else: chrom_dict[chrom]['freqs'].append( float(l[header_dict[reffreq]])) elif (case_freq is not None and control_freq is not None and case_freq in header_dict and control_freq in header_dict): if (case_n is not None and control_n is not None and case_n in header_dict and control_n in header_dict): if (l[header_dict[control_n]] == '.' or l[header_dict[control_n]] == 'NA' or l[header_dict[case_n]] == '.' or l[header_dict[case_n]] == 'NA' or l[header_dict[control_freq]] == '.' or l[header_dict[control_freq]] == 'NA' or l[header_dict[case_freq]] == '.' or l[header_dict[case_freq]] == 'NA'): chrom_dict[chrom]['freqs'].append(-1) else: case_N = float(l[header_dict[case_n]]) control_N = float(l[header_dict[control_n]]) tot_N = case_N + control_N a_scalar = case_N / float(tot_N) u_scalar = control_N / float(tot_N) freq = float( l[header_dict[case_freq]]) * a_scalar + float( l[header_dict[control_freq]]) * u_scalar chrom_dict[chrom]['freqs'].append(freq) else: if (l[header_dict[case_freq]] == '.' or l[header_dict[case_freq]] == 'NA' or l[header_dict[control_freq]] == '.' or l[header_dict[control_freq]] == 'NA'): chrom_dict[chrom]['freqs'].append(-1) else: freq = (float(l[header_dict[case_freq]]) + float(l[header_dict[control_freq]])) / 2.0 chrom_dict[chrom]['freqs'].append(freq) else: chrom_dict[chrom]['freqs'].append(-1) # Get the INFO score info_sc = -1 if info is not None and info in header_dict: info_sc = float(l[header_dict[info]]) chrom_dict[chrom]['infos'].append(info_sc) chrom_dict[chrom]['ps'].append(pval_read) nt = [l[header_dict[A1]].upper(), l[header_dict[A2]].upper()] chrom_dict[chrom]['nts'].append(nt) raw_beta = float(l[header_dict[eff]]) if n is None: if ncol not in header_dict: case_N = float(l[header_dict[case_n]]) control_N = float(l[header_dict[control_n]]) N = case_N + control_N else: N = float(header_dict[ncol]) else: N = n if not input_is_beta: raw_beta = sp.log(raw_beta) chrom_dict[chrom]['log_odds'].append(raw_beta) beta = get_beta_from_pvalue(pval_read, raw_beta) chrom_dict[chrom]['betas'].append(beta / sp.sqrt(N)) else: beta = get_beta_from_pvalue(pval_read, raw_beta) chrom_dict[chrom]['log_odds'].append(beta / sp.sqrt(N)) chrom_dict[chrom]['betas'].append(beta / sp.sqrt(N)) if len(bad_chromosomes) > 0: if debug: print('Ignored chromosomes: %s' % (','.join(list(bad_chromosomes)))) print( 'Please note that only data on chromosomes 1-23, and X are parsed.' ) if num_lines > 0: sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%\n' % (100.0)) sys.stdout.flush() print('SS file loaded, now sorting and storing in HDF5 file.') assert not 'sum_stats' in hdf5_file, 'Something is wrong with HDF5 file?' ssg = hdf5_file.create_group('sum_stats') num_snps = 0 num_non_finite = 0 for chrom in chrom_dict: if debug: print('%d SNPs on chromosome %s' % (len(chrom_dict[chrom]['positions']), chrom)) assert len(chrom_dict[chrom]['positions']) == len( chrom_dict[chrom]['betas']) == len(chrom_dict[chrom]['ps']) == len( chrom_dict[chrom] ['nts']), 'Problems with parsing summary stats' sl = list( zip(chrom_dict[chrom]['positions'], chrom_dict[chrom]['sids'], chrom_dict[chrom]['nts'], chrom_dict[chrom]['betas'], chrom_dict[chrom]['log_odds'], chrom_dict[chrom]['infos'], chrom_dict[chrom]['freqs'], chrom_dict[chrom]['ps'])) sl.sort() ps = [] betas = [] nts = [] sids = [] positions = [] log_odds = [] infos = [] freqs = [] prev_pos = -1 for pos, sid, nt, beta, lo, info, frq, p in sl: if pos == prev_pos: if debug: print('duplicated position %d' % pos) continue else: prev_pos = pos if not sp.isfinite(beta): num_non_finite += 1 continue ps.append(p) betas.append(beta) nts.append(nt) sids.append(sid) positions.append(pos) log_odds.append(lo) infos.append(info) freqs.append(frq) nts = sp.array(nts, dtype=util.nts_dtype) sids = sp.array(sids, dtype=util.sids_dtype) if debug: if not num_non_finite == 0: print('%d SNPs have non-finite statistics on chromosome %s' % (num_non_finite, chrom)) print('Still %d SNPs on chromosome %s' % (len(ps), chrom)) g = ssg.create_group('chrom_%s' % chrom) g.create_dataset('ps', data=sp.array(ps)) g.create_dataset('freqs', data=freqs) g.create_dataset('betas', data=betas) g.create_dataset('log_odds', data=log_odds) num_snps += len(log_odds) g.create_dataset('infos', data=infos) g.create_dataset('nts', data=nts) g.create_dataset('sids', data=sids) g.create_dataset('positions', data=positions) hdf5_file.flush() if debug: print('%d SNPs excluded due to invalid chromosome' % invalid_chr) if match_genomic_pos: print('%d SNPs excluded due to invalid genomic positions' % invalid_pos) else: print( '%d SNPs with non-matching genomic positions (not excluded)' % invalid_pos) print('%d SNPs excluded due to invalid P-value' % invalid_p) print('%d SNPs excluded due to invalid effect sizes' % invalid_p) print('%d SNPs parsed from summary statistics file' % num_snps) summary_dict[3.09] = {'name': 'dash', 'value': 'Summary statistics'} summary_dict[3.1] = { 'name': 'Num SNPs parsed from sum stats file', 'value': num_snps } if invalid_p > 0: summary_dict[3.2] = { 'name': 'Num invalid P-values in sum stats', 'value': invalid_p } if invalid_beta > 0: summary_dict[3.21] = { 'name': 'Num invalid P-values in sum stats', 'value': invalid_p } if invalid_chr > 0: summary_dict[3.4] = { 'name': 'SNPs w non-matching chromosomes excluded', 'value': invalid_chr } if invalid_pos > 0: if match_genomic_pos: summary_dict[3.3] = { 'name': 'SNPs w non-matching positions excluded', 'value': invalid_pos } else: summary_dict[3.3] = { 'name': 'SNPs w non-matching positions (not excluded)', 'value': invalid_pos }
def func(x): '''The function that we are finding the root of.''' return sp.log(1.5 * x)
def invSigmo(self, a): return - log(1.0 / a - 1.0)
def ocv_soc(soc,a,b,c,d,e,f): y = a+b*soc+c*pow(soc,2)+d/(soc+0.00001)+e*log(soc+0.00001)+f*log(1+0.00001-soc) return y
#!/usr/bin/python3 # -*- coding: utf-8 -*- import scipy from scipy.stats import norm n = 10000 S = 1614.96 # Standard and Poors 500 Index, on 07/01/2013 r = 0.008 # implied risk free interest rate, between 3 year and 5 year T-bill rate sigma = 0.1827 # implied volatility K = 1575 # strike price Tminust = 110. / 365. # 07/01/2013 to 10/19/2013 numerd1 = scipy.log(S / K) + (r + sigma**2 / 2) * Tminust numerd2 = scipy.log(S / K) + (r - sigma**2 / 2) * Tminust d1 = numerd1 / (sigma * scipy.sqrt(Tminust)) d2 = numerd2 / (sigma * scipy.sqrt(Tminust)) part1 = S * (norm.cdf(d1) - 1) part2 = K * scipy.exp(-r * Tminust) * (norm.cdf(d2) - 1) VP = part1 - part2 x = norm.rvs(size=n) y1 = scipy.maximum( 0, K - S * scipy.exp((r - sigma**2 / 2) * Tminust + sigma * x * scipy.sqrt(Tminust))) y2 = scipy.maximum( 0, K - S * scipy.exp((r - sigma**2 / 2) * Tminust + sigma * -x * scipy.sqrt(Tminust))) y = (y1 + y2) / 2
def doConvolution(x_in, y_in, x_out, widths, factor=5, oversampling=1): ''' Perform convolution on lists with a Gaussian filter. Reduce the input grid to the target grid by integration. @param x_in: The input x-values @type x_in: array @param y_in: The input y-values @type y_in: array @param x_out: The target x-grid @type x_out: array @param widths: The full width/half maximum spectral resolution as a function of wavelength, i.e. the fwhm of the gaussian @type widths: array @keyword factor: the sigma factor for determining the window pushed through the gaussian filter. This avoids having to convolve the whole input grid, which takes a lot of time. Beyond sigma*factor the contribution of the y values is assumed to be negligible. (default: 5) @type factor: int @keyword oversampling: oversampling factor of the target x-grid with respect to the given spectral resolution. (default: 1) @type oversampling: int @return: The resulting y-values @rtype: list ''' x_in, y_in, x_out, widths = array(x_in), array(y_in), array(x_out), array( widths) y_out = [] print 'Convolving for x_out between %.2f micron and %.2f micron with oversampling %i.' \ %(x_out[0],x_out[-1],int(oversampling)) #- Convert FWHM's to sigma for the gaussians sigma = [fwhm / (2. * sqrt(2. * log(2.))) for fwhm in widths] #- Define the binsizes of the bins that will be integrated, i.e. the #- apparent resolution of x_out binsize = [w / oversampling for w in widths] for delta_bin, sigi, xi_out in zip(binsize, sigma, x_out): yi_in = y_in[abs(x_in - xi_out) <= factor * sigi] #- if not empty: continue, else add 0 if list(yi_in) and set(yi_in) != set([0.0]): #- all relevant xi's for the bin around xi_out, ie in this bin the #- y-values will be integrated xi_in = x_in[abs(x_in - xi_out) <= delta_bin] #- The window for the convolution itself, outside this window the #- data are assumed to be negligible, ie for a gaussian window = x_in[abs(x_in - xi_out) <= factor * sigi] convolution = convolveArray(window, yi_in, sigi) #- if one value in the bin, out of the window selection: add value if len(list(convolution[abs(window - xi_out) <= delta_bin])) == 1: y_out.append(convolution[abs(window - xi_out) <= delta_bin][0]) print 'Convolution has a window of only one element at xi_out %f.' % xi_out #- If more than one value: integrate elif list(convolution[abs(window - xi_out) <= delta_bin]): y_out.append( trapz(y=convolution[abs(window - xi_out) <= delta_bin], x=xi_in) / (xi_in[-1] - xi_in[0])) #- If no values in the bin from the window: add average of the window #- This should not occur ideally! else: print 'Convolution has a window of no elements at x_out ' + \ '%f. Careful! Average is taken of '%(xi_out) + \ 'sigma*factor window! This should not be happening...' y_out.append(sum(convolution) / float(len(convolution))) else: y_out.append(0.0) return y_out
def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm, nobs, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, maxit, family): # load shared fortran library glmlib = loadGlmLib() # pre-process data ty = y[:, 0] tevent = y[:, 1] if scipy.any(ty <= 0): raise ValueError('negative event time not permitted for cox family') if len(offset) == 0: offset = ty*0 is_offset = False else: is_offset = True # now convert types and allocate memory before calling # glmnet fortran library ###################################### # --------- PROCESS INPUTS ----------- ###################################### # force inputs into fortran order and scipy float64 copyFlag = False x = x.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) irs = irs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) pcs = pcs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) ty = ty.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) tevent = tevent.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) offset = offset.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) weights = weights.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) jd = jd.astype(dtype = scipy.int32, order = 'F', copy = copyFlag) vp = vp.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) cl = cl.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) ulam = ulam.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) ###################################### # --------- ALLOCATE OUTPUTS --------- ###################################### # lmu lmu = -1 lmu_r = ctypes.c_int(lmu) # ca ca = scipy.zeros([nx, nlam], dtype = scipy.float64) ca = ca.astype(dtype = scipy.float64, order = 'F', copy = False) ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ia ia = -1*scipy.ones([nx], dtype = scipy.int32) ia = ia.astype(dtype = scipy.int32, order = 'F', copy = False) ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # nin nin = -1*scipy.ones([nlam], dtype = scipy.int32) nin = nin.astype(dtype = scipy.int32, order = 'F', copy = False) nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # dev dev = -1*scipy.ones([nlam], dtype = scipy.float64) dev = dev.astype(dtype = scipy.float64, order = 'F', copy = False) dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # alm alm = -1*scipy.ones([nlam], dtype = scipy.float64) alm = alm.astype(dtype = scipy.float64, order = 'F', copy = False) alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # nlp nlp = -1 nlp_r = ctypes.c_int(nlp) # jerr jerr = -1 jerr_r = ctypes.c_int(jerr) # dev0 dev0 = -1 dev0_r = ctypes.c_double(dev0) # ################################### # main glmnet fortran caller # ################################### if is_sparse: # no sparse coxnet implemented raise ValueError('Cox model not implemented for sparse x in glmnet') else: # call fortran coxnet routine glmlib.coxnet_( ctypes.byref(ctypes.c_double(parm)), ctypes.byref(ctypes.c_int(nobs)), ctypes.byref(ctypes.c_int(nvars)), x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ty.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), tevent.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)), ctypes.byref(ctypes.c_int(nlam)), ctypes.byref(ctypes.c_double(flmin)), ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_double(thresh)), ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(ctypes.c_int(isd)), ctypes.byref(lmu_r), ca_r, ia_r, nin_r, ctypes.byref(dev0_r), dev_r, alm_r, ctypes.byref(nlp_r), ctypes.byref(jerr_r) ) # ################################### # post process results # ################################### # check for error if (jerr_r.value > 0): raise ValueError("Fatal glmnet error in library call : error code = ", jerr_r.value) elif (jerr_r.value < 0): print("Warning: Non-fatal error in glmnet library call: error code = ", jerr_r.value) print("Check results for accuracy. Partial or no results returned.") # clip output to correct sizes lmu = lmu_r.value ca = ca[0:nx, 0:lmu] ia = ia[0:nx] nin = nin[0:lmu] dev = dev[0:lmu] alm = alm[0:lmu] # ninmax ninmax = max(nin) # fix first value of alm (from inf to correct value) if ulam[0] == 0.0: t1 = scipy.log(alm[1]) t2 = scipy.log(alm[2]) alm[0] = scipy.exp(2*t1 - t2) # create return fit dictionary if ninmax > 0: ca = ca[0:ninmax, :] df = scipy.sum(scipy.absolute(ca) > 0, axis=0) ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran oja = scipy.argsort(ja) ja1 = ja[oja] beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) beta[ja1, :] = ca[oja, :] else: beta = scipy.zeros([nvars, lmu], dtype = scipy.float64) df = scipy.zeros([1, lmu], dtype = scipy.float64) fit = dict() fit['beta'] = beta fit['dev'] = dev fit['nulldev'] = dev0_r.value fit['df']= df fit['lambdau'] = alm fit['npasses'] = nlp_r.value fit['jerr'] = jerr_r.value fit['dim'] = scipy.array([nvars, lmu], dtype = scipy.integer) fit['offset'] = is_offset fit['class'] = 'coxnet' # ################################### # return to caller # ################################### return fit
def __init__(self, T, P, mezcla): self.T=unidades.Temperature(T) self.P=unidades.Pressure(P, "atm") self.mezcla=mezcla self.componente=mezcla.componente self.fraccion=mezcla.fraccion self.B=self.b*self.P.atm/R_atml/self.T self.Tita=self.tita*self.P.atm/(R_atml*self.T)**2 delta=self.delta*self.P.atm/R_atml/self.T epsilon=self.epsilon*(self.P.atm/R_atml/self.T)**2 eta=self.eta*self.P.atm/R_atml/self.T Z=roots([1, delta-self.B-1, self.Tita+epsilon-delta*(self.B+1), -epsilon*(self.B+1)-self.Tita*eta]) self.Z=r_[Z[0].real, Z[2].real] self.V=self.Z*R_atml*self.T/self.P.atm #mol/l self.x, self.xi, self.yi, self.Ki=self._Flash() self.H_exc=-(self.tita+self.dTitadT)/R_atml/self.T/(self.delta**2-4*self.epsilon)**0.5*log((2*self.V+self.delta-(self.delta**2-4*self.epsilon)**0.5)/(2*self.V+self.delta+(self.delta**2-4*self.epsilon)**0.5))+1-self.Z
def updateW(self, m): M = self.components Muse = self.doUpdate.sum() if self.noise == 'gauss': YmeanX = self.Z.E1 elif self.noise == 'hurdle' or self.noise == 'poisson': YmeanX = self.meanX if (m < self.nKnown) or (m in self.iLatentSparse) or (m in self.iLatent): with SP.errstate(divide='ignore'): logPi = SP.log(self.Pi.E1[:, m] / (1 - self.Pi.E1[:, m])) #logPi = (self.Pi.lnE1 - (special.digamma(self.Pi.b) - special.digamma(self.Pi.a+self.Pi.b)))[:,m] elif self.nScale > 0 and self.nScale < YmeanX.shape[0]: with SP.errstate(divide='ignore'): logPi = SP.log(self.Pi.E1[:, m] / (1 - self.Pi.E1[:, m])) #logPi = self.Pi.lnE1 - (special.digamma(self.Pi.b) - special.digamma(self.Pi.a+self.Pi.b)) isOFF_ = self.Pi.E1[:, m] < .5 logPi[isOFF_] = (YmeanX.shape[0] / self.nScale) * SP.log( self.Pi.E1[isOFF_, m] / (1 - self.Pi.E1[isOFF_, m])) isON_ = self.Pi.E1[:, m] > .5 if self.onF > 1.: logPi[isON_] = self.onF * SP.log(self.Pi.E1[isON_, m] / (1 - self.Pi.E1[isON_, m])) else: onF = 1. logPi = SP.log(self.Pi.E1[:, m] / (1 - self.Pi.E1[:, m])) sigma2Sigmaw = (1.0 / self.Eps.E1) * self.Alpha.E1[m] setMinus = SP.int_( SP.hstack([list(range(M))[0:m], list(range(M))[m + 1::]])) setMinus = setMinus[self.doUpdate[setMinus] == 1] SmTSk = SP.sum( SP.tile(self.S.E1[:, m:m + 1], (1, Muse - 1)) * self.S.E1[:, setMinus], 0) SmTSm = SP.dot(self.S.E1[:, m].transpose(), self.S.E1[:, m]) + self.S.diagSigmaS[:, m].sum() b = SP.dot((self.W.C[:, setMinus, 0] * self.W.E1[:, setMinus]), (SmTSk.transpose())) diff = SP.dot(self.S.E1[:, m].transpose(), YmeanX) - b SmTSmSig = SmTSm + sigma2Sigmaw #update C and W u_qm = logPi + 0.5 * SP.log(sigma2Sigmaw) - 0.5 * SP.log(SmTSmSig) + ( 0.5 * self.Eps.E1) * ((diff**2) / SmTSmSig) with SP.errstate(over='ignore'): self.W.C[:, m, 0] = 1. / (1 + SP.exp(-u_qm)) self.W.C[:, m, 1] = 1 - self.W.C[:, m, 0] self.W.E1[:, m] = (diff / SmTSmSig) #q(w_qm | s_qm=1), q=1,...,Q self.W.sigma2[:, m] = (1. / self.Eps.E1) / SmTSmSig self.W.E2diag[:, m] = self.W.E1[:, m]**2 + self.W.sigma2[:, m]
def blackscholes_put(S, E, T, rf, sigma): #calculate d1 and d2 parameters d1 = (log(S / E) + (rf + sigma * sigma / 2.0) * T) / (sigma * sqrt(T)) d2 = d1 - sigma * sqrt(T) #we use the cdf of a normal distribution return -S * stats.norm.cdf(-d1) + E * exp(-rf * T) * stats.norm.cdf(-d2)
def logdet(self): r = sp.log(self.SpI()).sum() r += sp.sum(sp.log(self.Cn.S())) * self.dim_r r += 2 * sp.log(sp.diag(self.H_chol())).sum() return r
def calcBound(self): #TODO: debug!! DO NOT USE F1 = -self._D*self._N/2*SP.log(2*pi) - self._N/2 * SP.sum(SP.log(1/self.Eps.E1)) - \ 0.5*SP.sum(self.ZZ*self.Eps.E1) SW_tau = (self.W.C[:, :, 0] * self.W.E1) * SP.tile( self.Eps.E1, (self.W.E1.shape[1], 1)).T SW2_tau = (self.W.C[:, :, 0] * (self.W.E2diag)) * SP.tile(self.Eps.E1, (self.W.E1.shape[1], 1)).T SS = SP.sum(self.S.E1 * self.S.E1, 0) SmTSm = SP.zeros(self.W.E1.shape[1]) F2 = SP.sum(SW_tau * SP.dot(self.Z.E1.T, self.S.E1)) F3 = 0. F4 = 0. F7PlusE3 = 0.5 * (self.nHidden * self._N) #don't use knowns in entropy for m in SP.arange(self.W.E1.shape[1]): #F3 SigmaSm = 1. / (1 + SP.sum(self.S.diagSigmaS[:, m])) SmTSm[m] = SS[m] + self._N * SigmaSm F3 += SP.sum(SW2_tau[:, m], 0) * SmTSm[m] #F4 rS = SP.zeros(self._N) for m1 in SP.arange(m + 1, self.W.E1.shape[1]): tmp = (self.W.C[:, m1, 0] * self.W.E1[:, m1]) * SW_tau[:, m] rS = rS + SP.sum(tmp, 0) * self.S.E1[:, m1] F4 = F4 + SP.dot(rS, self.S.E1[:, m:m + 1]) #F7 alphaSm = SP.sum(SW2_tau[:, m]) F7PlusE3 = F7PlusE3 - 0.5*self._N*SP.log(1+alphaSm) - (0.5*self._N)/(1+alphaSm) \ - 0.5*SP.dot(self.S.E1[:,m].T, self.S.E1[:,m]) F5 = -(0.5*self.components*self._D)*SP.log(2.*pi) - (0.5*self.components)*sum(SP.log(1./self.Alpha.E1)) - \ 0.5* SP.sum(1-self.W.C[:,:,0]) + SP.sum(SP.sum(self.W.C[:,:,0]*self.W.E2diag,0)*self.Alpha.E1) F6 = SP.sum(SP.log(self.Pi.E1) * self.W.C[:, :, 0]) + SP.sum( SP.log(1. - self.Pi.E1) * (1 - self.W.C[:, :, 0])) EpslnE = special.digamma(self.Eps.a) - SP.log(self.Eps.b) F8 = (self.Eps.pa - 1) * SP.sum(EpslnE) - self.Eps.pb * SP.sum( self.Eps.E1) AlphalnE = special.digamma(self.Alpha.a) - SP.log(self.Alpha.b) F9 = (self.Alpha.pa - 1) * SP.sum(AlphalnE) - self.Alpha.pb * SP.sum( self.Alpha.E1) E1 = (0.5*self.components*self._D)*SP.log(2*pi) + (0.5*self.components)*SP.sum(SP.log(1./self.Alpha.E1)) + \ 0.5*(self.components*self._D) - 0.5*SP.sum(SP.log(1./self.Alpha.E1)*(SP.sum(self.W.C[:,:,0],0))) \ + 0.5*SP.sum( self.W.C[:,:,0]*SP.log(self.W.sigma2)) E2 = - SP.sum( self.W.C[:,:,0]*SP.log(self.W.C[:,:,0]+(self.W.C[:,:,0]==0)) + \ (1-self.W.C[:,:,0])*log(1-self.W.C[:,:,0]+(self.W.C[:,:,0]==1))) E4 = SP.sum(self.Eps.a*SP.log(self.Eps.b)) + SP.sum((self.Eps.a-1)*EpslnE) -\ SP.sum(self.Eps.b*self.Eps.E1) - SP.sum(special.gammaln(self.Eps.a)) E5 = SP.sum(self.Alpha.a*SP.log(self.Alpha.b)) + SP.sum((self.Alpha.a-1)*AlphalnE) -\ SP.sum(self.Alpha.b*self.Alpha.E1) - SP.sum(special.gammaln(self.Alpha.a)) #pdb.set_trace() #F = F1 + F2 - 0.5*F3 - F4 + F5 + F6 + E1 + E2 + F7PlusE3 + F8 - E4 +F9 - E5 F = F1 + F2 - 0.5 * F3 - F4 + F5 + F6 + E1 + E2 + F7PlusE3 #+ F8 - E4 #+F9 - E5 return F
# Isotopologue image and centroid map cubo, head = datafits(image) dnu = head['CDELT3'] len_nu = head['NAXIS3'] nui = head['CRVAL3']- head['CRPIX3']*dnu nuf = nui + (len_nu-1)*dnu nu = sp.linspace(nui, nuf, len_nu) nu0 = sp.mean(nu) #Gaussian Convolution if False: resol = abs(head['CDELT1'])*3600 stdev = Beam / (2 * sp.sqrt (2 * sp.log(2))) stdev /= resol x_size = int(8*stdev + 1.) print 'convolution with gaussian' print '\tbeam '+str(Beam)+' arcsec' print '\tbeam '+str(stdev)+' pixels' # circular Gaussian beam = Gaussian2DKernel (stddev = stdev, x_size = x_size, y_size = x_size, model ='integrate') smooth = np.zeros((80, 256, 256)) for k in range(80): smooth[k, :,:] += convolve_fft(cubo[k,:,:], beam) print '\tsmoothed' cubo = smooth
plt.plot([0, 0], ylim, '--', color='.3') plt.xticks(xticks, ['$'+str(int(xi * 100)) + '\% $' for xi in xticks]) plt.yticks(yticks, ['$'+str(int(yi * 100)) + '\% $' for yi in yticks]) plt.xlim(xlim) plt.ylim(ylim) plt.xlabel(xlabel) plt.ylabel(ylabel) # plt.savefig(fname, format=fformat) rawdata = list(reader(open("data_www.csv"), quoting = QUOTE_NONNUMERIC)) # generating a dictionary with data of the relevant years data={c: {z[0]:{zname:zvalue for zname,zvalue in zip(rawdata[0], z)} for z in rawdata if z[rawdata[0].index('year')] == c} for c in (2004, 1970)} gdpDeltaUS = log(data[2004]['United States']['per_cap_gdp']/ data[1970]['United States']['per_cap_gdp'])/34 GovChangeGDP = {'poor': [], 'rich': []} GDPChange = {'poor': [], 'rich': []} PrivChangeGDP = {'poor': [], 'rich': []} Country = {'poor': [], 'rich': []} NFAChangeGDP = {'poor': [], 'rich': []} for c in data[1970].keys(): try: d1, d0 = data[2004][c], data[1970][c] gdp_change = log(d1['per_cap_gdp'] /d0['per_cap_gdp'])/34 - gdpDeltaUS nfa_change_gdp = ((d1['assets']-d1['liabilities']) /d1['dollar_gdp'] - (d0['assets']-d0['liabilities']) /d0['dollar_gdp'])/34
def computePVmixtureChi2(lrt, a2=None, tol=0.0, mixture=0.5, scale=1.0, dof=1.0): ''' OBSOLETE: but was known to work. Can delete once we get past problems with python code solved. computes P-values for a mixture of a scaled Chi^2_dof and Chi^2_0 distributions. The mixture weight is estimated from the fraction of models, where the parameter is at the boundary. The scale and degrees of freedom (dof) of the scaled Chi^2_dof are estimated by maximum likelihood, if the parameters provided are set to None. Note that accurate estimation of the mixture coefficient needs a sufficiently large number of tests to be performed. The P-values are computed as mixture*(1.0-CDF_Chi^2_1(lrt)) -------------------------------------------------------------------------- Input: lrt : [S] 1D array of likelihood ratio tests (2*ln(likelihood ratio)) a2 : [S] 1D array, if specified then a2 is used to determine the Chi^2_0 component, else lrt is used (optional). tol : cutoff for members of the Chi^2_0 component is a2/lrt 0+tol. mixture : the scaled Chi^2_dof1 mixture component, if this parameter is set to None, it will be estimated by the fraction of the tests that have the weight a2 at the boundary (a2=0.0, lrt=0.0) scale : the scale parameter of the scaled Chi^2_dof, if set to None the parameter will be determined by maximum likelihood. (default 1.0) dof : the degrees of freedom of the scaled Chi^2_dof, if set to None the parameter will be determined by maximum likelihood. (default 1.0) -------------------------------------------------------------------------- Output: pv : [S] 1D-array of P-values computed as mixture*(1.0-CDF_Chi^2_dof(scale,lrt)) mixture : mixture weight of the scaled Chi^2_dof component scale : scale of the scaled Chi^2_dof distribution dof : degrees of freedom of the scaled Chi^2_dof distribution i0 : indicator for Chi^2_0 P-values -------------------------------------------------------------------------- ''' raise Exception( "made changes to use alteqnull and did not modify this code as it looks obsolete" ) loc = None chi2mix = chi2mixture() chi2mix.lrt = lrt if mixture is None: i0, mixture = chi2mix.fit_mixture(a2=a2, tol=tol) else: chi2mix.mixture = mixture if a2 is None: i0 = (lrt <= (0.0 + tol)) else: i0 = (a2 <= (0.0 + tol)) N = (~i0).sum() sumX = (lrt[~i0]).sum() logsumX = (sp.log(lrt[~i0])).sum() if (dof is None) and (scale is None): #f is the Gamma likelihood with the scale parameter maximized analytically as a funtion of 0.5 * the degrees of freedom f = lambda k: -1.0 * (-N * sp.special.gammaln(k) - k * N * (sp.log(sumX) - sp.log(k) - sp.log(N)) + (k - 1.0) * logsumX - k * N) #f_ = lambda(x): 1-N*N/(2.0*x*sumX) res = minimize1D(f, evalgrid=None, nGrid=10, minval=0.1, maxval=3.0) dof = 2.0 * res[0] elif dof is None: f = lambda k: -1.0 * (-N * sp.special.gammaln(k) - k * N * sp.log( 2.0 * scale) + (k - 1.0) * logsumX - sumX / (2.0 * scale)) res = minimize1D(f, evalgrid=None, nGrid=10, minval=0.1, maxval=3.0) dof = 2.0 * res[0] if scale is None: #compute condition for ML if (1.0 - (N * N * dof) / (4.0 * sumX) > 0): logging.warn( 'Warning: positive second derivative: No maximum likelihood solution can be found for the scale. returning scale=1.0 and dof=1.0' ) scale = 1.0 dof = 1.0 else: scale = sumX / (N * dof) pv = mixture * ( st.chi2.sf(lrt / scale, dof) ) # Can use the Chi^2 CDF/SF to evaluate the scaled Chi^2 by rescaling the input. pv[i0] = 1.0 return (pv, mixture, scale, dof, i0)
def parspar(n): i = n/ndim j = n%ndim data = cubo[:,i,j] datamax = data.max() noise_level = 0 noise = sp.random.normal(scale=noise_level,size=len(data)) data = data+ruido if (i-int(ndim/2.))**2 + (j-int(ndim/2.))**2 > r**2 or 0.4*cubomax>datamax: return [None] print i,j m0 = sp.integrate.simps(data,nu) m1 = sp.integrate.simps(velocities*data, nu)/m0 mom2[i,j] = sp.integrate.simps(data*(velocities-m1)**2, nu)*1000/m0 datamin = data.min() data1 = data -data.min() centroid = (nu*(cubo[:,i,j]-datamin)).sum()/(cubo[:,i,j]-datamin).sum() i0 = datamin if i0==0: i0=1e-10 i0_lim=(0.5*i0,1.2*i0) r_ij = sp.sqrt((i-128)**2 +(j-128)**2) rho_ij = r_ij/sp.cos(incli) if rho_ij<30: temp_0 = 70 tlim = (10,200) else: temp_0 = 70 * (r_ij / 30.)**(-0.5) tlim = (10,120) vels = (nu-nu0)*3e5/nu0 velg = vels[data==data.max()][0] noise = data[(velocities<velg-1) | (velocities>velg+1.)] rms = np.sqrt(sp.sum(noise**2)/float(len(noise))) ## Cont=True Fit lines considering the presence of continuum. if cont: line_model = pm.Model() with line_model: var = ['Temp','nu_c','log(N_CO)','v_turb','Continuum'] # Priors for unknown model parameters Temp = pm.TruncatedNormal('Temp', mu=temp_0, sd=5, lower=tlim[0], upper=tlim[1]) nu_c = pm.TruncatedNormal('nu_c', mu=centroid, sd=abs(dnu)/10., lower=centroid-0.5*abs(dnu), upper=centroid+0.5*abs(dnu)) NCO = pm.Uniform('log(N_CO)', lower=10, upper=24) v_turb = pm.Uniform('v_turb', lower=sp.sqrt(k*tlim[0]/m), upper=300000) ## This is really broadening in velocity space, not turbulent vel. i_0 = pm.TruncatedNormal('Continuum', mu=i0, sd=5, lower=i0_lim[0], upper=i0_lim[1]) # Expected value of outcome predict = intensity_continuum(nu, Temp, nu_c, alpha, 10**NCO, v_turb, angle, i_0, head, iso) # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=predict, sd=rms, observed=data) step = pm.NUTS() st = {'Temp':temp_0, 'nu_c':centroid, 'log(N_CO)':20, 'v_turb':20000, 'Continuum':i0} trace = pm.sample(5000,tune=1000,cores=2,step=step,start=st) stats = pm.summary(trace) mean_pars = [stats['mean'][x] for x in var] hpd_2_5 = [stats['hpd_2.5'][x] for x in var] hpd_97_5 = [stats['hpd_2.5'][x] for x in var] var_std = [stats['sd'][x] for x in var] medians_pars = [sp.median(trace[x]) for x in var] Map = [float(pm.find_MAP(model=line_model)[x]) for x in var] fit = mean_pars model = intensity_continuum(nu, fit[0], fit[1],alpha, 10**fit[2], fit[3], angle,fit[4], head, iso) else: line_model = pm.Model() with line_model: var = ['Temp','nu_c','log(N_CO)','v_turb'] # Priors for unknown model parameters Temp = pm.TruncatedNormal('Temp', mu=temp_0, sd=5, lower=tlim[0], upper=tlim[1]) nu_c = pm.TruncatedNormal('nu_c', mu=centroid, sd=abs(dnu)/10., lower=centroid-0.5*abs(dnu), upper=centroid+0.5*abs(dnu)) NCO = pm.Uniform('log(N_CO)', lower=10, upper=24) v_turb = pm.Uniform('v_turb', lower=sp.sqrt(k*tlim[0]/m), upper=300000) ## This is really broadening in velocity space, not turbulent vel. # Expected value of outcome predict = intensity(nu, Temp, nu_c, 10**NCO, v_turb, angle, head, iso) # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=predict, sd=rms, observed=data) step = pm.NUTS() st = {'Temp':temp_0, 'nu_c':centroid, 'log(N_CO)':20, 'v_turb':20000} trace = pm.sample(5000,tune=1000,cores=2,step=step,start=st) stats = pm.summary(trace) mean_pars = [stats['mean'][x] for x in var] hpd_2_5 = [stats['hpd_2.5'][x] for x in var] hpd_97_5 = [stats['hpd_2.5'][x] for x in var] var_std = [stats['sd'][x] for x in var] medians_pars = [sp.median(trace[x]) for x in var] Map = [float(pm.find_MAP(model=line_model)[x]) for x in var] fit = mean_pars model = intensity(nu, fit[0], fit[1], 10**fit[2], fit[3], angle, head, iso) Temperature[i,j,:] = sp.array([fit[0],var_std[0],medians_pars[0],Map[0],hpd_2_5[0],hpd_2_5[0]]) Denscol[i,j,:] = sp.array([10**fit[2],10**fit[2]*sp.log(10)*var_std[2],10**medians_pars[2],10**Map[2],10**hpd_2_5[2],10**hpd_2_5[2]]) Turbvel[i,j,:] = sp.sqrt(((sp.array([fit[3],var_std[3],medians_pars[3],Map[3],hpd_2_5[3],hpd_2_5[3]]))**2 - k*fit[0]/m))*1e-5 aux_nu = sp.array([fit[1],var_std[1],medians_pars[1],Map[1],hpd_2_5[1],hpd_2_5[1]]) vel_cen[i,j,:] = sp.around(((nu0-aux_nu)*c*1e-5/nu0)) return [i,j,Temperature[i,j,:], Denscol[i,j,:], Turbvel[i,j,:],vel_cen[i,j,:]]
def parse_sum_stats_custom(filename=None, bimfile=None, hdf5_file=None, n=None, ch=None, pos=None, A1=None, A2=None, reffreq=None, case_freq=None, control_freq=None, case_n=None, control_n=None, info=None, rs=None, pval=None, eff=None, ncol=None, input_is_beta=False, debug=False): # Check required fields are here assert not A2 is None, 'Require header for non-effective allele' assert not A1 is None, 'Require header for effective allele' assert not rs is None, 'Require header for RS ID' assert not eff is None, 'Require header for Statistics' assert not pval is None, 'Require header for pval' assert not ncol is None or not n is None, 'Require either N or NCOL information' if ch is None: assert not bimfile is None, 'Require bimfile when chromosome header not provided' print("Chromosome Header not provided, will use info from bim file") if pos is None: assert not bimfile is None, 'Require bimfile when position header not provided' print("Position Header not provided, will use info from bim file") snps_pos_map = {} if bimfile is not None: valid_sids = set() if debug: print('Parsing bim file: %s' % bimfile) with open(bimfile) as f: for line in f: l = line.split() # Bim file format is CHR SNP BP valid_sids.add(l[1]) snps_pos_map[l[1]] = {'pos': int(l[3]), 'chrom': l[0]} chr_filter = 0 pos_filter = 0 invalid_p = 0 chrom_dict = {} opener = open if is_gz(filename): opener = gzip.open print('Parsing summary statistics file: %s' % filename) with opener(filename) as f: header = f.readline() if is_gz(filename): header = header.decode('utf-8') if debug: print(header) header_dict = {} columns = (header.strip()).split() index = 0 for col in columns: header_dict[col] = index index += 1 assert ch is None or ch in header_dict, 'Chromosome header cannot be found in summary statistic file' assert A2 in header_dict, 'Non-effective allele column cannot be found in summary statistic file' assert A1 in header_dict, 'Effective allele column cannot be found in summary statistic file' assert eff in header_dict, 'Effect size column not found in summary statistic file' assert rs in header_dict, 'SNP ID column not found in summary statistic file' assert pos is None or pos in header_dict, 'Position column not found in summary statistic file' assert pval in header_dict, 'P Value column not found in summary statistic file' assert not n is None or ncol in header_dict, 'Sample size column not found in summary statistic ' \ 'file and N not provided' # header_dict now contains the header column name for each corresponding input bad_chromosomes = set() for line in f: if is_gz(filename): line = line.decode('utf-8') l = (line.strip()).split() # get the SNP ID first sid = l[header_dict[rs]] # check the SNP ID if sid in valid_sids: # Get the chromosome information chrom = 0 if not chr is None and chr in header_dict: chrom = l[header_dict[ch]] chrom = re.sub("chr", "", chrom) if not chrom == snps_pos_map[sid]['chrom']: chr_filter += 1 else: chrom = snps_pos_map[sid]['chrom'] if not chrom in util.ok_chromosomes: bad_chromosomes.add(chrom) continue # Check if the chromosome of the SNP is correct pos_read = 0 if not pos is None and pos in header_dict: pos_read = int(l[header_dict[pos]]) if not pos_read == snps_pos_map[sid]['pos']: pos_filter += 1 continue else: pos_read = snps_pos_map[sid]['pos'] if not chrom in chrom_dict: chrom_dict[chrom] = { 'ps': [], 'log_odds': [], 'infos': [], 'freqs': [], 'betas': [], 'nts': [], 'sids': [], 'positions': [] } # - Start Wallace, fix the bug. # validate the p values first, if has problem, complete ignore this snp. pval_read = float(l[header_dict[pval]]) if isinf(stats.norm.ppf(pval_read)): invalid_p += 1 continue chrom_dict[chrom]['ps'].append(pval_read) # - end Wallace. chrom_dict[chrom]['sids'].append(sid) chrom_dict[chrom]['positions'].append(pos_read) # Check the frequency if reffreq is not None and reffreq in header_dict: if l[header_dict[reffreq]] == '.' or l[ header_dict[reffreq]] == 'NA': chrom_dict[chrom]['freqs'].append(-1) else: chrom_dict[chrom]['freqs'].append( float(l[header_dict[reffreq]])) elif (case_n is not None and control_n is not None and case_n in header_dict and control_n in header_dict and case_freq is not None and control_freq is not None and case_freq in header_dict and control_freq in header_dict): if (l[header_dict[control_n]] == '.' or l[header_dict[control_n]] == 'NA' or l[header_dict[case_n]] == '.' or l[header_dict[case_n]] == 'NA' or l[header_dict[control_freq]] == '.' or l[header_dict[control_freq]] == 'NA' or l[header_dict[case_freq]] == '.' or l[header_dict[case_freq]] == 'NA'): chrom_dict[chrom]['freqs'].append(-1) else: case_N = float(l[header_dict[case_n]]) control_N = float(l[header_dict[control_n]]) N = case_N + control_N a_scalar = case_N / N u_scalar = control_N / N freq = float( l[header_dict[case_freq]]) * a_scalar + float( l[header_dict[control_freq]]) * u_scalar chrom_dict[chrom]['freqs'].append(freq) else: chrom_dict[chrom]['freqs'].append(-1) # Get the INFO score info_sc = -1 if info is not None and info in header_dict: info_sc = float(l[header_dict[info]]) chrom_dict[chrom]['infos'].append(info_sc) # - Wallace, move this to the very beginning for checking. # Please refer to line 167 to 174. # pval_read = float(l[header_dict[pval]]) # chrom_dict[chrom]['ps'].append(pval_read) # if isinf(stats.norm.ppf(pval_read)): # invalid_p += 1 # continue # - end fix. nt = [l[header_dict[A1]].upper(), l[header_dict[A2]].upper()] chrom_dict[chrom]['nts'].append(nt) raw_beta = float(l[header_dict[eff]]) if not input_is_beta: raw_beta = sp.log(raw_beta) chrom_dict[chrom]['log_odds'].append(raw_beta) beta = sp.sign(raw_beta) * stats.norm.ppf(pval_read / 2.0) if n is None: #wallae #chrom_dict[chrom]['betas'].append(beta/ sp.sqrt(int(header_dict[ncol]))) chrom_dict[chrom]['betas'].append( beta / sp.sqrt(int(l[header_dict[ncol]]))) else: chrom_dict[chrom]['betas'].append(beta / sp.sqrt(n)) else: beta = sp.sign(raw_beta) * stats.norm.ppf(pval_read / 2.0) if n is None: ##wallae # chrom_dict[chrom]['log_odds'].append(beta/ sp.sqrt(int(header_dict[ncol]))) # chrom_dict[chrom]['betas'].append(beta/ sp.sqrt(int(header_dict[ncol]))) chrom_dict[chrom]['log_odds'].append( beta / sp.sqrt(int(l[header_dict[ncol]]))) chrom_dict[chrom]['betas'].append( beta / sp.sqrt(int(l[header_dict[ncol]]))) else: chrom_dict[chrom]['log_odds'].append(beta / sp.sqrt(n)) chrom_dict[chrom]['betas'].append(beta / sp.sqrt(n)) if len(bad_chromosomes) > 0: print('Ignored chromosomes: %s' % (','.join(list(bad_chromosomes)))) print( 'Please note that only data on chromosomes 1-23, and X are parsed.' ) print('SS file loaded, now sorting and storing in HDF5 file.') assert not 'sum_stats' in hdf5_file, 'Something is wrong with HDF5 file?' ssg = hdf5_file.create_group('sum_stats') num_snps = 0 num_non_finite = 0 for chrom in chrom_dict: # Wallace for checking allele coding. if debug: for x, y, z in zip(chrom_dict[chrom]['sids'], chrom_dict[chrom]['nts'], chrom_dict[chrom]['ps']): sys.stderr.write('AFTER LOAD GWAS SUM: %s %s %s %s\n' % (x, y[0], y[1], z)) # end - Wallace if debug: print('%d SNPs on chromosome %s' % (len(chrom_dict[chrom]['positions']), chrom)) sl = list( zip(chrom_dict[chrom]['positions'], chrom_dict[chrom]['sids'], chrom_dict[chrom]['nts'], chrom_dict[chrom]['betas'], chrom_dict[chrom]['log_odds'], chrom_dict[chrom]['infos'], chrom_dict[chrom]['freqs'], chrom_dict[chrom]['ps'])) sl.sort() ps = [] betas = [] nts = [] sids = [] positions = [] log_odds = [] infos = [] freqs = [] prev_pos = -1 for pos, sid, nt, beta, lo, info, frq, p in sl: if pos == prev_pos: if debug: print('duplicated position %d' % pos) continue else: prev_pos = pos if not sp.isfinite(beta): num_non_finite += 1 continue ps.append(p) betas.append(beta) nts.append(nt) sids.append(sid) positions.append(pos) log_odds.append(lo) infos.append(info) freqs.append(frq) nts = sp.array(nts, dtype=nts_dtype) sids = sp.array(sids, dtype=sids_dtype) if debug: if not num_non_finite == 0: print('%d SNPs have non-finite statistics on chromosome %s' % (num_non_finite, chrom)) print('Still %d SNPs on chromosome %s' % (len(ps), chrom)) g = ssg.create_group('chrom_%s' % chrom) g.create_dataset('ps', data=sp.array(ps)) g.create_dataset('freqs', data=freqs) g.create_dataset('betas', data=betas) g.create_dataset('log_odds', data=log_odds) num_snps += len(log_odds) g.create_dataset('infos', data=infos) g.create_dataset('nts', data=nts) g.create_dataset('sids', data=sids) g.create_dataset('positions', data=positions) hdf5_file.flush() print('%d SNPs excluded due to invalid chromosome ID.' % chr_filter) print('%d SNPs excluded due to invalid chromosome position' % pos_filter) print('%d SNPs excluded due to invalid P value' % invalid_p) print('%d SNPs parsed from summary statistics file.' % num_snps)
def infer_lEI_post(self, X_, D_i, fixI=False, I=0.): E = self.infer_lEI(X_, D_i, fixI=fixI, I=I) ns = X_.shape[0] return sp.log(sp.nanmean(sp.exp(E), axis=0)).reshape([1, ns])