Example #1
0
def bp_mf_free_energy(lmds, pis, args):
    theta, alpha, beta, gamma, emit_probs, X = (args.theta, args.alpha, args.beta, args.gamma, args.emit_probs,
                        args.X)
    I, T, L = X.shape
    K = gamma.shape[0]
    log_theta, log_alpha, log_beta, log_gamma = sp.log(theta), sp.log(alpha), sp.log(beta), sp.log(gamma)
    log_obs_mat = args.log_obs_mat
    Q = bp_marginal_onenode(lmds, pis, args)
    entropy = (Q * sp.log(Q)).sum()
    #print 'mf entropy', -entropy
    total_free = entropy
    for i in xrange(I):
    #for i in prange(I, nogil=True):
        vp = vert_parent[i]
        #for t in prange(T, nogil=True):
        for t in xrange(T):
            for k in xrange(K):
                total_free -= Q[i,t,k] * log_obs_mat[i,t,k]
                if i == 0 and t == 0:
                    total_free -= Q[i,t,k] * log_gamma[k]
                else:
                    for v in xrange(K):
                        if i == 0:
                            total_free -= Q[i,t-1,v] * Q[i,t,k] * log_alpha[v,k]
                        elif t == 0:
                            total_free -= Q[vp,t,v] * Q[i,t,k] * log_beta[v,k]
                        else:
                            for h in xrange(K):
                                total_free -= Q[vp,t,v] * Q[i,t-1,h] * Q[i,t,k] * log_theta[v,h,k]
    #print 'mf free energy:', total_free
    return total_free
Example #2
0
 def _box_cox_transform(self, verbose=False, method='standard'):
     """
     Performs the Box-Cox transformation, over different ranges, picking the optimal one w. respect to normality.
     """
     from scipy import stats
     a = sp.array(self.values)
     if method == 'standard':
         vals = (a - min(a)) + 0.1 * sp.var(a)
     else:
         vals = a
     sw_pvals = []
     lambdas = sp.arange(-2.0, 2.1, 0.1)
     for l in lambdas:
         if l == 0:
             vs = sp.log(vals)
         else:
             vs = ((vals ** l) - 1) / l
         r = stats.shapiro(vs)
         if sp.isfinite(r[0]):
             pval = r[1]
         else:
             pval = 0.0
         sw_pvals.append(pval)
     i = sp.argmax(sw_pvals)
     l = lambdas[i]
     if l == 0:
         vs = sp.log(vals)
     else:
         vs = ((vals ** l) - 1) / l
     self._perform_transform(vs,"box_cox")
     log.debug('optimal lambda was %0.1f' % l)
     return True
Example #3
0
    def coste(self, *args, **kwargs):
        """
        material:
            0   -   Carbon steel
            1   -   Stainless steel 316
            2   -   Stainless steel 304
            3   -   Stainless steel 347
            4   -   Nickel
            5   -   Monel
            6   -   Inconel
            7   -   Zirconium
            8    -  Titanium
            9    -   Brick and rubber or brick and polyester-lined steel
            10  -   Rubber or lead-lined steel
            11  -   Polyester, fiberglass-reinforced
            12  -   Aluminum
            13  -   Copper
            14  -   Concrete
        """
        self._indicesCoste(*args)
        
        self.material=kwargs["material"]
        
        V=self.Volumen.galUS

        Fm=[1., 2.7, 2.4, 3.0, 3.5, 3.3, 3.8, 11.0, 11.0, 2.75, 1.9, 0.32, 2.7, 2.3, 0.55][self.material]

        if V<=21000:
            C=Fm*exp(2.631+1.3673*log(V)-0.06309*log(V)**2)
        else:
            C=Fm*exp(11.662+0.6104*log(V)-0.04536*log(V)**2)
        
        self.C_adq=Currency(C*self.Current_index/self.Base_index)
        self.C_inst=Currency(self.C_adq*self.f_install)
Example #4
0
File: gwas.py Project: timeu/PyGWAS
def _calc_bic_(ll, num_snps, num_par, n):
    bic = -2 * (ll) + num_par * sp.log(n)
    extended_bic = bic + \
        2 * _log_choose_(num_snps, num_par - 2)
    modified_bic = bic + \
        2 * (num_par) * sp.log(num_snps / 2.2 - 1)
    return (bic, extended_bic, modified_bic)
Example #5
0
def llfun(act, pred):
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1 - epsilon, pred)
    ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred)))
    ll = ll * -1.0 / len(act)
    return ll
Example #6
0
def gap(data, refs=None, nrefs=20, ks=range(1,11), method=None):
    shape = data.shape
    if refs is None:
        tops = data.max(axis=0)
        bots = data.min(axis=0)
        dists = scipy.matrix(scipy.diag(tops-bots))

        rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs))
        for i in range(nrefs):
            rands[:, :, i] = rands[:, :, i]*dists+bots
    else:
        rands = refs
    gaps = scipy.zeros((len(ks),))
    for (i, k) in enumerate(ks):
        g1 = method(n_clusters=k).fit(data)
        (kmc, kml) = (g1.cluster_centers_, g1.labels_)
        disp = sum([euclidean(data[m, :], kmc[kml[m], :]) for m in range(shape[0])])

        refdisps = scipy.zeros((rands.shape[2],))
        for j in range(rands.shape[2]):
            g2 = method(n_clusters=k).fit(rands[:, :, j])
            (kmc, kml) = (g2.cluster_centers_, g2.labels_)
            refdisps[j] = sum([euclidean(rands[m, :, j], kmc[kml[m],:]) for m in range(shape[0])])
        gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp)
    return gaps
Example #7
0
def _Psat(Tdb):
    """
    ASHRAE Fundamentals Handbook pag 1.2 eq. 4
    input:
        Dry bulb temperature, K
    return:
        Saturation pressure, Pa
    """
    if 173.15 <= Tdb < 273.15:
        C1 = -5674.5359
        C2 = 6.3925247
        C3 = -0.009677843
        C4 = 0.00000062215701
        C5 = 2.0747825E-09
        C6 = -9.484024E-13
        C7 = 4.1635019
        pws = exp(C1/Tdb + C2 + C3*Tdb + C4*Tdb**2 + C5*Tdb**3 + C6*Tdb**4 +
                  C7*log(Tdb))
    elif 273.15 <= Tdb <= 473.15:
        C8 = -5800.2206
        C9 = 1.3914993
        C10 = -0.048640239
        C11 = 0.000041764768
        C12 = -0.000000014452093
        C13 = 6.5459673
        pws = exp(C8/Tdb + C9 + C10*Tdb + C11*Tdb**2 + C12*Tdb**3 + C13*log(Tdb))
    else:
        raise NotImplementedError("Incoming out of bound")

    return pws
Example #8
0
    def _LML_covar(self, hyperparams):
        """

	log marginal likelihood contributions from covariance hyperparameters

	"""
        try:   
            KV = self.get_covariances(hyperparams)
        except linalg.LinAlgError:
            LG.error("exception caught (%s)" % (str(hyperparams)))
            return 1E6

        #all in one go
        #negative log marginal likelihood, see derivations
        lquad = 0.5* (KV['y_rot']*KV['Si']*KV['y_rot']).sum()
        ldet  = 0.5*-SP.log(KV['Si'][:,:]).sum()
        LML   = 0.5*self.n*self.d * SP.log(2*SP.pi) + lquad + ldet
        if VERBOSE:
            #1. slow and explicit way
            lmls_ = SP.zeros([self.d])
            for i in xrange(self.d):
                _y = self.y[:,i]
                sigma2 = SP.exp(2*hyperparams['lik'])
                _K = KV['K'] + SP.diag(KV['Knoise'][:,i])
                _Ki = SP.linalg.inv(_K)
                lquad_ = 0.5 * SP.dot(_y,SP.dot(_Ki,_y))
                ldet_ = 0.5 * SP.log(SP.linalg.det(_K))
                lmls_[i] = 0.5 * self.n* SP.log(2*SP.pi) + lquad_ + ldet_
            assert SP.absolute(lmls_.sum()-LML)<1E-3, 'outch'
        return LML
Example #9
0
def summarize_splits(splits, weighted=True):
    rows = []
    if weighted:
        v = [ (x.likelihood * x.weight, x) for x in splits ]
    else:
        v = [ (x.likelihood, x) for x in splits ]
    ptot = sum([ x[0] for x in v ])
    v.sort(); v.reverse()
    opt = scipy.log(v[0][0])

    rows.append(["split", "lnL", "Rel.Prob"])
    sumprob = 0.0
    for L, split in v:
        lnL = scipy.log(L)
        relprob = (L/ptot)
        if sumprob < 0.95:
        #if (opt - lnL) < 2:
            rows.append([str(split), "%.4g" % lnL, "%.4g" % relprob])
        sumprob += relprob
    widths = []
    for i in range(3):
        w = max([ len(x[i]) for x in rows ])
        for x in rows:
            x[i] = x[i].ljust(w)
    return [ "  ".join(x) for x in rows ]
Example #10
0
    def survival_function(loss_ratio, **kwargs):
        """
            Static method that prepares the calculation parameters
            to be passed to stats.lognorm.sf

            :param loss_ratio: current loss ratio
            :type loss_ratio: float

            :param kwargs: convenience dictionary
            :type kwargs: :py:class:`dict` with the following
                keys:
                    **vf** - vulnerability function as provided by
                            :py:class:`openquake.shapes.VulnerabilityFunction`
                    **col** - matrix column number
        """
        vuln_function = kwargs.get('vf')
        position = kwargs.get('col')

        vf_loss_ratio = vuln_function.loss_ratios[position]

        stddev = vuln_function.covs[position] * vf_loss_ratio

        variance = stddev ** 2.0

        sigma = sqrt(log((variance / vf_loss_ratio ** 2.0) + 1.0))
        mu = exp(log(vf_loss_ratio ** 2.0 /
            sqrt(variance + vf_loss_ratio ** 2.0)))

        return stats.lognorm.sf(loss_ratio, sigma, scale=mu)
Example #11
0
def print_model_probability(logprob):
	"""
	Gives a nice overview of the model probability, allowing
	the practitioner to compare this model's probability to others
	"""
	prob = scipy.exp(logprob)

	limits = {
		'eq'         :logprob, 
		'barely'     :logprob - scipy.log(3),
		'substantial':logprob - scipy.log(10),
		'strong'     :logprob - scipy.log(30),
		'very strong':logprob - scipy.log(100)
	}
	for i in limits:
		limits[i] = "%5.1f" % limits[i]
		limits[i] = " " * (7 - len(limits[i])) + limits[i]
	
	print("Model probability ln(p(D|M, I)): [about 10^%.0f] %.5f" % (logprob / scipy.log(10), logprob))
	print(("""
	Table to compare support against other models (Jeffrey):

	   other model     |
	   ln(p(D|M,I))    | supporting evidence for this model
	 ------------------+-------------------------------------
		  >%%(eq)%s   Negative (supports other model)
	 %%(eq)%s ..%%(barely)%s   Barely worth mentioning
	 %%(barely)%s ..%%(substantial)%s   Substantial
	 %%(substantial)%s ..%%(strong)%s   Strong
	 %%(strong)%s ..%%(very strong)%s   Very strong
		  <%%(very strong)%s   Decisive

	be careful.
	""" % tuple(['s']*10)) % limits)
Example #12
0
def evaluate_ll(y, yhat):
    epsilon = 1e-15
    yhat = sp.maximum(epsilon, yhat)
    yhat = sp.minimum(1-epsilon, yhat)
    ll = sum(y*sp.log(yhat) + sp.subtract(1,y)*sp.log(sp.subtract(1,yhat)))
    ll = ll * -1.0/len(y)
    return ll
Example #13
0
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res
Example #14
0
def log_d_pois_like_trunc_5(d,s1,s2,a,p):
    """double poisson w max 5 goals"""
    #dp = np.sign(d)*np.power(np.abs(d),p)
    dp = 1.5*np.arctan(d)    #print(dp)
    return ( log(a)*(s1+s2)+dp*(s1-s2) - 2*a*cosh(dp)
         -gammaln(s1+1) - gammaln(s2+1) 
        -log(gammaincc(6,a*exp(-dp))*gammaincc(6,a*exp(dp)) ) ) 
Example #15
0
def compute_continuous_prob_value(parameters, distribution, rvs):
    mean = float(parameters[0])
    stddev = float(parameters[1]) / 100 * mean
    A = float(parameters[2])
    B = float(parameters[3])
    result = float("-inf")

    if rvs is None:
        while result <= A or result > B:

            if distribution == "normal":
                rvs = stats.norm.rvs
                result = rvs(mean, stddev)

            elif distribution == "lognormal":
                variance = stddev ** 2.0
                mu = log(mean ** 2.0 / sqrt(variance + mean ** 2.0))
                sigma = sqrt(log((variance / mean ** 2.0) + 1.0))
                rvs = stats.lognorm.rvs
                result = rvs(sigma, scale=scipy.exp(mu))

            elif distribution == "gamma":
                betha = (stddev) ** 2 / mean
                alpha = mean / betha
                rvs = stats.gamma.rvs
                result = rvs(alpha, scale=betha)
    else:
        result = 1

    return result
Example #16
0
    def loglike(self, data, paravec, sign = 1):

        la, lb, lp = paravec
        loglike = len(data) * sp.log(np.exp(la)) + (np.exp(la)*np.exp(lp)-1) * sum(sp.log(data)) - (1/np.exp(lb)**np.exp(la)) * sum(data**np.exp(la))\
        -len(data)*np.exp(la)*np.exp(lp)*sp.log(np.exp(lb)) - len(data) * gammaln(np.exp(lp))
        loglike = sign*loglike
        return loglike
Example #17
0
    def loglike(self, data, paravec, sign = 1): 

        lu, lsig = paravec
        loglike = (-1/(2*np.exp(lsig)**2)) * sum((sp.log(data)-np.exp(lu))**2) - (len(data)/2) * sp.log(2*sp.pi) \
        - len(data) * sp.log(np.exp(lsig)) - sum(sp.log(data))
        loglike = sign*loglike
        return loglike
Example #18
0
def run_demo():
    LG.basicConfig(level=LG.INFO)
    random.seed(1)

    #1. create toy data
    [x,y] = create_toy_data()
    n_dimensions = 1
    
    #2. location of unispaced predictions
    X = SP.linspace(0,10,100)[:,SP.newaxis]
        

    if 0:
        #old interface where the covaraince funciton and likelihood are one thing:
        #hyperparamters
        covar_parms = SP.log([1,1,1])
        hyperparams = {'covar':covar_parms}       
        #construct covariance function
        SECF = se.SqexpCFARD(n_dimensions=n_dimensions)
        noiseCF = noise.NoiseCFISO()
        covar = combinators.SumCF((SECF,noiseCF))
        covar_priors = []
        #scale
        covar_priors.append([lnpriors.lnGammaExp,[1,2]])
        covar_priors.extend([[lnpriors.lnGammaExp,[1,1]] for i in xrange(n_dimensions)])
        #noise
        covar_priors.append([lnpriors.lnGammaExp,[1,1]])
        priors = {'covar':covar_priors}
        likelihood = None

    if 1:
        #new interface with likelihood parametres being decoupled from the covaraince function
        likelihood = lik.GaussLikISO()
        covar_parms = SP.log([1,1])
        hyperparams = {'covar':covar_parms,'lik':SP.log([1])}       
        #construct covariance function
        SECF = se.SqexpCFARD(n_dimensions=n_dimensions)
        covar = SECF
        covar_priors = []
        #scale
        covar_priors.append([lnpriors.lnGammaExp,[1,2]])
        covar_priors.extend([[lnpriors.lnGammaExp,[1,1]] for i in xrange(n_dimensions)])
        lik_priors = []
        #noise
        lik_priors.append([lnpriors.lnGammaExp,[1,1]])
        priors = {'covar':covar_priors,'lik':lik_priors}

        

    
    gp = GP(covar,likelihood=likelihood,x=x,y=y)
    opt_model_params = opt.opt_hyper(gp,hyperparams,priors=priors,gradcheck=False)[0]
    
    #predict
    [M,S] = gp.predict(opt_model_params,X)

    #create plots
    gpr_plot.plot_sausage(X,M,SP.sqrt(S))
    gpr_plot.plot_training_data(x,y)
    PL.show()
Example #19
0
def logloss(act, pred):
    epsilon = 1e-4
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1-epsilon, pred)
    ll = -1.0/len(act) * sum(act*sp.log(pred) +
            sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
    return ll
Example #20
0
 def fit(self, kk=None):
     """
     Fit Fourier spectrum with the function set at class instantination
     ==> NB: fitting is done in logarithmic coordinates
     and fills plotting arrays with data
     --------
     Options:
     --------
     kk
        (k1,k2) <None> spectral interval for function fitting
        by default interval [ kk[1], kk[imax__kk] ] will be fitted
        ==> i.e. k=0 is excluded
     """
     # fitting interval
     if kk:
         ik_min=(self.fft_data.kk[1:self.fft_data.imax__kk]<=kk[0]).nonzero()[0][-1]
         ik_max=(self.fft_data.kk[1:self.fft_data.imax__kk]<=kk[1]).nonzero()[0][-1]
     else:
         ik_min=1;
         ik_max=self.fft_data.imax__kk
     # do fitting
     self.__popt,self.__pcov = scipy.optimize.curve_fit(self.__func_fit,
                                                        scipy.log(self.fft_data.kk[ik_min:ik_max]),
                                                        scipy.log(self.fft_data.Ik[ik_min:ik_max]) )
     # boundaries of fitted interval
     self.kmin = self.fft_data.kk[ik_min]
     self.kmax = self.fft_data.kk[ik_max]
     # fill plot arrays <===============
     self.kk_plot=scipy.logspace( scipy.log10(self.kmin),
                                  scipy.log10(self.kmax),
                                  self.nk_plot )
     self.Ik_plot=self.fitting_function(self.kk_plot)
Example #21
0
def gap(data, refs=None, nrefs=20, ks=range(1,11), iter=10):
	"""
	Compute the Gap statistic for an nxm dataset in data.

	Either give a precomputed set of reference distributions in refs as an (n,m,k) scipy array,
	or state the number k of reference distributions in nrefs for automatic generation with a
	uniformed distribution within the bounding box of data.

	Give the list of k-values for which you want to compute the statistic in ks.
	"""
	shape = data.shape
	if refs==None:
		tops = data.max(axis=0)
		bots = data.min(axis=0)
		dists = scipy.matrix(scipy.diag(tops-bots))
		rands = scipy.random.random_sample(size=(shape[0],shape[1],nrefs))
		for i in range(nrefs):
			rands[:,:,i] = rands[:,:,i]*dists+bots
	else:
		rands = refs
	
	gaps = scipy.zeros((len(ks),))
	for (i,k) in enumerate(ks):
		(kmc,kml) = scipy.cluster.vq.kmeans2(data, k, iter=iter)
		disp = sum([dst(data[m,:],kmc[kml[m],:]) for m in range(shape[0])])
	
		refdisps = scipy.zeros((rands.shape[2],))
		print 'For k =',k,'calculating random distribution #',
		for j in range(rands.shape[2]):
			print j,
			(kmc,kml) = scipy.cluster.vq.kmeans2(rands[:,:,j], k, iter=iter)
			refdisps[j] = sum([dst(rands[m,:,j],kmc[kml[m],:]) for m in range(shape[0])])
		gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp)
		print ""
	return gaps
Example #22
0
    def TB_Cv_exceso(self, T, P):
        """Método de cálculo de la capacidad calorífica a volumen constante de exceso mediante la ecuación de estado de Trebble-Bishnoi"""
        a, b, c, d, q1, q2=self.TB_lib(T, P)
        v=self.TB_V(T, P)
        z=P*v/R_atml/T
        t=1+6*c/b+c**2/b**2+4*d**2/b**2
        tita=abs(t)**0.5
        A=a*P/R_atml**2/T**2
        B=b*P/R_atml/T
        u=1+c/b
        delta=v**2+(b+c)*v-b*c-d**2
        beta=1.+q2*(1-self.tr(T)+log(self.tr(T)))
        da=-q1*a/self.Tc
        dda=q1**2*a/self.Tc**2
        if self.tr(T)<=1.0:
            db=b/beta*(1/T-1/self.Tc)
            ddb=-q2*b/beta/T**2
        else:
            db=0
            ddb=0

        dt=-db/b**2*(6*c+2*c**2/b+8*d**2/b)
        dtita=abs(dt)/20
        if t>=0:
            lamda=log((2*z+B*(u-tita))/(2*z+B*(u+tita)))
            dlamda=(db-db*tita-b*dtita)/(2*v+b+c-b*tita)-(db+db*tita+b*dtita)/((2*v+b+c+b*tita))
        else:
            lamda=2*arctan((2*z+u*B)/B/tita)-pi
            dlamda=2/(1+((2*v+b+c)/b/tita)**2)*(db/b/tita-(2*v+b+c)*(db/b**2/tita+dtita/b/tita**2))

        Cv=1/b/tita*(dlamda*(a-da*T)-lamda*dda*T-lamda*(a-da*T)*(db/b+dtita/tita))+(ddb*T+db)*(-R_atml*T/(v-b)+a/b**2/t*((v*(3*c+b)-b*c+c**2-2*d**2)/delta+(3*c+b)*lamda/b/tita))+db*T*(-R_atml/(v-b)-R_atml*T*db/(v-b)**2+1/b**2/t*(da-2*a*db/b-a*dt/t)*((v*(3*c+b)-b*c+c**2-2*d**2)/delta+(3*c+b)*lamda/b/tita)+a/b**2/t*(db*(v-c)*(v**2-2*c*v-c**2+d**2)/delta**2+db*lamda/b/tita+(3*c+b)/b/tita*(dlamda-lamda*(db/b+dtita/tita))))
        return unidades.SpecificHeat(Cv*101325/1000/self.peso_molecular, "JkgK")
Example #23
0
def KramersKronigFFT(ImX_A):
	'''	Hilbert transform used to calculate real part of a function from its imaginary part
	uses piecewise cubic interpolated integral kernel of the Hilbert transform
	use only if len(ImX_A)=2**m-1, uses fft from scipy.fftpack  '''
	X_A = sp.copy(ImX_A)
	N = int(len(X_A))
	## be careful with the data type, orherwise it fails for large N
	if N > 3e6: A = sp.arange(3,N+1,dtype='float64')
	else:       A = sp.arange(3,N+1)  
	X1 = 4.0*sp.log(1.5)
	X2 = 10.0*sp.log(4.0/3.0)-6.0*sp.log(1.5)
	## filling the kernel
	if N > 3e6: Kernel_A = sp.zeros(N-2,dtype='float64')
	else:       Kernel_A = sp.zeros(N-2)
	Kernel_A = (1-A**2)*((A-2)*sp.arctanh(1.0/(1-2*A))+(A+2)*sp.arctanh(1.0/(1+2*A)))\
	+((A**3-6*A**2+11*A-6)*sp.arctanh(1.0/(3-2*A))+(A+3)*(A**2+3*A+2)*sp.arctanh(1.0/(2*A+3)))/3.0
	Kernel_A = sp.concatenate([-sp.flipud(Kernel_A),sp.array([-X2,-X1,0.0,X1,X2]),Kernel_A])/sp.pi
	## zero-padding the functions for fft
	ImXExt_A = sp.concatenate([X_A[int((N-1)/2):],sp.zeros(N+2),X_A[:int((N-1)/2)]])
	KernelExt_A = sp.concatenate([Kernel_A[N:],sp.zeros(1),Kernel_A[:N]])
	## performing the fft
	ftReXExt_A = -fft(ImXExt_A)*fft(KernelExt_A)
	ReXExt_A = sp.real(ifft(ftReXExt_A))
	ReX_A = sp.concatenate([ReXExt_A[int((3*N+3)/2+1):],ReXExt_A[:int((N-1)/2+1)]])
	return ReX_A
Example #24
0
File: He.py Project: bkt92/pychemqt
    def _visco0(self, rho, T, fase=None, coef=False):

        Visco0 = lambda T: -0.135311743/log(T) + 1.00347841 + \
            1.20654649*log(T) - 0.149564551*log(T)**2 + 0.0125208416*log(T)**3

        def ViscoE(T, rho):
            x = log(T)
            B = -47.5295259/x+87.6799309-42.0741589*x+8.33128289*x**2-0.589252385*x**3
            C = 547.309267/x-904.870586+431.404928*x-81.4504854*x**2+5.37005733*x**3
            D = -1684.39324/x+3331.08630-1632.19172*x+308.804413*x**2-20.2936367*x**3
            return rho.gcc*B+rho.gcc**2*C+rho.gcc**3*D


        if T < 100:
            # Section 4.2.1 for 3.5 < T < 100
            no = Visco0(T)
            ne = ViscoE(T, rho)
            n = exp(no+ne)
        else:
            # Section 4.2.1 for T > 100
            no = 196*T**0.71938*exp(12.451/T-295.67/T**2-4.1249)
            ne = exp(Visco0(T)+ViscoE(T, rho))-exp(Visco0(T)+ViscoE(T, unidades.Density(0)))
            n = no+ne

        if coef:
            return ne
        else:
            return unidades.Viscosity(n*1e-6, "P")
Example #25
0
    def _LML_covar(self, hyperparams):
        #calculate marginal likelihood of kronecker GP

        #1. get covariance structures needed:
        try:
            KV = self.get_covariances(hyperparams)
        except linalg.LinAlgError:
            LG.error("exception caught (%s)" % (str(hyperparams)))
            return 1E6
        #2. build lml 
        LML = 0
        LMLc = 0.5* self.nd * SP.log(2.0 * SP.pi)
        #constant part of negative lml
        #quadratic form
        Si = KV['Si']
       
        LMLq = 0.5 * SP.dot(KV['y_rot'].ravel(),KV['YSi'].ravel() )
        #determinant stuff
        LMLd = -0.5 * SP.log(Si).sum()

        if VERBOSE:
            print "costly verbose debugging on"
            K = SP.kron(KV['Kr'],KV['Kc']) + SP.diag(KV['Knoise'])
            Ki = SP.linalg.inv(K)
            LMLq_ = 0.5* SP.dot(SP.dot(self.y.ravel(),Ki),self.y.ravel())
            LMLd_ = 0.5* 2 * SP.log(SP.linalg.cholesky(K).diagonal()).sum()
            check_dist(LMLq,LMLq_)
            check_dist(LMLd,LMLd_)
            

        return LMLc+LMLq+LMLd
Example #26
0
def tfidf(termFrequency):
	""" The student must code this. """
	gf = sp.sum(termFrequency,axis=1).astype(float)
	p = (termFrequency.T/gf).T
	g = sp.sum(p*sp.log(p+1)/sp.log(len(p[0,:])),axis=1) + 1
	a = (sp.log(termFrequency + 1).T*g).T
	return a
Example #27
0
def NTU_fPR(P, R, flujo, **kwargs):
    """Calculo de la factor de correccion
    Flujo vendra definido por su acronimo
        CF: Counter flow
        PF: Parallel flow
        CrFMix: Crossflow, both fluids mixed
        CrFSMix: Crossflow, one fluid mixed, other unmixed
        CrFunMix: Crossflow, both fluids unmixed
        1-2TEMAE: 1-2 pass shell and tube exchanger

    kwargs: Opciones adicionales:
        mixed: corriente mezclada para CrFSMix
            Cmin, Cmax
    """

    if flujo == "1-2TEMAE":
        if R == 1:
            NTU = log((1-P)/2-3*P)
        else:
            E = (1+R**2)**0.5
            NTU = log((2-P*(1+R-E))/(2-P*(1+R+E)))/E

    else:
        if R == 1:
            NTU = P/(1-P)
        else:
            NTU = log((1-R/P)/(1-P))/(1-R)

    return NTU
Example #28
0
    def _LML_covar(self,hyperparams,debugging=False):
        """
        log marginal likelihood
        """
        try:
            KV = self.get_covariances(hyperparams,debugging=debugging)
        except LA.LinAlgError:
            LG.error('linalg exception in _LML_covar')
            return 1E6
        except ValueError:
            LG.error('value error in _LML_covar')
            return 1E6
 
        lml_quad = 0.5*(KV['Ytilde']*KV['UYU']).sum()
        lml_det =  0.5 * SP.log(KV['S']).sum()
        lml_const = 0.5*self.n*self.t*(SP.log(2*SP.pi))
        
        if debugging:
            # do calculation without kronecker tricks and compare
            _lml_quad = 0.5 * (KV['alpha']*KV['Yvec']).sum()
            _lml_det =  SP.log(SP.diag(KV['L'])).sum()
            assert SP.allclose(_lml_quad,lml_quad),  'ouch, quadratic form is wrong in _LMLcovar'
            assert SP.allclose(_lml_det, lml_det), 'ouch, ldet is wrong in _LML_covar'
        
        lml = lml_quad + lml_det + lml_const

        return lml
Example #29
0
def cdi_info(energy, h, z, pix, del_x_d, verbose = False):
    """
    h - object size\nz - sam-det dist\npix - # of pix\ndel_x_d - pixel size
    """
    x = (pix/2.)*del_x_d
    l = energy_to_wavelength(energy)
    NF = lambda nh, nl, nz : nh**2./(nl*nz)
    del_x_s = lambda l, z, x : (l*z)/(2.*x)
    nNF = NF(h,l,z)
    OS = lambda l,z,x,h,pix : ((pix*del_x_s(l,z,x))**2.)/(h**2.)
    nOS = OS(l,z,x,h,pix)
    if verbose:
        pyl.figure()
        zrange = sp.linspace(0, 2*z, 100)
        pyl.plot(zrange, sp.log(NF(h,l,zrange)))
        pyl.title('NF')
        pyl.xlabel('z [m]')
        pyl.ylabel('log NF')
        pyl.figure()
        pyl.plot(zrange, sp.log(OS(l,zrange, x, h, pix)))
        pyl.title('OS')
        pyl.xlabel('z [m]')
        pyl.ylabel('log OS')
    
    print 'NF: %1.2e\nOS: %1.2e\ndel_x_d: %1.2e\nw_d: %1.2e\ndel_x_s: %1.2e\nw_s: %1.2e' % (nNF, nOS, del_x_d, pix*del_x_d, del_x_s(l,z,x), del_x_s(l,z,x)*pix)
    aperture_stats(energy, z, x)
Example #30
0
    def LML(self,params=None,*kw_args):
        """
        calculate LML
        """
        if params is not None:
            self.setParams(params)

        self._update_cache()
        
        start = TIME.time()

        #1. const term
        lml  = self.N*self.P*SP.log(2*SP.pi)

        #2. logdet term
        lml += SP.sum(SP.log(self.cache['Sc2']))*self.N
        lml += 2*SP.log(SP.diag(self.cache['cholB'])).sum()

        #3. quatratic term
        lml += SP.sum(self.cache['LY']*self.cache['LY'])
        lml -= SP.sum(self.cache['WLY']*self.cache['BiWLY'])

        lml *= 0.5

        smartSum(self.time,'lml',TIME.time()-start)
        smartSum(self.count,'lml',1)

        return lml
Example #31
0
    def start(self, current, selections):
        current.progress.begin('Creating Spectogram')
        signals = current.analog_signals(self.which_signals + 1)
        if not signals:
            current.progress.done()
            raise SpykeException('No signals selected!')

        num_signals = len(signals)

        columns = int(round(sp.sqrt(num_signals)))

        current.progress.set_ticks(num_signals)
        samples = self.nfft_index[self.fft_samples]
        win = PlotDialog(toolbar=True,
                         wintitle="Signal Spectogram (FFT window size %d)" %
                         samples)

        for c in xrange(num_signals):
            pW = BaseImageWidget(win, yreverse=False, lock_aspect_ratio=False)
            plot = pW.plot

            s = signals[c]

            # Calculate spectrogram and create plot
            v = mlab.specgram(s,
                              NFFT=samples,
                              noverlap=samples / 2,
                              Fs=s.sampling_rate)
            interpolation = 'nearest'
            if self.interpolate:
                interpolation = 'linear'
            img = make.image(sp.log(v[0]),
                             ydata=[v[1][0], v[1][-1]],
                             xdata=[v[2][0], v[2][-1]],
                             interpolation=interpolation)
            plot.add_item(img)

            # Labels etc.
            if not self.show_color_bar:
                plot.disable_unused_axes()
            title = ''
            if s.recordingchannel and s.recordingchannel.name:
                title = s.recordingchannel.name
            if s.segment and s.segment.name:
                if title:
                    title += ' , '
                title += s.segment.name
            plot.set_title(title)
            plot.set_axis_title(plot.Y_LEFT, 'Frequency')
            plot.set_axis_unit(plot.Y_LEFT,
                               s.sampling_rate.dimensionality.string)
            plot.set_axis_title(plot.X_BOTTOM, 'Time')
            time_unit = (1 / s.sampling_rate).simplified
            plot.set_axis_unit(plot.X_BOTTOM, time_unit.dimensionality.string)
            win.add_plot_widget(pW, c, column=c % columns)
            current.progress.step()

        current.progress.done()
        win.add_custom_image_tools()
        win.add_x_synchronization_option(True, range(num_signals))
        win.add_y_synchronization_option(True, range(num_signals))
        win.show()
Example #32
0
def cvlognet(fit, \
            lambdau, \
            x, \
            y, \
            weights, \
            offset, \
            foldid, \
            ptype, \
            grouped, \
            keep = False):

    typenames = {
        'deviance': 'Binomial Deviance',
        'mse': 'Mean-Squared Error',
        'mae': 'Mean Absolute Error',
        'auc': 'AUC',
        'class': 'Misclassification Error'
    }
    if ptype == 'default':
        ptype = 'deviance'

    ptypeList = ['mse', 'mae', 'deviance', 'auc', 'class']
    if not ptype in ptypeList:
        print('Warning: only ', ptypeList, 'available for binomial models; '
              'deviance'
              ' used')
        ptype = 'deviance'

    prob_min = 1.0e-5
    prob_max = 1 - prob_min
    nc = y.shape[1]
    if nc == 1:
        classes, sy = scipy.unique(y, return_inverse=True)
        nc = len(classes)
        indexes = scipy.eye(nc, nc)
        y = indexes[sy, :]
    else:
        classes = scipy.arange(nc) + 1  # 1:nc

    N = y.size
    nfolds = scipy.amax(foldid) + 1
    if (N / nfolds < 10) and (type == 'auc'):
        print(
            'Warning: Too few (<10) observations per fold for type.measure=auc in cvlognet'
        )
        print(
            'Warning:     changed to type.measure = deviance. Alternately, use smaller value '
        )
        print('Warning:     for nfolds')
        ptype = 'deviance'

    if (N / nfolds < 3) and grouped:
        print(
            'Warning: option grouped = False enforced in cvglmnet as there are < 3 observations per fold'
        )
        grouped = False

    is_offset = not (len(offset) == 0)
    predmat = scipy.ones([y.shape[0], lambdau.size]) * scipy.NAN
    nfolds = scipy.amax(foldid) + 1
    nlams = []
    for i in range(nfolds):
        which = foldid == i
        fitobj = fit[i].copy()
        if is_offset:
            off_sub = offset[which, ]
        else:
            off_sub = scipy.empty([0])
        preds = glmnetPredict(fitobj, x[which, ], scipy.empty([0]), 'response',
                              False, off_sub)
        nlami = scipy.size(fit[i]['lambdau'])
        predmat[which, 0:nlami] = preds
        nlams.append(nlami)
    # convert nlams to scipy array
    nlams = scipy.array(nlams, dtype=scipy.integer)

    if ptype == 'auc':
        cvraw = scipy.zeros([nfolds, lambdau.size]) * scipy.NaN
        good = scipy.zeros([nfolds, lambdau.size])
        for i in range(nfolds):
            good[i, 0:nlams[i]] = 1
            which = foldid == i
            for j in range(nlams[i]):
                cvraw[i, j] = auc_mat(y[which, ], predmat[which, j],
                                      weights[which])
        N = scipy.sum(good, axis=0)
        sweights = scipy.zeros([nfolds, 1])
        for i in range(nfolds):
            sweights[i] = scipy.sum(weights[foldid == i], axis=0)
        weights = sweights
    else:
        ywt = scipy.sum(y, axis=1, keepdims=True)
        y = y / scipy.tile(ywt, [1, y.shape[1]])
        weights = weights * ywt
        N = y.shape[0] - scipy.sum(scipy.isnan(predmat), axis=0, keepdims=True)
        yy1 = scipy.tile(y[:, 0:1], [1, lambdau.size])
        yy2 = scipy.tile(y[:, 1:2], [1, lambdau.size])

    if ptype == 'mse':
        cvraw = (yy1 - (1 - predmat))**2 + (yy2 - (1 - predmat))**2
    elif ptype == 'deviance':
        predmat = scipy.minimum(scipy.maximum(predmat, prob_min), prob_max)
        lp = yy1 * scipy.log(1 - predmat) + yy2 * scipy.log(predmat)
        ly = scipy.log(y)
        ly[y == 0] = 0
        ly = scipy.dot(y * ly, scipy.array([1.0, 1.0]).reshape([2, 1]))
        cvraw = 2 * (scipy.tile(ly, [1, lambdau.size]) - lp)
    elif ptype == 'mae':
        cvraw = scipy.absolute(yy1 -
                               (1 - predmat)) + scipy.absolute(yy2 -
                                                               (1 - predmat))
    elif ptype == 'class':
        cvraw = yy1 * (predmat > 0.5) + yy2 * (predmat <= 0.5)

    if y.size / nfolds < 3 and grouped == True:
        print(
            'Option grouped=false enforced in cv.glmnet, since < 3 observations per fold'
        )
        grouped = False

    if grouped == True:
        cvob = cvcompute(cvraw, weights, foldid, nlams)
        cvraw = cvob['cvraw']
        weights = cvob['weights']
        N = cvob['N']

    cvm = wtmean(cvraw, weights)
    sqccv = (cvraw - cvm)**2
    cvsd = scipy.sqrt(wtmean(sqccv, weights) / (N - 1))

    result = dict()
    result['cvm'] = cvm
    result['cvsd'] = cvsd
    result['name'] = typenames[ptype]

    if keep:
        result['fit_preval'] = predmat

    return (result)
 def f_expected(u):
     return (sp.log(u)-sp.log(d_observed)) / (- sp.log(d_observed)) * \
            (u > d_observed)
Example #34
0
    def test_gets_modes_by_scan(self):
        nf = self.nf
        nt = self.nt
        dt = self.dt
        bw = self.bw
        nb = self.nb
        # Give every channel a different thermal noise floor.
        thermal_norm = 1.0 + 1.0 / nf * sp.arange(nf)  # K**2/Hz
        self.data *= sp.sqrt(thermal_norm * bw * 2)
        n_time = self.data.shape[0]
        # Now make a 1/f like noise component in a few frequency modes.
        n_modes = 3
        index = -0.8 * (2.0 - sp.arange(n_modes, dtype=float) / n_modes)
        amp = 1.2 * (3.**(n_modes - 1. - sp.arange(n_modes, dtype=float))
                     )  # K**2/Hz
        f_0 = 1.0  # Hz
        modes = sp.empty((n_modes, nf))
        for ii in range(n_modes):
            correlated_overf = noise_power.generate_overf_noise(
                amp[ii], index[ii], f_0, dt, n_time)
            # Generate the frequency mode.  They should all be orthonormal.
            mode = sp.sin(2. * sp.pi *
                          (ii + 1) * sp.arange(nf, dtype=float) / nf + 6.4 *
                          (ii + 3))
            mode /= sp.sqrt(sp.sum(mode**2))
            modes[ii] = mode
            self.data += correlated_overf[:, None, None, None] * mode
        # Add a subdominant general 1/f noise to all channels.
        general_amp = 0.1
        general_index = -0.9
        general_cross_over = f_0 * general_amp**(-1. / general_index)
        for ii in range(nf):
            tmp_a = general_amp * thermal_norm[ii]
            correlated_overf = noise_power.generate_overf_noise(
                tmp_a, general_index, f_0, dt, n_time)
            self.data[:, 0, :, ii] += correlated_overf[:, None]
        # Now put the data into the form of the real data.
        Blocks = self.make_blocks()
        # Measure all the noise parameters.
        model_name = 'freq_modes_over_f_' + str(n_modes)
        parameters = mn.measure_noise_parameters(Blocks, [model_name],
                                                 split_scans=True)
        for pol, pol_params in parameters.iteritems():

            for ii in range(n_modes):
                mode_noise = pol_params[model_name]['over_f_mode_' + str(ii)]
                self.assertTrue(
                    sp.allclose(mode_noise['amplitude'], amp[ii], rtol=0.5))
                self.assertTrue(
                    sp.allclose(mode_noise['index'], index[ii], atol=0.2))
                #thermal_proj = sp.sum(thermal_norm * modes[ii,:]**2)
                #self.assertTrue(sp.allclose(mode_noise['thermal'], thermal_proj,
                #                            rtol=0.5))
                self.assertTrue(
                    abs(sp.dot(mode_noise['mode'], modes[ii, :])) > 0.90)
            thermal = pol_params[model_name]['thermal']
            loss = float(nf - n_modes) / nf
            self.assertTrue(sp.allclose(thermal, thermal_norm * loss,
                                        rtol=0.4))
            measured_general_ind = pol_params[model_name]['all_channel_index']
            measured_corner = pol_params[model_name]['all_channel_corner_f']
            if pol == 1:
                self.assertTrue(
                    sp.allclose(measured_general_ind, general_index, atol=0.4))
                #Only need logarithmic accuracy on the corner.
                self.assertTrue(
                    sp.allclose(sp.log(measured_corner),
                                sp.log(general_cross_over),
                                atol=1.5))
            elif pol == 3:
                self.assertTrue(measured_corner < 4. / dt / nt)
Example #35
0
    def nLLeval_test(self, y_test, beta, h2=0.0, logdelta=None, delta=None, sigma2=1.0, Kstar_star=None, robust=False):
        """
        compute out-of-sample log-likelihood

        robust: boolean
                indicates if eigenvalues will be truncated at 1E-9 or 1E-4. The former (default) one was used in FastLMMC,
                but may lead to numerically unstable solutions.
        """
        assert y_test.ndim == 1, "y_test should have 1 dimension"
        mu = self.predictMean(beta, h2=h2, logdelta=logdelta, delta=delta)
        res = y_test - mu

        sigma = self.predictVariance(h2=h2, logdelta=logdelta, delta=delta, sigma2=sigma2, Kstar_star=Kstar_star)

        #TODO: benchmark, record speed difference
        """
        # efficient computation of: (y - mu)^T sigma2^{-1} (y - mu)
        # Solve the linear system x = (L L^T)^-1 res

        try:
            L = SP.linalg.cho_factor(sigma)
            res_sig = SP.linalg.cho_solve(L, res)
            logdetK = NP.linalg.slogdet(sigma)[1]

        except Exception, detail:
            print "Cholesky failed, using eigen-value decomposition!"
        """

        [S_,U_] = LA.eigh(sigma)

        if robust:
            S_nonz=(S_>1E-4)
        else:
            S_nonz=(S_>1E-9)
        assert sum(S_nonz) > 0, "Some eigenvalues should be nonzero"
        S = S_[S_nonz]
        U = U_[:, S_nonz]
        Sdi = 1 / S

        res_sig = res.T.dot(Sdi * U).dot(U.T)
        logdetK = SP.log(S).sum()

        # some sanity checks
        if False:
            res_sig3 = SP.linalg.pinv(sigma).dot(res)
            NP.testing.assert_array_almost_equal(res_sig, res_sig3, decimal=2)

        # see Carl Rasmussen's book on GPs, equation 5.10, or 
        term1 = -0.5 * logdetK
        term2 = -0.5 * SP.dot(res_sig.reshape(-1).T, res.reshape(-1)) #Change the inputs to the functions so that these are vectors, not 1xn,nx1
        term3 = -0.5 * len(res) * SP.log(2 * SP.pi)

        if term2 < -10000:
            logging.warning("looks like nLLeval_test is running into numerical difficulties")

            SC = S.copy()
            SC.sort()

            logging.warning(["delta:", delta, "log det", logdetK, "term 2", term2, "term 3:", term3 ])
            logging.warning(["largest eigv:", SC[-1], "second largest eigv:", SC[-2], "smallest eigv:", SC[0] ])
            logging.warning(["ratio 1large/2large:", SC[-1]/SC[-2], "ratio lrg/small:", SC[-1]/SC[0] ])
        
        neg_log_likelihood = -(term1 + term2 + term3)

        return neg_log_likelihood
Example #36
0
    def nLLeval(self,h2=0.0,REML=True, logdelta = None, delta = None, dof = None, scale = 1.0,penalty=0.0):
        '''
        evaluate -ln( N( U^T*y | U^T*X*beta , h2*S + (1-h2)*I ) ),
        where ((1-a2)*K0 + a2*K1) = USU^T
        --------------------------------------------------------------------------
        Input:
        h2      : mixture weight between K and Identity (environmental noise)
        REML    : boolean
                  if True   : compute REML
                  if False  : compute ML
        dof     : Degrees of freedom of the Multivariate student-t
                        (default None uses multivariate Normal likelihood)
        logdelta: log(delta) allows to optionally parameterize in delta space
        delta   : delta     allows to optionally parameterize in delta space
        scale   : Scale parameter the multiplies the Covariance matrix (default 1.0)
        --------------------------------------------------------------------------
        Output dictionary:
        'nLL'       : negative log-likelihood
        'sigma2'    : the model variance sigma^2
        'beta'      : [D*1] array of fixed effects weights beta
        'h2'        : mixture weight between Covariance and noise
        'REML'      : True: REML was computed, False: ML was computed
        'a2'        : mixture weight between K0 and K1
        'dof'       : Degrees of freedom of the Multivariate student-t
                        (default None uses multivariate Normal likelihood)
        'scale'     : Scale parameter that multiplies the Covariance matrix (default 1.0)
        --------------------------------------------------------------------------
        '''
        if (h2<0.0) or (h2>1.0):
            return {'nLL':3E20,
                    'h2':h2,
                    'REML':REML,
                    'scale':scale}
        k=self.S.shape[0]
        N=self.y.shape[0]
        D=self.UX.shape[1]
        
        #if REML == True:
        #    # this needs to be fixed, please see test_gwas.py for details
        #    raise NotImplementedError("this feature is not ready to use at this time, please use lmm_cov.py instead")

        if logdelta is not None:
            delta = SP.exp(logdelta)

        if delta is not None:
            Sd = (self.S+delta)*scale
        else:
            Sd = (h2*self.S + (1.0-h2))*scale

        UXS = self.UX / NP.lib.stride_tricks.as_strided(Sd, (Sd.size,self.UX.shape[1]), (Sd.itemsize,0))
        UyS = self.Uy / Sd

        XKX = UXS.T.dot(self.UX)
        XKy = UXS.T.dot(self.Uy)
        yKy = UyS.T.dot(self.Uy)

        logdetK = SP.log(Sd).sum()
                
        if (k<N):#low rank part
        
            # determine normalization factor
            if delta is not None:
                denom = (delta*scale)
            else:
                denom = ((1.0-h2)*scale)
            
            XKX += self.UUX.T.dot(self.UUX)/(denom)
            XKy += self.UUX.T.dot(self.UUy)/(denom)
            yKy += self.UUy.T.dot(self.UUy)/(denom)      
            logdetK+=(N-k) * SP.log(denom)
 
        # proximal contamination (see Supplement Note 2: An Efficient Algorithm for Avoiding Proximal Contamination)
        # available at: http://www.nature.com/nmeth/journal/v9/n6/extref/nmeth.2037-S1.pdf
        # exclude SNPs from the RRM in the likelihood evaluation
        

        if len(self.exclude_idx) > 0:          
            num_exclude = len(self.exclude_idx)
            
            # consider only excluded SNPs
            G_exclude = self.G[:,self.exclude_idx]
            
            self.UW = self.U.T.dot(G_exclude) # needed for proximal contamination
            UWS = self.UW / NP.lib.stride_tricks.as_strided(Sd, (Sd.size,num_exclude), (Sd.itemsize,0))
            assert UWS.shape == (k, num_exclude)
            
            WW = NP.eye(num_exclude) - UWS.T.dot(self.UW)
            WX = UWS.T.dot(self.UX)
            Wy = UWS.T.dot(self.Uy)
            assert WW.shape == (num_exclude, num_exclude)
            assert WX.shape == (num_exclude, D)
            assert Wy.shape == (num_exclude,)
            
            if (k<N):#low rank part
            
                self.UUW = G_exclude - self.U.dot(self.UW)
                
                WW += self.UUW.T.dot(self.UUW)/denom
                WX += self.UUW.T.dot(self.UUX)/denom
                Wy += self.UUW.T.dot(self.UUy)/denom
            
            
            #TODO: do cholesky, if fails do eigh
            # compute inverse efficiently
            [S_WW,U_WW] = LA.eigh(WW)
            
            UWX = U_WW.T.dot(WX)
            UWy = U_WW.T.dot(Wy)
            assert UWX.shape == (num_exclude, D)
            assert UWy.shape == (num_exclude,)
            
            # compute S_WW^{-1} * UWX
            WX = UWX / NP.lib.stride_tricks.as_strided(S_WW, (S_WW.size,UWX.shape[1]), (S_WW.itemsize,0))
            # compute S_WW^{-1} * UWy
            Wy = UWy / S_WW
            # determinant update
            logdetK += SP.log(S_WW).sum()
            assert WX.shape == (num_exclude, D)
            assert Wy.shape == (num_exclude,)
            
            # perform updates (instantiations for a and b in Equation (1.5) of Supplement)
            yKy += UWy.T.dot(Wy)
            XKy += UWX.T.dot(Wy)
            XKX += UWX.T.dot(WX)
            

        #######
        
        [SxKx,UxKx]= LA.eigh(XKX)
        #optionally regularize the beta weights by penalty
        if penalty>0.0:
            SxKx+=penalty
        i_pos = SxKx>1E-10
        beta = SP.dot(UxKx[:,i_pos],(SP.dot(UxKx[:,i_pos].T,XKy)/SxKx[i_pos]))

        r2 = yKy-XKy.dot(beta)

        if dof is None:#Use the Multivariate Gaussian
            if REML:
                XX = self.X.T.dot(self.X)
                [Sxx,Uxx]= LA.eigh(XX)
                logdetXX  = SP.log(Sxx).sum()
                logdetXKX = SP.log(SxKx).sum()
                sigma2 = r2 / (N - D)
                nLL =  0.5 * ( logdetK + logdetXKX - logdetXX + (N-D) * ( SP.log(2.0*SP.pi*sigma2) + 1 ) )
            else:
                sigma2 = r2 / (N)
                nLL =  0.5 * ( logdetK + N * ( SP.log(2.0*SP.pi*sigma2) + 1 ) )
            result = {
                  'nLL':nLL,
                  'sigma2':sigma2,
                  'beta':beta,
                  'h2':h2,
                  'REML':REML,
                  'a2':self.a2,
                  'scale':scale
                  }
        else:#Use multivariate student-t
            if REML:
                XX = self.X.T.dot(self.X)
                [Sxx,Uxx]= LA.eigh(XX)
                logdetXX  = SP.log(Sxx).sum()
                logdetXKX = SP.log(SxKx).sum()

                nLL =  0.5 * ( logdetK + logdetXKX - logdetXX + (dof + (N-D)) * SP.log(1.0+r2/dof) )
                nLL += 0.5 * (N-D)*SP.log( dof*SP.pi ) + SS.gammaln( 0.5*dof ) - SS.gammaln( 0.5* (dof + (N-D) ))
            else:
                nLL =   0.5 * ( logdetK + (dof + N) * SP.log(1.0+r2/dof) )
                nLL +=  0.5 * N*SP.log( dof*SP.pi ) + SS.gammaln( 0.5*dof ) - SS.gammaln( 0.5* (dof + N ))
            result = {
                  'nLL':nLL,
                  'dof':dof,
                  'beta':beta,
                  'h2':h2,
                  'REML':REML,
                  'a2':self.a2,
                  'scale':scale
                  }        
        assert SP.all(SP.isreal(nLL)), "nLL has an imaginary component, possibly due to constant covariates"
        return result
Example #37
0
    def check_ExpCM_derivatives(self):
        """Use `sympy` to check values and derivatives of `ExpCM` attributes."""
        (Prxy, Qxy, phiw, beta, omega, eta0, eta1, eta2,
         kappa) = sympy.symbols(
             'Prxy, Qxy, phiw, beta, omega, eta0, eta1, eta2, kappa')

        values = {
            'beta': self.params['beta'],
            'omega': self.params['omega'],
            'kappa': self.params['kappa'],
            'eta0': self.params['eta'][0],
            'eta1': self.params['eta'][1],
            'eta2': self.params['eta'][2],
        }

        # check Prxy
        for r in range(self.nsites):
            for x in range(N_CODON):
                pirAx = self.prefs[r][INDEX_TO_AA[CODON_TO_AA[x]]]
                for y in [yy for yy in range(N_CODON) if yy != x]:
                    pirAy = self.prefs[r][INDEX_TO_AA[CODON_TO_AA[y]]]
                    if not CODON_SINGLEMUT[x][y]:
                        Prxy = 0
                    else:
                        w = NT_TO_INDEX[[
                            ynt
                            for (xnt, ynt
                                 ) in zip(INDEX_TO_CODON[x], INDEX_TO_CODON[y])
                            if xnt != ynt
                        ][0]]
                        if w == 0:
                            phiw = 1 - eta0
                        elif w == 1:
                            phiw = eta0 * (1 - eta1)
                        elif w == 2:
                            phiw = eta0 * eta1 * (1 - eta2)
                        elif w == 3:
                            phiw = eta0 * eta1 * eta2
                        else:
                            raise ValueError("Invalid w")
                        self.assertTrue(
                            scipy.allclose(float(phiw.subs(values)),
                                           self.expcm.phi[w]))
                        if CODON_TRANSITION[x][y]:
                            Qxy = kappa * phiw
                        else:
                            Qxy = phiw
                        self.assertTrue(
                            scipy.allclose(float(Qxy.subs(values)),
                                           self.expcm.Qxy[x][y]))
                        if CODON_NONSYN[x][y]:
                            if pirAx == pirAy:
                                Prxy = Qxy * omega
                            else:
                                Prxy = Qxy * omega * (
                                    -beta * scipy.log(pirAx / pirAy) /
                                    (1 - (pirAx / pirAy)**beta))
                        else:
                            Prxy = Qxy
                    for (name, actual, expect) in [
                        ('Prxy', self.expcm.Prxy[r][x][y], Prxy),
                        ('dPrxy_dkappa', self.expcm.dPrxy['kappa'][r][x][y],
                         sympy.diff(Prxy, kappa)),
                        ('dPrxy_domega', self.expcm.dPrxy['omega'][r][x][y],
                         sympy.diff(Prxy, omega)),
                        ('dPrxy_dbeta', self.expcm.dPrxy['beta'][r][x][y],
                         sympy.diff(Prxy, beta)),
                        ('dPrxy_deta0', self.expcm.dPrxy['eta'][0][r][x][y],
                         sympy.diff(Prxy, eta0)),
                        ('dPrxy_deta1', self.expcm.dPrxy['eta'][1][r][x][y],
                         sympy.diff(Prxy, eta1)),
                        ('dPrxy_deta2', self.expcm.dPrxy['eta'][2][r][x][y],
                         sympy.diff(Prxy, eta2)),
                    ]:
                        if Prxy == 0:
                            expectval = 0
                        else:
                            expectval = float(expect.subs(values))
                        self.assertTrue(
                            scipy.allclose(actual, expectval, atol=1e-4),
                            "{0}: {1} vs {2}".format(name, actual, expectval))

        # check prx
        qxs = [sympy.Symbol('qx{0}'.format(x)) for x in range(N_CODON)]
        frxs = [sympy.Symbol('frx{0}'.format(x)) for x in range(N_CODON)]
        prx = sympy.Symbol('prx')
        phixs = [sympy.Symbol('phix{0}'.format(w)) for w in range(3)]
        for r in range(self.nsites):
            for x in range(N_CODON):
                pirAx = self.prefs[r][INDEX_TO_AA[CODON_TO_AA[x]]]
                frxs[x] = pirAx**beta
                xcodon = INDEX_TO_CODON[x]
                assert len(phixs) == len(xcodon)
                for (w, xwnt) in enumerate(xcodon):
                    xw = NT_TO_INDEX[xwnt]
                    if xw == 0:
                        phixs[w] = 1 - eta0
                    elif xw == 1:
                        phixs[w] = eta0 * (1 - eta1)
                    elif xw == 2:
                        phixs[w] = eta0 * eta1 * (1 - eta2)
                    elif xw == 3:
                        phixs[w] = eta0 * eta1 * eta2
                    else:
                        raise ValueError("invalid xw")
                qxs[x] = phixs[0] * phixs[1] * phixs[2]
            for x in range(N_CODON):
                prx = frxs[x] * qxs[x] / sum(frx * qx
                                             for (frx, qx) in zip(frxs, qxs))
                for (name, actual, expect) in [
                    ('prx', self.expcm.prx[r][x], prx),
                    ('dprx_dbeta', self.expcm.dprx['beta'][r][x],
                     sympy.diff(prx, beta)),
                    ('dprx_deta0', self.expcm.dprx['eta'][0][r][x],
                     sympy.diff(prx, eta0)),
                    ('dprx_deta1', self.expcm.dprx['eta'][1][r][x],
                     sympy.diff(prx, eta1)),
                    ('dprx_deta2', self.expcm.dprx['eta'][2][r][x],
                     sympy.diff(prx, eta2)),
                ]:
                    expectval = float(expect.subs(values))
                    self.assertTrue(
                        scipy.allclose(actual, expectval, atol=1e-5),
                        "{0}: {1} vs {2}".format(name, actual, expectval))
Example #38
0
 def _fprime(self, sigma):
     logSoverK = log(self.S / self.K)
     n12 = ((self.r + sigma**2 / 2) * self.T)
     numerd1 = logSoverK + n12
     d1 = numerd1 / (sigma * sqrt(self.T))
     return self.S * sqrt(self.T) * norm.pdf(d1) * exp(-self.r * self.T)
Example #39
0
 def _BlackScholesCall(S, K, T, sigma, r, q):
     d1 = (log(S / K) + (r - q + (sigma**2) / 2) * T) / (sigma * sqrt(T))
     d2 = d1 - sigma * sqrt(T)
     return S * exp(-q * T) * norm.cdf(d1) - K * exp(-r * T) * norm.cdf(d2)
Example #40
0
Dd = bowD.D

if Controller == 1:
    # Controller without integral part
    cl_polesd = sp.exp(cl_poles*Ts)    # Desired discrete poles
    k = place(Ad, Bd, cl_polesd)

elif Controller == 2:
    # LQR Controller
    Q = np.diag([5, 5, 10, 1]);
    Q = np.diag([20, 10, 100 , 10]);
    R = [4];                    
    k, S, E = rp.dlqr(Ad, Bd, Q, R)

    # Observer design parameters
    preg = sp.log(E[0])/Ts
    w0 = max(abs(preg));       # process spectral radius

    # Modify poles for observer
    cl_poles = w0/wn*cl_poles
    cl_2poles = w0/wn*cl_2poles

if Observer == 1:
    # Reduced order observer
    T=[[0,0,1,0],[0,0,0,1]]
    obs_polesc = obs_k*cl_2poles
    obs_polesd = sp.exp(obs_polesc*Ts)
    r_obs = red_obs(bowD,T, obs_polesd)
    # Put Observer and controller together (compact form)
    ctr = comp_form(bowD, r_obs, k)
def stdatmos(**altitude):
    """
    Evaluate the standard atmosphere at any given altitude.
    
    This function allows input of a single variable to calculate
    the atmospheric properties at different altitudes. The function
    can work with different types of standard models. The default model
    values are set as defined by the International Standard Atmosphere.
    
    Parameters
    ----------
    model : dict, optional
        A standard atmosphere model as obtained from stdmodel. Partial
        models are allowed. The remaining model values default to
        the International Standard Atmosphere.
    h or geom : array_like
        Geometrical altitude [meters].
    geop : array_like
        Geopotential altitude [meters].
    abs : array_like
        Absolute altitude [meters].
    T : array_like
        Temperature altitude [K].
    P : array_like
        Pressure altitude [Pa].
    rho : array_like
        Density altitude [kg/m^3].
        
    Returns
    -------
    out : (h, T, P, rho, a)
        Tuple of geometrical altitude, temperature, pressure, density
        and speed of sound at given altitudes.
    
    Notes
    -----
    This function assumes a continues lapserate below 0 altitude and above
    the top layer, which allows for extrapolation outside the specified
    region (0 to 86km in ISA). Temperature altitude is obtained as the
    first altitude from 0 where the specified temperature exists.
        
    See Also
    --------
    stdmodel
    
    Examples
    --------
    >>> stdatmos(P=[1e5, 1e4, 1e3])[0]
    [110.8864127251899, 16221.007939493587, 31207.084373790043]
    >>> stdatmos(h=sp.linspace(-2000, 81000))
    (array, array, array, array, array)
    """

    #pop atmospherical model from input
    model = altitude.pop("model", {})

    #check if model is a dictionary
    if type(model) is not DictType:
        raise Exception("Custom atmosphere model is incompatible.")

    #check if a single remaining input exists
    if len(altitude) is not 1:
        raise Exception("Function needs exactly one altitude input.")

    #pop the altitude input
    mtype, alt = altitude.popitem()

    #check if the altitude input type is valid
    if mtype not in ["h", "geom", "geop", "abs", "T", "P", "rho"]:
        raise Exception("The altitude input should be a valid input type.")

    #convert the input to numpy arrays
    itype, alt = to_ndarray(alt)

    #model values
    R = model.get("R", 287.053)  #gas constant [J/kg/K] (air)
    gamma = model.get("gamma", 1.4)  #specific heat ratio [-] (air)

    g = model.get("g0", 9.80665)  #gravity [m/s^2] (earth)
    radius = model.get("radius", 6356766.0)  #earth radius [m] (earth)

    Tb = model.get("T0", 288.15)  #base temperature [K]
    Pb = model.get("P0", 101325.0)  #base pressure [Pa]

    #model lapse rate and height layers
    Hb = sp.array([0, 11, 20, 32, 47, 51, 71, sp.inf], sp.float64) * 1000
    Lr = sp.array([-6.5, 0, 1, 2.8, 0, -2.8, -2], sp.float64) * 0.001

    Hb = model.get("layers", Hb)  #layer height [km]
    Lr = model.get("lapserate", Lr)  #lapse rate [K/km]

    #preshape solution arrays
    T = sp.ones(alt.shape, sp.float64) * sp.nan
    P = sp.ones(alt.shape, sp.float64) * sp.nan

    #define the height array
    if mtype in ["h", "geom"]:
        h = alt * radius / (radius + alt)
    elif mtype is "geop":
        h = alt
    elif mtype is "abs":
        h = alt - radius
    else:
        h = sp.ones(alt.shape, sp.float64) * sp.nan

    for lr, hb, ht in zip(Lr, Hb[:-1], Hb[1:]):
        #calculate the temperature at layer top
        Tt = Tb + lr * (ht - hb)

        if mtype is "T":
            #break the loop if there are no nans in the solution array
            if not sp.isnan(h).any():
                break

            #select all temperatures in current layer
            if lr == 0:
                sel = (alt == Tb)
            else:
                s = sp.sign(lr)
                bot = -sp.inf if hb == 0 else Tb * s
                top = sp.inf if ht == Hb[-1] else Tt * s
                sel = sp.logical_and(alt * s >= bot, alt * s < top)

            #only select when not already solved
            sel = sp.logical_and(sel, sp.isnan(h))

            #temperature is given as input
            T[sel] = alt[sel]

            #solve for height and pressure
            if lr == 0:
                h[sel] = hb
                P[sel] = Pb
            else:
                h[sel] = hb + (1.0 / lr) * (T[sel] - Tb)
                P[sel] = Pb * (T[sel] / Tb)**(-g / (lr * R))

        elif mtype in ["P", "rho"]:
            #choose base value as pressure or density
            vb = Pb if mtype is "P" else Pb / (R * Tb)

            #select all input values below given pressure or density
            sel = alt <= (sp.inf if hb == 0 else vb)

            #break if nothing is selected
            if not sel.any():
                break

            #solve for temperature and height
            if lr == 0:
                T[sel] = Tb
                h[sel] = hb - sp.log(alt[sel] / vb) * R * Tb / g
            else:
                x = g if mtype is "P" else (lr * R + g)
                T[sel] = Tb * (alt[sel] / vb)**(-lr * R / x)
                h[sel] = hb + (T[sel] - Tb) / lr

            #pressure is given as input
            P[sel] = alt[sel] if mtype is "P" else alt[sel] * R * T[sel]

        else:
            #select all height values above layer base
            sel = h >= (-sp.inf if hb == 0 else hb)

            #break if nothing is selected
            if not sel.any():
                break

            #solve for temperature and pressure
            if lr == 0:
                T[sel] = Tb
                P[sel] = Pb * sp.exp((-g / (R * Tb)) * (h[sel] - hb))
            else:
                T[sel] = Tb + lr * (h[sel] - hb)
                P[sel] = Pb * (T[sel] / Tb)**(-g / (lr * R))

        #update pressure base value
        if lr == 0:
            Pb *= sp.exp((-g / (R * Tb)) * (ht - hb))
        else:
            Pb *= (Tt / Tb)**(-g / (lr * R))

        #update temperature base value
        Tb = Tt

    #convert geopotential altitude to geometrical altitude
    h *= radius / (radius - h)

    #density
    rho = P / (R * T)

    #speed of sound
    a = sp.sqrt(gamma * R * T)

    return from_ndarray(itype, h, T, P, rho, a)
Example #42
0
 def _BlackScholesPut(S, K, T, sigma, r, q):
     d1 = (log(S / K) + (r - q + (sigma**2) / 2) * T) / (sigma * sqrt(T))
     d2 = d1 - sigma * sqrt(T)
     return K * exp(-r * T) * norm.cdf(-d2) - S * exp(
         -q * T) * norm.cdf(-d1)
Example #43
0
def parse_sum_stats_custom(filename=None,
                           bimfile=None,
                           only_hm3=False,
                           hdf5_file=None,
                           n=None,
                           ch=None,
                           pos=None,
                           A1=None,
                           A2=None,
                           reffreq=None,
                           case_freq=None,
                           control_freq=None,
                           case_n=None,
                           control_n=None,
                           info=None,
                           rs=None,
                           pval=None,
                           eff=None,
                           ncol=None,
                           input_is_beta=False,
                           match_genomic_pos=False,
                           debug=False,
                           summary_dict=None):
    # Check required fields are here
    assert not A2 is None, 'Require header for non-effective allele'
    assert not A1 is None, 'Require header for effective allele'
    assert not rs is None, 'Require header for RS ID'
    assert not eff is None, 'Require header for Statistics'
    assert not pval is None, 'Require header for pval'
    assert not ncol is None or not n is None or (
        control_n is not None
        and case_n is not None), 'Require either N or NCOL information'

    if ch is None:
        assert not bimfile is None, 'Require bimfile when chromosome header not provided'
        print("Chromosome Header not provided, will use info from bim file")
    if pos is None:
        assert not bimfile is None, 'Require bimfile when position header not provided'
        print("Position Header not provided, will use info from bim file")

    num_lines = util.count_lines(filename)
    snps_pos_map = {}
    if only_hm3:
        if debug:
            print('Loading HapMap3 SNPs')
        hm3_sids = util.load_hapmap_SNPs()

    if bimfile is not None:
        valid_sids = set()
        if debug:
            print('Parsing bim file: %s' % bimfile)

        with open(bimfile) as f:
            for line in f:
                l = line.split()
                chrom = util.get_chrom_num(l[0])
                if chrom not in util.ok_chromosomes:
                    continue
                sid = l[1]
                if only_hm3:
                    if sid in hm3_sids:
                        valid_sids.add(sid)
                        snps_pos_map[sid] = {'pos': int(l[3]), 'chrom': chrom}
                else:
                    valid_sids.add(sid)
                    snps_pos_map[sid] = {'pos': int(l[3]), 'chrom': chrom}

        if len(valid_sids) == 0:
            raise Exception('Unable to parse BIM file')
    else:
        raise Exception(
            'BIM file missing. Please check genotype paths provided.')

    invalid_chr = 0
    invalid_pos = 0
    invalid_p = 0
    invalid_beta = 0
    chrom_dict = {}
    opener = open
    if is_gz(filename):
        opener = gzip.open
    print('Parsing summary statistics file: %s' % filename)
    with opener(filename) as f:
        header = f.readline()
        if is_gz(filename):
            header = header.decode('utf-8')
        if debug:
            print('File header:')
            print(header)
        header_dict = {}
        columns = (header.strip()).split()
        index = 0
        for col in columns:
            header_dict[col] = index
            index += 1
        assert ch is None or ch in header_dict, 'Chromosome header cannot be found in summary statistic file'
        assert A2 in header_dict, 'Non-effective allele column cannot be found in summary statistic file'
        assert A1 in header_dict, 'Effective allele column cannot be found in summary statistic file'
        assert eff in header_dict, 'Effect size column not found in summary statistic file'
        assert rs in header_dict, 'SNP ID column not found in summary statistic file'
        assert pos is None or pos in header_dict, 'Position column not found in summary statistic file'
        assert pval in header_dict, 'P Value column not found in summary statistic file'
        assert not n is None or ncol in header_dict or (control_n in header_dict and case_n in header_dict), 'Sample size column not found in summary statistic ' \
                                                     'file and N not provided'
        # header_dict now contains the header column name for each corresponding input
        bad_chromosomes = set()
        line_i = 1
        for line in f:
            line_i += 1
            if line_i % 1000 == 0 and num_lines > 0:
                sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%' %
                                 (100.0 * (float(line_i) / (num_lines))))
                sys.stdout.flush()
            if is_gz(filename):
                line = line.decode('utf-8')
            l = (line.strip()).split()
            # get the SNP ID first
            sid = l[header_dict[rs]]
            # check the SNP ID
            if sid in valid_sids:
                # Get the chromosome information
                chrom = 0
                if not ch is None and ch in header_dict:
                    chrom = util.get_chrom_num(l[header_dict[ch]])
                    # Check if the chromosome of the SNP is correct
                    if not chrom == snps_pos_map[sid]['chrom']:
                        invalid_chr += 1
                        continue
                else:
                    chrom = snps_pos_map[sid]['chrom']

                pos_read = 0
                if not pos is None and pos in header_dict:
                    pos_read = int(l[header_dict[pos]])
                    if not pos_read == snps_pos_map[sid]['pos']:
                        invalid_pos += 1
                        if match_genomic_pos:
                            continue
                else:
                    pos_read = snps_pos_map[sid]['pos']

                pval_read = float(l[header_dict[pval]])
                if not isfinite(stats.norm.ppf(pval_read)):
                    invalid_p += 1
                    continue

                if not isfinite(float(l[header_dict[eff]])):
                    invalid_beta += 1
                    continue

                if not chrom in chrom_dict:
                    chrom_dict[chrom] = {
                        'ps': [],
                        'log_odds': [],
                        'infos': [],
                        'freqs': [],
                        'betas': [],
                        'nts': [],
                        'sids': [],
                        'positions': []
                    }
                chrom_dict[chrom]['sids'].append(sid)
                chrom_dict[chrom]['positions'].append(pos_read)
                # Check the frequency
                if reffreq is not None and reffreq in header_dict:
                    if l[header_dict[reffreq]] == '.' or l[
                            header_dict[reffreq]] == 'NA':
                        chrom_dict[chrom]['freqs'].append(-1)
                    else:
                        chrom_dict[chrom]['freqs'].append(
                            float(l[header_dict[reffreq]]))
                elif (case_freq is not None and control_freq is not None
                      and case_freq in header_dict
                      and control_freq in header_dict):
                    if (case_n is not None and control_n is not None
                            and case_n in header_dict
                            and control_n in header_dict):
                        if (l[header_dict[control_n]] == '.'
                                or l[header_dict[control_n]] == 'NA'
                                or l[header_dict[case_n]] == '.'
                                or l[header_dict[case_n]] == 'NA'
                                or l[header_dict[control_freq]] == '.'
                                or l[header_dict[control_freq]] == 'NA'
                                or l[header_dict[case_freq]] == '.'
                                or l[header_dict[case_freq]] == 'NA'):
                            chrom_dict[chrom]['freqs'].append(-1)
                        else:
                            case_N = float(l[header_dict[case_n]])
                            control_N = float(l[header_dict[control_n]])
                            tot_N = case_N + control_N
                            a_scalar = case_N / float(tot_N)
                            u_scalar = control_N / float(tot_N)
                            freq = float(
                                l[header_dict[case_freq]]) * a_scalar + float(
                                    l[header_dict[control_freq]]) * u_scalar
                            chrom_dict[chrom]['freqs'].append(freq)
                    else:
                        if (l[header_dict[case_freq]] == '.'
                                or l[header_dict[case_freq]] == 'NA'
                                or l[header_dict[control_freq]] == '.'
                                or l[header_dict[control_freq]] == 'NA'):
                            chrom_dict[chrom]['freqs'].append(-1)
                        else:
                            freq = (float(l[header_dict[case_freq]]) +
                                    float(l[header_dict[control_freq]])) / 2.0
                            chrom_dict[chrom]['freqs'].append(freq)
                else:
                    chrom_dict[chrom]['freqs'].append(-1)
                # Get the INFO score
                info_sc = -1
                if info is not None and info in header_dict:
                    info_sc = float(l[header_dict[info]])
                chrom_dict[chrom]['infos'].append(info_sc)
                chrom_dict[chrom]['ps'].append(pval_read)
                nt = [l[header_dict[A1]].upper(), l[header_dict[A2]].upper()]
                chrom_dict[chrom]['nts'].append(nt)
                raw_beta = float(l[header_dict[eff]])

                if n is None:
                    if ncol not in header_dict:
                        case_N = float(l[header_dict[case_n]])
                        control_N = float(l[header_dict[control_n]])
                        N = case_N + control_N
                    else:
                        N = float(header_dict[ncol])
                else:
                    N = n
                if not input_is_beta:
                    raw_beta = sp.log(raw_beta)
                    chrom_dict[chrom]['log_odds'].append(raw_beta)
                    beta = get_beta_from_pvalue(pval_read, raw_beta)
                    chrom_dict[chrom]['betas'].append(beta / sp.sqrt(N))
                else:
                    beta = get_beta_from_pvalue(pval_read, raw_beta)
                    chrom_dict[chrom]['log_odds'].append(beta / sp.sqrt(N))
                    chrom_dict[chrom]['betas'].append(beta / sp.sqrt(N))

        if len(bad_chromosomes) > 0:
            if debug:
                print('Ignored chromosomes: %s' %
                      (','.join(list(bad_chromosomes))))
                print(
                    'Please note that only data on chromosomes 1-23, and X are parsed.'
                )

    if num_lines > 0:
        sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%\n' % (100.0))
        sys.stdout.flush()
    print('SS file loaded, now sorting and storing in HDF5 file.')
    assert not 'sum_stats' in hdf5_file, 'Something is wrong with HDF5 file?'
    ssg = hdf5_file.create_group('sum_stats')
    num_snps = 0
    num_non_finite = 0
    for chrom in chrom_dict:
        if debug:
            print('%d SNPs on chromosome %s' %
                  (len(chrom_dict[chrom]['positions']), chrom))
        assert len(chrom_dict[chrom]['positions']) == len(
            chrom_dict[chrom]['betas']) == len(chrom_dict[chrom]['ps']) == len(
                chrom_dict[chrom]
                ['nts']), 'Problems with parsing summary stats'
        sl = list(
            zip(chrom_dict[chrom]['positions'], chrom_dict[chrom]['sids'],
                chrom_dict[chrom]['nts'], chrom_dict[chrom]['betas'],
                chrom_dict[chrom]['log_odds'], chrom_dict[chrom]['infos'],
                chrom_dict[chrom]['freqs'], chrom_dict[chrom]['ps']))
        sl.sort()
        ps = []
        betas = []
        nts = []
        sids = []
        positions = []
        log_odds = []
        infos = []
        freqs = []
        prev_pos = -1
        for pos, sid, nt, beta, lo, info, frq, p in sl:
            if pos == prev_pos:
                if debug:
                    print('duplicated position %d' % pos)
                continue
            else:
                prev_pos = pos
            if not sp.isfinite(beta):
                num_non_finite += 1
                continue
            ps.append(p)
            betas.append(beta)
            nts.append(nt)
            sids.append(sid)
            positions.append(pos)
            log_odds.append(lo)
            infos.append(info)
            freqs.append(frq)
        nts = sp.array(nts, dtype=util.nts_dtype)
        sids = sp.array(sids, dtype=util.sids_dtype)
        if debug:
            if not num_non_finite == 0:
                print('%d SNPs have non-finite statistics on chromosome %s' %
                      (num_non_finite, chrom))
            print('Still %d SNPs on chromosome %s' % (len(ps), chrom))
        g = ssg.create_group('chrom_%s' % chrom)
        g.create_dataset('ps', data=sp.array(ps))
        g.create_dataset('freqs', data=freqs)
        g.create_dataset('betas', data=betas)
        g.create_dataset('log_odds', data=log_odds)
        num_snps += len(log_odds)
        g.create_dataset('infos', data=infos)
        g.create_dataset('nts', data=nts)
        g.create_dataset('sids', data=sids)
        g.create_dataset('positions', data=positions)
        hdf5_file.flush()
    if debug:
        print('%d SNPs excluded due to invalid chromosome' % invalid_chr)
        if match_genomic_pos:
            print('%d SNPs excluded due to invalid genomic positions' %
                  invalid_pos)
        else:
            print(
                '%d SNPs with non-matching genomic positions (not excluded)' %
                invalid_pos)
        print('%d SNPs excluded due to invalid P-value' % invalid_p)
        print('%d SNPs excluded due to invalid effect sizes' % invalid_p)
        print('%d SNPs parsed from summary statistics file' % num_snps)
    summary_dict[3.09] = {'name': 'dash', 'value': 'Summary statistics'}
    summary_dict[3.1] = {
        'name': 'Num SNPs parsed from sum stats file',
        'value': num_snps
    }
    if invalid_p > 0:
        summary_dict[3.2] = {
            'name': 'Num invalid P-values in sum stats',
            'value': invalid_p
        }
    if invalid_beta > 0:
        summary_dict[3.21] = {
            'name': 'Num invalid P-values in sum stats',
            'value': invalid_p
        }
    if invalid_chr > 0:
        summary_dict[3.4] = {
            'name': 'SNPs w non-matching chromosomes excluded',
            'value': invalid_chr
        }
    if invalid_pos > 0:
        if match_genomic_pos:
            summary_dict[3.3] = {
                'name': 'SNPs w non-matching positions excluded',
                'value': invalid_pos
            }
        else:
            summary_dict[3.3] = {
                'name': 'SNPs w non-matching positions (not excluded)',
                'value': invalid_pos
            }
Example #44
0
def func(x):
    '''The function that we are finding the root of.'''
    return sp.log(1.5 * x)
 def invSigmo(self, a):
     return - log(1.0 / a - 1.0)
def ocv_soc(soc,a,b,c,d,e,f):
    y = a+b*soc+c*pow(soc,2)+d/(soc+0.00001)+e*log(soc+0.00001)+f*log(1+0.00001-soc)
    return y
Example #47
0
#!/usr/bin/python3
# -*- coding: utf-8 -*-

import scipy
from scipy.stats import norm

n = 10000

S = 1614.96  # Standard and Poors 500 Index, on 07/01/2013
r = 0.008  # implied risk free interest rate, between 3 year and 5 year T-bill rate
sigma = 0.1827  # implied volatility
K = 1575  # strike price
Tminust = 110. / 365.  # 07/01/2013 to 10/19/2013

numerd1 = scipy.log(S / K) + (r + sigma**2 / 2) * Tminust
numerd2 = scipy.log(S / K) + (r - sigma**2 / 2) * Tminust
d1 = numerd1 / (sigma * scipy.sqrt(Tminust))
d2 = numerd2 / (sigma * scipy.sqrt(Tminust))
part1 = S * (norm.cdf(d1) - 1)
part2 = K * scipy.exp(-r * Tminust) * (norm.cdf(d2) - 1)
VP = part1 - part2

x = norm.rvs(size=n)
y1 = scipy.maximum(
    0, K - S *
    scipy.exp((r - sigma**2 / 2) * Tminust + sigma * x * scipy.sqrt(Tminust)))
y2 = scipy.maximum(
    0, K - S *
    scipy.exp((r - sigma**2 / 2) * Tminust + sigma * -x * scipy.sqrt(Tminust)))
y = (y1 + y2) / 2
Example #48
0
def doConvolution(x_in, y_in, x_out, widths, factor=5, oversampling=1):
    '''
    Perform convolution on lists with a Gaussian filter.
    
    Reduce the input grid to the target grid by integration.
   
    @param x_in: The input x-values
    @type x_in: array
    @param y_in: The input y-values
    @type y_in: array
    @param x_out: The target x-grid
    @type x_out: array
    @param widths: The full width/half maximum spectral resolution as a 
                   function of wavelength, i.e. the fwhm of the gaussian
    @type widths: array
    
    @keyword factor: the sigma factor for determining the window pushed through
                     the gaussian filter. This avoids having to convolve the 
                     whole input grid, which takes a lot of time. Beyond 
                     sigma*factor the contribution of the y values is assumed 
                     to be negligible.
                     
                     (default: 5)
    @type factor: int
    @keyword oversampling: oversampling factor of the target x-grid with
                           respect to the given spectral resolution.
                           
                           (default: 1)
    @type oversampling: int
   
    @return: The resulting y-values
    @rtype: list
    
    '''

    x_in, y_in, x_out, widths = array(x_in), array(y_in), array(x_out), array(
        widths)
    y_out = []
    print 'Convolving for x_out between %.2f micron and %.2f micron with oversampling %i.' \
          %(x_out[0],x_out[-1],int(oversampling))
    #- Convert FWHM's to sigma for the gaussians
    sigma = [fwhm / (2. * sqrt(2. * log(2.))) for fwhm in widths]
    #- Define the binsizes of the bins that will be integrated, i.e. the
    #- apparent resolution of x_out
    binsize = [w / oversampling for w in widths]
    for delta_bin, sigi, xi_out in zip(binsize, sigma, x_out):
        yi_in = y_in[abs(x_in - xi_out) <= factor * sigi]
        #- if not empty: continue, else add 0
        if list(yi_in) and set(yi_in) != set([0.0]):
            #- all relevant xi's for the bin around xi_out, ie in this bin the
            #- y-values will be integrated
            xi_in = x_in[abs(x_in - xi_out) <= delta_bin]
            #- The window for the convolution itself, outside this window the
            #- data are assumed to be negligible, ie for a gaussian
            window = x_in[abs(x_in - xi_out) <= factor * sigi]
            convolution = convolveArray(window, yi_in, sigi)
            #- if one value in the bin, out of the window selection: add value
            if len(list(convolution[abs(window - xi_out) <= delta_bin])) == 1:
                y_out.append(convolution[abs(window - xi_out) <= delta_bin][0])
                print 'Convolution has a window of only one element at xi_out %f.' % xi_out
            #- If more than one value: integrate
            elif list(convolution[abs(window - xi_out) <= delta_bin]):
                y_out.append(
                    trapz(y=convolution[abs(window - xi_out) <= delta_bin],
                          x=xi_in) / (xi_in[-1] - xi_in[0]))
            #- If no values in the bin from the window: add average of the window
            #- This should not occur ideally!
            else:
                print 'Convolution has a window of no elements at x_out ' + \
                      '%f. Careful! Average is taken of '%(xi_out) + \
                      'sigma*factor window! This should not be happening...'
                y_out.append(sum(convolution) / float(len(convolution)))
        else:
            y_out.append(0.0)
    return y_out
Example #49
0
def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm,
          nobs, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, 
          thresh, isd, maxit, family):
    
    # load shared fortran library
    glmlib = loadGlmLib() 
    
    # pre-process data     
    ty = y[:, 0]
    tevent = y[:, 1]
    if scipy.any(ty <= 0):
        raise ValueError('negative event time not permitted for cox family')
    if len(offset) == 0:
        offset = ty*0
        is_offset = False
    else:
        is_offset = True
        
    # now convert types and allocate memory before calling 
    # glmnet fortran library
    ######################################
    # --------- PROCESS INPUTS -----------
    ######################################
    # force inputs into fortran order and scipy float64
    copyFlag = False
    x = x.astype(dtype = scipy.float64, order = 'F', copy = copyFlag) 
    irs = irs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag)
    pcs = pcs.astype(dtype = scipy.int32, order = 'F', copy = copyFlag)    
    ty = ty.astype(dtype = scipy.float64, order = 'F', copy = copyFlag)    
    tevent = tevent.astype(dtype = scipy.float64, order = 'F', copy = copyFlag)    
    offset = offset.astype(dtype = scipy.float64, order = 'F', copy = copyFlag)    
    weights = weights.astype(dtype = scipy.float64, order = 'F', copy = copyFlag)    
    jd = jd.astype(dtype = scipy.int32, order = 'F', copy = copyFlag)        
    vp = vp.astype(dtype = scipy.float64, order = 'F', copy = copyFlag)    
    cl = cl.astype(dtype = scipy.float64, order = 'F', copy = copyFlag)    
    ulam   = ulam.astype(dtype = scipy.float64, order = 'F', copy = copyFlag)    

    ######################################
    # --------- ALLOCATE OUTPUTS ---------
    ######################################
    # lmu
    lmu = -1
    lmu_r = ctypes.c_int(lmu)
    # ca
    ca   = scipy.zeros([nx, nlam], dtype = scipy.float64)
    ca   = ca.astype(dtype = scipy.float64, order = 'F', copy = False)    
    ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # ia
    ia   = -1*scipy.ones([nx], dtype = scipy.int32)
    ia   = ia.astype(dtype = scipy.int32, order = 'F', copy = False)    
    ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # nin
    nin   = -1*scipy.ones([nlam], dtype = scipy.int32)
    nin   = nin.astype(dtype = scipy.int32, order = 'F', copy = False)    
    nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
    # dev
    dev   = -1*scipy.ones([nlam], dtype = scipy.float64)
    dev   = dev.astype(dtype = scipy.float64, order = 'F', copy = False)    
    dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # alm
    alm   = -1*scipy.ones([nlam], dtype = scipy.float64)
    alm   = alm.astype(dtype = scipy.float64, order = 'F', copy = False)    
    alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    # nlp
    nlp = -1
    nlp_r = ctypes.c_int(nlp)
    # jerr
    jerr = -1
    jerr_r = ctypes.c_int(jerr)
    # dev0
    dev0 = -1
    dev0_r = ctypes.c_double(dev0)

    #  ###################################
    #   main glmnet fortran caller
    #  ###################################  
    if is_sparse:
        # no sparse coxnet implemented
        raise ValueError('Cox model not implemented for sparse x in glmnet')

    else:
        # call fortran coxnet routine
        glmlib.coxnet_( 
              ctypes.byref(ctypes.c_double(parm)), 
              ctypes.byref(ctypes.c_int(nobs)),
              ctypes.byref(ctypes.c_int(nvars)),
              x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
              ty.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
              tevent.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
              offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
              weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
              jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), 
              vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
              cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
              ctypes.byref(ctypes.c_int(ne)), 
              ctypes.byref(ctypes.c_int(nx)), 
              ctypes.byref(ctypes.c_int(nlam)), 
              ctypes.byref(ctypes.c_double(flmin)), 
              ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
              ctypes.byref(ctypes.c_double(thresh)), 
              ctypes.byref(ctypes.c_int(maxit)), 
              ctypes.byref(ctypes.c_int(isd)), 
              ctypes.byref(lmu_r),
              ca_r, 
              ia_r, 
              nin_r, 
              ctypes.byref(dev0_r),
              dev_r,
              alm_r, 
              ctypes.byref(nlp_r), 
              ctypes.byref(jerr_r)
              )
   
    #  ###################################
    #   post process results
    #  ###################################  
     
    # check for error
    if (jerr_r.value > 0):
        raise ValueError("Fatal glmnet error in library call : error code = ", jerr_r.value)
    elif (jerr_r.value < 0):
        print("Warning: Non-fatal error in glmnet library call: error code = ", jerr_r.value)
        print("Check results for accuracy. Partial or no results returned.")
    
    # clip output to correct sizes
    lmu = lmu_r.value
    ca = ca[0:nx, 0:lmu]    
    ia = ia[0:nx]
    nin = nin[0:lmu]
    dev = dev[0:lmu]
    alm = alm[0:lmu]    
    
    # ninmax
    ninmax = max(nin)
    # fix first value of alm (from inf to correct value)
    if ulam[0] == 0.0:
        t1 = scipy.log(alm[1])
        t2 = scipy.log(alm[2])
        alm[0] = scipy.exp(2*t1 - t2)        
    # create return fit dictionary
    if ninmax > 0:
        ca = ca[0:ninmax, :]
        df = scipy.sum(scipy.absolute(ca) > 0, axis=0)
        ja = ia[0:ninmax] - 1    # ia is 1-indexed in fortran
        oja = scipy.argsort(ja)
        ja1 = ja[oja]
        beta = scipy.zeros([nvars, lmu], dtype = scipy.float64)
        beta[ja1, :] = ca[oja, :]
    else:
        beta = scipy.zeros([nvars, lmu], dtype = scipy.float64)
        df = scipy.zeros([1, lmu], dtype = scipy.float64)
    
    fit = dict()
    fit['beta'] = beta
    fit['dev'] = dev
    fit['nulldev'] = dev0_r.value
    fit['df']= df
    fit['lambdau'] = alm
    fit['npasses'] = nlp_r.value
    fit['jerr'] = jerr_r.value
    fit['dim'] = scipy.array([nvars, lmu], dtype = scipy.integer)
    fit['offset'] = is_offset
    fit['class'] = 'coxnet'    
 
    #  ###################################
    #   return to caller
    #  ###################################  

    return fit
Example #50
0
    def __init__(self, T, P, mezcla):
        self.T=unidades.Temperature(T)
        self.P=unidades.Pressure(P, "atm")
        self.mezcla=mezcla
        self.componente=mezcla.componente
        self.fraccion=mezcla.fraccion

        self.B=self.b*self.P.atm/R_atml/self.T
        self.Tita=self.tita*self.P.atm/(R_atml*self.T)**2

        delta=self.delta*self.P.atm/R_atml/self.T
        epsilon=self.epsilon*(self.P.atm/R_atml/self.T)**2
        eta=self.eta*self.P.atm/R_atml/self.T
        Z=roots([1, delta-self.B-1, self.Tita+epsilon-delta*(self.B+1), -epsilon*(self.B+1)-self.Tita*eta])
        self.Z=r_[Z[0].real, Z[2].real]

        self.V=self.Z*R_atml*self.T/self.P.atm  #mol/l
        self.x, self.xi, self.yi, self.Ki=self._Flash()
        self.H_exc=-(self.tita+self.dTitadT)/R_atml/self.T/(self.delta**2-4*self.epsilon)**0.5*log((2*self.V+self.delta-(self.delta**2-4*self.epsilon)**0.5)/(2*self.V+self.delta+(self.delta**2-4*self.epsilon)**0.5))+1-self.Z
Example #51
0
    def updateW(self, m):
        M = self.components
        Muse = self.doUpdate.sum()
        if self.noise == 'gauss':
            YmeanX = self.Z.E1
        elif self.noise == 'hurdle' or self.noise == 'poisson':
            YmeanX = self.meanX

        if (m < self.nKnown) or (m in self.iLatentSparse) or (m
                                                              in self.iLatent):
            with SP.errstate(divide='ignore'):
                logPi = SP.log(self.Pi.E1[:, m] / (1 - self.Pi.E1[:, m]))
            #logPi = (self.Pi.lnE1 - (special.digamma(self.Pi.b) - special.digamma(self.Pi.a+self.Pi.b)))[:,m]

        elif self.nScale > 0 and self.nScale < YmeanX.shape[0]:
            with SP.errstate(divide='ignore'):
                logPi = SP.log(self.Pi.E1[:, m] / (1 - self.Pi.E1[:, m]))
            #logPi = self.Pi.lnE1 - (special.digamma(self.Pi.b) - special.digamma(self.Pi.a+self.Pi.b))
            isOFF_ = self.Pi.E1[:, m] < .5
            logPi[isOFF_] = (YmeanX.shape[0] / self.nScale) * SP.log(
                self.Pi.E1[isOFF_, m] / (1 - self.Pi.E1[isOFF_, m]))

            isON_ = self.Pi.E1[:, m] > .5

            if self.onF > 1.:
                logPi[isON_] = self.onF * SP.log(self.Pi.E1[isON_, m] /
                                                 (1 - self.Pi.E1[isON_, m]))

        else:
            onF = 1.
            logPi = SP.log(self.Pi.E1[:, m] / (1 - self.Pi.E1[:, m]))

        sigma2Sigmaw = (1.0 / self.Eps.E1) * self.Alpha.E1[m]

        setMinus = SP.int_(
            SP.hstack([list(range(M))[0:m],
                       list(range(M))[m + 1::]]))
        setMinus = setMinus[self.doUpdate[setMinus] == 1]

        SmTSk = SP.sum(
            SP.tile(self.S.E1[:, m:m + 1],
                    (1, Muse - 1)) * self.S.E1[:, setMinus], 0)
        SmTSm = SP.dot(self.S.E1[:, m].transpose(),
                       self.S.E1[:, m]) + self.S.diagSigmaS[:, m].sum()

        b = SP.dot((self.W.C[:, setMinus, 0] * self.W.E1[:, setMinus]),
                   (SmTSk.transpose()))
        diff = SP.dot(self.S.E1[:, m].transpose(), YmeanX) - b

        SmTSmSig = SmTSm + sigma2Sigmaw

        #update C and W

        u_qm = logPi + 0.5 * SP.log(sigma2Sigmaw) - 0.5 * SP.log(SmTSmSig) + (
            0.5 * self.Eps.E1) * ((diff**2) / SmTSmSig)
        with SP.errstate(over='ignore'):
            self.W.C[:, m, 0] = 1. / (1 + SP.exp(-u_qm))

        self.W.C[:, m, 1] = 1 - self.W.C[:, m, 0]
        self.W.E1[:, m] = (diff / SmTSmSig)  #q(w_qm | s_qm=1), q=1,...,Q
        self.W.sigma2[:, m] = (1. / self.Eps.E1) / SmTSmSig
        self.W.E2diag[:, m] = self.W.E1[:, m]**2 + self.W.sigma2[:, m]
def blackscholes_put(S, E, T, rf, sigma):
    #calculate d1 and d2 parameters
    d1 = (log(S / E) + (rf + sigma * sigma / 2.0) * T) / (sigma * sqrt(T))
    d2 = d1 - sigma * sqrt(T)
    #we use the cdf of a normal distribution
    return -S * stats.norm.cdf(-d1) + E * exp(-rf * T) * stats.norm.cdf(-d2)
Example #53
0
 def logdet(self):
     r = sp.log(self.SpI()).sum()
     r += sp.sum(sp.log(self.Cn.S())) * self.dim_r
     r += 2 * sp.log(sp.diag(self.H_chol())).sum()
     return r
Example #54
0
    def calcBound(self):
        #TODO: debug!! DO NOT USE
        F1 = -self._D*self._N/2*SP.log(2*pi) - self._N/2 * SP.sum(SP.log(1/self.Eps.E1)) - \
            0.5*SP.sum(self.ZZ*self.Eps.E1)

        SW_tau = (self.W.C[:, :, 0] * self.W.E1) * SP.tile(
            self.Eps.E1, (self.W.E1.shape[1], 1)).T
        SW2_tau = (self.W.C[:, :, 0] *
                   (self.W.E2diag)) * SP.tile(self.Eps.E1,
                                              (self.W.E1.shape[1], 1)).T
        SS = SP.sum(self.S.E1 * self.S.E1, 0)
        SmTSm = SP.zeros(self.W.E1.shape[1])

        F2 = SP.sum(SW_tau * SP.dot(self.Z.E1.T, self.S.E1))

        F3 = 0.
        F4 = 0.
        F7PlusE3 = 0.5 * (self.nHidden * self._N)  #don't use knowns in entropy

        for m in SP.arange(self.W.E1.shape[1]):
            #F3
            SigmaSm = 1. / (1 + SP.sum(self.S.diagSigmaS[:, m]))
            SmTSm[m] = SS[m] + self._N * SigmaSm
            F3 += SP.sum(SW2_tau[:, m], 0) * SmTSm[m]

            #F4
            rS = SP.zeros(self._N)
            for m1 in SP.arange(m + 1, self.W.E1.shape[1]):
                tmp = (self.W.C[:, m1, 0] * self.W.E1[:, m1]) * SW_tau[:, m]
                rS = rS + SP.sum(tmp, 0) * self.S.E1[:, m1]
            F4 = F4 + SP.dot(rS, self.S.E1[:, m:m + 1])

            #F7
            alphaSm = SP.sum(SW2_tau[:, m])
            F7PlusE3 = F7PlusE3 - 0.5*self._N*SP.log(1+alphaSm)  - (0.5*self._N)/(1+alphaSm) \
                            - 0.5*SP.dot(self.S.E1[:,m].T, self.S.E1[:,m])

        F5 = -(0.5*self.components*self._D)*SP.log(2.*pi) - (0.5*self.components)*sum(SP.log(1./self.Alpha.E1)) - \
            0.5* SP.sum(1-self.W.C[:,:,0]) + SP.sum(SP.sum(self.W.C[:,:,0]*self.W.E2diag,0)*self.Alpha.E1)

        F6 = SP.sum(SP.log(self.Pi.E1) * self.W.C[:, :, 0]) + SP.sum(
            SP.log(1. - self.Pi.E1) * (1 - self.W.C[:, :, 0]))

        EpslnE = special.digamma(self.Eps.a) - SP.log(self.Eps.b)
        F8 = (self.Eps.pa - 1) * SP.sum(EpslnE) - self.Eps.pb * SP.sum(
            self.Eps.E1)

        AlphalnE = special.digamma(self.Alpha.a) - SP.log(self.Alpha.b)
        F9 = (self.Alpha.pa - 1) * SP.sum(AlphalnE) - self.Alpha.pb * SP.sum(
            self.Alpha.E1)


        E1 = (0.5*self.components*self._D)*SP.log(2*pi) + (0.5*self.components)*SP.sum(SP.log(1./self.Alpha.E1)) + \
            0.5*(self.components*self._D) - 0.5*SP.sum(SP.log(1./self.Alpha.E1)*(SP.sum(self.W.C[:,:,0],0))) \
             + 0.5*SP.sum( self.W.C[:,:,0]*SP.log(self.W.sigma2))

        E2 = - SP.sum( self.W.C[:,:,0]*SP.log(self.W.C[:,:,0]+(self.W.C[:,:,0]==0)) + \
            (1-self.W.C[:,:,0])*log(1-self.W.C[:,:,0]+(self.W.C[:,:,0]==1)))

        E4 = SP.sum(self.Eps.a*SP.log(self.Eps.b)) + SP.sum((self.Eps.a-1)*EpslnE) -\
             SP.sum(self.Eps.b*self.Eps.E1) - SP.sum(special.gammaln(self.Eps.a))


        E5 = SP.sum(self.Alpha.a*SP.log(self.Alpha.b)) + SP.sum((self.Alpha.a-1)*AlphalnE) -\
             SP.sum(self.Alpha.b*self.Alpha.E1) - SP.sum(special.gammaln(self.Alpha.a))

        #pdb.set_trace()
        #F = F1 + F2 - 0.5*F3 - F4 + F5 + F6 + E1 + E2 + F7PlusE3 + F8 - E4 +F9 - E5
        F = F1 + F2 - 0.5 * F3 - F4 + F5 + F6 + E1 + E2 + F7PlusE3  #+ F8 - E4 #+F9 - E5

        return F
Example #55
0
#  Isotopologue image and centroid map
cubo, head = datafits(image)

dnu = head['CDELT3']
len_nu = head['NAXIS3']
nui = head['CRVAL3']- head['CRPIX3']*dnu
nuf = nui + (len_nu-1)*dnu

nu = sp.linspace(nui, nuf, len_nu)
nu0 = sp.mean(nu)

#Gaussian Convolution
if False:
    resol = abs(head['CDELT1'])*3600
    stdev = Beam / (2 * sp.sqrt (2 * sp.log(2)))
    stdev /= resol
    x_size = int(8*stdev + 1.)

    print 'convolution with gaussian'
    print '\tbeam '+str(Beam)+' arcsec'
    print '\tbeam '+str(stdev)+' pixels'

    # circular Gaussian
    beam = Gaussian2DKernel (stddev = stdev, x_size = x_size, y_size = x_size,
                             model ='integrate')
    smooth =  np.zeros((80, 256, 256))
    for k in range(80):
        smooth[k, :,:] += convolve_fft(cubo[k,:,:], beam)
    print '\tsmoothed'
    cubo = smooth
Example #56
0
    plt.plot([0, 0], ylim, '--', color='.3')
    plt.xticks(xticks, ['$'+str(int(xi * 100)) + '\% $' for xi in xticks]) 
    plt.yticks(yticks, ['$'+str(int(yi * 100)) + '\% $' for yi in yticks])
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
#    plt.savefig(fname, format=fformat)


rawdata = list(reader(open("data_www.csv"), quoting = QUOTE_NONNUMERIC))
# generating a dictionary with data of the relevant years
data={c: {z[0]:{zname:zvalue for zname,zvalue in zip(rawdata[0], z)} 
          for z in rawdata if z[rawdata[0].index('year')] == c} 
      for c in (2004, 1970)} 
gdpDeltaUS = log(data[2004]['United States']['per_cap_gdp']/
                 data[1970]['United States']['per_cap_gdp'])/34

GovChangeGDP = {'poor': [], 'rich': []}
GDPChange = {'poor': [], 'rich': []}
PrivChangeGDP  =  {'poor': [], 'rich': []}
Country = {'poor': [], 'rich': []}
NFAChangeGDP = {'poor': [], 'rich': []}
for c in data[1970].keys():
    try:
        d1, d0 = data[2004][c], data[1970][c]
        gdp_change = log(d1['per_cap_gdp']
                         /d0['per_cap_gdp'])/34 - gdpDeltaUS
        nfa_change_gdp = ((d1['assets']-d1['liabilities'])
                          /d1['dollar_gdp']
                          - (d0['assets']-d0['liabilities'])
                          /d0['dollar_gdp'])/34
Example #57
0
    def computePVmixtureChi2(lrt,
                             a2=None,
                             tol=0.0,
                             mixture=0.5,
                             scale=1.0,
                             dof=1.0):
        '''
        OBSOLETE: but was known to work. Can delete once we get past problems with python code solved.

        computes P-values for a mixture of a scaled Chi^2_dof and Chi^2_0 distributions.
        The mixture weight is estimated from the fraction of models, where the parameter is at the boundary.
        The scale and degrees of freedom (dof) of the scaled Chi^2_dof are estimated by maximum likelihood, if the parameters provided are set to None.
        Note that accurate estimation of the mixture coefficient needs a sufficiently large number of tests to be performed.
        The P-values are computed as mixture*(1.0-CDF_Chi^2_1(lrt))
        --------------------------------------------------------------------------
        Input:
        lrt     : [S] 1D array of likelihood ratio tests (2*ln(likelihood ratio))
        a2      : [S] 1D array, if specified then a2 is used to determine the Chi^2_0
                  component, else lrt is used (optional).
        tol     : cutoff for members of the Chi^2_0 component is a2/lrt 0+tol.
        mixture : the scaled Chi^2_dof1 mixture component, if this parameter is set
                  to None, it will be estimated by the fraction of the tests that have
                  the weight a2 at the boundary (a2=0.0, lrt=0.0)
        scale   : the scale parameter of the scaled Chi^2_dof, if set to None the
                  parameter will be determined by maximum likelihood. (default 1.0)
        dof     : the degrees of freedom of the scaled Chi^2_dof, if set to None the
                  parameter will be determined by maximum likelihood. (default 1.0)
        --------------------------------------------------------------------------
        Output:
        pv        : [S] 1D-array of P-values computed as mixture*(1.0-CDF_Chi^2_dof(scale,lrt))
        mixture   : mixture weight of the scaled Chi^2_dof component
        scale     : scale of the scaled Chi^2_dof distribution
        dof       : degrees of freedom of the scaled Chi^2_dof distribution
        i0        : indicator for Chi^2_0 P-values
        --------------------------------------------------------------------------
        '''
        raise Exception(
            "made changes to use alteqnull and did not modify this code as it looks obsolete"
        )
        loc = None
        chi2mix = chi2mixture()
        chi2mix.lrt = lrt
        if mixture is None:
            i0, mixture = chi2mix.fit_mixture(a2=a2, tol=tol)
        else:
            chi2mix.mixture = mixture
            if a2 is None:
                i0 = (lrt <= (0.0 + tol))
            else:
                i0 = (a2 <= (0.0 + tol))

        N = (~i0).sum()
        sumX = (lrt[~i0]).sum()
        logsumX = (sp.log(lrt[~i0])).sum()
        if (dof is None) and (scale is None):
            #f is the Gamma likelihood with the scale parameter maximized analytically as a funtion of 0.5 * the degrees of freedom
            f = lambda k: -1.0 * (-N * sp.special.gammaln(k) - k * N *
                                  (sp.log(sumX) - sp.log(k) - sp.log(N)) +
                                  (k - 1.0) * logsumX - k * N)
            #f_ = lambda(x): 1-N*N/(2.0*x*sumX)
            res = minimize1D(f,
                             evalgrid=None,
                             nGrid=10,
                             minval=0.1,
                             maxval=3.0)
            dof = 2.0 * res[0]
        elif dof is None:
            f = lambda k: -1.0 * (-N * sp.special.gammaln(k) - k * N * sp.log(
                2.0 * scale) + (k - 1.0) * logsumX - sumX / (2.0 * scale))
            res = minimize1D(f,
                             evalgrid=None,
                             nGrid=10,
                             minval=0.1,
                             maxval=3.0)
            dof = 2.0 * res[0]
        if scale is None:
            #compute condition for ML
            if (1.0 - (N * N * dof) / (4.0 * sumX) > 0):
                logging.warn(
                    'Warning: positive second derivative: No maximum likelihood solution can be found for the scale. returning scale=1.0 and dof=1.0'
                )
                scale = 1.0
                dof = 1.0

            else:
                scale = sumX / (N * dof)
        pv = mixture * (
            st.chi2.sf(lrt / scale, dof)
        )  # Can use the Chi^2 CDF/SF to evaluate the scaled Chi^2 by rescaling the input.
        pv[i0] = 1.0
        return (pv, mixture, scale, dof, i0)
Example #58
0
def parspar(n):
    i = n/ndim
    j = n%ndim
    data = cubo[:,i,j]
    datamax = data.max()
    noise_level = 0
    noise = sp.random.normal(scale=noise_level,size=len(data))
    data = data+ruido
    if (i-int(ndim/2.))**2 + (j-int(ndim/2.))**2 > r**2 or 0.4*cubomax>datamax:
        return [None]

    print i,j

    m0 = sp.integrate.simps(data,nu)
    m1 = sp.integrate.simps(velocities*data, nu)/m0
    mom2[i,j] = sp.integrate.simps(data*(velocities-m1)**2, nu)*1000/m0


    datamin = data.min()
    data1 = data -data.min()
    centroid = (nu*(cubo[:,i,j]-datamin)).sum()/(cubo[:,i,j]-datamin).sum()

    i0 = datamin
    if i0==0:
        i0=1e-10
    i0_lim=(0.5*i0,1.2*i0)


    r_ij = sp.sqrt((i-128)**2 +(j-128)**2)
    rho_ij = r_ij/sp.cos(incli)

    if rho_ij<30:
        temp_0 = 70
        tlim = (10,200)
    else:
        temp_0 = 70 * (r_ij / 30.)**(-0.5)
        tlim = (10,120)
    vels = (nu-nu0)*3e5/nu0
    velg = vels[data==data.max()][0]

    noise = data[(velocities<velg-1) | (velocities>velg+1.)]
    rms = np.sqrt(sp.sum(noise**2)/float(len(noise)))

## Cont=True Fit lines considering the presence of continuum.
    if cont:
        line_model = pm.Model()
    
        with line_model:
        
            var = ['Temp','nu_c','log(N_CO)','v_turb','Continuum']

# Priors for unknown model parameters
            Temp = pm.TruncatedNormal('Temp', mu=temp_0, sd=5, lower=tlim[0], upper=tlim[1])
            nu_c = pm.TruncatedNormal('nu_c', mu=centroid, sd=abs(dnu)/10., lower=centroid-0.5*abs(dnu), upper=centroid+0.5*abs(dnu))
            NCO = pm.Uniform('log(N_CO)', lower=10, upper=24)
            v_turb = pm.Uniform('v_turb', lower=sp.sqrt(k*tlim[0]/m), upper=300000)  ## This is really broadening in velocity space, not turbulent vel.
            i_0 = pm.TruncatedNormal('Continuum', mu=i0, sd=5, lower=i0_lim[0], upper=i0_lim[1])
    
    # Expected value of outcome
            predict = intensity_continuum(nu, Temp, nu_c, alpha, 10**NCO, v_turb, angle, i_0, head, iso)
    
    # Likelihood (sampling distribution) of observations
            Y_obs = pm.Normal('Y_obs', mu=predict, sd=rms, observed=data)
            step = pm.NUTS()
    
            st = {'Temp':temp_0,
                'nu_c':centroid,
                'log(N_CO)':20,
                'v_turb':20000,
                'Continuum':i0}

            trace = pm.sample(5000,tune=1000,cores=2,step=step,start=st)

            stats = pm.summary(trace)

            mean_pars = [stats['mean'][x] for x in var]
            hpd_2_5 = [stats['hpd_2.5'][x] for x in var]
            hpd_97_5 = [stats['hpd_2.5'][x] for x in var]
            var_std = [stats['sd'][x] for x in var]
            medians_pars = [sp.median(trace[x]) for x in var]
            Map = [float(pm.find_MAP(model=line_model)[x]) for x in var]
            fit = mean_pars
            model = intensity_continuum(nu, fit[0], fit[1],alpha, 10**fit[2], fit[3], angle,fit[4], head, iso)

    else:
        line_model = pm.Model()
    
        with line_model:
        
            var = ['Temp','nu_c','log(N_CO)','v_turb']
            
            # Priors for unknown model parameters
            Temp = pm.TruncatedNormal('Temp', mu=temp_0, sd=5, lower=tlim[0], upper=tlim[1])
            nu_c = pm.TruncatedNormal('nu_c', mu=centroid, sd=abs(dnu)/10., lower=centroid-0.5*abs(dnu), upper=centroid+0.5*abs(dnu))
            NCO = pm.Uniform('log(N_CO)', lower=10, upper=24)
            v_turb = pm.Uniform('v_turb', lower=sp.sqrt(k*tlim[0]/m), upper=300000) ## This is really broadening in velocity space, not turbulent vel.
            
            # Expected value of outcome
            predict = intensity(nu, Temp, nu_c, 10**NCO, v_turb, angle, head, iso)
            
            # Likelihood (sampling distribution) of observations
            Y_obs = pm.Normal('Y_obs', mu=predict, sd=rms, observed=data)
            step = pm.NUTS()
            
            st = {'Temp':temp_0,
                'nu_c':centroid,
                'log(N_CO)':20,
                'v_turb':20000}
        
            trace = pm.sample(5000,tune=1000,cores=2,step=step,start=st)
            
            stats = pm.summary(trace)
            
            mean_pars = [stats['mean'][x] for x in var]
            hpd_2_5 = [stats['hpd_2.5'][x] for x in var]
            hpd_97_5 = [stats['hpd_2.5'][x] for x in var]
            var_std = [stats['sd'][x] for x in var]
            medians_pars = [sp.median(trace[x]) for x in var]
            Map = [float(pm.find_MAP(model=line_model)[x]) for x in var]
            fit = mean_pars
            model = intensity(nu, fit[0], fit[1], 10**fit[2], fit[3], angle, head, iso)

    Temperature[i,j,:] = sp.array([fit[0],var_std[0],medians_pars[0],Map[0],hpd_2_5[0],hpd_2_5[0]])
    Denscol[i,j,:] = sp.array([10**fit[2],10**fit[2]*sp.log(10)*var_std[2],10**medians_pars[2],10**Map[2],10**hpd_2_5[2],10**hpd_2_5[2]])
    Turbvel[i,j,:] = sp.sqrt(((sp.array([fit[3],var_std[3],medians_pars[3],Map[3],hpd_2_5[3],hpd_2_5[3]]))**2 - k*fit[0]/m))*1e-5
    aux_nu = sp.array([fit[1],var_std[1],medians_pars[1],Map[1],hpd_2_5[1],hpd_2_5[1]])
    vel_cen[i,j,:] = sp.around(((nu0-aux_nu)*c*1e-5/nu0))
    return [i,j,Temperature[i,j,:], Denscol[i,j,:], Turbvel[i,j,:],vel_cen[i,j,:]]
Example #59
0
def parse_sum_stats_custom(filename=None,
                           bimfile=None,
                           hdf5_file=None,
                           n=None,
                           ch=None,
                           pos=None,
                           A1=None,
                           A2=None,
                           reffreq=None,
                           case_freq=None,
                           control_freq=None,
                           case_n=None,
                           control_n=None,
                           info=None,
                           rs=None,
                           pval=None,
                           eff=None,
                           ncol=None,
                           input_is_beta=False,
                           debug=False):
    # Check required fields are here
    assert not A2 is None, 'Require header for non-effective allele'
    assert not A1 is None, 'Require header for effective allele'
    assert not rs is None, 'Require header for RS ID'
    assert not eff is None, 'Require header for Statistics'
    assert not pval is None, 'Require header for pval'
    assert not ncol is None or not n is None, 'Require either N or NCOL information'

    if ch is None:
        assert not bimfile is None, 'Require bimfile when chromosome header not provided'
        print("Chromosome Header not provided, will use info from bim file")
    if pos is None:
        assert not bimfile is None, 'Require bimfile when position header not provided'
        print("Position Header not provided, will use info from bim file")

    snps_pos_map = {}
    if bimfile is not None:
        valid_sids = set()
        if debug:
            print('Parsing bim file: %s' % bimfile)
        with open(bimfile) as f:
            for line in f:
                l = line.split()
                # Bim file format is CHR SNP BP
                valid_sids.add(l[1])
                snps_pos_map[l[1]] = {'pos': int(l[3]), 'chrom': l[0]}
    chr_filter = 0
    pos_filter = 0
    invalid_p = 0
    chrom_dict = {}
    opener = open
    if is_gz(filename):
        opener = gzip.open
    print('Parsing summary statistics file: %s' % filename)
    with opener(filename) as f:
        header = f.readline()
        if is_gz(filename):
            header = header.decode('utf-8')
        if debug:
            print(header)
        header_dict = {}
        columns = (header.strip()).split()
        index = 0
        for col in columns:
            header_dict[col] = index
            index += 1
        assert ch is None or ch in header_dict, 'Chromosome header cannot be found in summary statistic file'
        assert A2 in header_dict, 'Non-effective allele column cannot be found in summary statistic file'
        assert A1 in header_dict, 'Effective allele column cannot be found in summary statistic file'
        assert eff in header_dict, 'Effect size column not found in summary statistic file'
        assert rs in header_dict, 'SNP ID column not found in summary statistic file'
        assert pos is None or pos in header_dict, 'Position column not found in summary statistic file'
        assert pval in header_dict, 'P Value column not found in summary statistic file'
        assert not n is None or ncol in header_dict, 'Sample size column not found in summary statistic ' \
                                                     'file and N not provided'
        # header_dict now contains the header column name for each corresponding input
        bad_chromosomes = set()
        for line in f:
            if is_gz(filename):
                line = line.decode('utf-8')
            l = (line.strip()).split()
            # get the SNP ID first
            sid = l[header_dict[rs]]
            # check the SNP ID
            if sid in valid_sids:
                # Get the chromosome information
                chrom = 0
                if not chr is None and chr in header_dict:
                    chrom = l[header_dict[ch]]
                    chrom = re.sub("chr", "", chrom)
                    if not chrom == snps_pos_map[sid]['chrom']:
                        chr_filter += 1
                else:
                    chrom = snps_pos_map[sid]['chrom']
                if not chrom in util.ok_chromosomes:
                    bad_chromosomes.add(chrom)
                    continue
                # Check if the chromosome of the SNP is correct

                pos_read = 0
                if not pos is None and pos in header_dict:
                    pos_read = int(l[header_dict[pos]])
                    if not pos_read == snps_pos_map[sid]['pos']:
                        pos_filter += 1
                        continue
                else:
                    pos_read = snps_pos_map[sid]['pos']

                if not chrom in chrom_dict:
                    chrom_dict[chrom] = {
                        'ps': [],
                        'log_odds': [],
                        'infos': [],
                        'freqs': [],
                        'betas': [],
                        'nts': [],
                        'sids': [],
                        'positions': []
                    }

                # - Start Wallace, fix the bug.
                # validate the p values first, if has problem, complete ignore this snp.
                pval_read = float(l[header_dict[pval]])
                if isinf(stats.norm.ppf(pval_read)):
                    invalid_p += 1
                    continue
                chrom_dict[chrom]['ps'].append(pval_read)
                # - end Wallace.

                chrom_dict[chrom]['sids'].append(sid)
                chrom_dict[chrom]['positions'].append(pos_read)
                # Check the frequency
                if reffreq is not None and reffreq in header_dict:
                    if l[header_dict[reffreq]] == '.' or l[
                            header_dict[reffreq]] == 'NA':
                        chrom_dict[chrom]['freqs'].append(-1)
                    else:
                        chrom_dict[chrom]['freqs'].append(
                            float(l[header_dict[reffreq]]))
                elif (case_n is not None and control_n is not None
                      and case_n in header_dict and control_n in header_dict
                      and case_freq is not None and control_freq is not None
                      and case_freq in header_dict
                      and control_freq in header_dict):
                    if (l[header_dict[control_n]] == '.'
                            or l[header_dict[control_n]] == 'NA'
                            or l[header_dict[case_n]] == '.'
                            or l[header_dict[case_n]] == 'NA'
                            or l[header_dict[control_freq]] == '.'
                            or l[header_dict[control_freq]] == 'NA'
                            or l[header_dict[case_freq]] == '.'
                            or l[header_dict[case_freq]] == 'NA'):
                        chrom_dict[chrom]['freqs'].append(-1)
                    else:
                        case_N = float(l[header_dict[case_n]])
                        control_N = float(l[header_dict[control_n]])
                        N = case_N + control_N
                        a_scalar = case_N / N
                        u_scalar = control_N / N
                        freq = float(
                            l[header_dict[case_freq]]) * a_scalar + float(
                                l[header_dict[control_freq]]) * u_scalar
                        chrom_dict[chrom]['freqs'].append(freq)
                else:
                    chrom_dict[chrom]['freqs'].append(-1)
                # Get the INFO score
                info_sc = -1
                if info is not None and info in header_dict:
                    info_sc = float(l[header_dict[info]])
                chrom_dict[chrom]['infos'].append(info_sc)
                # - Wallace, move this to the very beginning for checking.
                # Please refer to line 167 to 174.
                # pval_read = float(l[header_dict[pval]])
                # chrom_dict[chrom]['ps'].append(pval_read)
                # if isinf(stats.norm.ppf(pval_read)):
                #     invalid_p += 1
                #     continue
                # - end fix.

                nt = [l[header_dict[A1]].upper(), l[header_dict[A2]].upper()]
                chrom_dict[chrom]['nts'].append(nt)

                raw_beta = float(l[header_dict[eff]])
                if not input_is_beta:
                    raw_beta = sp.log(raw_beta)
                    chrom_dict[chrom]['log_odds'].append(raw_beta)
                    beta = sp.sign(raw_beta) * stats.norm.ppf(pval_read / 2.0)
                    if n is None:
                        #wallae
                        #chrom_dict[chrom]['betas'].append(beta/ sp.sqrt(int(header_dict[ncol])))
                        chrom_dict[chrom]['betas'].append(
                            beta / sp.sqrt(int(l[header_dict[ncol]])))
                    else:
                        chrom_dict[chrom]['betas'].append(beta / sp.sqrt(n))
                else:
                    beta = sp.sign(raw_beta) * stats.norm.ppf(pval_read / 2.0)
                    if n is None:
                        ##wallae
                        # chrom_dict[chrom]['log_odds'].append(beta/ sp.sqrt(int(header_dict[ncol])))
                        # chrom_dict[chrom]['betas'].append(beta/ sp.sqrt(int(header_dict[ncol])))
                        chrom_dict[chrom]['log_odds'].append(
                            beta / sp.sqrt(int(l[header_dict[ncol]])))
                        chrom_dict[chrom]['betas'].append(
                            beta / sp.sqrt(int(l[header_dict[ncol]])))
                    else:
                        chrom_dict[chrom]['log_odds'].append(beta / sp.sqrt(n))
                        chrom_dict[chrom]['betas'].append(beta / sp.sqrt(n))

        if len(bad_chromosomes) > 0:
            print('Ignored chromosomes: %s' %
                  (','.join(list(bad_chromosomes))))
            print(
                'Please note that only data on chromosomes 1-23, and X are parsed.'
            )

    print('SS file loaded, now sorting and storing in HDF5 file.')
    assert not 'sum_stats' in hdf5_file, 'Something is wrong with HDF5 file?'
    ssg = hdf5_file.create_group('sum_stats')
    num_snps = 0
    num_non_finite = 0
    for chrom in chrom_dict:
        # Wallace for checking allele coding.
        if debug:
            for x, y, z in zip(chrom_dict[chrom]['sids'],
                               chrom_dict[chrom]['nts'],
                               chrom_dict[chrom]['ps']):
                sys.stderr.write('AFTER LOAD GWAS SUM: %s %s %s %s\n' %
                                 (x, y[0], y[1], z))
        # end - Wallace

        if debug:
            print('%d SNPs on chromosome %s' %
                  (len(chrom_dict[chrom]['positions']), chrom))
        sl = list(
            zip(chrom_dict[chrom]['positions'], chrom_dict[chrom]['sids'],
                chrom_dict[chrom]['nts'], chrom_dict[chrom]['betas'],
                chrom_dict[chrom]['log_odds'], chrom_dict[chrom]['infos'],
                chrom_dict[chrom]['freqs'], chrom_dict[chrom]['ps']))
        sl.sort()
        ps = []
        betas = []
        nts = []
        sids = []
        positions = []
        log_odds = []
        infos = []
        freqs = []
        prev_pos = -1
        for pos, sid, nt, beta, lo, info, frq, p in sl:
            if pos == prev_pos:
                if debug:
                    print('duplicated position %d' % pos)
                continue
            else:
                prev_pos = pos
            if not sp.isfinite(beta):
                num_non_finite += 1
                continue
            ps.append(p)
            betas.append(beta)
            nts.append(nt)
            sids.append(sid)
            positions.append(pos)
            log_odds.append(lo)
            infos.append(info)
            freqs.append(frq)
        nts = sp.array(nts, dtype=nts_dtype)
        sids = sp.array(sids, dtype=sids_dtype)
        if debug:
            if not num_non_finite == 0:
                print('%d SNPs have non-finite statistics on chromosome %s' %
                      (num_non_finite, chrom))
            print('Still %d SNPs on chromosome %s' % (len(ps), chrom))
        g = ssg.create_group('chrom_%s' % chrom)
        g.create_dataset('ps', data=sp.array(ps))
        g.create_dataset('freqs', data=freqs)
        g.create_dataset('betas', data=betas)
        g.create_dataset('log_odds', data=log_odds)
        num_snps += len(log_odds)
        g.create_dataset('infos', data=infos)
        g.create_dataset('nts', data=nts)
        g.create_dataset('sids', data=sids)
        g.create_dataset('positions', data=positions)
        hdf5_file.flush()
    print('%d SNPs excluded due to invalid chromosome ID.' % chr_filter)
    print('%d SNPs excluded due to invalid chromosome position' % pos_filter)
    print('%d SNPs excluded due to invalid P value' % invalid_p)
    print('%d SNPs parsed from summary statistics file.' % num_snps)
Example #60
0
 def infer_lEI_post(self, X_, D_i, fixI=False, I=0.):
     E = self.infer_lEI(X_, D_i, fixI=fixI, I=I)
     ns = X_.shape[0]
     return sp.log(sp.nanmean(sp.exp(E), axis=0)).reshape([1, ns])