def InitJackArrays(self,freq,samples): """Initialize Jack Arrays""" self.iIa = sp.zeros(samples).astype(sp.float32) self.iQa = sp.zeros(samples).astype(sp.float32) self.oIa = sp.zeros(samples, dtype=sp.float32 ) self.oQa = sp.zeros(samples, dtype=sp.float32 ) ## 100 frames warmup sf = 0 ef = self.rtframes2sync samples = sp.pi + (2*sp.pi*freq*(self.dt * sp.r_[sf:ef])) self.oIa[sf:ef] = self.amp * sp.cos(samples) self.oQa[sf:ef] = self.amp * sp.sin(samples) # For IQ balancing #self.oIa[sf:ef] = sp.cos(samples) - (sp.sin(samples)*(1+self.oalpha)*sp.sin(self.ophi)) #self.oQa[sf:ef] = sp.sin(samples)*(1+self.oalpha)*sp.cos(self.ophi) ## 180 phase change sf = ef ef = ef + self.sync2fft + self.fftn + self.fft2end samples = (2*sp.pi*freq*(self.dt * sp.r_[sf:ef])) self.oIa[sf:ef] = self.amp * sp.cos(samples) self.oQa[sf:ef] = self.amp * sp.sin(samples)
def __init__(self, renderer=True, realtime=True, ip="127.0.0.1", port="21560"): # initialize base class GraphicalEnvironment.__init__(self) self.actLen=12 self.mySensors=sensors.Sensors(["EdgesReal"]) self.dists=array([20.0, sqrt(2.0)*20, sqrt(3.0)*20]) self.gravVect=array([0.0,-100.0,0.0]) self.centerOfGrav=zeros((1,3),float) self.pos=ones((8,3),float) self.vel=zeros((8,3),float) self.SpringM = ones((8,8),float) self.d=60.0 self.dt=0.02 self.startHight=10.0 self.dumping=0.4 self.fraktMin=0.7 self.fraktMax=1.3 self.minAkt=self.dists[0]*self.fraktMin self.maxAkt=self.dists[0]*self.fraktMax self.reset() self.count=0 self.setEdges() self.act(array([20.0]*12)) self.euler() self.realtime=realtime self.step=0 if renderer: self.setRenderInterface(FlexCubeRenderInterface(ip, port)) self.getRenderInterface().updateData(self.pos, self.centerOfGrav)
def test_mean_mode_equivalent(self): """Test 2 equivalent ways to deweight mean, see if they agree.""" self.DM.nf = 1 time_stream, ra, dec, az, el, time, mask_inds = \ self.DM.get_all_trimmed() nt = len(time) # Frist Way. Noise1 = dirty_map.Noise(time_stream, time) thermal_noise_levels = sp.zeros((1,)) + 0.04 # Kelvin**2 Noise1.add_thermal(thermal_noise_levels) Noise1.add_mask(mask_inds) Noise1.deweight_time_mean() Noise1.deweight_time_slope() Noise1.add_correlated_over_f(0.01, -1.2, 0.1) Noise1.finalize() N1 = Noise1.get_inverse() # Second Way. Noise2 = dirty_map.Noise(time_stream, time) thermal_noise_levels = sp.zeros((1,)) + 0.04 # Kelvin**2 Noise2.add_thermal(thermal_noise_levels) Noise2.add_mask(mask_inds) Noise2.deweight_time_slope() Noise2.add_correlated_over_f(0.01, -1.2, 0.1) Noise2.freq_mode_noise += dirty_map.T_huge**2 Noise2.finalize() N2 = Noise2.get_inverse() N2_m = N2.view() N2_m.shape = (nt, nt) self.assertTrue(sp.allclose(N2, N1))
def YICgen(X,Z,alpha, delta): """ Yt = (Kt**alpha)*((exp(Zt)*Lt)**(1-alpha)) This function generates the output levels given the previously defined X = sp.array([[K],[L]]) (X is a 2xT period matrix of capital on top and labor on the bottom row) and Z (previously generated technology shocks). It = Ktp1 - (1 - delta)*Kt This function generates the investment levels per period delta = depreciation rate of capital. Ct = Yt - It This function defines the consumption levels as a difference between output and investment. """ K = X[0,:] L = X[1,:] t = sp.shape(X)[1] Y = sp.zeros(t) I = sp.zeros(t) C = sp.zeros(t) #solve for Y in each period t for i in range(t): Y[i] = (K[i]**alpha)*((sp.exp(Z[i])*L[i])**(1.-alpha)) #solve for I in each period t for i in range(t-1): I[i] = K[i+1] - (1. - delta)*K[i] #solve for C in each period t for i in range(t-1): C[i] = Y[i] - I[i] return Y, I, C
def deactivated_test_extreme_index(self): """Set of parameters know to have cased issues in the past with numerical stability.""" nf = 40 nt = 150 n = nf * nt dt = 0.26214 BW = 1. / dt / 2. time_stream = sp.zeros((nf, nt)) time_stream = al.make_vect(time_stream, axis_names=("freq", "time")) time = dt * (sp.arange(nt) + 50) N = dirty_map.Noise(time_stream, time) # Thermal. thermal = sp.zeros(nf, dtype=float) + 0.0002 * BW * 2. thermal[22] = dirty_map.T_infinity**2 N.add_thermal(thermal) # Time mean and slope. N.deweight_time_mean() N.deweight_time_slope() # Extreem index over_f bit. mode = -sp.ones(nf, dtype=float) / sp.sqrt(nf - 1) mode[22] = 0 # Parameters measured from one of the data sets. Known to screw things # up. #N.add_over_f_freq_mode(8.128e-7, -4.586, 1.0, 1.422e-7, mode, True) N.add_over_f_freq_mode(0.001729, -0.777, 1.0, 1e-8, mode, True) #N.orthogonalize_modes() N.finalize() # Check if the fast inverse works. N_mat = N.get_mat() N_mat.shape = (n, n) N_inv = N.get_inverse() N_inv.shape = (n, n)
def testMatrixSymmetries(self): """ X and P should have the same pattern as the distance matrix defined by the lattice. """ precision = 20 polygon = self.calc.square_lattice(5) X,P = self.calc.correlations(polygon, self.maple_link, precision) # Round X and P down so we can see if elements are distinct or not. X = sympy.matrix2numpy(X) P = sympy.matrix2numpy(P) X = X.astype('float') P = P.astype('float') # Get the pattern of the distance matrix. D = spatial.distance.cdist(polygon,polygon) # The pattern of the distance matrix D_pat = sp.zeros(D.shape) getSignatureMatrix(D_pat,sp.nditer(D),D.shape) # Get the pattern of X and P. X_pat = sp.zeros(X.shape) P_pat = sp.zeros(P.shape) getSignatureMatrix(X_pat,sp.nditer(X),X.shape) getSignatureMatrix(P_pat,sp.nditer(P),P.shape) # Check if patterns match. eq_(False,(D_pat - X_pat).all()) eq_(False,(D_pat - P_pat).all())
def cov_dvrpmllbb_to_vxyz_single(d,e_d,e_vr,pmll,pmbb,cov_pmllbb,l,b): """ NAME: cov_dvrpmllbb_to_vxyz PURPOSE: propagate distance, radial velocity, and proper motion uncertainties to Galactic coordinates for scalar inputs INPUT: d - distance [kpc, as/mas for plx] e_d - distance uncertainty [kpc, [as/mas] for plx] e_vr - low velocity uncertainty [km/s] pmll - proper motion in l (*cos(b)) [ [as/mas]/yr ] pmbb - proper motion in b [ [as/mas]/yr ] cov_pmllbb - uncertainty covariance for proper motion l - Galactic longitude [rad] b - Galactic lattitude [rad] OUTPUT: cov(vx,vy,vz) [3,3] HISTORY: 2010-04-12 - Written - Bovy (NYU) """ M= _K*sc.array([[pmll,d,0.],[pmbb,0.,d]]) cov_dpmllbb= sc.zeros((3,3)) cov_dpmllbb[0,0]= e_d**2. cov_dpmllbb[1:3,1:3]= cov_pmllbb cov_vlvb= sc.dot(M,sc.dot(cov_dpmllbb,M.T)) cov_vrvlvb= sc.zeros((3,3)) cov_vrvlvb[0,0]= e_vr**2. cov_vrvlvb[1:3,1:3]= cov_vlvb R= sc.array([[m.cos(l)*m.cos(b), m.sin(l)*m.cos(b), m.sin(b)], [-m.sin(l),m.cos(l),0.], [-m.cos(l)*m.sin(b),-m.sin(l)*m.sin(b), m.cos(b)]]) return sc.dot(R.T,sc.dot(cov_vrvlvb,R))
def prop_ring(self): """ Test properties for a ring, modelled as a thin walled something """ radius = 1. # make sure the simple test cases go well x = np.linspace(0,radius,100000) y = np.sqrt(radius*radius - x*x) x = np.append(-x[::-1], x) y_up = np.append(y[::-1], y) tw1 = np.ndarray((len(x),3), order='F') tw1[:,0] = x tw1[:,1] = y_up tw1[:,2] = 0.01 tw2 = np.ndarray((len(x),3), order='F') y_low = np.append(-y[::-1], -y) tw2[:,0] = x tw2[:,1] = y_low tw2[:,2] = 0.01 # tw1 and tw2 need to be of the same size, give all zeros upper_bound = sp.zeros((4,2), order='F') lower_bound = sp.zeros((4,2), order='F') st_arr, EA, EIxx, EIyy = properties(upper_bound, lower_bound, tw1=tw1, tw2=tw2, rho=1., rho_tw=1., E=1., E_tw=1.) headers = HawcPy.ModelData().st_column_header_list print '\nRING PROPERTIES' for index, item in enumerate(headers): tmp = item + ' :' print tmp.rjust(8), st_arr[index]
def fgmres(self,rhs,tol=1e-6,restrt=None,maxiter=None,callback=None): if maxiter == None: maxiter = len(rhs) if restrt == None: restrt = 2*maxiter # implemented as in [Saad, 1993] # start x = zeros(len(rhs)) H = zeros((restrt+1, restrt)) V = zeros((len(rhs),restrt)) Z = zeros((len(rhs),restrt)) # Arnoldi process (with modified Gramm-Schmidt) res = 1. j = 0 r = rhs - self.point.matvec(x) beta = norm(r) V[:,0]=r/beta while j < maxiter and res > tol: Z[:,j] = self.point.psolve(V[:,j]) w = self.point.matvec(Z[:,j]) for i in range(j+1): H[i,j]=dot(w,V[:,i]) w = w - H[i,j]*V[:,i] H[j+1,j] = norm(w) V[:,j+1]=w/H[j+1,j] e = zeros(j+2) e[0]=1. y, res, rank, sing_val = lstsq(H[:j+2,:j+1],beta*e) j += 1 print "# GMRES| iteration :", j, "res: ", res/beta self.resid = r_[self.resid,res/beta] Zy = dot(Z[:,:j],y) x = x + Zy info = 1 return (x,info)
def overlaps(sts, window): """Calculates a "boolean" dictonary, indicating for every spike in every spiketrain in sts whether it belongs to an overlap or not""" n = len(sts) O = {} for k in sts.keys(): O[k] = sp.zeros(sts[k].shape, dtype=sp.bool_) Onums = sp.zeros(len(sts)) # run over all pairs of spike trains in G for i in xrange(n): for j in xrange(i + 1, n): # for every pair run over all spikes in i and check whether a # spike in j overlaps trainI = sts[sts.keys()[i]] trainJ = sts[sts.keys()[j]] idxI = 0 idxJ = 0 while idxI < len(trainI) and idxJ < len(trainJ): # Overlapping? if abs(trainI[idxI] - trainJ[idxJ]) < window: # Every spike can only be in one or no overlap. # prevents triple counting if O[sts.keys()[i]][idxI] == 0: O[sts.keys()[i]][idxI] = 1 Onums[i] += 1 if O[sts.keys()[j]][idxJ] == 0: O[sts.keys()[j]][idxJ] = 1 Onums[j] += 1 if trainI[idxI] < trainJ[idxJ]: idxI += 1 else: idxJ += 1 ret = {'O': O, 'Onums': Onums} return ret
def __init__(self, imageData=None): MarkerWindowInteractor.__init__(self) print "PlaneWidgetsXYZ.__init__()" self.vtksurface = None self.interactButtons = (1,2,3) self.sharedPicker = vtk.vtkCellPicker() #self.sharedPicker.SetTolerance(0.005) self.SetPicker(self.sharedPicker) self.pwX = vtk.vtkImagePlaneWidget() self.pwY = vtk.vtkImagePlaneWidget() self.pwZ = vtk.vtkImagePlaneWidget() self.textActors = {} self.boxes = {} self.set_image_data(imageData) self.Render() self.vtk_translation = zeros(3, 'd') self.vtk_rotation = zeros(3, 'd')
def __init__(self, npts, k=None): self.npts = npts from ..gaussian_process import GaussianProcess if k is None: from .squared_exponential import SquaredExponentialKernel k = SquaredExponentialKernel(fixed_params=[True, False]) self.gp = GaussianProcess(k, X=scipy.zeros(npts), y=scipy.zeros(npts))
def extract_spikes(data, epochs): """extract spike waveforms according to :epochs: from :data: :type data: ndarray :param data: the signal to extract from [samples, channels] :type epochs: ndarray :param epochs: epochs to cut [[start,end]], should have common length! :type mc: bool :returns: ndarray, extracted spike waveforms from :data: """ # inits and checks if not all(map(isinstance, [data, epochs], [sp.ndarray] * 2)): raise TypeError('pass sp.ndarrays!') ns, nc = epochs.shape[0], data.shape[1] if epochs.shape[0] == 0: return sp.zeros((0, 0)) tf = epochs[0, 1] - epochs[0, 0] # extract rval = sp.zeros((ns, tf * nc), dtype=data.dtype) for s in xrange(ns): for c in xrange(nc): correct_beg = min(0, epochs[s, 0]) correct_end = max(0, epochs[s, 1] - data.shape[0]) rval[s, c * tf - correct_beg:(c + 1) * tf - correct_end] =\ data[epochs[s, 0] - correct_beg:epochs[s, 1] - correct_end, c] return rval
def GetSqAmpl(filename,Nsamp=1,channel=None,V=None,O=None,r=sc.zeros((1,2)),rp=sc.zeros((1,2)),q=None): """ For the transverse channel: Calculates and returns Sq(sample,n,r)=<q,r|q,n><q,n|Sqp|GS>. Coordinates are in the order Sq(sample,n,r). For the longitudinal channel: input r has no effect. Calculates and return Sq(sample,n)=|<q,n|Sqz|GS>|^2. """ if type(filename)==str: filename=[filename] attrs=GetAttr(filename[0]) if channel==None: channel=attrs['channel'] L=int(attrs['L']) if q==None: q=[float(attrs['qx']/L),float(attrs['qy'])/L] else: q=[float(q[0])/L,float(q[1])/L] shift=None if 'phasex' in attrs.keys(): shift=[attrs['phasex']/2.0,attrs['phasey']/2.0] else: shift=[attrs['phase_shift_x']/2.0,attrs['phase_shift_y']/2.0] phi=attrs['phi'] neel=attrs['neel'] if O==None or V== None: H,O,E,V=GetEigSys(filename,Nsamp=Nsamp,channel=channel,q=q) if channel=='long': return sf.sqwlongamp(V,O,L,L,q,shift,phi,neel) elif channel=='trans': return sf.sqwtransamp(V,O,L,L,q,shift,phi,neel,r,rp) pass
def _execute(self, x, *args, **kwargs): """run the clustering on a set of observations""" # init self._labels = sp.zeros((len(self.crange) * self.repeats, x.shape[0]), dtype=int) - 1 self._gof = sp.zeros(len(self.crange) * self.repeats, dtype=self.dtype) self._ll = sp.zeros(len(self.crange) * self.repeats, dtype=self.dtype) self._parameters = [None] * len(self.crange) * self.repeats # clustering fit_func = { 'kmeans': self._fit_kmeans, 'gmm': self._fit_gmm, #'vbgmm': self._fit_vbgmm, 'dpgmm': self._fit_dpgmm, 'spectral': self._fit_spectral, 'meanshift': self._fit_mean_shift, 'dbscan': self._fit_dbscan }[self.clus_type](x) self._winner = sp.nanargmin(self._gof) self.parameters = self._parameters[self._winner] self.labels = self._labels[self._winner]
def rlsloo_ll(V, D, Y, lambdas=None): """ Input: V, D = eigenvectors and eigen values from eigen value decomposition lambdas = default used in our computation Output: cs = is a matrix of size representing function weights of lambda loos = total LOO error vector for nonlinear RLS with lambda """ n = V.shape[0] cl = Y.shape[1] l = len(lambdas) cs = sp.zeros((l, cl, n)) loos = sp.zeros((l,cl)) loos[:] = sp.inf for i in range(l): # print D, Y, lambdas[i] csll, looerrsll = rlsloo_ll1(V, D, Y, lambdas[i]) cs[i][:][:] = csll loos[i][:] = sp.sqrt( sp.sum( looerrsll**2, axis=0) ) return cs, loos
def make_line(m=1.0, b=25.0, points=100, xstep=1.0, ysigma=5.0, data=1): #generate arrays x = sc.zeros(points) y = sc.zeros(points) y_center = sc.zeros(points) #initialize random seed nu.random.seed(10) #fill arrays for i in range(points): x[i] = i*xstep y_center = m*x + b y = (ysigma*nu.random.randn(points) ) + y_center print x,y #save array to file if data==1: data_out = raw_input('data file name, ending in .txt: ') if data_out == '': data_out = 'new_file.txt' for j in range(points): f = open(data_out, "w") f.write(str(x[j])+','+'\t'+str(y[j])) #f.write(y[j]) f.close() print 'file', data_out, 'successfully written and closed' #plot line plt.scatter(x, y) plt.show() print 'ending program...'
def determine_sign_of_emat(emat,wt_seq): """determine what the correct sign is for an energy matrix. We will use the assumption that the wild type sequence must be better binding than a random sequence. INPUTS: emat: energy matrix wt_seq: wild type sequence of energy matrix OUTPUT: emat: energy matrix with correct sign """ n_rand = 1000 # number of random sequences to check e_rand = sp.zeros(n_rand) # convert sequence to matrix seq_mat = seq2mat(wt_seq) e_wt = sp.sum(emat*seq_mat) for i in range(n_rand): seq_rand = sp.zeros((4,len(wt_seq))) for j in range(len(wt_seq)): seq_rand[sp.random.randint(4),j] = 1 e_rand[i] = sp.sum(emat*seq_rand) if e_wt < sp.mean(e_rand): return emat else: return -emat
def gap(data, refs=None, nrefs=20, ks=range(1,11), method=None): shape = data.shape if refs is None: tops = data.max(axis=0) bots = data.min(axis=0) dists = scipy.matrix(scipy.diag(tops-bots)) rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs)) for i in range(nrefs): rands[:, :, i] = rands[:, :, i]*dists+bots else: rands = refs gaps = scipy.zeros((len(ks),)) for (i, k) in enumerate(ks): g1 = method(n_clusters=k).fit(data) (kmc, kml) = (g1.cluster_centers_, g1.labels_) disp = sum([euclidean(data[m, :], kmc[kml[m], :]) for m in range(shape[0])]) refdisps = scipy.zeros((rands.shape[2],)) for j in range(rands.shape[2]): g2 = method(n_clusters=k).fit(rands[:, :, j]) (kmc, kml) = (g2.cluster_centers_, g2.labels_) refdisps[j] = sum([euclidean(rands[m, :, j], kmc[kml[m],:]) for m in range(shape[0])]) gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp) return gaps
def Xgen(X0,Z,PP,QQ,Xbar): """ This function generates a history of X given a history technology shocks (Z), a P matrix, a Q matrix, and an intial X (X0). Note Xt(tilde) = PXt-1(tilde) +QZt(tilde) Xt=Xbar*e^Xt(tilde) """ num_endog=sp.shape(PP)[1] T=len(Z)#sp.shape(Z)[0] #display(T) X=sp.zeros((num_endog,T)) X[:,0]=X0 for i in range(1,T): Zt=Z[i] Xt_1=sp.zeros((num_endog,1)) for j in range(num_endog): Xt_1[j,0]=X[j,i-1] Xt=sp.dot(PP,Xt_1)+sp.dot(QQ,Zt) for k in range(num_endog): X[k,i]=Xt[k,0] exponents=sp.exp(X) for p in range(T): for q in range(num_endog): X[q,p]=Xbar[0,q]*exponents[q,p] return X
def X_binding(par, qw): """Computation of the exciton binding energy given the electron and hole wave function and the variational parameter par Defined in Eq. 15(b) of Mares and Chuang, J. Appl. Phys. 74, 1388 (1993) Keyword arguments: par -- variational parameter in [nm] qw -- Object containing the electronic structure related magnitudes. See QW. """ lam = par * 1.0E-9 Ry = spc.physical_constants['Rydberg constant times hc in eV'][0] R0 = Ry*qw.mu/qw.eps_r**2 aB = spc.physical_constants['Bohr radius'][0] ax = aB*qw.eps_r/qw.mu beta = ax/lam C0 = R0*beta**2 C1 = -R0*4.0*beta Ze = sp.zeros([1,qw.grid.shape[0]]) Ze[0,:] = qw.grid[:] Zh = sp.zeros([qw.grid.shape[0],1]) Zh[:,0] = qw.grid[:] X = 2.0*sp.absolute(Ze-Zh)/par Fe = sp.zeros([1,qw.grid.shape[0]]) Fe[0,:] = qw.Elec.wf[:]**2 Fh = sp.zeros([qw.grid.shape[0],1]) Fh[:,0] = qw.Hole.wf[:]**2 Int = Fe * Fh * G_int(X) Val = sp.trapz( sp.trapz(Int, Ze.flatten() ), Zh.flatten()) return C0 + C1 * Val
def _LMLgrad_lik(self,hyperparams): """derivative of the likelihood parameters""" logtheta = hyperparams['covar'] try: KV = self.get_covariances(hyperparams) except linalg.LinAlgError: LG.error("exception caught (%s)" % (str(hyperparams))) return 1E6 #loop through all dimensions #logdet term: Kd = 2*KV['Knoise'] dldet = 0.5*(Kd*KV['Si']).sum(axis=0) #quadratic term y_roti = KV['y_roti'] dlquad = -0.5 * (y_roti * Kd * y_roti).sum(axis=0) if VERBOSE: dldet_ = SP.zeros([self.d]) dlquad_ = SP.zeros([self.d]) for d in xrange(self.d): _K = KV['K'] + SP.diag(KV['Knoise'][:,d]) _Ki = SP.linalg.inv(_K) dldet_[d] = 0.5* SP.dot(_Ki,SP.diag(Kd[:,d])).trace() dlquad_[d] = -0.5*SP.dot(self.y[:,d],SP.dot(_Ki,SP.dot(SP.diag(Kd[:,d]),SP.dot(_Ki,self.y[:,d])))) assert (SP.absolute(dldet-dldet_)<1E-3).all(), 'outch' assert (SP.absolute(dlquad-dlquad_)<1E-3).all(), 'outch' LMLgrad = dldet + dlquad RV = {'lik': LMLgrad} return RV
def makesumrule(ptype,plen,ts,lagtype='centered'): """ This function will return the sum rule. Inputs ptype - The type of pulse. plen - Length of the pulse in seconds. ts - Sample time in seconds. lagtype - Can be centered forward or backward. Output sumrule - A 2 x nlags numpy array that holds the summation rule. """ nlags = sp.round_(plen/ts) if ptype.lower()=='long': if lagtype=='forward': arback=-sp.arange(nlags,dtype=int) arforward = sp.zeros(nlags,dtype=int) elif lagtype=='backward': arback = sp.zeros(nlags,dtype=int) arforward=sp.arange(nlags,dtype=int) else: arback = -sp.ceil(sp.arange(0,nlags/2.0,0.5)).astype(int) arforward = sp.floor(sp.arange(0,nlags/2.0,0.5)).astype(int) sumrule = sp.array([arback,arforward]) elif ptype.lower()=='barker': sumrule = sp.array([[0],[0]]) return sumrule
def split(self, sagi, meri): """ utilizes geometry.grid to change the rectangle into a generalized surface, it is specified with a single set of basis vectors to describe the meridonial, normal, and sagittal planes.""" ins = float((sagi - 1))/sagi inm = float((meri - 1))/meri stemp = self.norm.s/sagi mtemp = self.meri.s/meri z,theta = scipy.meshgrid(scipy.linspace(-self.norm.s*ins, self.norm.s*ins, sagi), scipy.linspace(-self.meri.s*inm, self.meri.s*inm, meri)) vecin =geometry.Vecr((self.sagi.s*scipy.ones(theta.shape), theta+scipy.pi/2, scipy.zeros(theta.shape))) #this produces an artificial # meri vector, which is in the 'y_hat' direction in the space of the cylinder # This is a definite patch over the larger problem, where norm is not normal # to the cylinder surface, but is instead the axis of rotation. This was # done to match the Vecr input, which works better with norm in the z direction pt1 = geometry.Point(geometry.Vecr((scipy.zeros(theta.shape), theta, z)), self) pt1.redefine(self._origin) vecin = vecin.split() x_hat = self + pt1 #creates a vector which includes all the centers of the subsurface out = [] #this for loop makes me cringe super hard for i in xrange(meri): try: temp = [] for j in xrange(sagi): inp = self.rot(vecin[i][j]) temp += [Cyl(geometry.Vecx(x_hat.x()[:,i,j]), self._origin, [2*stemp,2*mtemp], self.sagi.s, vec=[inp, self.norm.copy()], flag=self.flag)] out += [temp] except IndexError: inp = self.rot(vecin[i]) out += [Cyl(geometry.Vecx(x_hat.x()[:,i]), self._origin, [2*stemp,2*mtemp], self.norm.s, vec=[inp, self.norm.copy()], flag=self.flag)] return out
def crossOver(self, parents, nbChildren): """ generate a number of children by doing 1-point cross-over """ """ change as the <choice> return quite often the same p1 and even several time p2 was return the same than p1 """ xdim = self.numParameters shuffle(parents) children = [] for i in range(len(parents)/2): p1 = parents[i] p2 = parents[i+(len(parents)/2)] if xdim < 2: children.append(p1) children.append(p2) else: point = choice(range(xdim-1)) point += 1 res = zeros(xdim) res[:point] = p1[:point] res[point:] = p2[point:] children.append(res) res = zeros(xdim) res[:point] = p2[:point] res[point:] = p1[point:] children.append(res) shuffle(children) if len(children) > nbChildren: children = children[:nbChildren] elif len(children) < nbChildren: k = True while k: children +=sample(children,len(children)) if len(children) >= nbChildren: children = children[:nbChildren] k = False return children
def __init__(self,linear_solver,parameters=None): """ input: ===== linear_solver (LinearSolver) contains the linear solver that will be used in each Newton iteration parameters (dict) look at the docstring of getDefaultParameters() to find out which fields there are behaviour: ========= This class implements a Newton solver that stops when the maximum number of iterations has been reached, OR the relative OR absolute tolerance have been reached. """ Solver.Solver.__init__(self,parameters) if isinstance(linear_solver,LinearSolver.LinearSolver): self.linsolv=linear_solver else: raise TypeError, "input argument " + linear_solver \ + " should be a linear solver" self.nb_newt = 0 self.newton_residual = zeros((0,)) self.newton_res_norm = zeros(0,) # self.newton_states = zeros(( param['max_iter'],len( self.point.getCurrentGuess()) )) self.newton_states = zeros(( 0 ))
def init(self, values): self.values = values.copy() self.prev_values = values.copy() self.more_prev_values = values.copy() self.previous_gradient = zeros(values.shape) self.step = zeros(values.shape) self.previous_error = float("-inf")
def makeinputh5(Iono,basedir): """This will make a h5 file for the IonoContainer that can be used as starting points for the fitter. The ionocontainer taken will be average over the x and y dimensions of space to make an average value of the parameters for each altitude. Inputs Iono - An instance of the Ionocontainer class that will be averaged over so it can be used for fitter starting points. basdir - A string that holds the directory that the file will be saved to. """ # Get the parameters from the original data Param_List = Iono.Param_List dataloc = Iono.Cart_Coords times = Iono.Time_Vector velocity = Iono.Velocity zlist,idx = sp.unique(dataloc[:,2],return_inverse=True) siz = list(Param_List.shape[1:]) vsiz = list(velocity.shape[1:]) datalocsave = sp.column_stack((sp.zeros_like(zlist),sp.zeros_like(zlist),zlist)) outdata = sp.zeros([len(zlist)]+siz) outvel = sp.zeros([len(zlist)]+vsiz) # Do the averaging across space for izn,iz in enumerate(zlist): arr = sp.argwhere(idx==izn) outdata[izn] = sp.mean(Param_List[arr],axis=0) outvel[izn] = sp.mean(velocity[arr],axis=0) Ionoout = IonoContainer(datalocsave,outdata,times,Iono.Sensor_loc,ver=0, paramnames=Iono.Param_Names, species=Iono.Species,velocity=outvel) Ionoout.saveh5(basedir/'startdata.h5')
def GetMat(self, s, sym=False): """Return the element transfer matrix for the RigidMass element. If sym=True, 's' must be a symbolic string and a matrix of strings will be returned. Otherwise, 's' is a numeric value (probably complex) and the matrix returned will be complex.""" if sym: myparams=self.symparams else: myparams=self.params if self.maxsize==4 and self.usez: rigidmat1=rigidmatz(s,myparams) else: rigidmat1=rigidmaty(s,myparams) if self.maxsize==4: return rigidmat1 elif self.maxsize>4: rigidmat2=rigidmatz(s,myparams) zmat=scipy.zeros(scipy.shape(rigidmat2)) if self.maxsize==8: bigmat1=c_[rigidmat1,zmat] bigmat2=c_[zmat,rigidmat2] temp=r_[bigmat1,bigmat2] return Transform8by8(temp) elif self.maxsize==12: rigidmat0=rigidmatx(s,myparams) row1=c_[rigidmat0,zmat,zmat] t1=c_[rigidmat1,zmat] t2=c_[zmat,rigidmat2] temp=r_[t1,t2] temp=Transform8by8(temp) part2=c_[scipy.zeros((8,4)),temp] return r_[row1, part2]
def EBTransitPhase(tset, kid_x): ebp = atpy.Table('%s/eb_pars.txt' %dir, type = 'ascii') lc = tset.tables[1] time = lc.TIME flux = lc.PDCSAP_FLUX nobs = len(time) lg = scipy.isfinite(time) pylab.figure(52) pylab.clf() pylab.plot(time[lg], flux[lg]) npl = 2 phase = scipy.zeros((npl, nobs)) inTr = scipy.zeros((npl, nobs), 'int') period = ebp.P[ebp.KID == kid_x] for ipl in scipy.arange(npl): if ipl == 0: t0 = ebp.Ep1[ebp.KID == kid_x] if ipl == 1: t0 = ebp.Ep2[ebp.KID == kid_x] if ipl == 0: dur = ebp.Dur1[ebp.KID == kid_x] if ipl == 1: dur = ebp.Dur2[ebp.KID == kid_x] dur /= period counter = 0 while (time[lg] - t0).min() < 0: t0 -= period counter += 1 if counter > 1000: break ph = ((time - t0) % period) / period ph[ph < -0.5] += 1 ph[ph > 0.5] -= 1 phase[ipl,:] = ph inTr[ipl,:] = (abs(ph) <= dur/1.5) return phase, inTr
eigenvalue due to noise.""" import sys import scipy import Gnuplot import arnoldiDTM import Matrix if __name__ == "__main__": M = 2 A = Matrix.Diagonal(scipy.arange(M,0,-1)) I = M+1 q = scipy.zeros(M) q[0] = 1 Chart = Gnuplot.Gnuplot() Chart.xlabel('Standard Deviation of Noise') Chart.ylabel('Mean Eigenvalues') Means = [] Noises = scipy.arange(1,500,10) for N in Noises: eValues = [] for i in xrange(100): adtm = arnoldiDTM.arnoldiDTM(A, Noise=N, NoiseShape='normal') Values, Vectors = adtm.arnoldi(q, I) eValues.append(Values[-1])
def main(): # parse cmd arguments parser = getParser() parser.parse_args() args = getArguments(parser) # prepare logger logger = Logger.getInstance() if args.debug: logger.setLevel(logging.DEBUG) elif args.verbose: logger.setLevel(logging.INFO) # build output image name image_superimposition_name = args.folder + '/' + args.image1.split( '/')[-1][:-4] + '_superimp' image_superimposition_name += args.image1.split('/')[-1][-4:] # check if output image exists if not args.force: if os.path.exists(image_superimposition_name): raise ArgumentError( 'The output image {} already exists. Please provide the -f/force flag, if you wish to override it.' .format(image_superimposition_name)) # load image1 using nibabel logger.info('Loading image {}...'.format(args.image1)) image1_data, image1_header = load(args.image1) # load image2 using nibabel logger.info('Loading image {}...'.format(args.image2)) image2_data, _ = load(args.image2) # check input images to be valid logger.info('Checking input images for correctness...') if image1_data.shape != image2_data.shape: raise ArgumentError( 'The two input images shape do not match with 1:{} and 2:{}'. format(image1_data.shape, image2_data.shape)) int_types = (scipy.uint, scipy.uint0, scipy.uint8, scipy.uint16, scipy.uint32, scipy.uint64, scipy.uintc, scipy.uintp, scipy.int_, scipy.int0, scipy.int8, scipy.int16, scipy.int32, scipy.int64, scipy.intc, scipy.intp) if image1_data.dtype not in int_types: raise ArgumentError( 'Input image 1 is of type {}, an int type is required.'.format( image1_data.dtype)) if image2_data.dtype not in int_types: raise ArgumentError( 'Input image 2 is of type {}, an int type is required.'.format( image2_data.dtype)) if 4294967295 < abs(image1_data.min()) + image1_data.max() + abs( image2_data.min()) + image2_data.max(): raise ArgumentError( 'The input images contain so many (or not consecutive) labels, that they will not fit in a uint32 range.' ) # create superimposition of the two label images logger.info('Creating superimposition image...') image_superimposition_data = scipy.zeros(image1_data.shape, dtype=scipy.uint32) translation = {} label_id_counter = 0 for x in range(image1_data.shape[0]): for y in range(image1_data.shape[1]): for z in range(image1_data.shape[2]): label1 = image1_data[x, y, z] label2 = image2_data[x, y, z] if not (label1, label2) in translation: translation[(label1, label2)] = label_id_counter label_id_counter += 1 image_superimposition_data[x, y, z] = translation[(label1, label2)] # save resulting superimposition image logger.info( 'Saving superimposition image as {} in the same format as input image...' .format(image_superimposition_name)) save(image_superimposition_data, args.output, image1_header, args.force) logger.info('Successfully terminated.')
pdf[[ 'sales', 'resale', 'type', 'price', 'engine_s', 'horsepow', 'wheelbas', 'width', 'length', 'curb_wgt', 'fuel_cap', 'mpg', 'lnsales']] = pdf[['sales', 'resale', 'type', 'price', 'engine_s', 'horsepow', 'wheelbas', 'width', 'length', 'curb_wgt', 'fuel_cap', 'mpg', 'lnsales']].apply(pd.to_numeric, errors='coerce') pdf = pdf.dropna() pdf = pdf.reset_index(drop=True) print ("Shape of dataset after cleaning: ", pdf.size) pdf.head(5) featureset = pdf[['engine_s', 'horsepow', 'wheelbas', 'width', 'length', 'curb_wgt', 'fuel_cap', 'mpg']] from sklearn.preprocessing import MinMaxScaler x=featureset.values min_max_scaler=MinMaxScaler() feature_mtx=min_max_scaler.fit_transform(x) print(feature_mtx [0:5]) #In this part we use Scipy package to cluster the dataset: First, we calculate the distance matrix. import scipy leng = feature_mtx.shape[0] D = scipy.zeros([leng,leng]) for i in range(leng): for j in range(leng): D[i,j] = scipy.spatial.distance.euclidean(feature_mtx[i], feature_mtx[j])
def calc_Ls(self, x_surface, geom): '''Emission of surface, as a radiance''' return s.zeros((self.nwl, ))
def LV4(): """a discrete-time version of the LV model""" import scipy as sc import scipy.stats as stats import scipy.integrate as integrate import sys def CR_t1(pops, t=0): """a discrete-time version of the LV model with iteration""" Rt = pops[0] Ct = pops[1] eps = stats.norm.rvs(0.2, size=1)[0] Rt1 = Rt * (1 + (r + eps) * (1 - Rt / k) - a * Ct) Ct1 = Ct * (1 - z + e * a * Rt) return sc.array([Rt1, Ct1]) ### main inputs: if len(sys.argv) != 5: r = 1.0 a = 0.1 z = 1.5 e = 0.75 print("using default parameters") else: r = float(sys.argv[1]) a = float(sys.argv[2]) z = float(sys.argv[3]) e = float(sys.argv[4]) k = 30 #define constant parameter, without exceeding the carrying capacity t = sc.linspace(0, 15, 20) #define time series(1000 number between 0-15) ## generate data R0 = 10 C0 = 5 RC0 = sc.array([R0, C0]) pops = sc.zeros((len(t), 2)) for i in range(0, len(t)): pops[i] = RC0 RC0 = CR_t1(RC0, t=t) print("the final population density is\n", str(pops[-1, 0]), "for Resourses\n", pops[-1, 1], "for Consumers") import matplotlib.pylab as p import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages with PdfPages("../results/LV4_models.pdf") as pdf: f1 = p.figure() p.plot(t, pops[:, 0], 'g-', label='Resource density') # Plot p.plot(t, pops[:, 1], 'b-', label='Consumer density') p.grid() p.legend(loc='best') p.xlabel('Time') p.ylabel('Population density') p.title('Consumer-Resource population dynamics\n(Discrete-time)') pdf.savefig(f1) #Save figure f2 = p.figure() p.plot(pops[:, 0], pops[:, 1], 'r-') p.grid() p.legend(loc='best') p.xlabel('Resource density') p.ylabel('Consumer density') p.title( 'Consumer-Resource population dynamics\n(Discrete time)\nr=%s,a=%s,z=%s,e=%s,k=%s' % (r, a, z, e, k)) pdf.savefig(f2) #Save figure
def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, nobs, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, intr, maxit, kopt, family): # load shared fortran library glmlib = loadGlmLib() print("WEIGHTS") print(weights.shape) print("Y") print(y.shape) # noo = y.shape[0] if len(y.shape) > 1: nc = y.shape[1] else: nc = 1 if (noo != nobs): raise ValueError( 'x and y have different number of rows in call to glmnet') if nc == 1: classes, sy = scipy.unique(y, return_inverse=True) nc = len(classes) indexes = scipy.eye(nc, nc) y = indexes[sy, :] else: classes = scipy.arange(nc) + 1 # 1:nc # if family == 'binomial': if nc > 2: raise ValueError( 'More than two classes in y. use multinomial family instead') else: nc = 1 y = y[:, [1, 0]] # if (len(weights) != 0): t = weights > 0 if ~scipy.all(t): t = scipy.reshape(t, (len(y), )) y = y[t, :] x = x[t, :] weights = weights[t] nobs = scipy.sum(t) else: t = scipy.empty([0], dtype=scipy.integer) # if len(y.shape) == 1: mv = len(y) ny = 1 else: mv, ny = y.shape print("BEFORE tile") print("WEIGHTS") print(weights.shape) print("Y") print(y.shape) y = y * scipy.tile(weights, (ny, 1)).T # if len(offset) == 0: offset = y * 0 is_offset = False else: if len(t) != 0: offset = offset[t, :] do = offset.shape if do[0] != nobs: raise ValueError( 'offset should have the same number of values as observations in binominal/multinomial call to glmnet' ) if nc == 1: if do[1] == 1: offset = scipy.column_stack((offset, -offset)) if do[1] > 2: raise ValueError( 'offset should have 1 or 2 columns in binomial call to glmnet' ) if (family == 'multinomial') and (do[1] != nc): raise ValueError( 'offset should have same shape as y in multinomial call to glmnet' ) is_offset = True # now convert types and allocate memory before calling # glmnet fortran library ###################################### # --------- PROCESS INPUTS ----------- ###################################### # force inputs into fortran order and scipy float64 copyFlag = False x = x.astype(dtype=scipy.float64, order='F', copy=copyFlag) irs = irs.astype(dtype=scipy.int32, order='F', copy=copyFlag) pcs = pcs.astype(dtype=scipy.int32, order='F', copy=copyFlag) y = y.astype(dtype=scipy.float64, order='F', copy=copyFlag) weights = weights.astype(dtype=scipy.float64, order='F', copy=copyFlag) offset = offset.astype(dtype=scipy.float64, order='F', copy=copyFlag) jd = jd.astype(dtype=scipy.int32, order='F', copy=copyFlag) vp = vp.astype(dtype=scipy.float64, order='F', copy=copyFlag) cl = cl.astype(dtype=scipy.float64, order='F', copy=copyFlag) ulam = ulam.astype(dtype=scipy.float64, order='F', copy=copyFlag) ###################################### # --------- ALLOCATE OUTPUTS --------- ###################################### # lmu lmu = -1 lmu_r = ctypes.c_int(lmu) # a0, ca if nc == 1: a0 = scipy.zeros([nlam], dtype=scipy.float64) ca = scipy.zeros([nx, nlam], dtype=scipy.float64) else: a0 = scipy.zeros([nc, nlam], dtype=scipy.float64) ca = scipy.zeros([nx, nc, nlam], dtype=scipy.float64) # a0 a0 = a0.astype(dtype=scipy.float64, order='F', copy=False) a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ca ca = ca.astype(dtype=scipy.float64, order='F', copy=False) ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ia ia = -1 * scipy.ones([nx], dtype=scipy.int32) ia = ia.astype(dtype=scipy.int32, order='F', copy=False) ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # nin nin = -1 * scipy.ones([nlam], dtype=scipy.int32) nin = nin.astype(dtype=scipy.int32, order='F', copy=False) nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # dev dev = -1 * scipy.ones([nlam], dtype=scipy.float64) dev = dev.astype(dtype=scipy.float64, order='F', copy=False) dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # alm alm = -1 * scipy.ones([nlam], dtype=scipy.float64) alm = alm.astype(dtype=scipy.float64, order='F', copy=False) alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # nlp nlp = -1 nlp_r = ctypes.c_int(nlp) # jerr jerr = -1 jerr_r = ctypes.c_int(jerr) # dev0 dev0 = -1 dev0_r = ctypes.c_double(dev0) # ################################### # main glmnet fortran caller # ################################### if is_sparse: # sparse lognet glmlib.splognet_( ctypes.byref(ctypes.c_double(parm)), ctypes.byref(ctypes.c_int(nobs)), ctypes.byref(ctypes.c_int(nvars)), ctypes.byref(ctypes.c_int(nc)), x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), pcs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), irs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)), ctypes.byref(ctypes.c_int(nlam)), ctypes.byref(ctypes.c_double(flmin)), ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_double(thresh)), ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(intr)), ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(ctypes.c_int(kopt)), ctypes.byref(lmu_r), a0_r, ca_r, ia_r, nin_r, ctypes.byref(dev0_r), dev_r, alm_r, ctypes.byref(nlp_r), ctypes.byref(jerr_r)) else: # call fortran lognet routine glmlib.lognet_(ctypes.byref(ctypes.c_double(parm)), ctypes.byref(ctypes.c_int(nobs)), ctypes.byref(ctypes.c_int(nvars)), ctypes.byref(ctypes.c_int(nc)), x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)), ctypes.byref(ctypes.c_int(nlam)), ctypes.byref(ctypes.c_double(flmin)), ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_double(thresh)), ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(intr)), ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(ctypes.c_int(kopt)), ctypes.byref(lmu_r), a0_r, ca_r, ia_r, nin_r, ctypes.byref(dev0_r), dev_r, alm_r, ctypes.byref(nlp_r), ctypes.byref(jerr_r)) # ################################### # post process results # ################################### # check for error if (jerr_r.value > 0): raise ValueError("Fatal glmnet error in library call : error code = ", jerr_r.value) elif (jerr_r.value < 0): print("Warning: Non-fatal error in glmnet library call: error code = ", jerr_r.value) print("Check results for accuracy. Partial or no results returned.") # clip output to correct sizes lmu = lmu_r.value if nc == 1: a0 = a0[0:lmu] ca = ca[0:nx, 0:lmu] else: a0 = a0[0:nc, 0:lmu] ca = ca[0:nx, 0:nc, 0:lmu] ia = ia[0:nx] nin = nin[0:lmu] dev = dev[0:lmu] alm = alm[0:lmu] # ninmax ninmax = max(nin) # fix first value of alm (from inf to correct value) if ulam[0] == 0.0: t1 = scipy.log(alm[1]) t2 = scipy.log(alm[2]) alm[0] = scipy.exp(2 * t1 - t2) # create return fit dictionary if family == 'multinomial': a0 = a0 - scipy.tile(scipy.mean(a0), (nc, 1)) dfmat = a0.copy() dd = scipy.array([nvars, lmu], dtype=scipy.integer) beta_list = list() if ninmax > 0: # TODO: is the reshape here done right? ca = scipy.reshape(ca, (nx, nc, lmu)) ca = ca[0:ninmax, :, :] ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran oja = scipy.argsort(ja) ja1 = ja[oja] df = scipy.any(scipy.absolute(ca) > 0, axis=1) df = scipy.sum(df) df = scipy.reshape(df, (1, df.size)) for k in range(0, nc): ca1 = scipy.reshape(ca[:, k, :], (ninmax, lmu)) cak = ca1[oja, :] dfmat[k, :] = scipy.sum(scipy.absolute(cak) > 0, axis=0) beta = scipy.zeros([nvars, lmu], dtype=scipy.float64) beta[ja1, :] = cak beta_list.append(beta) else: for k in range(0, nc): dfmat[k, :] = scipy.zeros([1, lmu], dtype=scipy.float64) beta_list.append(scipy.zeros([nvars, lmu], dtype=scipy.float64)) # df = scipy.zeros([1, lmu], dtype=scipy.float64) # if kopt == 2: grouped = True else: grouped = False # fit = dict() fit['a0'] = a0 fit['label'] = classes fit['beta'] = beta_list fit['dev'] = dev fit['nulldev'] = dev0_r.value fit['dfmat'] = dfmat fit['df'] = df fit['lambdau'] = alm fit['npasses'] = nlp_r.value fit['jerr'] = jerr_r.value fit['dim'] = dd fit['grouped'] = grouped fit['offset'] = is_offset fit['class'] = 'multnet' else: dd = scipy.array([nvars, lmu], dtype=scipy.integer) if ninmax > 0: ca = ca[0:ninmax, :] df = scipy.sum(scipy.absolute(ca) > 0, axis=0) ja = ia[0:ninmax] - 1 # ia is 1-indexes in fortran oja = scipy.argsort(ja) ja1 = ja[oja] beta = scipy.zeros([nvars, lmu], dtype=scipy.float64) beta[ja1, :] = ca[oja, :] else: beta = scipy.zeros([nvars, lmu], dtype=scipy.float64) df = scipy.zeros([1, lmu], dtype=scipy.float64) # fit = dict() fit['a0'] = a0 fit['label'] = classes fit['beta'] = beta fit['dev'] = dev fit['nulldev'] = dev0_r.value fit['df'] = df fit['lambdau'] = alm fit['npasses'] = nlp_r.value fit['jerr'] = jerr_r.value fit['dim'] = dd fit['offset'] = is_offset fit['class'] = 'lognet' # ################################### # return to caller # ################################### return fit
from Gnuplot import Gnuplot, GridData from math import sin, cos, tan, asin, acos, atan, exp, pi from os import popen2 from sys import stdin import random from scipy.integrate import quad from Gnuplot.func_ext import * from functions import * from libacepy.frange import frange import random Float = float matrixmultiply = dot gp = Gnuplot() c = zeros([101], Float) t_data = zeros([512, 3], Float) e_data = zeros([128, 3], Float) #ions=["he2","o8","n7","c12","mg11","c13","o7","n6","mg10","c5","ne8","mg9","o6","n5","c4"] ions = [ "He2", "O8", "N7", "C6", "Mg11", "13C6", "O7", "N6", "Mg10", "C5", "Ne8", "Mg9", "O6", "N5", "C4" ] x = zeros([len(ions)], Float) G = zeros([len(ions)], Float) sg = zeros([len(ions)], Float) sk = zeros([len(ions)], Float) sl = zeros([len(ions)], Float) sr = zeros([len(ions)], Float) kappa = zeros([len(ions)], Float) breite = zeros([len(ions)], Float)
f[5] = 0.092 eta[5] = 98.4 f[6] = 0.109 eta[6] = 2880 for i in range(0, np.size(eta)): f_times_eta[i] = f[i] * np.exp(-eta[i] * z) f_times_eta_div[i] = -eta[i] * f_times_eta[i] #print f_times_eta_div[i] Q = sum(f_times_eta_div) return Q Rs = 800 Q_swr_0 = 800 z = np.linspace(0.0, 0.25, num=1001) Q_swr = scipy.zeros(np.size(z)) Q_swr_1 = scipy.zeros(np.size(z)) Q_swr_2 = scipy.zeros(np.size(z)) eta_measuerd = 1.0 for i in range(0, np.size(z)): Q_swr_1[i] = -1 * Q_swr_0 * Q_swr_z(z[i], eta_measuerd) / (cp_w * rho_w) print z[i], Q_swr_1[i] plt.plot(Q_swr_1[0:100], z[0:100]) eta_measuerd = 10.0 for i in range(0, np.size(z)): Q_swr_2[i] = -1 * Q_swr_0 * Q_swr_z(z[i], eta_measuerd) / (cp_w * rho_w) print z[i], Q_swr_2[i] plt.plot(Q_swr_2[0:100], z[0:100])
def coordinate_genot_ss(genotype_file=None, hdf5_file=None, genetic_map_dir=None, check_mafs=False, min_maf=0.01, skip_coordination=False, debug=False): """ Assumes plink BED files. Imputes missing genotypes. """ from plinkio import plinkfile plinkf = plinkfile.PlinkFile(genotype_file) plinkf_dict = plinkfiles.get_phenotypes(plinkf) num_individs = plinkf_dict['num_individs'] risk_scores = sp.zeros(num_individs) rb_risk_scores = sp.zeros(num_individs) num_common_snps = 0 corr_list = [] rb_corr_list = [] if plinkf_dict['has_phenotype']: hdf5_file.create_dataset('y', data=plinkf_dict['phenotypes']) hdf5_file.create_dataset('fids', data=sp.array(plinkf_dict['fids'], dtype=util.fids_dtype)) hdf5_file.create_dataset('iids', data=sp.array(plinkf_dict['iids'], dtype=util.iids_dtype)) ssf = hdf5_file['sum_stats'] cord_data_g = hdf5_file.create_group('cord_data') # Figure out chromosomes and positions by looking at SNPs. loci = plinkf.get_loci() plinkf.close() gf_chromosomes = [l.chromosome for l in loci] chromosomes = sp.unique(gf_chromosomes) chromosomes.sort() chr_dict = plinkfiles.get_chrom_dict(loci, chromosomes) tot_num_non_matching_nts = 0 for chrom in chromosomes: chr_str = 'chrom_%d' % chrom print('Coordinating data for chromosome %s' % chr_str) chrom_d = chr_dict[chr_str] #print(chrom_d) try: ssg = ssf['chrom_%d' % chrom] except Exception as err_str: print(err_str) print('Did not find chromosome in SS dataset.') print('Continuing.') continue # for x,y in zip(chrom_d['sids'], chrom_d['nts']): # sys.stderr.write(f'{x} {y[0]} {y[1]}\n') # # for x,y in zip(ssg['sids'], ssg['nts']): # sys.stderr.write(f'{x} {y[0]} {y[1]}\n') g_sids = chrom_d['sids'] g_sid_set = set(g_sids) assert len(g_sid_set) == len( g_sids), 'Some SNPs appear to be duplicated?' ss_sids = (ssg['sids'][...]).astype(util.sids_u_dtype) ss_sid_set = set(ss_sids) assert len(ss_sid_set) == len( ss_sids), 'Some SNPs appear to be duplicated?' # Figure out filters: g_filter = sp.in1d(g_sids, ss_sids) ss_filter = sp.in1d(ss_sids, g_sids) # Order by SNP IDs g_order = sp.argsort(g_sids) ss_order = sp.argsort(ss_sids) g_indices = [] for g_i in g_order: if g_filter[g_i]: g_indices.append(g_i) ss_indices = [] for ss_i in ss_order: if ss_filter[ss_i]: ss_indices.append(ss_i) g_nts = chrom_d['nts'] snp_indices = chrom_d['snp_indices'] ss_nts = (ssg['nts'][...]).astype(util.nts_u_dtype) betas = ssg['betas'][...] log_odds = ssg['log_odds'][...] assert not sp.any(sp.isnan( betas)), 'Some SNP effect estimates are NANs (not a number)' assert not sp.any(sp.isinf( betas)), 'Some SNP effect estimates are INFs (infinite numbers)' # Wallace -start, f**k LDpred w_pos = chrom_d['positions'] # -end num_non_matching_nts = 0 num_ambig_nts = 0 ok_nts = [] if debug: print('Found %d SNPs present in both datasets' % (len(g_indices))) if 'freqs' in ssg: ss_freqs = ssg['freqs'][...] ok_indices = {'g': [], 'ss': []} for g_i, ss_i in zip(g_indices, ss_indices): # for g_i, ss_i, pos_i in zip(g_indices, ss_indices, w_pos): # Is the nucleotide ambiguous? g_nt = [g_nts[g_i][0], g_nts[g_i][1]] if not skip_coordination: if tuple(g_nt) in util.ambig_nts: num_ambig_nts += 1 tot_num_non_matching_nts += 1 continue if (not g_nt[0] in util.valid_nts) or (not g_nt[1] in util.valid_nts): num_non_matching_nts += 1 tot_num_non_matching_nts += 1 continue ss_nt = ss_nts[ss_i] # Are the nucleotides the same? flip_nts = False os_g_nt = sp.array([ util.opp_strand_dict[g_nt[0]], util.opp_strand_dict[g_nt[1]] ]) if not (sp.all(g_nt == ss_nt) or sp.all(os_g_nt == ss_nt)): # Opposite strand nucleotides flip_nts = (g_nt[1] == ss_nt[0] and g_nt[0] == ss_nt[1]) or (os_g_nt[1] == ss_nt[0] and os_g_nt[0] == ss_nt[1]) if flip_nts: betas[ss_i] = -betas[ss_i] log_odds[ss_i] = -log_odds[ss_i] if 'freqs' in ssg: if ss_freqs[ss_i] > 0: ss_freqs[ss_i] = 1 - ss_freqs[ss_i] else: # Wallace debug if debug: sys.stderr.write( f'non match at: {g_sids[g_i]} - ssid:{ss_sids[ss_i]}, g_nt: {g_nt[0]} - {g_nt[1]}, ss_nt: {ss_nt[0]} - {ss_nt[1]}\n' ) # End Wallace debug. num_non_matching_nts += 1 tot_num_non_matching_nts += 1 continue # everything seems ok. ok_indices['g'].append(g_i) ok_indices['ss'].append(ss_i) ok_nts.append(g_nt) if debug: print('%d SNPs were excluded due to ambiguous nucleotides.' % num_ambig_nts) print('%d SNPs were excluded due to non-matching nucleotides.' % num_non_matching_nts) # Resorting by position positions = sp.array(chrom_d['positions'])[ok_indices['g']] order = sp.argsort(positions) ok_indices['g'] = list(sp.array(ok_indices['g'])[order]) ok_indices['ss'] = list(sp.array(ok_indices['ss'])[order]) positions = positions[order] # Parse SNPs snp_indices = sp.array(chrom_d['snp_indices']) # Pinpoint where the SNPs are in the file. snp_indices = snp_indices[ok_indices['g']] raw_snps, freqs = plinkfiles.parse_plink_snps(genotype_file, snp_indices) if debug: print('Parsed a %dX%d (SNP) genotype matrix' % (raw_snps.shape[0], raw_snps.shape[1])) snp_stds = sp.sqrt(2 * freqs * (1 - freqs)) snp_means = freqs * 2 betas = betas[ok_indices['ss']] log_odds = log_odds[ok_indices['ss']] ps = ssg['ps'][...][ok_indices['ss']] nts = sp.array(ok_nts)[order] sids = (ssg['sids'][...]).astype(util.sids_u_dtype) sids = sids[ok_indices['ss']] # Check SNP frequencies.. if check_mafs and 'freqs' in ssg: ss_freqs = ss_freqs[ok_indices['ss']] # Assuming freq less than 0 is missing data freq_discrepancy_snp = sp.absolute(ss_freqs - (1 - freqs)) > 0.15 # Filter SNPs that doesn't have MAF info from sumstat freq_discrepancy_snp = sp.logical_and(freq_discrepancy_snp, ss_freqs > 0) freq_discrepancy_snp = sp.logical_and(freq_discrepancy_snp, ss_freqs < 1) if sp.any(freq_discrepancy_snp): print( 'Warning: %d SNPs appear to have high frequency ' 'discrepancy between summary statistics and validation sample' % sp.sum(freq_discrepancy_snp)) # Filter freq_discrepancy_snps ok_freq_snps = sp.logical_not(freq_discrepancy_snp) raw_snps = raw_snps[ok_freq_snps] snp_stds = snp_stds[ok_freq_snps] snp_means = snp_means[ok_freq_snps] freqs = freqs[ok_freq_snps] ps = ps[ok_freq_snps] positions = positions[ok_freq_snps] nts = nts[ok_freq_snps] sids = sids[ok_freq_snps] betas = betas[ok_freq_snps] log_odds = log_odds[ok_freq_snps] # Filter minor allele frequency SNPs. maf_filter = (freqs > min_maf) * (freqs < (1 - min_maf)) maf_filter_sum = sp.sum(maf_filter) n_snps = len(maf_filter) assert maf_filter_sum <= n_snps, "Problems when filtering SNPs with low minor allele frequencies" if sp.sum(maf_filter) < n_snps: raw_snps = raw_snps[maf_filter] snp_stds = snp_stds[maf_filter] snp_means = snp_means[maf_filter] freqs = freqs[maf_filter] ps = ps[maf_filter] positions = positions[maf_filter] nts = nts[maf_filter] sids = sids[maf_filter] betas = betas[maf_filter] log_odds = log_odds[maf_filter] print('%d SNPs with MAF < %0.3f were filtered' % (n_snps - maf_filter_sum, min_maf)) print('%d SNPs were retained on chromosome %d.' % (maf_filter_sum, chrom)) rb_prs = sp.dot(sp.transpose(raw_snps), log_odds) if debug and plinkf_dict['has_phenotype']: print('Normalizing SNPs') snp_means.shape = (len(raw_snps), 1) snp_stds.shape = (len(raw_snps), 1) snps = (raw_snps - snp_means) / snp_stds assert snps.shape == raw_snps.shape, 'Problems when normalizing SNPs (set to have variance 1 and 0 mean)' snp_stds = snp_stds.flatten() snp_means = snp_means.flatten() prs = sp.dot(sp.transpose(snps), betas) corr = sp.corrcoef(plinkf_dict['phenotypes'], prs)[0, 1] corr_list.append(corr) print( 'PRS correlation for chromosome %d was %0.4f when predicting into LD ref data' % (chrom, corr)) rb_corr = sp.corrcoef(plinkf_dict['phenotypes'], rb_prs)[0, 1] rb_corr_list.append(rb_corr) print( 'Raw effect sizes PRS correlation for chromosome %d was %0.4f when predicting into LD ref data' % (chrom, rb_corr)) sid_set = set(sids) if genetic_map_dir is not None: genetic_map = [] with gzip.open(genetic_map_dir + 'chr%d.interpolated_genetic_map.gz' % chrom) as f: for line in f: l = line.split() if l[0] in sid_set: genetic_map.append(l[0]) else: genetic_map = None coord_data_dict = { 'chrom': 'chrom_%d' % chrom, 'raw_snps_ref': raw_snps, 'snp_stds_ref': snp_stds, 'snp_means_ref': snp_means, 'freqs_ref': freqs, 'ps': ps, 'positions': positions, 'nts': nts, 'sids': sids, 'genetic_map': genetic_map, 'betas': betas, 'log_odds': log_odds, 'log_odds_prs': rb_prs } write_coord_data(cord_data_g, coord_data_dict) if debug and plinkf_dict['has_phenotype']: rb_risk_scores += rb_prs risk_scores += prs num_common_snps += len(betas) if debug and plinkf_dict['has_phenotype']: # Now calculate the prediction R^2 corr = sp.corrcoef(plinkf_dict['phenotypes'], risk_scores)[0, 1] rb_corr = sp.corrcoef(plinkf_dict['phenotypes'], rb_risk_scores)[0, 1] print( 'PRS R2 prediction accuracy for the whole genome was %0.4f (corr=%0.4f) when predicting into LD ref data' % (corr**2, corr)) print( 'Log-odds (effects) PRS R2 prediction accuracy for the whole genome was %0.4f (corr=%0.4f) when predicting into LD ref data' % (rb_corr**2, rb_corr)) print('There were %d SNPs in common' % num_common_snps) print('In all, %d SNPs were excluded due to nucleotide issues.' % tot_num_non_matching_nts) print('Done coordinating genotypes and summary statistics datasets.')
train = [] test1 = [] test2 = [] for i in range(n_s): if parents[i, 1] in idxf: test2 = test2 + [i] else: if parents[i, 0] in idxm: test1 = test1 + [i] else: train = train + [i] train2 = train + test1 test = test1 + test2 yhat = SP.zeros((n_s, 1)) yhat[train] = y[train] def train_and_eval(Xtrain, Xtest, ytrain): ns = Xtrain.shape[0] idx = list(range(ns)) SP.random.shuffle(idx) train_idx = idx[:int(ns * 0.7)] valid_idx = idx[int(ns * 0.7):] xg_train = xgb.DMatrix(Xtrain[train_idx, :], label=ytrain[train_idx, :]) xg_valid = xgb.DMatrix(Xtrain[valid_idx, :], label=ytrain[valid_idx, :]) param = {'eta': 0.05, 'silent': 1, 'subsample': 0.5, 'lambda': 0.8} model = xgb.train(param, xg_train, 500, [(xg_valid, 'valid')],
def getGaus(self, alpha, mue, sigma, x): dens = zeros(self.numOGaus) for g in range(self.numOGaus): dens[g] = self.getStND(x, mue[g], sigma[g]) return sum(self.sigmo(alpha) * dens)
EE_sol_test = [] tau_sol_test = [] for Mon in xrange(max_chan_realizaion): max_d2d_to_d2d_gains_diff = sp.copy(max_d2d_to_d2d_gains[:, :, Mon]) sp.fill_diagonal(max_d2d_to_d2d_gains_diff, 0) uav_to_d2d_gains = max_uav_to_d2d_gains[:num_d2d_pairs, Mon] d2d_to_d2d_gains = max_d2d_to_d2d_gains[:num_d2d_pairs, :num_d2d_pairs, Mon] d2d_to_d2d_gains_diff = max_d2d_to_d2d_gains_diff[:num_d2d_pairs, : num_d2d_pairs] d2d_to_d2d_gains_diag = sp.subtract(d2d_to_d2d_gains, d2d_to_d2d_gains_diff) # vectorize channel training test_chan = sp.zeros(dimen_input) test_chan[0:num_d2d_pairs] = uav_to_d2d_gains test_chan[num_d2d_pairs:dimen_input] = d2d_to_d2d_gains.ravel() vec_chan_test = sp.array([test_chan]) X_test = vec_chan_test test_tau_result = nn_model.predict(X_test, verbose=0) test_theta_dnn = 1 / (1 - test_tau_result) phi_n_sol = sp.multiply((test_theta_dnn - 1) * eta * power_UAV, uav_to_d2d_gains) x_rate = sp.matmul(d2d_to_d2d_gains_diag, sp.transpose(phi_n_sol)) term_rate = sp.matmul(sp.transpose(d2d_to_d2d_gains_diff), sp.transpose(phi_n_sol)) + 1
ref = '100307' print(ref + '.reduce' + str(r_factor) + '.LR_mask.mat') fn1 = ref + '.reduce' + str(r_factor) + '.LR_mask.mat' fname1 = os.path.join(ref_dir, fn1) msk = scipy.io.loadmat(fname1) # h5py.File(fname1); dfs_left = readdfs(os.path.join(p_dir_ref, 'reference', ref + '.aparc.\ a2009s.32k_fs.reduce3.left.dfs')) dfs_left_sm = readdfs(os.path.join(p_dir_ref, 'reference', ref + '.aparc.\ a2009s.32k_fs.reduce3.very_smooth.left.dfs')) # view_patch_vtk(dfs_left_sm) rho_rho = [] rho_all = [] #lst=lst[:1] labs_all = sp.zeros((len(dfs_left.labels), len(lst))) sub = lst[0] data = scipy.io.loadmat(os.path.join(p_dir, sub, sub + '.rfMRI_REST1_LR.\ reduce3.ftdata.NLM_11N_hvar_25.mat')) LR_flag = msk['LR_flag'] LR_flag = np.squeeze(LR_flag) != 0 data = data['ftdata_NLM'] temp = data[LR_flag, :] #temp = temp[:,0:30] #temp[5000:6000, 500:700] = sp.randn(1000, 200) # temp[1000, :] m = np.mean(temp, 1) temp = temp - m[:, None] s = np.std(temp, 1)+1e-16 temp = temp/s[:, None] d1 = temp
def coordinate_genotypes_ss_w_ld_ref(genotype_file=None, reference_genotype_file=None, hdf5_file=None, genetic_map_dir=None, check_mafs=False, min_maf=0.01, skip_coordination=False, debug=False): print('Coordinating things w genotype file: %s \nref. genot. file: %s' % (genotype_file, reference_genotype_file)) from plinkio import plinkfile plinkf = plinkfile.PlinkFile(genotype_file) # Loads only the individuals... plinkf_dict = plinkfiles.get_phenotypes(plinkf) # Figure out chromosomes and positions. if debug: print('Parsing validation bim file') loci = plinkf.get_loci() plinkf.close() gf_chromosomes = [l.chromosome for l in loci] chromosomes = sp.unique(gf_chromosomes) chromosomes.sort() chr_dict = plinkfiles.get_chrom_dict(loci, chromosomes) if debug: print('Parsing LD reference bim file') plinkf_ref = plinkfile.PlinkFile(reference_genotype_file) loci_ref = plinkf_ref.get_loci() plinkf_ref.close() chr_dict_ref = plinkfiles.get_chrom_dict(loci_ref, chromosomes) # Open HDF5 file and prepare out data assert not 'iids' in hdf5_file, 'Something is wrong with the HDF5 file, no individuals IDs were found.' if plinkf_dict['has_phenotype']: hdf5_file.create_dataset('y', data=plinkf_dict['phenotypes']) hdf5_file.create_dataset('fids', data=sp.array(plinkf_dict['fids'], dtype=util.fids_dtype)) hdf5_file.create_dataset('iids', data=sp.array(plinkf_dict['iids'], dtype=util.iids_dtype)) ssf = hdf5_file['sum_stats'] cord_data_g = hdf5_file.create_group('cord_data') maf_adj_risk_scores = sp.zeros(plinkf_dict['num_individs']) num_common_snps = 0 # corr_list = [] tot_g_ss_nt_concord_count = 0 tot_rg_ss_nt_concord_count = 0 tot_g_rg_nt_concord_count = 0 tot_num_non_matching_nts = 0 # Now iterate over chromosomes for chrom in chromosomes: ok_indices = {'g': [], 'rg': [], 'ss': []} chr_str = 'chrom_%d' % chrom print('Coordinating data for chromosome %s' % chr_str) chrom_d = chr_dict[chr_str] chrom_d_ref = chr_dict_ref[chr_str] try: ssg = ssf['chrom_%d' % chrom] except Exception as err_str: print(err_str) print('Did not find chromosome in SS dataset.') print('Continuing.') continue ssg = ssf['chrom_%d' % chrom] g_sids = chrom_d['sids'] rg_sids = chrom_d_ref['sids'] ss_sids = (ssg['sids'][...]).astype(util.sids_u_dtype) if debug: print( 'Found %d SNPs in validation data, %d SNPs in LD reference data, and %d SNPs in summary statistics.' % (len(g_sids), len(rg_sids), len(ss_sids))) common_sids = sp.intersect1d(ss_sids, g_sids) common_sids = sp.intersect1d(common_sids, rg_sids) if debug: print( 'Found %d SNPs on chrom %d that were common across all datasets' % (len(common_sids), chrom)) ss_snp_map = [] g_snp_map = [] rg_snp_map = [] ss_sid_dict = {} for i, sid in enumerate(ss_sids): ss_sid_dict[sid] = i g_sid_dict = {} for i, sid in enumerate(g_sids): g_sid_dict[sid] = i rg_sid_dict = {} for i, sid in enumerate(rg_sids): rg_sid_dict[sid] = i for sid in common_sids: g_snp_map.append(g_sid_dict[sid]) # order by positions g_positions = sp.array(chrom_d['positions'])[g_snp_map] order = sp.argsort(g_positions) # order = order.tolist() g_snp_map = sp.array(g_snp_map)[order] g_snp_map = g_snp_map.tolist() common_sids = sp.array(common_sids)[order] # Get the other two maps for sid in common_sids: rg_snp_map.append(rg_sid_dict[sid]) for sid in common_sids: ss_snp_map.append(ss_sid_dict[sid]) g_nts = sp.array(chrom_d['nts']) rg_nts = sp.array(chrom_d_ref['nts']) rg_nts_ok = sp.array(rg_nts)[rg_snp_map] ss_nts = (ssg['nts'][...]).astype(util.nts_u_dtype) betas = ssg['betas'][...] log_odds = ssg['log_odds'][...] if 'freqs' in ssg: ss_freqs = ssg['freqs'][...] g_ss_nt_concord_count = sp.sum( g_nts[g_snp_map] == ss_nts[ss_snp_map]) / 2.0 rg_ss_nt_concord_count = sp.sum(rg_nts_ok == ss_nts[ss_snp_map]) / 2.0 g_rg_nt_concord_count = sp.sum(g_nts[g_snp_map] == rg_nts_ok) / 2.0 if debug: print( 'Nucleotide concordance counts out of %d genotypes: vg-g: %d, vg-ss: %d, g-ss: %d' % (len(g_snp_map), g_rg_nt_concord_count, g_ss_nt_concord_count, rg_ss_nt_concord_count)) tot_g_ss_nt_concord_count += g_ss_nt_concord_count tot_rg_ss_nt_concord_count += rg_ss_nt_concord_count tot_g_rg_nt_concord_count += g_rg_nt_concord_count num_non_matching_nts = 0 num_ambig_nts = 0 # Identifying which SNPs have nucleotides that are ok.. ok_nts = [] for g_i, rg_i, ss_i in zip(g_snp_map, rg_snp_map, ss_snp_map): # To make sure, is the SNP id the same? assert g_sids[g_i] == rg_sids[rg_i] == ss_sids[ ss_i], 'Some issues with coordinating the genotypes.' g_nt = g_nts[g_i] if not skip_coordination: rg_nt = rg_nts[rg_i] ss_nt = ss_nts[ss_i] # Is the nucleotide ambiguous. g_nt = [g_nts[g_i][0], g_nts[g_i][1]] if tuple(g_nt) in util.ambig_nts: num_ambig_nts += 1 tot_num_non_matching_nts += 1 continue # First check if nucleotide is sane? if (not g_nt[0] in util.valid_nts) or (not g_nt[1] in util.valid_nts): num_non_matching_nts += 1 tot_num_non_matching_nts += 1 continue os_g_nt = sp.array([ util.opp_strand_dict[g_nt[0]], util.opp_strand_dict[g_nt[1]] ]) flip_nts = False if not ((sp.all(g_nt == ss_nt) or sp.all(os_g_nt == ss_nt)) and (sp.all(g_nt == rg_nt) or sp.all(os_g_nt == rg_nt))): if sp.all(g_nt == rg_nt) or sp.all(os_g_nt == rg_nt): flip_nts = (g_nt[1] == ss_nt[0] and g_nt[0] == ss_nt[1]) or (os_g_nt[1] == ss_nt[0] and os_g_nt[0] == ss_nt[1]) # Try flipping the SS nt if flip_nts: betas[ss_i] = -betas[ss_i] log_odds[ss_i] = -log_odds[ss_i] if 'freqs' in ssg: ss_freqs[ss_i] = 1 - ss_freqs[ss_i] else: if debug: print("Nucleotides don't match after all?: g_sid=%s, ss_sid=%s, g_i=%d, ss_i=%d, g_nt=%s, ss_nt=%s" % \ (g_sids[g_i], ss_sids[ss_i], g_i, ss_i, str(g_nt), str(ss_nt))) num_non_matching_nts += 1 tot_num_non_matching_nts += 1 continue else: num_non_matching_nts += 1 tot_num_non_matching_nts += 1 continue # Opposite strand nucleotides # everything seems ok. ok_indices['g'].append(g_i) ok_indices['rg'].append(rg_i) ok_indices['ss'].append(ss_i) ok_nts.append(g_nt) if debug: print('%d SNPs had ambiguous nucleotides.' % num_ambig_nts) print('%d SNPs were excluded due to nucleotide issues.' % num_non_matching_nts) print('%d SNPs were retained on chromosome %d.' % (len(ok_indices['g']), chrom)) # Resorting by position positions = sp.array(chrom_d['positions'])[ok_indices['g']] # Now parse SNPs .. snp_indices = sp.array(chrom_d['snp_indices']) # Pinpoint where the SNPs are in the file. snp_indices = snp_indices[ok_indices['g']] raw_snps, freqs = plinkfiles.parse_plink_snps(genotype_file, snp_indices) snp_indices_ref = sp.array(chrom_d_ref['snp_indices']) # Pinpoint where the SNPs are in the file. snp_indices_ref = snp_indices_ref[ok_indices['rg']] raw_ref_snps, freqs_ref = plinkfiles.parse_plink_snps( reference_genotype_file, snp_indices_ref) snp_stds_ref = sp.sqrt(2 * freqs_ref * (1 - freqs_ref)) snp_means_ref = freqs_ref * 2 snp_stds = sp.sqrt(2 * freqs * (1 - freqs)) snp_means = freqs * 2 betas = betas[ok_indices['ss']] log_odds = log_odds[ok_indices['ss']] ps = ssg['ps'][...][ok_indices['ss']] nts = sp.array(ok_nts) sids = (ssg['sids'][...]).astype(util.sids_u_dtype) sids = sids[ok_indices['ss']] # Check SNP frequencies.. if check_mafs and 'freqs' in ssg: ss_freqs = ss_freqs[ok_indices['ss']] freq_discrepancy_snp = sp.absolute( ss_freqs - (1 - freqs)) > 0.15 #Array of np.bool values if sp.any(freq_discrepancy_snp): print( 'Warning: %d SNPs were filtered due to high allele frequency discrepancy between summary statistics and validation sample' % sp.sum(freq_discrepancy_snp)) # Filter freq_discrepancy_snps ok_freq_snps = sp.logical_not(freq_discrepancy_snp) raw_snps = raw_snps[ok_freq_snps] snp_stds = snp_stds[ok_freq_snps] snp_means = snp_means[ok_freq_snps] raw_ref_snps = raw_ref_snps[ok_freq_snps] snp_stds_ref = snp_stds_ref[ok_freq_snps] snp_means_ref = snp_means_ref[ok_freq_snps] freqs = freqs[ok_freq_snps] freqs_ref = freqs_ref[ok_freq_snps] ps = ps[ok_freq_snps] positions = positions[ok_freq_snps] nts = nts[ok_freq_snps] sids = sids[ok_freq_snps] betas = betas[ok_freq_snps] log_odds = log_odds[ok_freq_snps] # Filter minor allele frequency SNPs. maf_filter = (freqs > min_maf) * (freqs < (1 - min_maf)) maf_filter_sum = sp.sum(maf_filter) n_snps = len(maf_filter) assert maf_filter_sum <= n_snps, "Problems when filtering SNPs with low minor allele frequencies" if sp.sum(maf_filter) < n_snps: raw_snps = raw_snps[maf_filter] snp_stds = snp_stds[maf_filter] snp_means = snp_means[maf_filter] raw_ref_snps = raw_ref_snps[maf_filter] snp_stds_ref = snp_stds_ref[maf_filter] snp_means_ref = snp_means_ref[maf_filter] freqs = freqs[maf_filter] freqs_ref = freqs_ref[maf_filter] ps = ps[maf_filter] positions = positions[maf_filter] nts = nts[maf_filter] sids = sids[maf_filter] betas = betas[maf_filter] log_odds = log_odds[maf_filter] maf_adj_prs = sp.dot(log_odds, raw_snps) if debug and plinkf_dict['has_phenotype']: maf_adj_corr = sp.corrcoef(plinkf_dict['phenotypes'], maf_adj_prs)[0, 1] print( 'Log odds, per genotype PRS correlation w phenotypes for chromosome %d was %0.4f' % (chrom, maf_adj_corr)) genetic_map = [] if genetic_map_dir is not None: with gzip.open(genetic_map_dir + 'chr%d.interpolated_genetic_map.gz' % chrom) as f: for line in f: l = line.split() # if l[0] in sid_set: # genetic_map.append(l[0]) else: genetic_map = None coord_data_dict = { 'chrom': 'chrom_%d' % chrom, 'raw_snps_ref': raw_ref_snps, 'snp_stds_ref': snp_stds_ref, 'snp_means_ref': snp_means_ref, 'freqs_ref': freqs_ref, 'ps': ps, 'positions': positions, 'nts': nts, 'sids': sids, 'genetic_map': genetic_map, 'betas': betas, 'log_odds': log_odds, 'log_odds_prs': maf_adj_prs, 'raw_snps_val': raw_snps, 'snp_stds_val': snp_stds, 'snp_means_val': snp_means, 'freqs_val': freqs } write_coord_data(cord_data_g, coord_data_dict) maf_adj_risk_scores += maf_adj_prs num_common_snps += len(betas) # Now calculate the prediction r^2 if debug and plinkf_dict['has_phenotype']: maf_adj_corr = sp.corrcoef(plinkf_dict['phenotypes'], maf_adj_risk_scores)[0, 1] print( 'Log odds, per PRS correlation for the whole genome was %0.4f (r^2=%0.4f)' % (maf_adj_corr, maf_adj_corr**2)) print( 'Overall nucleotide concordance counts: g_rg: %d, g_ss: %d, rg_ss: %d' % (tot_g_rg_nt_concord_count, tot_g_ss_nt_concord_count, tot_rg_ss_nt_concord_count)) print('There were %d SNPs in common' % num_common_snps) print('In all, %d SNPs were excluded due to nucleotide issues.' % tot_num_non_matching_nts) print('Done!')
import scipy as sp from scipy import constants import matplotlib.pyplot as plt import math pi = constants.pi points = 5000 x = sp.zeros(points) y = sp.zeros(points) plt.grid(True, which="both") def parametric_xy(theta): x = 2 * math.cos(theta) + math.cos(2 * theta) y = 2 * math.sin(theta) - math.sin(2 * theta) return x, y theta = sp.linspace(0, 2 * pi, points) for i in range(points): x[i], y[i] = parametric_xy(theta[i]) plt.plot(x, y) plt.title('(a)') plt.savefig('a.png') plt.show() def polar_xy(r, theta): x = r * math.cos(theta) y = r * math.sin(theta)
__author__ = 'Frank Sehnke, [email protected]' from scipy import random, zeros, ones, exp, sqrt, cos, log stND = zeros(1000) for i in range(1000): x = -4.0 + float(i) * 8.0 / 1000.0 stND[i] = 1.0 / 2.51 * exp(-0.5 * (x)**2) # FIXME: different class name? class MixtureOfGaussians: def __init__(self, numOGaus=10, alphaA=0.02, alphaM=0.02, alphaS=0.02): self.alphaA = alphaA self.alphaM = alphaM self.alphaS = alphaS self.minSig = 0.000001 self.numOGaus = numOGaus #Number of Gaussians self.rangeMin = -20.0 self.rangeMax = 20.0 self.epsilon = (self.rangeMax - self.rangeMin) / ( sqrt(2.0) * float(self.numOGaus - 1)) #Initial value of sigmas self.propFakt = 1.0 / float(self.numOGaus) self.distFakt = 1.0 / float(self.numOGaus - 1) self.distRange = self.rangeMax - self.rangeMin self.sigma = ones(self.numOGaus) self.mue = zeros(self.numOGaus) self.alpha = ones(self.numOGaus) self.sigma *= self.epsilon
def ldpred_genomewide(data_file=None, ld_radius=None, ld_dict=None, out_file_prefix=None, ps=None, n=None, h2=None, num_iter=None, verbose=False, zero_jump_prob=0.05, burn_in=5): """ Calculate LDpred for a genome """ df = h5py.File(data_file, 'r') has_phenotypes = False if 'y' in df.keys(): 'Validation phenotypes found.' y = df['y'][...] # Phenotype num_individs = len(y) risk_scores_pval_derived = sp.zeros(num_individs) has_phenotypes = True ld_scores_dict = ld_dict['ld_scores_dict'] chrom_ld_dict = ld_dict['chrom_ld_dict'] chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats'] print 'Applying LDpred with LD radius: %d' % ld_radius results_dict = {} num_snps = 0 sum_beta2s = 0 cord_data_g = df['cord_data'] for chrom_str in chromosomes_list: if chrom_str in cord_data_g.keys(): g = cord_data_g[chrom_str] betas = g['betas'][...] n_snps = len(betas) num_snps += n_snps sum_beta2s += sp.sum(betas**2) L = ld_scores_dict['avg_gw_ld_score'] chi_square_lambda = sp.mean(n * sum_beta2s / float(num_snps)) print 'Genome-wide lambda inflation:', chi_square_lambda, print 'Genome-wide mean LD score:', L gw_h2_ld_score_est = max(0.0001, (max(1, chi_square_lambda) - 1) / (n * (L / num_snps))) print 'Estimated genome-wide heritability:', gw_h2_ld_score_est assert chi_square_lambda > 1, 'Something is wrong with the GWAS summary statistics. Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small. ' LDpred_inf_chrom_dict = {} print 'Calculating LDpred-inf weights' for chrom_str in chromosomes_list: if chrom_str in cord_data_g.keys(): print 'Calculating scores for Chromosome %s' % ( (chrom_str.split('_'))[1]) g = cord_data_g[chrom_str] # Filter monomorphic SNPs snp_stds = g['snp_stds_ref'][...] snp_stds = snp_stds.flatten() ok_snps_filter = snp_stds > 0 pval_derived_betas = g['betas'][...] n_snps = len(pval_derived_betas) pval_derived_betas = pval_derived_betas[ok_snps_filter] if h2 is not None: h2_chrom = h2 * (n_snps / float(num_snps)) else: h2_chrom = gw_h2_ld_score_est * (n_snps / float(num_snps)) start_betas = LDpred_inf.ldpred_inf( pval_derived_betas, genotypes=None, reference_ld_mats=chrom_ref_ld_mats[chrom_str], h2=h2_chrom, n=n, ld_window_size=2 * ld_radius, verbose=False) LDpred_inf_chrom_dict[chrom_str] = start_betas for p in ps: print 'Starting LDpred with p=%0.4f' % p p_str = '%0.4f' % p results_dict[p_str] = {} if out_file_prefix: # Preparing output files raw_effect_sizes = [] ldpred_effect_sizes = [] ldpred_inf_effect_sizes = [] out_sids = [] chromosomes = [] out_positions = [] out_nts = [] for chrom_str in chromosomes_list: if chrom_str in cord_data_g.keys(): g = cord_data_g[chrom_str] if has_phenotypes: if 'raw_snps_val' in g.keys(): raw_snps = g['raw_snps_val'][...] else: raw_snps = g['raw_snps_ref'][...] # Filter monomorphic SNPs snp_stds = g['snp_stds_ref'][...] snp_stds = snp_stds.flatten() ok_snps_filter = snp_stds > 0 snp_stds = snp_stds[ok_snps_filter] pval_derived_betas = g['betas'][...] pval_derived_betas = pval_derived_betas[ok_snps_filter] positions = g['positions'][...] positions = positions[ok_snps_filter] sids = g['sids'][...] sids = sids[ok_snps_filter] log_odds = g['log_odds'][...] log_odds = log_odds[ok_snps_filter] nts = g['nts'][...] nts = nts[ok_snps_filter] if out_file_prefix: chromosomes.extend([chrom_str] * len(pval_derived_betas)) out_positions.extend(positions) out_sids.extend(sids) raw_effect_sizes.extend(log_odds) out_nts.extend(nts) n_snps = len(pval_derived_betas) if h2 is not None: h2_chrom = h2 * (n_snps / float(num_snps)) else: h2_chrom = gw_h2_ld_score_est * (n_snps / float(num_snps)) if 'chrom_ld_boundaries' in ld_dict.keys(): ld_boundaries = ld_dict['chrom_ld_boundaries'][chrom_str] res_dict = ldpred_gibbs( pval_derived_betas, h2=h2_chrom, n=n, p=p, ld_radius=ld_radius, verbose=verbose, num_iter=num_iter, burn_in=burn_in, ld_dict=chrom_ld_dict[chrom_str], start_betas=LDpred_inf_chrom_dict[chrom_str], ld_boundaries=ld_boundaries, zero_jump_prob=zero_jump_prob) else: res_dict = ldpred_gibbs( pval_derived_betas, h2=h2_chrom, n=n, p=p, ld_radius=ld_radius, verbose=verbose, num_iter=num_iter, burn_in=burn_in, ld_dict=chrom_ld_dict[chrom_str], start_betas=LDpred_inf_chrom_dict[chrom_str], zero_jump_prob=zero_jump_prob) updated_betas = res_dict['betas'] updated_inf_betas = res_dict['inf_betas'] sum_sqr_effects = sp.sum(updated_betas**2) if sum_sqr_effects > gw_h2_ld_score_est: print 'Sum of squared updated effects estimates seems too large:', sum_sqr_effects print 'This suggests that the Gibbs sampler did not convergence.' print 'Calculating scores for Chromosome %s' % ( (chrom_str.split('_'))[1]) updated_betas = updated_betas / (snp_stds.flatten()) updated_inf_betas = updated_inf_betas / (snp_stds.flatten()) ldpred_effect_sizes.extend(updated_betas) ldpred_inf_effect_sizes.extend(updated_inf_betas) if has_phenotypes: prs = sp.dot(updated_betas, raw_snps) risk_scores_pval_derived += prs corr = sp.corrcoef(y, prs)[0, 1] r2 = corr**2 print 'The R2 prediction accuracy of PRS using %s was: %0.4f' % ( chrom_str, r2) print 'There were %d (SNP) effects' % num_snps if has_phenotypes: num_indivs = len(y) results_dict[p_str]['y'] = y results_dict[p_str]['risk_scores_pd'] = risk_scores_pval_derived print 'Prediction accuracy was assessed using %d individuals.' % ( num_indivs) corr = sp.corrcoef(y, risk_scores_pval_derived)[0, 1] r2 = corr**2 results_dict[p_str]['r2_pd'] = r2 print 'The R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % ( r2, ((1 - r2)**2) / num_indivs) if corr < 0: risk_scores_pval_derived = -1 * risk_scores_pval_derived auc = util.calc_auc(y, risk_scores_pval_derived) print 'AUC for the whole genome was: %0.4f' % auc # Now calibration denominator = sp.dot(risk_scores_pval_derived.T, risk_scores_pval_derived) y_norm = (y - sp.mean(y)) / sp.std(y) numerator = sp.dot(risk_scores_pval_derived.T, y_norm) regression_slope = (numerator / denominator) # [0][0] print 'The slope for predictions with P-value derived effects is:', regression_slope results_dict[p_str]['slope_pd'] = regression_slope weights_out_file = '%s_LDpred_p%0.4e.txt' % (out_file_prefix, p) with open(weights_out_file, 'w') as f: f.write( 'chrom pos sid nt1 nt2 raw_beta ldpred_beta\n' ) for chrom, pos, sid, nt, raw_beta, ldpred_beta in it.izip( chromosomes, out_positions, out_sids, out_nts, raw_effect_sizes, ldpred_effect_sizes): nt1, nt2 = nt[0], nt[1] f.write('%s %d %s %s %s %0.4e %0.4e\n' % (chrom, pos, sid, nt1, nt2, raw_beta, ldpred_beta)) weights_out_file = '%s_LDpred-inf.txt' % (out_file_prefix) with open(weights_out_file, 'w') as f: f.write( 'chrom pos sid nt1 nt2 raw_beta ldpred_inf_beta \n' ) for chrom, pos, sid, nt, raw_beta, ldpred_inf_beta in it.izip( chromosomes, out_positions, out_sids, out_nts, raw_effect_sizes, ldpred_inf_effect_sizes): nt1, nt2 = nt[0], nt[1] f.write('%s %d %s %s %s %0.4e %0.4e\n' % (chrom, pos, sid, nt1, nt2, raw_beta, ldpred_inf_beta))
def define_init(self, initTheta=1.): """ Define Initialisations of the model PARAMETERS ---------- initTheta flaot initialisation for theta. Default is 1. (no sparsity) """ N = self.dimensionalities["N"] K = self.dimensionalities["K"] M = self.dimensionalities["M"] D = self.dimensionalities["D"] # Latent variables self.model_opts["initZ"] = { 'mean': "random", 'var': s.ones((K, )), 'E': None, 'E2': None } # Tau self.model_opts["initTau"] = { 'a': [s.nan] * M, 'b': [s.nan] * M, 'E': [s.ones(D[m]) * 100 for m in range(M)] } # ARD of weights self.model_opts["initAlpha"] = { 'a': [s.nan] * M, 'b': [s.nan] * M, 'E': [s.ones(K) * 1. for m in range(M)] } # Theta self.model_opts["initTheta"] = { 'a': [s.ones(K, ) for m in range(M)], 'b': [s.ones(K, ) for m in range(M)], 'E': [s.nan * s.zeros(K, ) for m in range(M)] } if type(initTheta) is float: self.model_opts['initTheta']['E'] = [ s.ones(K, ) * initTheta for m in range(M) ] else: print("Error: 'initTheta' must be a float") exit() for m in range(M): for k in range(K): if self.model_opts['sparsity'][m][k] == 0.: self.model_opts["initTheta"]["a"][m][k] = s.nan self.model_opts["initTheta"]["b"][m][k] = s.nan # Weights self.model_opts["initSW"] = { 'Theta': [ s.repeat(self.model_opts['initTheta']['E'][m][None, :], self.dimensionalities["D"][m], 0) for m in range(M) ], 'mean_S0': [s.zeros((D[m], K)) for m in range(M)], 'var_S0': [s.nan * s.ones((D[m], K)) for m in range(M)], 'mean_S1': [s.zeros((D[m], K)) for m in range(M)], # 'mean_S1':[stats.norm.rvs(loc=0, scale=1, size=(D[m],K)) for m in range(M)], 'var_S1': [s.ones((D[m], K)) for m in range(M)], 'ES': [None] * M, 'EW_S0': [None] * M, 'EW_S1': [None] * M # It will be calculated from the parameters }
def randpair_groupdiff_ftest(sub_grp1_files, sub_grp2_files, num_pairs, len_time=255): print('Grp diff using f-test and brainsync') num_vert = spio.loadmat(sub_grp1_files[0])['dtseries'].shape[0] print('Generating random pairs from group 1') pairs_grp1, num_pairs1 = gen_rand_pairs(num_sub=len(sub_grp1_files), num_pairs=num_pairs) fmri_diff1 = sp.zeros((num_vert, num_pairs1)) # Preload data This only slighly faster, better is to load on the fly and multiprocess print('Reading data for group 1') sub_data1 = np.zeros((len_time, num_vert, len(sub_grp1_files))) for i, fname in enumerate(tqdm(sub_grp1_files)): sub1_data = spio.loadmat(fname)['dtseries'][:, :len_time].T sub_data1[:, :, i], _, _ = normalizeData(sub1_data) print('Compute differences in fMRI of random pairs from group 1') for i, rand_pair in enumerate(tqdm(pairs_grp1)): fmri_diff1[:, i] = pair_dist(rand_pair=rand_pair, sub_files=sub_grp1_files, sub_data=sub_data1, len_time=len_time) S1 = 0.5 * np.mean(fmri_diff1, axis=1) print('Generating random pairs from group 2') pairs_grp2, num_pairs2 = gen_rand_pairs(num_sub=len(sub_grp2_files), num_pairs=num_pairs) fmri_diff2 = sp.zeros((num_vert, num_pairs2)) # Preload data for group 2 print('Reading data for group 2') sub_data2 = np.zeros((len_time, num_vert, len(sub_grp2_files))) for i, fname in enumerate(tqdm(sub_grp2_files)): sub2_data = spio.loadmat(fname)['dtseries'][:, :len_time].T sub_data2[:, :, i], _, _ = normalizeData(sub2_data) print('Compute differences in fMRI of random pairs from group 2') for i, rand_pair in enumerate(tqdm(pairs_grp2)): fmri_diff2[:, i] = pair_dist(rand_pair=rand_pair, sub_files=sub_grp2_files, sub_data=sub_data2, len_time=len_time) S2 = 0.5 * np.mean(fmri_diff2, axis=1) # We will perform f-test test (modified in a pairwise stats) # n1 = sub_data1.shape[2] * len_time n2 = sub_data2.shape[2] * len_time F = S1 / (S2 + 1e-16) pval = 1 - ss.f.cdf(F, n1 - 1, n2 - 1) return F, pval
def ldpred_gibbs(beta_hats, genotypes=None, start_betas=None, h2=None, n=1000, ld_radius=100, num_iter=60, burn_in=10, p=None, zero_jump_prob=0.05, ld_dict=None, reference_ld_mats=None, ld_boundaries=None, verbose=False): """ LDpred (Gibbs Sampler) """ t0 = time.time() m = len(beta_hats) # If no starting values for effects were given, then use the infinitesimal model starting values. if start_betas is None: print 'Initializing LDpred effects with posterior mean LDpred-inf effects.' print 'Calculating LDpred-inf effects.' start_betas = LDpred_inf.ldpred_inf( beta_hats, genotypes=genotypes, reference_ld_mats=reference_ld_mats, h2=h2, n=n, ld_window_size=2 * ld_radius, verbose=False) curr_betas = sp.copy(start_betas) curr_post_means = sp.zeros(m) avg_betas = sp.zeros(m) # Iterating over effect estimates in sequential order iter_order = sp.arange(m) # Setting up the marginal Bayes shrink Mp = m * p hdmp = (h2 / Mp) hdmpn = hdmp + 1.0 / n hdmp_hdmpn = (hdmp / hdmpn) c_const = (p / sp.sqrt(hdmpn)) d_const = (1 - p) / (sp.sqrt(1.0 / n)) for k in range(num_iter): # Big iteration # Force an alpha shrink if estimates are way off compared to heritability estimates. (Improves MCMC convergence.) h2_est = max(0.00001, sp.sum(curr_betas**2)) alpha = min(1 - zero_jump_prob, 1.0 / h2_est, (h2 + 1 / sp.sqrt(n)) / h2_est) rand_ps = sp.random.random(m) rand_norms = stats.norm.rvs(0, (hdmp_hdmpn) * (1 / n), size=m) if ld_boundaries is None: for i, snp_i in enumerate(iter_order): start_i = max(0, snp_i - ld_radius) focal_i = min(ld_radius, snp_i) stop_i = min(m, snp_i + ld_radius + 1) # Local LD matrix D_i = ld_dict[snp_i] # Local (most recently updated) effect estimates local_betas = curr_betas[start_i:stop_i] # Calculate the local posterior mean, used when sampling. local_betas[focal_i] = 0 res_beta_hat_i = beta_hats[snp_i] - sp.dot(D_i, local_betas) b2 = res_beta_hat_i**2 d_const_b2_exp = d_const * sp.exp(-b2 * n / 2.0) if sp.isreal(d_const_b2_exp): numerator = c_const * sp.exp(-b2 / (2.0 * hdmpn)) if sp.isreal(numerator): if numerator == 0: postp = 0 else: postp = numerator / (numerator + d_const_b2_exp) assert sp.isreal( postp ), 'The posterior mean is not a real number? Possibly due to problems with summary stats, LD estimates, or parameter settings.' else: postp = 0 else: postp = 1 curr_post_means[snp_i] = hdmp_hdmpn * postp * res_beta_hat_i if rand_ps[i] < postp * alpha: # Sample from the posterior Gaussian dist. proposed_beta = rand_norms[i] + hdmp_hdmpn * res_beta_hat_i else: # Sample 0 proposed_beta = 0 curr_betas[snp_i] = proposed_beta # UPDATE BETA else: for i, snp_i in enumerate(iter_order): start_i = ld_boundaries[snp_i][0] stop_i = ld_boundaries[snp_i][1] focal_i = snp_i - start_i # Local LD matrix D_i = ld_dict[snp_i] # Local (most recently updated) effect estimates local_betas = curr_betas[start_i:stop_i] # Calculate the local posterior mean, used when sampling. local_betas[focal_i] = 0 res_beta_hat_i = beta_hats[snp_i] - sp.dot(D_i, local_betas) b2 = res_beta_hat_i**2 d_const_b2_exp = d_const * sp.exp(-b2 * n / 2.0) if sp.isreal(d_const_b2_exp): numerator = c_const * sp.exp(-b2 / (2.0 * hdmpn)) if sp.isreal(numerator): if numerator == 0: postp = 0 else: postp = numerator / (numerator + d_const_b2_exp) assert sp.isreal( postp ), 'Posterior mean is not a real number? Possibly due to problems with summary stats, LD estimates, or parameter settings.' else: postp = 0 else: postp = 1 curr_post_means[snp_i] = hdmp_hdmpn * postp * res_beta_hat_i if rand_ps[i] < postp * alpha: # Sample from the posterior Gaussian dist. proposed_beta = rand_norms[i] + hdmp_hdmpn * res_beta_hat_i else: # Sample 0 proposed_beta = 0 curr_betas[snp_i] = proposed_beta # UPDATE BETA if verbose: sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%' % (100.0 * (min(1, float(k + 1) / num_iter)))) sys.stdout.flush() if k >= burn_in: avg_betas += curr_post_means # Averaging over the posterior means instead of samples. avg_betas = avg_betas / float(num_iter - burn_in) t1 = time.time() t = (t1 - t0) if verbose: print '\nTook %d minutes and %0.2f seconds' % (t / 60, t % 60) return {'betas': avg_betas, 'inf_betas': start_betas}
from planeta import Planeta ''' PARTE2 En este script se graficara la energia y trayectoria usando verlet version 2. caso verlet avanzado con alpha 0 con dt muy pequeno queda muy preciso y perfecto, energia constante ''' ''' Condiciones iniciales, se define objeto de clase Planeta, el paso de tiempo y los arreglos de ceros de las distintas variables''' vy0=0.3 condicion_inicial = sp.array([10, 0, 0, vy0]) p = Planeta(condicion_inicial) t_final = 6000 pasos = 50000 dt= t_final / (float)(pasos) x = sp.zeros(pasos) y = sp.zeros(pasos) vx = sp.zeros(pasos) vy = sp.zeros(pasos) energia=sp.zeros(pasos) [x[0],y[0],vx[0],vy[0]] = condicion_inicial energia[0]=p.energia_total() ''' Se define el segundo valor del vector yactual mediante rk4 (en este caso) puesto que verlet2 lo necesita para la iteracion''' p.avanza_rk4(dt) resultados = p.y_actual x[1] = resultados[0] y[1] = resultados[1] vx[1] = resultados[2] vy[1] = resultados[3]
def randpairs_regression_simulation(bfp_path, sub_files, reg_var, num_pairs=2000, nperm=1000, len_time=235, num_proc=4, pearson_fdr_test=False): """ Perform regression stats based on square distance between random pairs """ # Get the number of vertices from a file num_vert = spio.loadmat(sub_files[0])['dtseries'].shape[0] labs = spio.loadmat( '/ImagePTE1/ajoshi/code_farm/bfp/supp_data/USCLobes_grayordinate_labels.mat' )['labels'] roi = (labs == 200) # R. Parietal Lobe pairs, num_pairs = gen_rand_pairs(num_sub=len(sub_files), num_pairs=num_pairs) fmri_diff = sp.zeros((num_vert, num_pairs)) regvar_diff = sp.zeros(num_pairs) if num_proc > 1: pool = Pool(num_proc) results = pool.imap( partial(pair_dist_simulation, sub_files=sub_files, reg_var=reg_var, len_time=len_time, roi=roi), pairs) ind = 0 for res in results: fmri_diff[:, ind] = res[0] regvar_diff[ind] = res[1] ind += 1 else: for ind in tqdm(range(len(pairs))): fmri_diff[:, ind], regvar_diff[ind] = pair_dist_simulation( sub_files=sub_files, reg_var=reg_var, len_time=len_time, rand_pair=pairs[ind], roi=roi) corr_pval2 = 0 if not pearson_fdr_test: print('Performing Permutation test with MAX statistic') corr_pval, corr_pval2, _ = corr_perm_test(X_pairs=fmri_diff.T, Y_pairs=regvar_diff, reg_var=reg_var, num_sub=len(sub_files), nperm=nperm) else: print('Performing Pearson correlation with FDR testing') corr_pval, corr_pval2 = corr_pearson_fdr(X_pairs=fmri_diff.T, Y_pairs=regvar_diff, reg_var=reg_var, num_sub=len(sub_files), nperm=nperm) corr_pval[sp.isnan(corr_pval)] = .5 labs = spio.loadmat( bfp_path + '/supp_data/USCBrain_grayordinate_labels.mat')['labels'].squeeze() labs[sp.isnan(labs)] = 0 if len(corr_pval) == len(labs): corr_pval[labs == 0] = 0.5 return corr_pval, corr_pval2
def fft_to_rfft(self, input): assert input.shape[0] == self.N/2+1 output = scipy.zeros((self.N,)) output[0::2] = input[:-1] output[1::2] = input[1:] return output
def randpair_groupdiff(sub_grp1_files, sub_grp2_files, num_pairs, len_time=255): print('Grp diff') num_vert = spio.loadmat(sub_grp1_files[0])['dtseries'].shape[0] print('Generating random pairs from group 1') pairs_grp1, num_pairs1 = gen_rand_pairs(num_sub=len(sub_grp1_files), num_pairs=num_pairs) fmri_diff1 = sp.zeros((num_vert, num_pairs1)) # Preload data This only slighly faster, better is to load on the fly and multiprocess print('Reading data for group 1') sub_data1 = np.zeros((len_time, num_vert, len(sub_grp1_files))) for i, fname in enumerate(tqdm(sub_grp1_files)): sub1_data = spio.loadmat(fname)['dtseries'][:, :len_time].T sub_data1[:, :, i], _, _ = normalizeData(sub1_data) print('Compute differences in fMRI of random pairs from group 1') for i, rand_pair in enumerate(tqdm(pairs_grp1)): fmri_diff1[:, i] = pair_dist(rand_pair=rand_pair, sub_files=sub_grp1_files, sub_data=sub_data1, len_time=len_time) S1 = 0.5 * np.mean(fmri_diff1, axis=1) print('Generating random pairs from group 2') pairs_grp2, num_pairs2 = gen_rand_pairs(num_sub=len(sub_grp2_files), num_pairs=num_pairs) fmri_diff2 = sp.zeros((num_vert, num_pairs2)) # Preload data for group 2 print('Reading data for group 2') sub_data2 = np.zeros((len_time, num_vert, len(sub_grp2_files))) for i, fname in enumerate(tqdm(sub_grp2_files)): sub2_data = spio.loadmat(fname)['dtseries'][:, :len_time].T sub_data2[:, :, i], _, _ = normalizeData(sub2_data) print('Compute differences in fMRI of random pairs from group 2') for i, rand_pair in enumerate(tqdm(pairs_grp2)): fmri_diff2[:, i] = pair_dist(rand_pair=rand_pair, sub_files=sub_grp2_files, sub_data=sub_data2, len_time=len_time) S2 = 0.5 * np.mean(fmri_diff2, axis=1) print('Generating random pairs from all subjects (grp1 + grp2)') # Generating random pairs. For large group this may allocate huge amount of memory, # use the following solution in that case # https://stackoverflow.com/questions/36779729/shuffling-combinations-without-converting-iterable-itertools-combinations-to-l all_pairs = np.array( list(product(range(len(sub_grp1_files)), range(len(sub_grp2_files))))) sp.random.shuffle(all_pairs) all_pairs = all_pairs[:num_pairs, :] fmri_diff = sp.zeros((num_vert, all_pairs.shape[0])) print('Compute differences in fMRI of random pairs from group1 to group 2') for i, rand_pair in enumerate(tqdm(all_pairs)): fmri_diff[:, i] = pair_dist_two_groups(rand_pair=rand_pair, sub_grp1_files=sub_grp1_files, sub_grp2_files=sub_grp2_files, sub_data1=sub_data1, sub_data2=sub_data2, len_time=len_time) # We will perform Welch's t test (modified in a pairwise stats) # https://en.wikipedia.org/wiki/Welch%27s_t-test n1 = sub_data1.shape[2] n2 = sub_data2.shape[2] tscore = np.sqrt((0.5 * np.mean(fmri_diff, axis=1) - (S1 * n1 + S2 * n2) / (n1 + n2))) / np.sqrt(S1 / n1 + S1 / n2 + 1e-6) tscore[np.isnan(tscore)] = 0 dof = (S1 / n1 + S2 / n2 + 1e-6)**2 / (S1**2 / ((n1**2) * (n1 - 1)) + S2**2 / ((n2**2) * (n2 - 1)) + 1e-6) pval = sp.stats.t.sf(tscore, dof) * 2 # two-sided pvalue = Prob(abs(t)>tt) return tscore, pval
count1 = 0 # Get number of subjects nsub = len(lst) lst2 = list([]) for fname in lst: if not os.path.isfile(fname): continue df = spio.loadmat(fname) #, 'r') d = df['dtseries'] # dataR = df['dataR'] # d = sp.concatenate((dataL, dataR), axis=1) d, _, _ = normalizeData(d) if count1 == 0: sub_data = sp.zeros((d.shape[0], d.shape[1], nsub)) sub_data[:, :, count1] = d lst2.append(fname) count1 += 1 print(count1, ) #%% Compute pairwise distance nSub = count1 sub_data = sub_data[:, :, :nSub] print(nSub) dist_all_orig = sp.zeros([nSub, nSub]) dist_all_rot = dist_all_orig.copy() #sub_data_orig = sub_data.copy()
def gash79(Pg=scipy.array([]), ER=float, S=float, St=float, p=float, pt=float): ''' Function to calculate precipitation interception loss from daily precipitation values and and vegetation parameters. Parameters: - Pg: daily rainfall data [mm]. - ER: evaporation percentage of total rainfall [mm h-1]. - S: storage capacity canopy [mm]. - St: stem storage capacity [mm]. - p: direct throughfall [mm]. - pt: stem precipitation [mm]. Returns: - Pg: Daily rainfall [mm]. - Ei: Interception [mm]. - TF: through fall [mm]. - SF: stemflow [mm]. References ---------- J.H.C. Gash, An analytical model of rainfall interception by forests, Quarterly Journal of the Royal Meteorological Society, 1979, 105, pp. 43-55. Examples -------- >>> gash79(12.4,0.15,1.3,0.2,0.2,0.02) (12.4, 8.4778854123725971, 0, 3.9221145876274024) >>> gash79(60.0,0.15,1.3,0.2,0.2,0.02) (60.0, 47.033885412372598, 0, 12.966114587627404) ''' # Test input array/value Pg = meteolib._arraytest(Pg) # Determine length of array Pg l = scipy.size(Pg) # Check if we have a single precipitation value or an array if l < 2: # Dealing with single value... #PGsat calculation (for the saturation of the canopy) PGsat = -(1 / ER * S) * scipy.log((1 - (ER / (1 - p - pt)))) #Set initial values to zero Ecan = 0. Etrunk = 0. # Calculate interception for different storm sizes if (Pg < PGsat and Pg > 0): Ecan = (1 - p - pt) * Pg if (Pg > St / pt): Etrunk = St + pt * Pg Ei = Ecan + Etrunk if (Pg > PGsat and Pg < St / pt): Ecan = ((((1 - p - pt) * PGsat) - S) + (ER * (Pg - PGsat)) + S) Etrunk = 0. Ei = Ecan + Etrunk if (Pg > PGsat and Pg > (St / pt)): Ecan = ((((1 - p - pt) * PGsat) - S) + (ER * (Pg - PGsat)) + S + (St + pt * Pg)) Etrunk = St + pt * Pg Ei = Ecan + Etrunk TF = Pg - Ei SF = 0 else: #Define variables and constants n = scipy.size(Pg) TF = scipy.zeros(n) SF = scipy.zeros(n) Ei = scipy.zeros(n) Etrunk = scipy.zeros(n) #Set results to zero if rainfall Pg is zero TF[Pg == 0] = 0. SF[Pg == 0] = 0. Ei[Pg == 0] = 0. Etrunk[Pg == 0] = 0. #PGsat calc (for the saturation of the canopy) PGsat = -(1 / ER * S) * scipy.log((1 - (ER / (1 - p - pt)))) #Process rainfall series for i in range(0, n): Ecan = 0. Etrunk = 0. if (Pg[i] < PGsat and Pg[i] > 0): Ecan = (1 - p - pt) * Pg[i] if (Pg[i] > St / pt): Etrunk = St + pt * Pg[i] Ei[i] = Ecan + Etrunk if (Pg[i] > PGsat and Pg[i] < St / pt): Ecan = ((((1 - p - pt) * PGsat) - S) + (ER * (Pg[i] - PGsat)) + S) Etrunk = 0. Ei[i] if (Pg[i] > PGsat and Pg[i] > (St / pt)): Ecan = ((((1 - p - pt) * PGsat) - S) + (ER * (Pg[i] - PGsat)) + S + (St + pt * Pg[i])) Etrunk = St + pt * Pg[i] Ei[i] = Ecan + Etrunk TF[i] = Pg[i] - Ei[i] return Pg, TF, SF, Ei
def scan(bfile, Y, cov, null, sets, i0, i1, perm_i, resfile, F, colCovarType_r='lowrank', rank_r=1, factr=1e7, unique_variants=False, standardize=False): if perm_i is not None: print(('Generating permutation (permutation %d)' % perm_i)) np.random.seed(perm_i) perm = np.random.permutation(Y.shape[0]) mtSet = limix.MTSet(Y=Y, S_R=cov['eval'], U_R=cov['evec'], F=F, rank=rank_r) mtSet.setNull(null) reader = BedReader(bfile) wnd_ids = sp.arange(i0, i1) LLR = sp.zeros(sets.shape[0]) for wnd_i in wnd_ids: _set = sets.ix[wnd_i] print('.. set %d: %s' % (wnd_i, _set['setid'])) Xr = reader.getGenotypes(pos_start=_set['start'], pos_end=_set['end'], chrom=_set['chrom'], impute=True) if unique_variants: Xr = f_uni_variants(Xr) if standardize: Xr -= Xr.mean(0) Xr /= Xr.std(0) else: # encoding minor as 0 p = 0.5 * Xr.mean(0) Xr[:, p > 0.5] = 2 - Xr[:, p > 0.5] if perm_i is not None: Xr = Xr[perm, :] # multi trait set test fit RV = mtSet.optimize(Xr, factr=factr) LLR[wnd_i] = RV['LLR'][0] # export results sets['LLR'] = LLR sets.to_csv(resfile, sep='\t', index=False)
with open("./D_mel_wt__atlas_r2.vpc") as infile: results = read_vpc(infile) # In[6]: print results[0]["column"] # In[ ]: import scipy as S data = results[1] x = data[:, 0] y = data[:, 1] z = data[:, 2] colnum = results[0]["column"].index("eve__3") - 1 colors = S.vstack( [data[:, colnum], S.zeros(data.shape[0]), S.zeros(data.shape[0])]).T colors -= colors.min() colors *= S.power(colors.max(), -1.0) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(x, z, s=15.0, c=colors) plt.show()
def __init__(self, module): Wrapper.__init__(self, module) self.stateArray = zeros( module.numRows, dtype=int )
def main(): # parse cmd arguments parser = getParser() parser.parse_args() args = getArguments(parser) # prepare logger logger = Logger.getInstance() if args.debug: logger.setLevel(logging.DEBUG) elif args.verbose: logger.setLevel(logging.INFO) # check if output image exists if not args.force: if os.path.exists(args.output): logger.warning( 'The output image {} already exists. Exiting.'.format( args.output)) exit(-1) # load input images region_image_data, reference_header = load(args.region) markers_image_data, _ = load(args.markers) gradient_image_data, _ = load(args.gradient) # split marker image into fg and bg images logger.info('Extracting foreground and background markers...') bgmarkers_image_data = scipy.zeros(markers_image_data.shape, scipy.bool_) bgmarkers_image_data[markers_image_data == 2] = True markers_image_data[markers_image_data != 1] = 0 fgmarkers_image_data = markers_image_data.astype(scipy.bool_) # check if all images dimensions are the same shape if not (gradient_image_data.shape == region_image_data.shape == fgmarkers_image_data.shape == bgmarkers_image_data.shape): logger.critical( 'Not all of the supplied images are of the same shape.') raise ArgumentError( 'Not all of the supplied images are of the same shape.') # collect cut objects cut_xy = __get_bg_bounding_pipe(bgmarkers_image_data) cut_z = __parse_contour_list(args.contourlist, args.zoom, args.ctype) # cut volumes old_size = region_image_data.shape gradient_image_data = gradient_image_data[cut_xy][cut_z] region_image_data = region_image_data[cut_xy][cut_z] fgmarkers_image_data = fgmarkers_image_data[cut_xy][cut_z] bgmarkers_image_data = bgmarkers_image_data[cut_xy][cut_z] # split the image if required, else relabel if args.split: logger.info('Split and relabel input image...') if 't' == args.split: region_image_data = __split_along_time(region_image_data) elif 'z' == args.split: region_image_data = __split_along_space(region_image_data) else: region_image_data = __split_along_space(region_image_data) region_image_data = __split_along_time(region_image_data) else: # recompute the label ids to start from id = 1 logger.info('Relabel input image...') region_image_data = filter.relabel(region_image_data) # generate graph logger.info('Preparing graph...') gcgraph = graphcut.graph_from_labels( region_image_data, fgmarkers_image_data, bgmarkers_image_data, boundary_term=graphcut.energy_label.boundary_stawiaski, boundary_term_args=( gradient_image_data)) # second is directedness of graph , 0) logger.info('Removing images that are not longer required from memory...') del fgmarkers_image_data del bgmarkers_image_data del gradient_image_data # execute min-cut logger.info('Executing min-cut...') maxflow = gcgraph.maxflow() logger.debug('Maxflow is {}'.format(maxflow)) # apply results to the region image logger.info('Applying results...') mapping = [ 0 ] # no regions with id 1 exists in mapping, entry used as padding mapping.extend( map( lambda x: 0 if gcgraph.termtype.SINK == gcgraph.what_segment( int(x) - 1) else 1, scipy.unique(region_image_data))) region_image_data = filter.relabel_map(region_image_data, mapping) # generating final image by increasing the size again output_image_data = scipy.zeros(old_size, dtype=scipy.bool_) output_image_data[cut_xy][cut_z] = region_image_data # save resulting mask save(output_image_data, args.output, reference_header, args.force) logger.info('Successfully terminated.')