def __init__(self,n_components,comp=None,centers=None,covars=None): self.n_components = n_components self.comp = sp.copy(comp) self.centers = sp.copy(centers) self.covars = sp.copy(covars) if centers != None: self.n_dim = centers.shape[1]
def _read_sky_logfile(self): #TODO : expand to read errors, msgs etc # read in the whole sky log file, shouldn't be big f = open(self.skylogfile) lines = f.readlines() f.close() dust = [line.split()[1:] for line in lines if line.startswith('dtau_dust')] line = [line.split()[1:] for line in lines if line.startswith('dtau_line')] dust = _sp.array(dust, dtype='float') line = _sp.array(line, dtype='float') transitions = _sp.unique(dust[:,0]) shells = _sp.unique(dust[:,1]) dtau_dust = dict() dtau_line = dict() dtau_tot = dict() for t in transitions: d = [] l = [] for s in shells: d.append( _sp.mean([i[2] for i in dust if ((i[0]==t) * (i[1]==s))]) ) l.append( _sp.mean([i[2] for i in line if ((i[0]==t) * (i[1]==s))]) ) dtau_dust[t] = _sp.copy(d) dtau_line[t] = _sp.copy(l) dtau_tot[t] = _sp.array(d) + _sp.array(l) # create object to store in main class class Tau(object):pass Tau.dtau_dust = dtau_dust Tau.dtau_line = dtau_line Tau.dtau_tot = dtau_tot Tau.transitions = transitions Tau.shells = shells self.Tau = Tau
def __init__(self,fitness_func,npop = 20,w = 0.5,c1 = 2.01,c2 = 2.02,debug = False): seed() self.debug = debug self.c1 = c1 self.c2 = c2 self.w = w self.ns = int(npop) self.fitness_func = fitness_func # gera pop inicial if os.path.isfile("dump_pso.pkl"): dump_fd = open("dump_pso.pkl",'r') self.pop = cPickle.load(dump_fd) self.fit = cPickle.load(dump_fd) self.v = cPickle.load(dump_fd) self.bfg = cPickle.load(dump_fd) self.bfg_fitness = cPickle.load(dump_fd) self.bfp = cPickle.load(dump_fd) self.bfp_fitness = cPickle.load(dump_fd) else: self.pop = scipy.array([self.gera_individuo() for i in scipy.arange(self.ns)]) self.fit = scipy.zeros(self.ns) # avalia fitness de toda populacao for i in scipy.arange(self.ns): self.fit[i],self.pop[i] = self.avalia_aptidao(self.pop[i]) # inicializa velocidades iniciais self.v = scipy.zeros((self.ns,Dim)) # guarda a melhor posicao de cada particula self.bfp = scipy.copy(self.pop) self.bfp_fitness = scipy.copy(self.fit) # guarda a melhor posicao global self.bfg = self.pop[self.bfp_fitness.argmin()].copy() self.bfg_fitness = self.bfp_fitness.min().copy()
def __init__(self, field, system_dir, nprocs=4, **kwargs): # super().__init__() # # field attributes that are copied over field.create_point_data() self.nx = field.nx self.nz = field.nz self.data_vector = field.data_vector self.data_map = field.data_map self.point_data = field.point_data self._field = field.clone() self._mask = sp.ones(self.data_map.shape, dtype=bool) # self.offset_map = sp.zeros(self.data_map.shape) self.offset_points = sp.zeros(self.point_data.shape) if kwargs.get('offset_field', None): kwargs['offset_field'].create_point_data() self.offset_map = sp.copy(kwargs['offset_field'].data_map) self.offset_points = sp.copy(kwargs['offset_field'].point_data) # self.system_dir = system_dir self.nprocs = nprocs self.avg_fact = kwargs.get('avg_fact', 1.0) self.mesh_params = kwargs.get('mesh_params', {}) self.merge_groups = []
def initialize(self,data,random=False): self.data = data self.n_dim = data.shape[1] if random: mins = sp.zeros(self.n_dim) maxes = sp.zeros(self.n_dim) sds = sp.zeros(self.n_dim) centers = sp.zeros((self.n_components,self.n_dim)) for i in xrange(self.n_dim): mins[i] = min(self.data[:,i]) maxes[i] = max(self.data[:,i]) sds[i] = sp.std(self.data[:,i]) centers[:,i] = sp.random.uniform(mins[i],maxes[i],self.n_components) self.comp = sp.ones(self.n_components)/float(self.n_components) + sp.random.uniform(-1./self.n_components,1./self.n_components,self.n_components) self.comp /= sp.sum(self.comp) covars = sp.array([sp.diag(sds**2) for i in xrange(self.n_components)]) self.centers = centers self.covars = covars else: clust = cluster.KMeans(self.n_components) clust.fit(self.data) self.centers = sp.copy(clust.cluster_centers_) labels = sp.copy(clust.labels_) self.covars = sp.zeros((self.n_components,self.n_dim,self.n_dim)) self.comp = sp.zeros(self.n_components) for i in xrange(self.n_components): inds = labels == i temp = self.data[inds,:] self.covars[i,:,:] = sp.dot(temp.T,temp) self.comp[i] = sum(inds)/float(self.data.shape[0])
def NumpyTensorInitializerForVacancy(gridShape, filename, vacancyfile=None): """ Initialize a 10 component plasticity state by reading from a numpy "tofile" type file or two files. """ dict = {('x','x') : (0,0), ('x','y') : (0,1), ('x','z') : (0,2),\ ('y','x') : (1,0), ('y','y') : (1,1), ('y','z') : (1,2),\ ('z','x') : (2,0), ('z','y') : (2,1), ('z','z') : (2,2)} data = fromfile(filename) if vacancyfile is None: data = data.reshape([10] + list(gridShape)) else: data = data.reshape([3,3] + list(gridShape)) dataV = fromfile(vacancyfile) dataV = dataV.reshape(list(gridShape)) state = VacancyState.VacancyState(gridShape) field = state.GetOrderParameterField() if vacancyfile is None: i = 0 for component in field.components: field[component] = copy(data[i]) i += 1 else: for component in field.components: if component[0] not in [x,y,z]: field[component] = copy(dataV) else: field[component] = copy(data[dict[component]]) return state
def copy_data(self, obj): r""" Copies data properites of the field onto another object created """ obj.nx = self.nx obj.nz = self.nz obj.data_map = sp.copy(self.data_map) obj.data_vector = sp.copy(self.data_vector) obj.point_data = sp.copy(self.point_data)
def clone(self): r""" Creates a fully qualified DataField object from the existing one. """ # instantiating class and adding attributes clone = DataField(None) # self.copy_data(clone) clone._raw_data = sp.copy(self._raw_data) clone._cell_interfaces = sp.copy(self._cell_interfaces) # return clone
def wigner(psi,xvec,yvec,g=sqrt(2)): """Wigner function for a state vector or density matrix at points xvec+i*yvec. Parameters ---------- state : qobj A state vector or density matrix. xvec : array_like x-coordinates at which to calculate the Wigner function. yvec : array_like y-coordinates at which to calculate the Wigner function. g : float Scaling factor for a = 0.5*g*(x+iy), default g=sqrt(2). Returns -------- W : array Values representing the Wigner function calculated over the specified range [xvec,yvec]. """ if psi.type=='ket' or psi.type=='oper': M=prod(psi.shape[0]) elif psi.type=='bra': M=prod(psi.shape[1]) else: raise TypeError('Input state is not a valid operator.') X,Y = meshgrid(xvec, yvec) amat = 0.5*g*(X + 1.0j*Y) wmat=zeros(shape(amat)) Wlist=array([zeros(shape(amat),dtype=complex) for k in range(M)]) Wlist[0]=exp(-2.0*abs(amat)**2)/pi if psi.type=='ket' or psi.type=='bra': psi=ket2dm(psi) wmat=real(psi[0,0])*real(Wlist[0]) for n in range(1,M): Wlist[n]=(2.0*amat*Wlist[n-1])/sqrt(n) wmat+= 2.0*real(psi[0,n]*Wlist[n]) for m in range(M-1): temp=copy(Wlist[m+1]) Wlist[m+1]=(2.0*conj(amat)*temp-sqrt(m+1)*Wlist[m])/sqrt(m+1) for n in range(m+1,M-1): temp2=(2.0*amat*Wlist[n]-sqrt(m+1)*temp)/sqrt(n+1) temp=copy(Wlist[n+1]) Wlist[n+1]=temp2 wmat+=real(psi[m+1,m+1]*Wlist[m+1]) for k in range(m+2,M): wmat+=2.0*real(psi[m+1,k]*Wlist[k]) return 0.5*wmat*g**2
def run(): data = sp.copy(housing_data) x = data[:, [0, 1]] y = data[:, [2]] m = sp.shape(y)[0] # Normalize the x values (x, mu, sigma) = graddesc.featureNormalize(x) # Add intercept term to x x = sp.concatenate((sp.ones((m, 1)), x), axis=1) # Init Theta and run Gradient Descent num_iters = 400 # Choose some alpha value alphas = [0.01, 0.03, 0.1, 0.3, 1.0] for alpha in alphas: theta = sp.zeros((3, 1)) (theta, J_history) = graddesc.gradientDescent(x, y, theta, alpha, num_iters) # Plot the value of J by number of iterations plt.plot(range(1, J_history.size+1), J_history, '-b') plt.title('Alpha = %f' % (alpha)) plt.xlabel('Number of iterations') plt.ylabel('J') plt.xlim([0, 50]) plt.show(block=True) # Estimate the price of a 1650 sq-ft, 3 br house price = 0 house = sp.array([[1.0, 1650.0, 3.0]]) # Normalize the features house[0, 1:] = (house[0, 1:] - mu) / sigma price = house.dot(theta) print('The estimated price with alpha', alpha, 'is', price[0, 0]) # Reload the data data = sp.copy(housing_data) x = data[:, [0, 1]] y = data[:, [2]] # Add intercept term to x x = sp.concatenate((sp.ones((m, 1)), x), axis=1) # Calculate the normal equation theta = graddesc.normalEqn(x, y) print('Theta computed from the normal equations:') print(theta)
def __init__(self, data, point_data): # # setting up the region super().__init__(None) if data.shape != point_data.shape[0:2]: msg = 'data and point_data have different dimensions: {} != {}' raise ValueError(msg.format(data.shape, point_data.shape[:2])) # self.nz, self.nx = data.shape self.data_map = sp.copy(data) self.data_vector = sp.ravel(data) self.point_data = sp.copy(point_data) # self._raw_data = sp.copy(data) self._define_cell_interfaces()
def _site_percolation(self, pmask): r""" This private method is called by 'find_clusters2' """ # Find throats that produce site percolation conns = sp.copy(self['throat.conns']) conns[:, 0] = pmask[conns[:, 0]] conns[:, 1] = pmask[conns[:, 1]] # Only if both pores are True is the throat set to True tmask = sp.all(conns, axis=1) # Perform the clustering using scipy.csgraph csr = self.create_adjacency_matrix(data=tmask, sprsfmt='csr', dropzeros=True) clusters = sprs.csgraph.connected_components(csgraph=csr, directed=False)[1] # Adjust cluster numbers such that non-invaded pores are labelled -1 # Note: The following line also takes care of assigning cluster numbers # to single isolated invaded pores p_clusters = (clusters + 1)*(pmask) - 1 # Label invaded throats with their neighboring pore's label t_clusters = clusters[self['throat.conns']] ind = (t_clusters[:, 0] == t_clusters[:, 1]) t_clusters = t_clusters[:, 0] # Label non-invaded throats with -1 t_clusters[~ind] = -1 return (p_clusters, t_clusters)
def update_rule(Asp,states0,parameters,scale=0.0): thresh,personal,a,b,c,scale0=parameters #ignore scale ( = 0 ) states=sp.copy(states0) #states is a list of states for all N individuals nei_sum=Asp*states degrees=Asp*sp.ones(len(states)) ##get average of all neighbours, i.e. s nei_av=[] for i in range(0,len(nei_sum)): if degrees[i]>0: nei_av.append(nei_sum[i]/degrees[i]) else: nei_av.append(0.0) totav=sum(states)/len(states) #this is m for n in range(0,len(states)): #len means length, i.e. number of individuals utility=a[n]*personal[n]+b[n]*nei_av[n]+c[n]*totav if states[n] < 1.0: #if state == 0 if utility <= thresh[n]: states[n]=0.0#scale*utility ##i.e. zero if scale=0 else: states[n]=1.0 return states
def sqrtm3(X): M = sp.copy(X) m, fb, fe = block_structure(M) n = M.shape[0] for i in range(0,m): M[fb[i]:fe[i],fb[i]:fe[i]] = twobytworoot(M[fb[i]:fe[i],fb[i]:fe[i]]) #print M for j in range(1,m): for i in range(0,m-j): #print M[fb[i]:fe[i],fb[JJ]:fe[JJ]] JJ = i+j Tnoto = M[fb[i]:fe[i],fb[JJ]:fe[JJ]] #dopo togliere il copy #print "Tnot: " #print Tnoto for k in range(i+1,JJ): Tnoto -= (M[fb[i]:fe[i],fb[k]:fe[k]]).dot(M[fb[k]:fe[k],fb[JJ]:fe[JJ]]) #print M[fb[i]:fe[i],fb[k]:fe[k]] #print M[fb[k]:fe[k],fb[JJ]:fe[JJ]] if((M[fb[i]:fe[i],fb[JJ]:fe[JJ]]).shape==(1,1)): #print "forma 1" #print M[fb[i]:fe[i],fb[JJ]:fe[JJ]] # Uij #print M[fb[i]:fe[i],fb[i]:fe[i]] # Uii #print M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]] # Ujj M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = Tnoto/(M[fb[i]:fe[i],fb[i]:fe[i]] + M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]]) else: Uii = M[fb[i]:fe[i],fb[i]:fe[i]] Ujj = M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]] shapeUii = Uii.shape[0] shapeUjj = Ujj.shape[0] """ print "------------" print Tnoto print Tnoto.shape print sp.kron(sp.eye(shapeUjj),Uii) print sp.kron(Ujj.T,sp.eye(shapeUii)) print Tnoto """ #M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = sp.linalg.solve_sylvester(Uii, Ujj, Tnoto) """ x, scale, info = dtrsyl(Uii, Ujj, Tnoto if (scale==1.0): = x else: M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = x*scale print "scale!=0" """ Tnoto = Tnoto.reshape((shapeUii*shapeUjj),1,order="F") M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = \ linalg.solve(sp.kron(sp.eye(shapeUjj),Uii) + sp.kron(Ujj.T,sp.eye(shapeUii)), Tnoto).reshape(shapeUii,shapeUjj,order="F") return M
def normalizeLength(self, noteOns, factor): #shibu = 60. / self.wavetempo * (self.binarized_data[0].size / self.duration) shibu = (self.fs/10.) / (self.wavetempo/60.) fixToResolution = noteOns/shibu*480. fixToResolution[:, 2] = noteOns[:, 2] # MIDI_Res(分解能) = 480 MIDI_Res = 480. minnotel = 1./4.*MIDI_Res #rate(許容誤差) rate = 0.5 #NoteNoが大きいものから順に並び替え fixToResolution = self.rowsort(fixToResolution) self.oldFixToResolution = sp.copy(fixToResolution) #lilypond符号用リスト book = [[] for i in range(fixToResolution.shape[0])] for n in range(fixToResolution.shape[0]): x_cor = fixToResolution[n, 0] + minnotel*rate - 1 #x_cor = fixToResolution[n, 0] + minnotel - 1 x_cor = (sp.floor(x_cor/minnotel))*minnotel if(x_cor == 0): x_cor = 1 fixToResolution[n, 0] = x_cor fixToResolution[n, 3], book[n] = self.normalizeNoteLength(fixToResolution[n, 3] + factor) book[n] = self.convertNoteNo(fixToResolution[n, 2]) + book[n] fixToResolution[n, 1] = fixToResolution[n, 3] + fixToResolution[n, 0] - 1 self.book = book return fixToResolution
def errorApproximation(self, ratio, dim=20): self.buildMatrix() sumNonzeros = (self.vxm !=0).sum() numTest = int(ratio*sumNonzeros) elementList = [] nonZeroTuple = sp.nonzero(self.vxm) for x in range(int(numTest)): rInt = sp.random.randint(0,nonZeroTuple[0].size) randrow = nonZeroTuple[0][rInt] randcolumn = nonZeroTuple[1][rInt] valElementIndex = [randrow,randcolumn] elementList.append(valElementIndex) self.modvxm = sp.copy(self.vxm) for x in elementList: self.modvxm[x[0],x[1]] = 0 self.modvmx = self.fillAverages(vxm = self.modvxm) self.newmodvxm = self.predict(dim,vxm=self.modvxm) sqDiff = 0 for x in elementList: sqDiff += sp.square(self.newmodvxm[x[0],x[1]] - self.vxm[x[0],x[1]]) self.rmse = sp.sqrt(sqDiff/len(elementList))
def getX(self,standardized=True,maf=None): """ return SNPs, if neccessary standardize them """ X = SP.copy(self.X) # test for missing values isnan = SP.isnan(X) for i in isnan.sum(0).nonzero()[0]: # set to mean X[isnan[:,i],i] = X[~isnan[:,i],i].mean() if maf!=None: LG.debug('filter SNPs') LG.debug('... number of SNPs(before filtering): %d'%X.shape[1]) idx_snps = SP.logical_and(X[self.idx_samples].mean(0)>0.1,X[self.idx_samples].mean(0)<0.9) LG.debug('... number of SNPs(after filtering) : %d'%idx_snps.sum()) else: idx_snps = SP.ones(self.n_f,dtype=bool) if standardized: LG.debug('standardize SNPs') X = X[self.idx_samples][:,idx_snps] X-= X.mean(0) X /= X.std(0,dtype=NP.float32) X /= SP.sqrt(X.shape[1]) return X return X[self.idx_samples][:,idx_snps]
def execute(self): self.power_mat, self.thermal_expectation = self.full_calculation() n_chan = self.power_mat.shape[1] n_freq = self.power_mat.shape[0] # Calculate the the mean channel correlations at low frequencies. low_f_mat = sp.mean(self.power_mat[1:4 * n_chan + 1,:,:], 0).real # Factorize it into preinciple components. e, v = linalg.eigh(low_f_mat) self.low_f_mode_values = e # Make sure the eigenvalues are sorted. if sp.any(sp.diff(e) < 0): raise RuntimeError("Eigenvalues not sorted.") self.low_f_modes = v # Now subtract out the noisiest channel modes and see what is left. n_modes_subtract = 10 mode_subtracted_power_mat = sp.copy(self.power_mat.real) mode_subtracted_auto_power = sp.empty((n_modes_subtract, n_freq)) for ii in range(n_modes_subtract): mode = v[:,-ii] amp = sp.sum(mode[:,None] * mode_subtracted_power_mat, 1) amp = sp.sum(amp * mode, 1) to_subtract = amp[:,None,None] * mode[:,None] * mode mode_subtracted_power_mat -= to_subtract auto_power = mode_subtracted_power_mat.view() auto_power.shape = (n_freq, n_chan**2) auto_power = auto_power[:,::n_chan + 1] mode_subtracted_auto_power[ii,:] = sp.mean(auto_power, -1) self.subtracted_auto_power = mode_subtracted_auto_power
def GP_train(x, y, cov_par, cov_func = None, cov_typ ='SE', \ cov_fixed = None, prior = None, \ MF = None, MF_par = None, MF_args = None, \ MF_fixed = None): ''' Max likelihood optimization of GP hyper-parameters. Calls GP_negloglik. Takes care of merging / splitting the fixed / variable and cov / MF parameters ''' if MF != None: merged_par = scipy.append(cov_par, MF_par) n_MF_par = len(MF_par) fixed = scipy.append(scipy.zeros(len(cov_par), 'bool'), \ scipy.zeros(n_MF_par, 'bool')) if (cov_fixed != None): fixed[0:-n_MF_par] = cov_fixed if (MF_fixed != None): fixed[-n_MF_par:] = MF_fixed if MF_args == None: MF_args = x[:] else: merged_par = cov_par[:] n_MF_par = 0 fixed = scipy.zeros(len(cov_par), 'bool') if cov_fixed != None: fixed[:] = cov_fixed var_par_in = merged_par[fixed == False] fixed_par = merged_par[fixed == True] args = (x, y, cov_func, cov_typ, MF, n_MF_par, MF_args, fixed, \ fixed_par, prior) var_par_out = \ sop.fmin(GP_negloglik, var_par_in, args, disp = 0) par_out = scipy.copy(merged_par) par_out[fixed == False] = var_par_out par_out[fixed == True] = fixed_par if MF != None: return par_out[:-n_MF_par], par_out[-n_MF_par:] else: return par_out
def run(): theta = sp.zeros((3, 1)) data = sp.copy(admission_data) X = data[:, [0, 1]] y = data[:, [2]] m = sp.shape(y)[0] # Add intercept term to x X = sp.concatenate((sp.ones((m, 1)), X), axis=1) """ Part 1: Plotting """ print('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.') logres.plotData(data) plt.xlabel('Exam 1 score') plt.ylabel('Exam 2 score') plt.legend('Admitted', 'Not admitted') plt.show() print('Program paused. Press enter to continue.') raw_input() """ Part 2: Compute Cost and Gradient """ (m, n) = X.shape initial_theta = sp.zeros((n, 1)) (cost, grad) = logres.costFunction(initial_theta, X, y) print('Cost at initial theta (zeros): ', cost) print('Gradient at initial theta (zeros): ', grad) print('Program paused. Press enter to continue.') raw_input() """ Part 3: Optimizing using fminunc """ (theta, cost) = logres.find_minimum_theta(theta, X, y) print('Cost at theta found by fmin: ', cost) print('Theta: ', theta) logres.plotDecisionBoundary(data, X, theta) plt.show() """ Part 4: Predict and Accuracies """ prob = logres.sigmoid(sp.asmatrix([1, 45, 85]).dot(theta)) print('For a student with scores 45 and 85, we predict an admission probability of ', prob[0, 0]) print('Program paused. Press enter to continue.')
def interp2d(qx, qy, qz): Vandermonde = sp.zeros((4,4)) Vandermonde[:,0] = 1 Vandermonde[:,1] = qx Vandermonde[:,2] = qy Vandermonde[:,3] = qx*qy Vinv = sp.linalg.inv(Vandermonde) print 'Vandermonde\n', Vandermonde print print 'Vandermonde inverse official \n', Vinv Vinv = inverse(Vandermonde, 4) print 'Vandermonde inverse Gauss \n', Vinv V22 = sp.copy(Vinv.T) print 'Identity check' print sp.dot(Vinv,Vandermonde) print 'Transpose official' print V22 for i in range(3): for j in range(i+1,4): d = Vinv[i,j] Vinv[i,j]= Vinv[j,i] Vinv[j,i]= d print 'Index ranspose\n', Vinv print 'Check transpose\n', Vinv-V22 def SU2(x,y): RHS = sp.array([1,x,y,x*y]) b = sp.dot(Vinv,RHS) return sp.dot(b,qz.T) SU2 = sp.vectorize(SU2) return SU2
def KramersKronigFFT(ImX_A): ''' Hilbert transform used to calculate real part of a function from its imaginary part uses piecewise cubic interpolated integral kernel of the Hilbert transform use only if len(ImX_A)=2**m-1, uses fft from scipy.fftpack ''' X_A = sp.copy(ImX_A) N = int(len(X_A)) ## be careful with the data type, orherwise it fails for large N if N > 3e6: A = sp.arange(3,N+1,dtype='float64') else: A = sp.arange(3,N+1) X1 = 4.0*sp.log(1.5) X2 = 10.0*sp.log(4.0/3.0)-6.0*sp.log(1.5) ## filling the kernel if N > 3e6: Kernel_A = sp.zeros(N-2,dtype='float64') else: Kernel_A = sp.zeros(N-2) Kernel_A = (1-A**2)*((A-2)*sp.arctanh(1.0/(1-2*A))+(A+2)*sp.arctanh(1.0/(1+2*A)))\ +((A**3-6*A**2+11*A-6)*sp.arctanh(1.0/(3-2*A))+(A+3)*(A**2+3*A+2)*sp.arctanh(1.0/(2*A+3)))/3.0 Kernel_A = sp.concatenate([-sp.flipud(Kernel_A),sp.array([-X2,-X1,0.0,X1,X2]),Kernel_A])/sp.pi ## zero-padding the functions for fft ImXExt_A = sp.concatenate([X_A[int((N-1)/2):],sp.zeros(N+2),X_A[:int((N-1)/2)]]) KernelExt_A = sp.concatenate([Kernel_A[N:],sp.zeros(1),Kernel_A[:N]]) ## performing the fft ftReXExt_A = -fft(ImXExt_A)*fft(KernelExt_A) ReXExt_A = sp.real(ifft(ftReXExt_A)) ReX_A = sp.concatenate([ReXExt_A[int((3*N+3)/2+1):],ReXExt_A[:int((N-1)/2+1)]]) return ReX_A
def sample(self,filename, burnin=100, sample_rate=10, n_samples=10, stopwords=None): self.buildCorpus(filename,stopwords) self.initialize() self.total_nzw = sp.zeros((self.n_topics,self.n_words)) self.total_nmz = sp.zeros((self.n_docs,self.n_topics)) self.logprobs = sp.zeros(burnin + sample_rate*n_samples) for i in xrange(burnin): self._sweep() self.logprobs[i] = self._loglikelihood() print "Iteration: {}".format(i) + "\tLog-prob: {}".format(self.logprobs[i]) for i in xrange(n_samples*sample_rate): self._sweep() self.logprobs[i+burnin] = self._loglikelihood() print "Iteration: {}".format(i+burnin) + "\tLog-prob: {}".format(self.logprobs[i+burnin]) if not i%sample_rate: self.total_nzw += sp.copy(self.nzw) self.total_nmz += sp.copy(self.nmz)
def hardThreshold(coeffs,thresh): new_coeffs = [] for j in coeffs: new_coeffs.append(sp.copy(j)) for j in xrange(1,len(new_coeffs)): for i in new_coeffs[j]: i *= sp.absolute(i) > thresh return new_coeffs
def fillAverages(self,vxm): vxmc = sp.copy(vxm) for i in range(vxmc.shape[0]): row = vxmc[i,:] row[row==0] = sp.mean(row[row!=0]) vxmc[i,:] = row return vxmc
def get_quantiles(scores, num_dots=1000): """ Uses scipy """ scores = sp.copy(sp.array(scores)) scores.sort() indices = [int(len(scores) * i / (num_dots + 2)) for i in range(1, num_dots + 1)] return scores[indices]
def get_log_quantiles(scores, num_dots=1000, max_val=5): """ Uses scipy """ scores = sp.copy(sp.array(scores)) scores.sort() indices = sp.array(10 ** ((-sp.arange(1, num_dots + 1, dtype='single') / (num_dots + 1)) * max_val) \ * len(scores), dtype='int') return -sp.log10(scores[indices])
def write_symmetry_plane(self, path='.', create_dirs=True, overwrite=False): r""" Exports the +Y half of the mesh flattening out everything below 0 on the Y axis """ # TODO: consider replacing the bottom face type with symmetryPlane # # storing orginial vertices old_verts = sp.copy(self._vertices) self._vertices[sp.where(self._vertices[:, 1] <= 0.0), 1] = 0.0 # # outputing mesh self.write_foam_file(path=path, create_dirs=create_dirs, overwrite=overwrite) # # restoring original verts self._vertices = sp.copy(old_verts)
def softThreshold(coeffs,thresh): new_coeffs = [] for j in coeffs: new_coeffs.append(sp.copy(j)) for j in xrange(1,len(new_coeffs)): for i in new_coeffs[j]: i[sp.absolute(i)<thresh] = 0 i[sp.absolute(i)>=thresh] -= (sp.sign(i[sp.absolute(i)>=thresh]))*thresh return new_coeffs
def sf(self,lrt): """ computes the survival function of a mixture of a chi-squared random variable of degree 0 and a scaled chi-squared random variable of degree d """ _lrt = SP.copy(lrt) _lrt[lrt<self.tol] = 0 pv = self.mixture*STATS.chi2.sf(_lrt/self.scale,self.dof) return pv
def _build_coefficient_matrix(self): r''' This builds the sparse coefficient matrix for the linear solver. ''' # Filling coefficient matrix tpore1 = self._net['throat.conns'][:, 0] tpore2 = self._net['throat.conns'][:, 1] #Identify Dirichlet pores try: temp = self.pores(self._phase.name + '_Dirichlet', mode='difference') except: raise Exception( 'The linear transport solver needs at least one Dirichlet boundary condition for the phase which is attached to ' + self.name) loc1 = sp.in1d(tpore1, temp) loc2 = sp.in1d(tpore2, temp) modified_tpore1 = tpore1[loc1] modified_tpore2 = tpore2[loc1] row = modified_tpore1 col = modified_tpore2 #Expand the conductance to a vector if necessary g = self['throat.conductance'] if sp.size(g) == 1: g = g * sp.ones(self.num_throats()) data_main = g data = data_main[loc1] modified_tpore2 = tpore2[loc2] modified_tpore1 = tpore1[loc2] row = sp.append(row, modified_tpore2) col = sp.append(col, modified_tpore1) data = sp.append(data, data_main[loc2]) A_dim = self.num_pores() #Check for Neuman_group BCs and add superpores if necessary try: self.pores(self._phase.name + '_Neumann_group') group_values = self.get_data( prop=self._phase.name + '_bcval_Neumann_group', pores=self.pores(self._phase.name + '_Neumann_group')) self._group_Neumann_vals = sp.unique(group_values) A_dim = A_dim + len(self._group_Neumann_vals) extera_neu = self._group_Neumann_vals self._g_super = 1e-60 for item in sp.r_[0:len(extera_neu)]: neu_tpore2 = self.pores(self._phase.name + '_Neumann_group') neu_tpore2 = neu_tpore2[group_values == extera_neu[item]] row = sp.append(row, neu_tpore2) col = sp.append(col, len(neu_tpore2) * [A_dim - item - 1]) data = sp.append(data, len(neu_tpore2) * [self._g_super]) row = sp.append(row, len(neu_tpore2) * [A_dim - item - 1]) col = sp.append(col, neu_tpore2) data = sp.append(data, len(neu_tpore2) * [self._g_super]) except: pass # Adding positions for diagonal diag = sp.r_[0:A_dim] try: pores = self.pores(self._phase.name + '_Dirichlet') row = sp.append(row, diag[pores]) col = sp.append(col, diag[pores]) data = sp.append(data, sp.ones_like(diag[pores])) temp_data = sp.copy(data) temp_data[sp.in1d(row, diag[pores])] = 0 non_Dir_diag = diag[-sp.in1d(diag, diag[pores])] except: temp_data = sp.copy(data) non_Dir_diag = diag S_temp = sp.zeros(A_dim) for i in sp.r_[0:len(row)]: S_temp[row[i]] = S_temp[row[i]] - temp_data[i] data = sp.append(data, S_temp[non_Dir_diag]) row = sp.append(row, non_Dir_diag) col = sp.append(col, non_Dir_diag) #Convert the lists to the sparse matrix self._Coeff_dimension = A_dim a = sprs.coo.coo_matrix((data, (row, col)), (A_dim, A_dim)) A = a.tocsr() return (A)
def ncp_bcd(X, rank, random_state=None, init='rand', **options): """ Fits nonnegative CP Decomposition using the Block Coordinate Descent (BCD) Method. Parameters ---------- X : (I_1, ..., I_N) array_like A real array with nonnegative entries and ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. init : str, or KTensor, optional (default ``'rand'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation is using the Block Coordinate Descent Method. References ---------- Xu, Yangyang, and Wotao Yin. "A block coordinate descent method for regularized multiconvex optimization with applications to negative tensor factorization and completion." SIAM Journal on imaging sciences 6.3 (2013): 1758-1789. Examples -------- """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Store norm of X for computing objective function. N = X.ndim # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'NCP_BCD', **options) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Block coordinate descent # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Um = U.copy() # Extrapolations of compoenents extraw = 1 # Used for extrapolation weight update weights_U = np.ones(N) # Extrapolation weights L = np.ones(N) # Lipschitz constants obj_bcd = 0.5 * normX**2 # Initial objective value # Main optimization loop. while result.still_optimizing: obj_bcd_old = obj_bcd # Old objective value U_old = U.copy() extraw_old = extraw for n in range(N): # Select all components, but U_n components = [U[j] for j in range(N) if j != n] # i) compute the N-1 gram matrices grams = sci.multiply.reduce([arr.T.dot(arr) for arr in components]) # Update gradient Lipschnitz constant L0 = L # Lipschitz constants L[n] = linalg.norm(grams, 2) # ii) Compute Khatri-Rao product kr = khatri_rao(components) p = unfold(X, n).dot(kr) # Compute Gradient. grad = Um[n].dot(grams) - p # Enforce nonnegativity (project onto nonnegative orthant). U[n] = sci.maximum(0.0, Um[n] - grad / L[n]) # Compute objective function and update optimization result. # grams *= U[X.ndim - 1].T.dot(U[X.ndim - 1]) # obj = np.sqrt(sci.sum(grams) - 2 * sci.sum(U[X.ndim - 1] * p) + normX**2) / normX obj = linalg.norm(X - U.full()) / normX result.update(obj) # Correction and extrapolation. grams *= U[N - 1].T.dot(U[N - 1]) obj_bcd = 0.5 * (sci.sum(grams) - 2 * sci.sum(U[N - 1] * p) + normX**2) extraw = (1 + sci.sqrt(1 + 4 * extraw_old**2)) / 2.0 if obj_bcd >= obj_bcd_old: # restore previous A to make the objective nonincreasing Um = sci.copy(U_old) else: # apply extrapolation w = (extraw_old - 1.0) / extraw # Extrapolation weight for n in range(N): weights_U[n] = min(w, 1.0 * sci.sqrt( L0[n] / L[n])) # choose smaller weights for convergence Um[n] = U[n] + weights_U[n] * (U[n] - U_old[n] ) # extrapolation # Finalize and return the optimization result. return result.finalize()
def wrap(x): xq = sp.copy(x) xq.resize([1, d]) a = G.infer_m_post(xq, [[sp.NaN]]) return a[0, 0]
def __init__(self, challenge_func, ns=10, npop1=20, pr=0.3, beta=0.85, npop2=20, w=0.7, c1=1.5, c2=1.5): # Tamanho das populacoes seed() self.ns = ns self.npop1 = npop1 self.npop2 = npop2 # Parametros do DE self.beta = beta self.pr = pr # Parametros do PSO self.c1 = c1 self.c2 = c2 self.w = w # Funcao que representa problema desafio self.fc = challenge_func # Respostas do problema desafio #self.pso = pso(fitness_func = challenge_func,npop = npop2,w = w,c1 = c1,c2 = c2) self.ans1 = scipy.zeros(self.npop1) self.ans2 = scipy.zeros(self.npop2) # Populacoes self.pop1 = [] self.pop2 = [] # Gera pop1 e pop2 e resolve problema desafio for i in scipy.arange(self.npop1): self.ans1[i], aux = self.resolve_desafio(self.gera_individuo()) self.pop1.append(aux.copy()) for i in scipy.arange(self.npop2): self.ans2[i], aux = self.resolve_desafio(self.gera_individuo()) self.pop2.append(aux.copy()) self.pop1 = scipy.array(self.pop1) self.pop2 = scipy.array(self.pop2) self.hall_of_fame1 = [] for i in scipy.arange(15): self.hall_of_fame1.insert( 0, scipy.hstack((self.ans1.min(), self.pop1[self.ans1.argmin()]))) self.hall_of_fame2 = [] for i in scipy.arange(15): #self.hall_of_fame2.insert(0,scipy.hstack((self.pso.fit[0],self.pso.pop[0]))) self.hall_of_fame2.insert( 0, scipy.hstack((self.ans2.min(), self.pop2[self.ans2.argmin()]))) # Funcoes fitness das populacoes self.fit1 = scipy.zeros(self.npop1) self.fit2 = scipy.zeros(self.npop2) for i in scipy.arange(self.npop2): self.fit2[i] = self.avalia_aptidao2(self.ans2[i]) for i in scipy.arange(self.npop1): self.fit1[i] = self.avalia_aptidao1(self.ans1[i]) # inicializa velocidades iniciais do PSO self.v = scipy.zeros(self.pop2.shape) # guarda o melhor fitness de cada particula PSO self.bfp = scipy.copy(self.pop2) self.bfp_fitness = scipy.copy(self.fit2) self.bfp_ans = scipy.copy(self.ans2) # guarda o melhor fitness global PSO self.bfg = self.pop2[self.bfp_fitness.argmax()].copy() self.bfg_fitness = self.bfp_fitness.max().copy() self.bfg_ans = self.bfp_ans[self.bfp_fitness.argmax()].copy()
def dare_old(A, B, Q, R, S=None, E=None): # Make sure we can import required slycot routine try: from slycot import sb02md except ImportError: raise ControlSlycot("can't find slycot module 'sb02md'") try: from slycot import sb02mt except ImportError: raise ControlSlycot("can't find slycot module 'sb02mt'") # Make sure we can find the required slycot routine try: from slycot import sg02ad except ImportError: raise ControlSlycot("can't find slycot module 'sg02ad'") # Reshape 1-d arrays if len(shape(A)) == 1: A = A.reshape(1, A.size) if len(shape(B)) == 1: B = B.reshape(1, B.size) if len(shape(Q)) == 1: Q = Q.reshape(1, Q.size) if R is not None and len(shape(R)) == 1: R = R.reshape(1, R.size) if S is not None and len(shape(S)) == 1: S = S.reshape(1, S.size) if E is not None and len(shape(E)) == 1: E = E.reshape(1, E.size) # Determine main dimensions if size(A) == 1: n = 1 else: n = size(A, 0) if size(B) == 1: m = 1 else: m = size(B, 1) # Solve the standard algebraic Riccati equation if S is None and E is None: # Check input data for consistency if size(A) > 1 and shape(A)[0] != shape(A)[1]: raise ControlArgument("A must be a quadratic matrix.") if (size(Q) > 1 and shape(Q)[0] != shape(Q)[1]) or \ (size(Q) > 1 and shape(Q)[0] != n) or \ size(Q) == 1 and n > 1: raise ControlArgument("Q must be a quadratic matrix of the same \ dimension as A.") if (size(B) > 1 and shape(B)[0] != n) or \ size(B) == 1 and n > 1: raise ControlArgument("Incompatible dimensions of B matrix.") if not (asarray(Q) == asarray(Q).T).all(): raise ControlArgument("Q must be a symmetric matrix.") if not (asarray(R) == asarray(R).T).all(): raise ControlArgument("R must be a symmetric matrix.") # Create back-up of arrays needed for later computations A_ba = copy(A) R_ba = copy(R) B_ba = copy(B) # Solve the standard algebraic Riccati equation by calling Slycot # functions sb02mt and sb02md try: A_b, B_b, Q_b, R_b, L_b, ipiv, oufact, G = sb02mt(n, m, B, R) except ValueError as ve: if ve.info < 0: e = ValueError(ve.message) e.info = ve.info elif ve.info == m + 1: e = ValueError("The matrix R is numerically singular.") e.info = ve.info else: e = ValueError("The %i-th element of d in the UdU (LdL) \ factorization is zero." % ve.info) e.info = ve.info raise e try: X, rcond, w, S, U, A_inv = sb02md(n, A, G, Q, 'D') except ValueError as ve: if ve.info < 0 or ve.info > 5: e = ValueError(ve.message) e.info = ve.info elif ve.info == 1: e = ValueError("The matrix A is (numerically) singular in \ discrete-time case.") e.info = ve.info elif ve.info == 2: e = ValueError("The Hamiltonian or symplectic matrix H cannot \ be reduced to real Schur form.") e.info = ve.info elif ve.info == 3: e = ValueError("The real Schur form of the Hamiltonian or \ symplectic matrix H cannot be appropriately ordered.") e.info = ve.info elif ve.info == 4: e = ValueError("The Hamiltonian or symplectic matrix H has \ less than n stable eigenvalues.") e.info = ve.info elif ve.info == 5: e = ValueError("The N-th order system of linear algebraic \ equations is singular to working precision.") e.info = ve.info raise e # Calculate the gain matrix G if size(R_b) == 1: G = dot( 1/(dot(asarray(B_ba).T,dot(X,B_ba))+R_ba) , \ dot(asarray(B_ba).T,dot(X,A_ba)) ) else: G = dot( inv(dot(asarray(B_ba).T,dot(X,B_ba))+R_ba) , \ dot(asarray(B_ba).T,dot(X,A_ba)) ) # Return the solution X, the closed-loop eigenvalues L and # the gain matrix G return (X, w[:n], G) # Solve the generalized algebraic Riccati equation elif S is not None and E is not None: # Check input data for consistency if size(A) > 1 and shape(A)[0] != shape(A)[1]: raise ControlArgument("A must be a quadratic matrix.") if (size(Q) > 1 and shape(Q)[0] != shape(Q)[1]) or \ (size(Q) > 1 and shape(Q)[0] != n) or \ size(Q) == 1 and n > 1: raise ControlArgument("Q must be a quadratic matrix of the same \ dimension as A.") if (size(B) > 1 and shape(B)[0] != n) or \ size(B) == 1 and n > 1: raise ControlArgument("Incompatible dimensions of B matrix.") if (size(E) > 1 and shape(E)[0] != shape(E)[1]) or \ (size(E) > 1 and shape(E)[0] != n) or \ size(E) == 1 and n > 1: raise ControlArgument("E must be a quadratic matrix of the same \ dimension as A.") if (size(R) > 1 and shape(R)[0] != shape(R)[1]) or \ (size(R) > 1 and shape(R)[0] != m) or \ size(R) == 1 and m > 1: raise ControlArgument("R must be a quadratic matrix of the same \ dimension as the number of columns in the B matrix.") if (size(S) > 1 and shape(S)[0] != n) or \ (size(S) > 1 and shape(S)[1] != m) or \ size(S) == 1 and n > 1 or \ size(S) == 1 and m > 1: raise ControlArgument("Incompatible dimensions of S matrix.") if not (asarray(Q) == asarray(Q).T).all(): raise ControlArgument("Q must be a symmetric matrix.") if not (asarray(R) == asarray(R).T).all(): raise ControlArgument("R must be a symmetric matrix.") # Create back-up of arrays needed for later computations A_b = copy(A) R_b = copy(R) B_b = copy(B) E_b = copy(E) S_b = copy(S) # Solve the generalized algebraic Riccati equation by calling the # Slycot function sg02ad try: rcondu,X,alfar,alfai,beta,S_o,T,U,iwarn = \ sg02ad('D','B','N','U','N','N','S','R',n,m,0,A,E,B,Q,R,S) except ValueError as ve: if ve.info < 0 or ve.info > 7: e = ValueError(ve.message) e.info = ve.info elif ve.info == 1: e = ValueError("The computed extended matrix pencil is \ singular, possibly due to rounding errors.") e.info = ve.info elif ve.info == 2: e = ValueError("The QZ algorithm failed.") e.info = ve.info elif ve.info == 3: e = ValueError("Reordering of the generalized eigenvalues \ failed.") e.info = ve.info elif ve.info == 4: e = ValueError("After reordering, roundoff changed values of \ some complex eigenvalues so that leading \ eigenvalues in the generalized Schur form no \ longer satisfy the stability condition; this \ could also be caused due to scaling.") e.info = ve.info elif ve.info == 5: e = ValueError("The computed dimension of the solution does \ not equal N.") e.info = ve.info elif ve.info == 6: e = ValueError("The spectrum is too close to the boundary of \ the stability domain.") e.info = ve.info elif ve.info == 7: e = ValueError("A singular matrix was encountered during the \ computation of the solution matrix X.") e.info = ve.info raise e L = zeros((n, 1)) L.dtype = 'complex64' for i in range(n): L[i] = (alfar[i] + alfai[i] * 1j) / beta[i] # Calculate the gain matrix G if size(R_b) == 1: G = dot( 1/(dot(asarray(B_b).T,dot(X,B_b))+R_b) , \ dot(asarray(B_b).T,dot(X,A_b)) + asarray(S_b).T) else: G = dot( inv(dot(asarray(B_b).T,dot(X,B_b))+R_b) , \ dot(asarray(B_b).T,dot(X,A_b)) + asarray(S_b).T) # Return the solution X, the closed-loop eigenvalues L and # the gain matrix G return (X, L, G) # Invalid set of input parameters else: raise ControlArgument("Invalid set of input parameters.")
def regions_to_network(im, dt=None, voxel_size=1): r""" Analyzes an image that has been partitioned into pore regions and extracts the pore and throat geometry as well as network connectivity. Parameters ---------- im : ND-array An image of the pore space partitioned into individual pore regions. Note that this image must have zeros indicating the solid phase. dt : ND-array The distance transform of the pore space. If not given it will be calculated, but it can save time to provide one if available. voxel_size : scalar The resolution of the image, expressed as the length of one side of a voxel, so the volume of a voxel would be **voxel_size**-cubed. The default is 1, which is useful when overlaying the PNM on the original image since the scale of the image is alway 1 unit lenth per voxel. Returns ------- A dictionary containing all the pore and throat size data, as well as the network topological information. The dictionary names use the OpenPNM convention (i.e. 'pore.coords', 'throat.conns') so it may be converted directly to an OpenPNM network object using the ``update`` command. """ print('_' * 60) print('Extracting pore and throat information from image') from skimage.morphology import disk, ball struc_elem = disk if im.ndim == 2 else ball # if ~sp.any(im == 0): # raise Exception('The received image has no solid phase (0\'s)') if dt is None: dt = spim.distance_transform_edt(im > 0) dt = spim.gaussian_filter(input=dt, sigma=0.5) # Get 'slices' into im for each pore region slices = spim.find_objects(im) # Initialize arrays Ps = sp.arange(1, sp.amax(im) + 1) Np = sp.size(Ps) p_coords = sp.zeros((Np, im.ndim), dtype=float) p_volume = sp.zeros((Np, ), dtype=float) p_dia_local = sp.zeros((Np, ), dtype=float) p_dia_global = sp.zeros((Np, ), dtype=float) p_label = sp.zeros((Np, ), dtype=int) p_area_surf = sp.zeros((Np, ), dtype=int) t_conns = [] t_dia_inscribed = [] t_area = [] t_perimeter = [] t_coords = [] # dt_shape = sp.array(dt.shape) # Start extracting size information for pores and throats for i in tqdm(Ps): pore = i - 1 if slices[pore] is None: continue s = extend_slice(slices[pore], im.shape) sub_im = im[s] sub_dt = dt[s] pore_im = sub_im == i padded_mask = sp.pad(pore_im, pad_width=1, mode='constant') pore_dt = spim.distance_transform_edt(padded_mask) s_offset = sp.array([i.start for i in s]) p_label[pore] = i # p_coords[pore, :] = spim.center_of_mass(pore_im) + s_offset peaks = sp.vstack(sp.where(pore_dt == pore_dt.max())).T p_coords[pore, :] = peaks[0] + s_offset p_volume[pore] = sp.sum(pore_im) p_dia_local[pore] = 2 * sp.amax(pore_dt) p_dia_global[pore] = 2 * sp.amax(sub_dt) p_area_surf[pore] = sp.sum(pore_dt == 1) im_w_throats = spim.binary_dilation(input=pore_im, structure=struc_elem(1)) im_w_throats = im_w_throats * sub_im Pn = sp.unique(im_w_throats)[1:] - 1 for j in Pn: if j > pore: t_conns.append([pore, j]) vx = sp.where(im_w_throats == (j + 1)) t_dia_inscribed.append(2 * sp.amax(sub_dt[vx])) t_perimeter.append(sp.sum(sub_dt[vx] < 2)) t_area.append(sp.size(vx[0])) t_inds = tuple([i + j for i, j in zip(vx, s_offset)]) temp = sp.where(dt[t_inds] == sp.amax(dt[t_inds]))[0][0] if im.ndim == 2: t_coords.append(tuple((t_inds[0][temp], t_inds[1][temp]))) else: t_coords.append( tuple((t_inds[0][temp], t_inds[1][temp], t_inds[2][temp]))) # Clean up values Nt = len(t_dia_inscribed) # Get number of throats if im.ndim == 2: # If 2D, add 0's in 3rd dimension p_coords = sp.vstack((p_coords.T, sp.zeros((Np, )))).T t_coords = sp.vstack((sp.array(t_coords).T, sp.zeros((Nt, )))).T net = {} net['pore.all'] = sp.ones((Np, ), dtype=bool) net['throat.all'] = sp.ones((Nt, ), dtype=bool) net['pore.coords'] = sp.copy(p_coords) * voxel_size net['pore.centroid'] = sp.copy(p_coords) * voxel_size net['throat.centroid'] = sp.array(t_coords) * voxel_size net['throat.conns'] = sp.array(t_conns) net['pore.label'] = sp.array(p_label) net['pore.volume'] = sp.copy(p_volume) * (voxel_size**3) net['throat.volume'] = sp.zeros((Nt, ), dtype=float) net['pore.diameter'] = sp.copy(p_dia_local) * voxel_size net['pore.inscribed_diameter'] = sp.copy(p_dia_local) * voxel_size net['pore.equivalent_diameter'] = 2 * ( (3 / 4 * net['pore.volume'] / sp.pi)**(1 / 3)) net['pore.extended_diameter'] = sp.copy(p_dia_global) * voxel_size net['pore.surface_area'] = sp.copy(p_area_surf) * (voxel_size)**2 net['throat.diameter'] = sp.array(t_dia_inscribed) * voxel_size net['throat.inscribed_diameter'] = sp.array(t_dia_inscribed) * voxel_size net['throat.area'] = sp.array(t_area) * (voxel_size**2) net['throat.perimeter'] = sp.array(t_perimeter) * voxel_size net['throat.equivalent_diameter'] = (sp.array(t_area) * (voxel_size**2))**0.5 P12 = net['throat.conns'] PT1 = sp.sqrt( sp.sum(((p_coords[P12[:, 0]] - t_coords) * voxel_size)**2, axis=1)) PT2 = sp.sqrt( sp.sum(((p_coords[P12[:, 1]] - t_coords) * voxel_size)**2, axis=1)) net['throat.total_length'] = PT1 + PT2 PT1 = PT1 - p_dia_local[P12[:, 0]] / 2 * voxel_size PT2 = PT2 - p_dia_local[P12[:, 1]] / 2 * voxel_size net['throat.length'] = PT1 + PT2 dist = (p_coords[P12[:, 0]] - p_coords[P12[:, 1]]) * voxel_size net['throat.direct_length'] = sp.sqrt(sp.sum(dist**2, axis=1)) # Make a dummy openpnm network to get the conduit lengths pn = op.network.GenericNetwork() pn.update(net) pn.add_model(propname='throat.endpoints', model=op_gm.throat_endpoints.spherical_pores, pore_diameter='pore.inscribed_diameter', throat_diameter='throat.inscribed_diameter') pn.add_model(propname='throat.conduit_lengths', model=op_gm.throat_length.conduit_lengths) pn.add_model(propname='pore.area', model=op_gm.pore_area.sphere) net['throat.endpoints.head'] = pn['throat.endpoints.head'] net['throat.endpoints.tail'] = pn['throat.endpoints.tail'] net['throat.conduit_lengths.pore1'] = pn['throat.conduit_lengths.pore1'] net['throat.conduit_lengths.pore2'] = pn['throat.conduit_lengths.pore2'] net['throat.conduit_lengths.throat'] = pn['throat.conduit_lengths.throat'] net['pore.area'] = pn['pore.area'] prj = pn.project prj.clear() wrk = op.Workspace() wrk.close_project(prj) return net
def annopred_genomewide(data_file=None, ld_radius=None, ld_dict=None, out_file_prefix=None, ps=None, n=None, h2=None, num_iter=None, zero_jump_prob=0.05, burn_in=5, PRF=None): """ Calculate LDpred for a genome """ prf_chr = PRF['chrom'] prf_sids = PRF['sids'] prf_pi = PRF['pi'] prf_sigi2 = PRF['sigi2'] df = h5py.File(data_file, 'r') has_phenotypes = False if 'y' in df.keys(): 'Validation phenotypes found.' y = df['y'][...] # Phenotype num_individs = len(y) risk_scores_pval_derived = sp.zeros(num_individs) risk_scores_pval_derived_inf = sp.zeros(num_individs) has_phenotypes = True ld_scores_dict = ld_dict['ld_scores_dict'] chrom_ld_dict = ld_dict['chrom_ld_dict'] chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats'] print 'LD radius used: %d' % ld_radius results_dict = {} num_snps = 0 sum_beta2s = 0 cord_data_g = df['cord_data'] for chrom_str in chromosomes_list: if chrom_str in cord_data_g.keys(): g = cord_data_g[chrom_str] betas = g['betas'][...] n_snps = len(betas) num_snps += n_snps sum_beta2s += sp.sum(betas**2) L = ld_scores_dict['avg_gw_ld_score'] chi_square_lambda = sp.mean(n * sum_beta2s / float(num_snps)) # print 'Genome-wide lambda inflation:', chi_square_lambda, print 'Genome-wide mean LD score:', L gw_h2_ld_score_est = max(0.0001, (max(1, chi_square_lambda) - 1) / (n * (L / num_snps))) print 'Estimated genome-wide heritability:', gw_h2_ld_score_est assert chi_square_lambda > 1, 'Check the summary statistic file' if h2 is None: h2 = gw_h2_ld_score_est print h2 h2_new = sp.sum(prf_sigi2) sig_12 = (1.0) / n ####################### pr_sig = {} pr_p = {} annopred_inf_chrom_dict = {} print 'Calculating initial values for MCMC using infinitesimal model' for chrom_str in chromosomes_list: if chrom_str in cord_data_g.keys(): print 'Calculating posterior betas for Chromosome %s' % ( (chrom_str.split('_'))[1]) g = cord_data_g[chrom_str] #Filter monomorphic SNPs snp_stds = g['snp_stds_ref'][...] snp_stds = snp_stds.flatten() ok_snps_filter = snp_stds > 0 pval_derived_betas = g['betas'][...] pval_derived_betas = pval_derived_betas[ok_snps_filter] sids = g['sids'][...] sids = sids[ok_snps_filter] chri = int(chrom_str.split('_')[1]) prf_sids_chri = prf_sids[prf_chr == chri] prf_pi_chri = prf_pi[prf_chr == chri] prf_sigi2_chri = prf_sigi2[prf_chr == chri] if len(prf_sids_chri) == len(sids): if sum(prf_sids_chri == sids) == len(prf_sids_chri): pr_p[chrom_str] = sp.copy(prf_pi_chri) pr_sig[chrom_str] = sp.copy(prf_sigi2_chri) else: print 'Order of SNPs does not match, sorting prior files' pr_p[chrom_str] = sp.zeros(len(sids)) pr_sig[chrom_str] = sp.zeros(len(sids)) for i, sid in enumerate(sids): pr_p[chrom_str][i] = prf_pi_chri[prf_sids_chri == sid] pr_sig[chrom_str][i] = prf_sigi2_chri[prf_sids_chri == sid] else: print 'More SNPs found in prior file, extracting SNPs from prior files' pr_p[chrom_str] = sp.zeros(len(sids)) pr_sig[chrom_str] = sp.zeros(len(sids)) for i, sid in enumerate(sids): pr_p[chrom_str][i] = prf_pi_chri[prf_sids_chri == sid] pr_sig[chrom_str][i] = prf_sigi2_chri[prf_sids_chri == sid] pr_sig[chrom_str] = h2 * pr_sig[chrom_str] / h2_new if h2 is not None: h2_chrom = sp.sum(pr_sig[chrom_str]) else: h2_chrom = gw_h2_ld_score_est * (n_snps / float(num_snps)) start_betas = annopred_inf( pval_derived_betas, pr_sigi=pr_sig[chrom_str], reference_ld_mats=chrom_ref_ld_mats[chrom_str], n=n, ld_window_size=2 * ld_radius) annopred_inf_chrom_dict[chrom_str] = start_betas for p in ps: print 'Starting AnnoPred with ', p p_str = p results_dict[p_str] = {} if out_file_prefix: #Preparing output files raw_effect_sizes = [] annopred_effect_sizes = [] annopred_inf_effect_sizes = [] out_sids = [] chromosomes = [] out_positions = [] out_nts = [] out = [] out_inf = [] out.append('The input prior p is ' + str(prf_pi[0]) + '\n') out.append('Estimated Genome-wide heritability: ' + str(gw_h2_ld_score_est) + '\n') out.append('Posterior variance for each snp: ' + str(sig_12) + '\n') print 'Estimated Genome-wide heritability from Priors:', h2 print 'Posterior variance for each snp:', sig_12 for chrom_str in chromosomes_list: if chrom_str in cord_data_g.keys(): g = cord_data_g[chrom_str] if has_phenotypes: if 'raw_snps_val' in g.keys(): raw_snps = g['raw_snps_val'][...] else: raw_snps = g['raw_snps_ref'][...] #Filter monomorphic SNPs snp_stds = g['snp_stds_ref'][...] snp_stds = snp_stds.flatten() ok_snps_filter = snp_stds > 0 snp_stds = snp_stds[ok_snps_filter] pval_derived_betas = g['betas'][...] pval_derived_betas = pval_derived_betas[ok_snps_filter] positions = g['positions'][...] positions = positions[ok_snps_filter] sids = g['sids'][...] sids = sids[ok_snps_filter] log_odds = g['log_odds'][...] log_odds = log_odds[ok_snps_filter] nts = g['nts'][...] nts = nts[ok_snps_filter] prf_pi_chri_sorted = pr_p[chrom_str] prf_sigi2_chri_sorted = pr_sig[chrom_str] if out_file_prefix: chromosomes.extend([chrom_str] * len(pval_derived_betas)) out_positions.extend(positions) out_sids.extend(sids) raw_effect_sizes.extend(log_odds) out_nts.extend(nts) n_snps = len(pval_derived_betas) if h2 is not None: h2_chrom = sp.sum(prf_sigi2_chri_sorted) #h2_chrom = h2 * (n_snps / float(num_snps)) else: h2_chrom = gw_h2_ld_score_est * (n_snps / float(num_snps)) #print 'Prior parameters: p=%0.3f, n=%d, m=%d, h2_chrom=%0.4f' % (p, n, n_snps, h2_chrom) res_dict = non_infinitesimal_mcmc( pval_derived_betas, Pi=prf_pi_chri_sorted, Sigi2=prf_sigi2_chri_sorted, sig_12=sig_12, h2=h2_chrom, n=n, ld_radius=ld_radius, num_iter=num_iter, burn_in=burn_in, ld_dict=chrom_ld_dict[chrom_str], start_betas=annopred_inf_chrom_dict[chrom_str], zero_jump_prob=zero_jump_prob) updated_betas = res_dict['betas'] updated_inf_betas = res_dict['inf_betas'] sum_sqr_effects = sp.sum(updated_betas**2) if sum_sqr_effects > gw_h2_ld_score_est: print 'Sum of squared updated effects estimates seems too large:', sum_sqr_effects print 'This suggests that the Gibbs sampler did not convergence.' print 'Calculating scores for Chromosome %s' % ( (chrom_str.split('_'))[1]) updated_betas = updated_betas / (snp_stds.flatten()) updated_inf_betas = updated_inf_betas / (snp_stds.flatten()) annopred_effect_sizes.extend(updated_betas) annopred_inf_effect_sizes.extend(updated_inf_betas) if has_phenotypes: prs = sp.dot(updated_betas, raw_snps) prs_inf = sp.dot(updated_inf_betas, raw_snps) risk_scores_pval_derived += prs risk_scores_pval_derived_inf += prs_inf corr = sp.corrcoef(y, prs)[0, 1] r2 = corr**2 corr_inf = sp.corrcoef(y, prs_inf)[0, 1] r2_inf = corr_inf**2 # print 'The R2 prediction accuracy of PRS using %s was: %0.4f' %(chrom_str, r2) # print 'The R2 prediction accuracy of PRS using %s was: %0.4f' %(chrom_str, r2_inf) out.append('The R2 prediction accuracy of PRS using ' + chrom_str + ' was ' + str(r2) + '\n') out_inf.append('The R2 prediction accuracy of PRS using ' + chrom_str + ' was ' + str(r2_inf) + '\n') # print 'There were %d (SNP) effects' % num_snps if has_phenotypes: num_indivs = len(y) results_dict[p_str]['y'] = y results_dict[p_str]['risk_scores_pd'] = risk_scores_pval_derived # print 'Prediction accuracy was assessed using %d individuals.'%(num_indivs) out.append('Prediction accuracy was assessed using ' + str(num_indivs) + ' individuals\n') corr = sp.corrcoef(y, risk_scores_pval_derived)[0, 1] r2 = corr**2 results_dict[p_str]['r2_pd'] = r2 # print 'The R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % (r2, ((1-r2)**2)/num_indivs) out.append( 'The R2 prediction accuracy (observed scale) for the whole genome was: ' + str(r2) + ' (' + str(((1 - r2)**2) / num_indivs) + ')\n') corr_inf = sp.corrcoef(y, risk_scores_pval_derived_inf)[0, 1] r2_inf = corr_inf**2 results_dict[p_str]['r2_pd'] = r2_inf # print 'The R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % (r2_inf, ((1-r2_inf)**2)/num_indivs) out_inf.append( 'The R2 prediction accuracy (observed scale) for the whole genome was: ' + str(r2_inf) + ' (' + str(((1 - r2_inf)**2) / num_indivs) + ')\n') if corr < 0: risk_scores_pval_derived = -1 * risk_scores_pval_derived auc = pred_accuracy(y, risk_scores_pval_derived) print 'AnnoPred AUC/COR for the whole genome was: %0.4f' % auc out.append('AUC/COR for the whole genome was: ' + str(auc) + '\n') if corr_inf < 0: risk_scores_pval_derived_inf = -1 * risk_scores_pval_derived_inf auc_inf = pred_accuracy(y, risk_scores_pval_derived_inf) print 'AnnoPred-inf AUC/COR for the whole genome was: %0.4f' % auc_inf out_inf.append('AUC/COR for the whole genome was: ' + str(auc_inf) + '\n') sp.savetxt('%s_y_' % (out_file_prefix) + str(p) + '.txt', y) sp.savetxt('%s_prs_' % (out_file_prefix) + str(p) + '.txt', risk_scores_pval_derived) sp.savetxt('%s_prs-inf' % (out_file_prefix) + str(p) + '.txt', risk_scores_pval_derived_inf) #Now calibration denominator = sp.dot(risk_scores_pval_derived.T, risk_scores_pval_derived) y_norm = (y - sp.mean(y)) / sp.std(y) numerator = sp.dot(risk_scores_pval_derived.T, y_norm) regression_slope = (numerator / denominator) #[0][0] # print 'The slope for predictions with P-value derived effects is:',regression_slope out.append( 'The slope for predictions with P-value derived effects is: ' + str(regression_slope) + '\n') results_dict[p_str]['slope_pd'] = regression_slope ff = open('%s_non_inf_auc_' % (out_file_prefix) + str(p) + '.txt', "w") ff.writelines(out) ff.close() ff_inf = open('%s_inf_auc_' % (out_file_prefix) + str(p) + '.txt', "w") ff_inf.writelines(out_inf) ff_inf.close() weights_out_file = '%s_non_inf_betas_' % (out_file_prefix) + str( p) + '.txt' ################################### with open(weights_out_file, 'w') as f: f.write( 'chrom pos sid nt1 nt2 raw_beta AnnoPred_beta\n' ) for chrom, pos, sid, nt, raw_beta, annopred_beta in it.izip( chromosomes, out_positions, out_sids, out_nts, raw_effect_sizes, annopred_effect_sizes): nt1, nt2 = nt[0], nt[1] f.write('%s %d %s %s %s %0.4e %0.4e\n' % (chrom, pos, sid, nt1, nt2, raw_beta, annopred_beta)) weights_out_file = '%s_inf_betas_' % (out_file_prefix) + str( p) + '.txt' with open(weights_out_file, 'w') as f: f.write( 'chrom pos sid nt1 nt2 raw_beta AnnoPred_inf_beta \n' ) for chrom, pos, sid, nt, raw_beta, annopred_inf_beta in it.izip( chromosomes, out_positions, out_sids, out_nts, raw_effect_sizes, annopred_inf_effect_sizes): nt1, nt2 = nt[0], nt[1] f.write( '%s %d %s %s %s %0.4e %0.4e\n' % (chrom, pos, sid, nt1, nt2, raw_beta, annopred_inf_beta))
def Move(prevspace, direction, timestep): space = sci.copy(prevspace) check = sci.zeros( (space.shape[0], space.shape[1])) #zero indicates not moved [rows, cols] = space.shape rem = timestep % 4 if (rem == 0): irange = range(1, rows - 1) jrange = range(1, cols - 1) if (rem == 1): irange = range(1, rows - 1) jrange = reversed(range(1, cols - 1)) if (rem == 2): irange = reversed(range(1, rows - 1)) jrange = range(1, cols - 1) else: irange = reversed(range(1, rows - 1)) jrange = reversed(range(1, cols - 1)) for i in irange: for j in jrange: direct = direction[i, j] if ((space[i, j] == 1 or space[i, j] == 100) and check[i, j] == 0): count = 0 shift = sci.random.choice([1, -1]) while (space[i, j] != 0): count += 1 if (direct == 0 and space[i - 1, j] == 0): space[i - 1, j], space[i, j] = space[i, j], space[i - 1, j] check[i - 1, j] = 1 elif (direct == 1 and space[i - 1, j + 1] == 0): space[i - 1, j + 1], space[i, j] = space[i, j], space[i - 1, j + 1] check[i - 1, j + 1] = 1 elif (direct == 2 and space[i, j + 1] == 0): space[i, j + 1], space[i, j] = space[i, j], space[i, j + 1] check[i, j + 1] = 1 elif (direct == 3 and space[i + 1, j + 1] == 0): space[i + 1, j + 1], space[i, j] = space[i, j], space[i + 1, j + 1] check[i + 1, j + 1] = 1 elif (direct == 4 and space[i + 1, j] == 0): space[i + 1, j], space[i, j] = space[i, j], space[i + 1, j] check[i + 1, j] = 1 elif (direct == 5 and space[i + 1, j - 1] == 0): space[i + 1, j - 1], space[i, j] = space[i, j], space[i + 1, j - 1] check[i + 1, j - 1] = 1 elif (direct == 6 and space[i, j - 1] == 0): space[i, j - 1], space[i, j] = space[i, j], space[i, j - 1] check[i, j - 1] = 1 elif (direct == 7 and space[i - 1, j - 1] == 0): space[i - 1, j - 1], space[i, j] = space[i, j], space[i - 1, j - 1] check[i - 1, j - 1] else: direct += shift if (direct > 7): direct = 0 elif (direct < 0): direct = 7 if (count > 7): break return space
def plot_rc(self, save=False, xs=True, xsapprox=True, kind='power', dist=5000, raw=False, alpha=0.05, div=5, box=False): """Plot HAND and xs rating curves with confidence intervals 'hand' - plot hand rating curve [T/F] 'xs' - plot xs rating curves [T/F] 'xsapprox' - plot xs rating curve approximation from n-value averages [T/F] 'ci' - plot confidence intervals [T/F] 'alpha' - alpha for confidence intervals [float(0.0,1.0)] 'div' - number of intervals for confidence interval [R]""" fig, ax = plt.subplots() if usgs: # Plot interpolated USGS rating curve # Plot curves for q, h in zip(self.usgsq, self.usgsh): if kind == 'cubic': print 'USGS interpolation plotted as power-law fit' f = self.interp(x=q, y=h, kind='power') else: f = self.interp(x=q, y=h, kind=kind) ax.plot(q, f(q), label='usgs', c='g', linewidth=5) if hand: # Plot interpolated HAND rating curve # Plot curves f = self.interp(x=self.handq, y=self.handh, kind=kind) ax.plot(self.handq, f(self.handq), label='hand', c='b', linewidth=5) if xs: # Plot all linearly-interpolated XS rating curves intervals = scipy.arange(dist, self.handlen + dist, dist) # print 'Intervals:',intervals cutoffub = [i / self.handlen * 100 for i in intervals] cutofflb = scipy.copy(cutoffub) cutofflb = scipy.insert(cutofflb, 0, 0)[:-1] cutoffs = zip(cutofflb, cutoffub) for l, u in cutoffs: idx = scipy.where( scipy.logical_and(scipy.greater_equal(self.xs_profs, l), scipy.less(self.xs_profs, u)))[0] if u > 100: u = 100.00 fig, ax = plt.subplots() # get figure and axes for plotting fname = 'results/by5000/{0}/rc__comid_{0}_from_{1}_to_{2}.png'.format( self.comid, ('%.2f' % l), ('%.2f' % u)) for prof, disch, stage in zip(self.xs_profs[idx], self.xs_disch[idx], self.xs_stage[idx]): # Get interpolation function # print (('%.2f' % prof) + str(disch)) # print (('%.2f' % prof) + str(stage)) f = self.interp(x=disch, y=stage, kind=kind) if raw == True: # Plot raw data (ie. only HEC-RAS points) # interp over discharge ax.plot(disch, f(disch), c='grey', linewidth=2) # interp over stage (switched axes) for testing # f = self.interp(x=stage,y=disch,kind=kind) # ax.plot(f(stage),stage,c='purple',linewidth=1) if raw == False: # Plot interpolated data (ie. 'div' many interpolated points) interval = disch[-1] / div qvals = scipy.arange(0, (disch[-1] + interval), interval) # [1:] ax.plot(qvals, f(qvals), c='grey', linewidth=2) # Add one label for all cross-section curves ax.plot([], [], label='HEC-RAS', c='grey', linewidth=2) # Plot graph fig.set_size_inches(20, 16, forward=True) plt.gca().set_xlim(left=0, right=self.max_disch) plt.gca().set_ylim(bottom=0, top=self.max_stage) ax.set_xticks(ax.get_xticks()[::2]) ax.set_yticks(ax.get_yticks()[::2]) title = 'COMID {0}, ({1},{2})'.format(self.comid, ('%.2f' % l), ('%.2f' % u)) ax.set_title(title, y=1.04, fontsize=56) plt.xlabel('Q (cfs)', fontsize=56) plt.ylabel('H (ft)', fontsize=56) ax.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) plt.rc('font', size=56) plt.legend(loc='upper left', fontsize=40) plt.tick_params(axis='both', labelsize=56) plt.grid() # print '\n------------------------\n' if xsapprox: # Add approximate rating curve from average n-values qvals, hvals = self.get_xs_q(low=0, upto=83) f = self.interp(x=qvals, y=hvals, kind=kind) ax.plot(qvals, f(qvals), label='Resistance Function', c='red', linewidth=5) # Add approximate rating curve for these indices idxqvals, idxhvals = self.get_xs_q(low=idx[0], upto=idx[-1]) if len(idxqvals) == 0: print 'No data found for profiles {0} to {1}'.format( ('%.2f' % l), ('%.2f' % u)) break # f = self.interp(x=idxqvals,y=idxhvals,kind=kind) # ax.plot(idxqvals,f(idxqvals),label='Resistance Function Local Average',c='orange',linewidth=5) # else: fig,ax = plt.subplots() # Plot graph fig.set_size_inches(20, 16, forward=True) plt.gca().set_xlim(left=0, right=self.usgsq[0][-1]) plt.gca().set_ylim(bottom=0, top=self.usgsh[0][-1]) ax.set_xticks(ax.get_xticks()[::2]) ax.set_yticks(ax.get_yticks()[::2]) title = 'COMID {0}'.format(self.comid) ax.set_title(title, y=1.04, fontsize=56) plt.xlabel('Q (cfs)', fontsize=56) plt.ylabel('H (ft)', fontsize=56) ax.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) plt.rc('font', size=56) plt.legend(loc='upper left', fontsize=40) plt.tick_params(axis='both', labelsize=56) plt.grid() if save: fig.savefig(save) plt.clf() if not save: # mng = plt.get_current_fig_manager() # mng.resize(*mng.window.maxsize()) plt.show() plt.clf()
def _intersections(x1, y1, x2, y2): """X0,Y0 = intersections(X1,Y1,X2,Y2) INTERSECTIONS Intersections of curves. Computes the (x,y) locations where two curves intersect. The curves can be broken with NaNs or have vertical segments. Example: [X0,Y0] = intersections(X1,Y1,X2,Y2); where X1 and Y1 are equal-length vectors of at least two points and represent curve 1. Similarly, X2 and Y2 represent curve 2. X0 and Y0 are column vectors containing the points at which the two curves intersect. The algorithm can return two additional vectors that indicate which segment pairs contain intersections and where they are: [X0,Y0,I,J] = intersections(X1,Y1,X2,Y2); For each element of the vector I, I(k) = (segment number of (X1,Y1)) + (how far along this segment the intersection is). For example, if I(k) = 45.25 then the intersection lies a quarter of the way between the line segment connecting (X1(45),Y1(45)) and (X1(46),Y1(46)). Similarly for the vector J and the segments in (X2,Y2). Version: 1.10, 25 February 2008 Converted to Python October 2010 by Jeffrey Bush [email protected] Author: Douglas M. Schwarz Email: dmschwarz=ieee*org, dmschwarz=urgrad*rochester*edu Real_email = regexprep(Email,{'=','*'},{'@','.'}) Theory of operation: Given two line segments, L1 and L2, L1 endpoints: (x1(1),y1(1)) and (x1(2),y1(2)) L2 endpoints: (x2(1),y2(1)) and (x2(2),y2(2)) we can write four equations with four unknowns and then solve them. The four unknowns are t1, t2, x0 and y0, where (x0,y0) is the intersection of L1 and L2, t1 is the distance from the starting point of L1 to the intersection relative to the length of L1 and t2 is the distance from the starting point of L2 to the intersection relative to the length of L2. So, the four equations are (x1(2) - x1(1))*t1 = x0 - x1(1) (x2(2) - x2(1))*t2 = x0 - x2(1) (y1(2) - y1(1))*t1 = y0 - y1(1) (y2(2) - y2(1))*t2 = y0 - y2(1) Rearranging and writing in matrix form, [x1(2)-x1(1) 0 -1 0; [t1; [-x1(1); 0 x2(2)-x2(1) -1 0; * t2; = -x2(1); y1(2)-y1(1) 0 0 -1; x0; -y1(1); 0 y2(2)-y2(1) 0 -1] y0] -y2(1)] Let's call that A*T = B. We can solve for T with T = A\B. Once we have our solution we just have to look at t1 and t2 to determine whether L1 and L2 intersect. If 0 <= t1 < 1 and 0 <= t2 < 1 then the two line segments cross and we can include (x0,y0) in the output. In principle, we have to perform this computation on every pair of line segments in the input data. This can be quite a large number of pairs so we will reduce it by doing a simple preliminary check to eliminate line segment pairs that could not possibly cross. The check is to look at the smallest enclosing rectangles (with sides parallel to the axes) for each line segment pair and see if they overlap. If they do then we have to compute t1 and t2 (via the A\B computation) to see if the line segments cross, but if they don't then the line segments cannot cross. In a typical application, this technique will eliminate most of the potential line segment pairs. """ # x1 and y1 must be vectors with same number of points (at least 2). if sp.sum(sp.size(x1) > 1) != 1 or sp.sum( sp.size(y1) > 1) != 1 or len(x1) != len(y1): raise ValueError( 'X1 and Y1 must be equal-length vectors of at least 2 points.') # x2 and y2 must be vectors with same number of points (at least 2). if sp.sum(sp.size(x2) > 1) != 1 or sp.sum( sp.size(y2) > 1) != 1 or len(x2) != len(y2): raise ValueError( 'X2 and Y2 must be equal-length vectors of at least 2 points.') # Compute number of line segments in each curve and some differences we'll # need later. n1 = len(x1) - 1 n2 = len(x2) - 1 xy1 = sp.column_stack((x1, y1)) xy2 = sp.column_stack((x2, y2)) dxy1 = sp.diff(xy1, axis=0) dxy2 = sp.diff(xy2, axis=0) # Determine the combinations of i and j where the rectangle enclosing the # i'th line segment of curve 1 overlaps with the rectangle enclosing the # j'th line segment of curve 2. i, j = sp.nonzero( sp.logical_and( sp.logical_and( sp.logical_and( sp.tile(sp.minimum(x1[0:-1], x1[1:]), (n2, 1)).T <= sp.tile(sp.maximum(x2[0:-1], x2[1:]), (n1, 1)), sp.tile(sp.maximum(x1[0:-1], x1[1:]), (n2, 1)).T >= sp.tile(sp.minimum(x2[0:-1], x2[1:]), (n1, 1))), sp.tile(sp.minimum(y1[0:-1], y1[1:]), (n2, 1)).T <= sp.tile(sp.maximum(y2[0:-1], y2[1:]), (n1, 1))), sp.tile(sp.maximum(y1[0:-1], y1[1:]), (n2, 1)).T >= sp.tile(sp.minimum(y2[0:-1], y2[1:]), (n1, 1)))) i = sp.copy(i) # make the arrays writable j = sp.copy(j) # Find segments pairs which have at least one vertex = NaN and remove them. # This line is a fast way of finding such segment pairs. We take # advantage of the fact that NaNs propagate through calculations, in # particular subtraction (in the calculation of dxy1 and dxy2, which we # need anyway) and addition. remove = sp.isnan(sp.sum(dxy1[i, :] + dxy2[j, :], axis=1)) i[remove] = [] j[remove] = [] # Initialize matrices. We'll put the T's and B's in matrices and use them # one column at a time. AA is a 3-D extension of A where we'll use one # plane at a time. n = len(i) T = sp.zeros((4, n)) AA = sp.zeros((4, 4, n)) AA[[0, 1], 2, :] = -1 AA[[2, 3], 3, :] = -1 AA[[0, 2], 0, :] = dxy1[i, :].T AA[[1, 3], 1, :] = dxy2[j, :].T B = -sp.array([x1[i], x2[j], y1[i], y2[j]]) # Loop through possibilities. Trap singularity warning and then use # lastwarn to see if that plane of AA is near singular. Process any such # segment pairs to determine if they are colinear (overlap) or merely # parallel. That test consists of checking to see if one of the endpoints # of the curve 2 segment lies on the curve 1 segment. This is done by # checking the cross product # # (x1(2),y1(2)) - (x1(1),y1(1)) x (x2(2),y2(2)) - (x1(1),y1(1)). # # If this is close to zero then the segments overlap. for k in sp.arange(n): L, U = lin.lu(AA[:, :, k], True) T[:, k] = lin.solve(U, lin.solve(L, B[:, k])) # Find where t1 and t2 are between 0 and 1 and return the corresponding # x0 and y0 values. in_range = sp.logical_and( sp.logical_and(sp.logical_and(T[0, :] >= 0, T[1, :] >= 0), T[0, :] < 1), T[1, :] < 1) x0 = T[2, in_range].T y0 = T[3, in_range].T return x0, y0
def test_calculates_time(self): self.Data.calc_time() self.assertTrue(hasattr(self.Data, 'time')) t_copy = sp.copy(self.Data.time) t_copy.sort() self.assertTrue(sp.allclose(t_copy, self.Data.time))
#Autor: Pablo Gullith #Bibliotecas from scipy import floor, linspace, array, zeros, copy, loadtxt from scipy.fftpack import rfft, irfft, dct, idct from pylab import plot, show, xlabel, ylabel dow2 = loadtxt('dow2.txt', float) plot(dow2) show() dow2_fourier = rfft(dow2) N = len(dow2_fourier) Primeiros_2_porcento = zeros(N, float) Primeiros_2_porcento[0 : int(N / 50)] = copy(dow2_fourier[0 : int(N / 50)]) Suavizada_dow2 = irfft(Primeiros_2_porcento) dow2_cos = dct(dow2) n = len(dow2_cos) Primeiros_cos_2_porcento = zeros(n, float) Primeiros_cos_2_porcento[0 : int(n / 50)] = copy(dow2_cos[0 : int(n / 50)]) Suavizada_cos_dow2 = idct(Primeiros_cos_2_porcento) / (2*n) plot(dow2, 'k') plot(Suavizada_dow2, 'g') show() plot(dow2, 'k') plot(Suavizada_cos_dow2, 'r') show()
def trim_saddle_points(peaks, dt, max_iters=10): r""" Removes peaks that were mistakenly identified because they lied on a saddle or ridge in the distance transform that was not actually a true local peak. Parameters ---------- peaks : ND-array A boolean image containing True values to mark peaks in the distance transform (``dt``) dt : ND-array The distance transform of the pore space for which the true peaks are sought. max_iters : int The maximum number of iterations to run while eroding the saddle points. The default is 10, which is usually not reached; however, a warning is issued if the loop ends prior to removing all saddle points. Returns ------- image : ND-array An image with fewer peaks than the input image References ---------- [1] Gostick, J. "A versatile and efficient network extraction algorithm using marker-based watershed segmenation". Physical Review E. (2017) """ peaks = sp.copy(peaks) if dt.ndim == 2: from skimage.morphology import square as cube else: from skimage.morphology import cube labels, N = spim.label(peaks) slices = spim.find_objects(labels) for i in range(N): s = extend_slice(s=slices[i], shape=peaks.shape, pad=10) peaks_i = labels[s] == i+1 dt_i = dt[s] im_i = dt_i > 0 iters = 0 peaks_dil = sp.copy(peaks_i) while iters < max_iters: iters += 1 peaks_dil = spim.binary_dilation(input=peaks_dil, structure=cube(3)) peaks_max = peaks_dil*sp.amax(dt_i*peaks_dil) peaks_extended = (peaks_max == dt_i)*im_i if sp.all(peaks_extended == peaks_i): break # Found a true peak elif sp.sum(peaks_extended*peaks_i) == 0: peaks_i = False break # Found a saddle point peaks[s] = peaks_i if iters >= max_iters: print('Maximum number of iterations reached, consider' + 'running again with a larger value of max_iters') return peaks
def ldpred_gibbs(beta_hats, genotypes=None, start_betas=None, h2=None, n=1000, ld_radius=100, num_iter=60, burn_in=10, p=None, zero_jump_prob=0.05, ld_dict=None, reference_ld_mats=None, ld_boundaries=None, verbose=False): """ LDpred (Gibbs Sampler) """ t0 = time.time() m = len(beta_hats) n = float(n) # If no starting values for effects were given, then use the infinitesimal model starting values. if start_betas is None: print( 'Initializing LDpred effects with posterior mean LDpred-inf effects.' ) print('Calculating LDpred-inf effects.') start_betas = LDpred_inf.ldpred_inf( beta_hats, genotypes=genotypes, reference_ld_mats=reference_ld_mats, h2=h2, n=n, ld_window_size=2 * ld_radius, verbose=False) curr_betas = sp.copy(start_betas) assert len( curr_betas ) == m, 'Betas returned by LDpred_inf do not have the same length as expected.' curr_post_means = sp.zeros(m) avg_betas = sp.zeros(m) # Iterating over effect estimates in sequential order iter_order = sp.arange(m) # Setting up the marginal Bayes shrink Mp = m * p hdmp = (h2 / Mp) hdmpn = hdmp + 1.0 / n hdmp_hdmpn = (hdmp / hdmpn) c_const = (p / sp.sqrt(hdmpn)) d_const = (1.0 - p) / (sp.sqrt(1.0 / n)) for k in range(num_iter): # Big iteration # Force an alpha shrink if estimates are way off compared to heritability estimates. (Improves MCMC convergence.) h2_est = max(0.00001, sp.sum(curr_betas**2)) alpha = min(1 - zero_jump_prob, 1.0 / h2_est, (h2 + 1.0 / sp.sqrt(n)) / h2_est) rand_ps = sp.random.random(m) rand_norms = stats.norm.rvs(0.0, (hdmp_hdmpn) * (1.0 / n), size=m) if ld_boundaries is None: for i, snp_i in enumerate(iter_order): start_i = max(0, snp_i - ld_radius) focal_i = min(ld_radius, snp_i) stop_i = min(m, snp_i + ld_radius + 1) # Local LD matrix D_i = ld_dict[snp_i] # Local (most recently updated) effect estimates local_betas = curr_betas[start_i:stop_i] # Calculate the local posterior mean, used when sampling. local_betas[focal_i] = 0.0 res_beta_hat_i = beta_hats[snp_i] - sp.dot(D_i, local_betas) b2 = res_beta_hat_i**2 d_const_b2_exp = d_const * sp.exp(-b2 * n / 2.0) if sp.isreal(d_const_b2_exp): numerator = c_const * sp.exp(-b2 / (2.0 * hdmpn)) if sp.isreal(numerator): if numerator == 0.0: postp = 0.0 else: postp = numerator / (numerator + d_const_b2_exp) assert sp.isreal( postp ), 'The posterior mean is not a real number? Possibly due to problems with summary stats, LD estimates, or parameter settings.' else: postp = 0.0 else: postp = 1.0 curr_post_means[snp_i] = hdmp_hdmpn * postp * res_beta_hat_i if rand_ps[i] < postp * alpha: # Sample from the posterior Gaussian dist. proposed_beta = rand_norms[i] + hdmp_hdmpn * res_beta_hat_i else: # Sample 0 proposed_beta = 0.0 curr_betas[snp_i] = proposed_beta # UPDATE BETA else: for i, snp_i in enumerate(iter_order): start_i = ld_boundaries[snp_i][0] stop_i = ld_boundaries[snp_i][1] focal_i = snp_i - start_i # Local LD matrix D_i = ld_dict[snp_i] # Local (most recently updated) effect estimates local_betas = curr_betas[start_i:stop_i] # Calculate the local posterior mean, used when sampling. local_betas[focal_i] = 0.0 res_beta_hat_i = beta_hats[snp_i] - sp.dot(D_i, local_betas) b2 = res_beta_hat_i**2 d_const_b2_exp = d_const * sp.exp(-b2 * n / 2.0) if sp.isreal(d_const_b2_exp): numerator = c_const * sp.exp(-b2 / (2.0 * hdmpn)) if sp.isreal(numerator): if numerator == 0.0: postp = 0.0 else: postp = numerator / (numerator + d_const_b2_exp) assert sp.isreal( postp ), 'Posterior mean is not a real number? Possibly due to problems with summary stats, LD estimates, or parameter settings.' else: postp = 0.0 else: postp = 1.0 curr_post_means[snp_i] = hdmp_hdmpn * postp * res_beta_hat_i if rand_ps[i] < postp * alpha: # Sample from the posterior Gaussian dist. proposed_beta = rand_norms[i] + hdmp_hdmpn * res_beta_hat_i else: # Sample 0 proposed_beta = 0.0 curr_betas[snp_i] = proposed_beta # UPDATE BETA if verbose: sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%' % (100.0 * (min(1, float(k + 1) / num_iter)))) sys.stdout.flush() if k >= burn_in: avg_betas += curr_post_means # Averaging over the posterior means instead of samples. avg_betas = avg_betas / float(num_iter - burn_in) t1 = time.time() t = (t1 - t0) if verbose: print('\nTook %d minutes and %0.2f seconds' % (t / 60, t % 60)) return {'betas': avg_betas, 'inf_betas': start_betas}
def trim_nearby_peaks(peaks, dt): r""" Finds pairs of peaks that are nearer to each other than to the solid phase, and removes the peak that is closer to the solid. Parameters ---------- peaks : ND-array A boolean image containing True values to mark peaks in the distance transform (``dt``) dt : ND-array The distance transform of the pore space for which the true peaks are sought. Returns ------- image : ND-array An array the same size as ``peaks`` containing a subset of the peaks in the original image. Notes ----- Each pair of peaks is considered simultaneously, so for a triplet of peaks each pair is considered. This ensures that only the single peak that is furthest from the solid is kept. No iteration is required. References ---------- [1] Gostick, J. "A versatile and efficient network extraction algorithm using marker-based watershed segmenation". Physical Review E. (2017) """ peaks = sp.copy(peaks) if dt.ndim == 2: from skimage.morphology import square as cube else: from skimage.morphology import cube peaks, N = spim.label(peaks, structure=cube(3)) crds = spim.measurements.center_of_mass(peaks, labels=peaks, index=sp.arange(1, N+1)) crds = sp.vstack(crds).astype(int) # Convert to numpy array of ints # Get distance between each peak as a distance map tree = sptl.cKDTree(data=crds) temp = tree.query(x=crds, k=2) nearest_neighbor = temp[1][:, 1] dist_to_neighbor = temp[0][:, 1] del temp, tree # Free-up memory dist_to_solid = dt[tuple(crds.T)] # Get distance to solid for each peak hits = sp.where(dist_to_neighbor < dist_to_solid)[0] # Drop peak that is closer to the solid than it's neighbor drop_peaks = [] for peak in hits: if dist_to_solid[peak] < dist_to_solid[nearest_neighbor[peak]]: drop_peaks.append(peak) else: drop_peaks.append(nearest_neighbor[peak]) drop_peaks = sp.unique(drop_peaks) # Remove peaks from image slices = spim.find_objects(input=peaks) for s in drop_peaks: peaks[slices[s]] = 0 return (peaks > 0)
def main(argv=None): print("") print("pyOZ - iterative solver of the Ornstein-Zernike equation") print("version %s, Lubos Vrbka, 2008-2009" % pyoz_version) print("") if (argv == None): argv = sys.argv # parse the input file cmdline = inputdata.parse_cmdline(argv) # if -o is specified on command line, then it is used as stdout # if -o is not specified, then console is used if (cmdline['output'] != None): try: sys.stdout = open(cmdline['output'], "wt") except IOError as msg: sys.stdout = sys.__stdout__ print("error opening output file %s" % cmdline['output']) print(msg) sys.exit(2) sys.stderr.write("output redirected to " + cmdline['output'] + "\n") # parse the control file with settings and parameters # return 4 collections and class with constants ctrl, syst, parm, outp, const = inputdata.parse_input(cmdline) # allocate distance arrays # array of distance in real space r = np.array(list(map(lambda x: (x + 1) * ctrl['deltar'], range(ctrl['npoints'])))) # array of distances in reciprocal space k = np.array(list(map(lambda x: (x + 1) * ctrl['deltak'], range(ctrl['npoints'])))) # initialize the DFT class print("initializing DFT routines") dft = ft.dft(ctrl['npoints'], ctrl['deltar'], ctrl['deltak'], r, k) dft.print_status() print("") # initialize the plotting subsystem if requested if (ctrl['do_graphics']): import pyoz_plot pyoz_plot.plot_initialize(ctrl, syst, const, r) # end if(do_graphics): # calculate the total U_ij potential, contributions of individual potentials (hs, lj, coulomb, ...) # also numerical derivatives of the contributions (where no analytical form is available) # and get the information on discontinuities # calculate also the erf-corrected direct correlation functions in real and fourier space # according to Ng U_ij, U_ij_individual, dU_ij_individual, U_discontinuity, U_erf_ij = potential.def_potential( ctrl, syst, parm, const, dft, r, k) # write the pair potential to the file, if requested if (outp['U_ij_write']): print("writing pair potential\t(%s)" % outp['U_ij_name']) try: fw = open(outp['U_ij_name'], "wt") for dr in range(ctrl['npoints']): fw.write("%8.3f" % r[dr]) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): fw.write("%20.5e" % U_ij[i, j, dr]) # end for i,j in range ncomponents... fw.write("\n") fw.close() except IOError as msg: print("error while saving interaction potential") print(msg) sys.exit(1) print("") # calculate the exp(-beta U_ij) for total potential (Mayer function + 1) # calculate the exp(-beta U_ij) for all individual potentials (hs, lj, coulomb) and evaluate # discontinuities where necessary # calculate the erf-correction contribution exp(U_erf_ij) # store all in a dictionary modMayerFunc modMayerFunc = potential.def_modMayerFunc( syst, U_ij, U_ij_individual, U_discontinuity, U_erf_ij['real']) # store the mayer function itself for the purpose of CG procedure with PY closure M = modMayerFunc['u_ij'] - 1.0 # allocate arrays with direct, total and pair corr. functions, Gamma function # some arrays will emerge from arithmetic operations, # showing them here makes the code clearer # real space: _r_, Fourier space _f_ # direct correlation function with Ng-correction (cs) applied # direct correlation function without (c) and with (C) density factor applied # w/o density correction in real space; w/ density correction in Fourier space # c_r_ij C_f_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) Cs_f_ij = np.zeros( (syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) # cs_r_ij # pair correlation function # g_r_ij # h in Fourier space without (h) and with (H) density factor applied # H_f_ij # matrix of partial structure factors # S # actual, old (o) and new (n) values for Gamma # these are short-ranged Gamma (see the code for more details) G_r_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) # G_o_ij G_n_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) G_f_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) # identity matrix for the solver (dr copies!) e_ij = np.eye(syst['ncomponents']) #o_ij = ones((syst['ncomponents'],syst['ncomponents'])) E_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) #O_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) for dr in range(ctrl['npoints']): E_ij[:, :, dr] = e_ij #O_ij[:,:,dr] = o_ij # zero array for the newton-raphson Z = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) # arrays for the Newton-Raphson procedure (allocated even if not used) CFXq = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) AX = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) AXq = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) Rq = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) SRS = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) SRSq = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints'])) # process initial Gamma for the first iteration # IMPORTANT: do not forget, that we are dealing with the short-range Gamma all the time # in case of noncharged systems, it is equal to normal Gamma if (cmdline['gamma'] != None): print("attempting load of G_ij from %s" % cmdline['gamma']) # attempt load of data from external file # short-ranged Gamma is loaded try: if (cmdline['binarygamma']): # binary file G_r_ij = fromfile(cmdline['gamma']) else: # text file G_r_ij = fromfile(cmdline['gamma'], float, -1, " ") # end if (cmdline['binarygamma']): G_r_ij.shape = (syst['ncomponents'], syst['ncomponents'], ctrl['npoints']) # end of the try block except: print("\tload failed, using zero Gamma function") # set G_r_ij to a zero Gamma and apply the Ng-correction # in reality the short-ranged Gamma function is then not zero, but the original Gamma is # when no long-ranged potential (coulomb) is present, the correction is zero G_r_ij = -U_erf_ij['real'] else: print("\tsuccesfully loaded") # symmetrize the 'matrix' for all values of r # G(1,2) = G(2,1) - the pair potentials are symmetric for dr in range(ctrl['npoints']): G_r_ij[:, :, dr] = (G_r_ij[:, :, dr] + G_r_ij[:, :, dr].transpose()) / 2 # end else of the try/except/else block else: # the Gamma is not loaded - we have zero Gamma, but need to apply the Ng-correction # in order to have short-range Gamma print("using zero Gamma function") G_r_ij = -U_erf_ij['real'] # end if (cmdline['gamma'] == None) print("") # update the plot if requested if (ctrl['do_graphics']): # Gamma will be updated after the closure is called - just to save some unnecessary calls pyoz_plot.plot_update( syst, const, U_r=U_ij, U_erf=U_erf_ij['real'], G_r=None, c_r=None, g_r=None, c_f=None) # end if(do_graphics): # the last thing to decide - which solver will be used? # linalg.solve is not very efficient on 1x1 and 2x2 matrices that are most frequently used # let's try with own functions for such cases if (syst['ncomponents'] == 1): from pyoz_solver import solver_1 as solver_function print("using optimized solver for 1 component") elif (syst['ncomponents'] == 2): # solver_2 works but is slower! from pyoz_solver import solver_2 as solver_function print("using optimized solver for 2 components") elif (syst['ncomponents'] == 3): from pyoz_solver import solver_n as solver_function print("using numpy linalg solver for 3 components") elif (syst['ncomponents'] > 3): from pyoz_solver import solver_n as solver_function print("using numpy linalg solver") # the correct solver has been selected print("\nstarting iteration\n==================") total_iter = 0 converged = 0 niter = 0 while (not converged and niter < ctrl['max_iter']): # timing purposes time_beg = time() niter += 1 total_iter += 1 print("main\t%4u " % niter), # show "progress bar" when output is redirected if (cmdline['output'] != None): sys.stderr.write(".") sys.stderr.flush() if ((total_iter % 25) == 0): sys.stderr.write("\n") # create copy of original Gamma function G_o_ij = np.copy(G_r_ij) # call closure relation and get c_r_ij # the Ng-formalism is already applied, the erf-correction is taken care of # in the definition of the modified Mayer function cs_r_ij, g_r_ij = syst['closure']( syst, r, modMayerFunc, U_discontinuity, G_r_ij) # update the plot if requested if (ctrl['do_graphics']): pyoz_plot.plot_update( syst, const, U_r=None, U_erf=None, G_r=G_r_ij, c_r=cs_r_ij, g_r=g_r_ij, c_f=None) # end if(do_graphics): # FT c_r_ij to c_f_ij # we are using Fourier-sine transform; there are some steps involved in between FBT (Bessel) and FST # this will not be discussed here, check the documentation and pyoz_dft.py for further information # the whole program is using FTs normalized with the density prefactors # sqrt(rho_i * rho_j) in order to have dimensionless functions in k-space # i.e., the FTs are multiplied by this factor, iFTs are divided by this factor # it follows then, that infinite dilution is taken care of there as well for i in range(syst['ncomponents']): for j in range(syst['ncomponents']): # perform the Fourier-Bessel transform of the short-ranged direct correlation function # compensate for the ng correction, return short-ranged and full c in fourier space (Cs_f_ij[i, j], C_f_ij[i, j]) = dft.dfbt( cs_r_ij[i, j], norm=syst['dens']['ij'][i, j], corr=-U_erf_ij['fourier'][i, j]) # end for j in range(ncomponents) # end for i in range(ncomponents) # update the plot if requested if (ctrl['do_graphics']): pyoz_plot.plot_update( syst, const, U_r=None, U_erf=None, G_r=None, c_r=None, g_r=None, c_f=C_f_ij) # end if(do_graphics): # now we have to solve the matrix problem in the Fourier space # note that the convolution theorem involves a constant factor ('a') # depending on the used forward fourier transform normalization constant # H = C + aCH # H - aCH = C # (E - aC)H = C # H = {E - aC}^-1 * C # however, thanks to the normalization chosen so that for FT it is 1, we can write # H = {E - C}^-1 * C # E + H = {E - C}^-1 * (C + E - C) # S = {E - C}^-1 * E H_f_ij = solver_function( (E_ij - dft.ft_convolution_factor * C_f_ij), C_f_ij, ctrl['npoints']) from math import pi S = E_ij + H_f_ij #S = solver_function((E_ij - C_f_ij), E_ij, ctrl['npoints']) # convert H to short ranged Gamma G(k) = H(k) - Cs(k) #G_f_ij = H_f_ij - Cs_f_ij # convert S to short ranged Gamma G(k) = S(k) - E - Cs(k) G_f_ij = S - E_ij - Cs_f_ij # FT G_f_ij to G_r_ij for i in range(syst['ncomponents']): for j in range(syst['ncomponents']): # perform the inverse Fourier transform of the Gamma function G_n_ij[i, j] = dft.idfbt( G_f_ij[i, j], norm=syst['dens']['ij'][i, j], corr=-U_erf_ij['real'][i, j]) # end for j in range(ncomponents) # end for i in range(ncomponents) # ********************************************************************************************* # test for convergence and write the gamma if everything is OK norm_dsqn = convergence_dsqn(ctrl, syst, G_o_ij, G_n_ij) time_end = time() print("%f sec - DSQN %.3e -" % ((time_end - time_beg), norm_dsqn)), if (norm_dsqn > ctrl['max_dsqn'] or (not np.isfinite(norm_dsqn))): print("\nDSQN too large, calculation is probably diverging") print("check inputs and outputs and/or increase the value of max_dsqn (%e at the moment)" % ctrl['max_dsqn']) sys.exit(2) if (norm_dsqn <= ctrl['convergence_crit']): print("converged") converged = 1 else: print("not converged") # test if we do picard or newton-raphson if (not ctrl['do_nr']): # perform the picard mixing # calculate the new Gamma G_r_ij = (1.0 - ctrl['mix_param']) * \ G_o_ij + ctrl['mix_param'] * G_n_ij else: # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Newton-Raphson/Conjugate gradients method # we are trying to solve the problem AX = B where # A is a linear operator, X is dgamma and B is difference between input and output gamma # we are using iterative method to get the solution (non-symmetric conjugate gradients) # more details are given in my notes or in # Zerah: J Comp Phys 61 1985, 280 # Belloni: J Chem Phys 88 (8) 1988, 5143 # the names of the respective valuables in the following section will be kept consistent # with the papers nr_converged = 0 nr_niter = 0 # calculate the convergence criterion # it's done relatively to the DSQN of the main cycle in order to avoid # unnecessary iterations in the beginning, where the linear # approximation is not exact nr_convergence_crit = norm_dsqn * ctrl['nr_convergence_factor'] # calculate B, as defined in the paper B = G_n_ij - G_o_ij # now initialize the system for step n=0 - iteration 1 will provide X(1) # the values of X(n) and X(n-1) needed for the iterative algorithm # let's choose X(0)=X(-1)=const * B # X(n)=Xcur; X(n-1)=Xold; X(n+1)=Xnew Xold = B * ctrl['nr_mix_param'] Xcur = copy(Xold) Xnew = copy(Xold) # R(0) definition Rcur = 0.0 # alpha(0) definition - will use L instead! Lcur = 0.0 # for the first iteration, set W(2) to 1.0 Wnew = 1.0 # we don't need Wcur here, but let's define it... Wcur = 1.0 # we will also need matrices S (both HNC and PY) and H (HNC) or M (PY - Mayer function) for the operator A (defined in the nr-cycle) # we use functions from the main cycle # we do it with arrays here and convert to matrices where needed and appropriate # we have to take the H using the old Gamma! i.e., taking g(r) provided by the closure shortly after the main iteration # cycle is started and subtracting 1 H = g_r_ij - 1.0 # modMayerFunc is MayerFunc + 1.0 # this has been done before, so commenting out #M = modMayerFunc['u_ij'] - 1.0 # the arrays for the operations involving operators A and At were created during the initialization # the code will operate with the matrix CF (closure factor), which is set according to closure # to either H (total correlation function, for HNC) or to M (mayer function, for PY) # check below for the algorithm details if (syst['closure_name'] == 'hnc'): CF = H.copy() elif (syst['closure_name'] == 'py'): CF = M else: sys.stderr.write("unsupported closure! \n") sys.exit(1) # timing purposes nr_time_beg = time() # increase the counter for the next half-iteration total_iter += 1 # we make one half-iteration (number 0) and then carry on with full cycles until convergence while (not nr_converged and nr_niter <= ctrl['nr_max_iter']): print(" nr/cg\t :%-4u" % (nr_niter)), # show "progress bar" when output is redirected - this time with plus sign # normal iterations are done with "." as an indicator if (cmdline['output'] != None): sys.stderr.write("+") sys.stderr.flush() if ((total_iter % 25) == 0): sys.stderr.write("\n") # end if (cmdline['output'] != None) # 'shift' the respective functions/values) (except R, which will be done later) Xold = copy(Xcur) Xcur = copy(Xnew) # store R from previous iteration R(n) to R(n-1) Rold = copy(Rcur) # store alpha (number) from previus iteraation alpha(n) to alpha(n-1) Lold = Lcur # store W (number) from previous iteration W(n+1) to W(n) Wcur = Wnew # for matricial relations (part of operators A and At) we need to do everything separately for each discretization step # perform the calculation of AX # this will be done in several steps since # !!!!!!!!!!!!!!!!!!! in HNC !!!!!!!!!!!!!!!!!!! # AX = 1X - iFT ( S FT(HX) S - FT(HX)) # !!!!!!!!!!!!!!!!!!! in PY !!!!!!!!!!!!!!!!!!! # AX = 1X - iFT ( S FT(MX) S - FT(MX)) # where M is the Mayer function exp(-betaU) - 1 # the code will operate with the matrix CF (closure factor), which is set according to closure # to either H (total correlation function, for HNC) or to M (mayer function, for PY) # this was done outside of this cycle # H.X is not matricial product! CFX = CF*Xcur for i in range(syst['ncomponents']): for j in range(syst['ncomponents']): CFXq[i, j] = dft.dfbt(CFX[i, j])[0] # matricial products here for dr in range(ctrl['npoints']): AXq[:, :, dr] = mat( S[:, :, dr])*mat(CFXq[:, :, dr])*mat(S[:, :, dr]) - mat(CFXq[:, :, dr]) for i in range(syst['ncomponents']): for j in range(syst['ncomponents']): AX[i, j] = Xcur[i, j] - dft.idfbt(AXq[i, j]) # calculate Rcur = R(n) = B - AX(n) # do the calculation Rcur = B - AX # check for convergence here - if converged, abandon the cycle! # we check how far is Rcur from zero (Z is zero array with the same dimensions as Rcur) nr_norm_dsqn = convergence_dsqn(ctrl, syst, Rcur, Z) #nr_norm_dsqn = convergence_dsqn(ctrl, syst, B, AX) nr_time_end = time() # convergence is tested relatively to the DSQN of the 'outer' cycle print("%f sec - rel. DSQN %.3e -" % ((nr_time_end - nr_time_beg), nr_norm_dsqn/norm_dsqn)), if (nr_norm_dsqn > ctrl['max_dsqn'] or (not isfinite(nr_norm_dsqn))): print("\n\tDSQN too large, calculation is probably diverging") break if (nr_norm_dsqn <= nr_convergence_crit): print("converged") nr_converged = 1 else: # the conjugate gradients algorithm needed - NR has not converged print("not converged") nr_time_beg = time() nr_niter += 1 total_iter += 1 # perform the calculation of AtR # this will be done in several steps since and At is an adjoint of the operator A # !!!!! in HNC !!!!! # AT R = 1R - FT (S iFT(R) S - iFT(R))H # !!!!! in PY !!!!! # AT R = 1R - FT (S iFT(R) S - iFT(R))M # the operator works with the matrix CF, set according to the used closure for i in range(syst['ncomponents']): for j in range(syst['ncomponents']): # even though R is r-space function, we are using the inverse FT here # the definition of the adjoint requires its usage here! # the problem is also the normalization sqrt(rho_i rho_j) that would be applied # incorrectly in case normal FT would be used here # note that the sinus transform is the same in r- and k-spaces => the difference really # lies in the normalization #Rq[i,j] = dft.dfbt(Rcur[i,j])[0] Rq[i, j] = dft.idfbt(Rcur[i, j]) # matricial product for dr in range(ctrl['npoints']): SRS[:, :, dr] = mat( S[:, :, dr])*mat(Rq[:, :, dr])*mat(S[:, :, dr])-mat(Rq[:, :, dr]) for i in range(syst['ncomponents']): for j in range(syst['ncomponents']): # even though SRS is k-space function, we are using the forward FT here # the definition of the adjoint requires its usage here! # the problem is also the normalization sqrt(rho_i rho_j) that would be applied # incorrectly in case iFT would be used here # note that the sinus transform is the same in r- and k-spaces => the difference really # lies in the normalization # remember that FT returns 2 functions in this case #SRSq[i,j] = dft.idfbt(SRS[i,j]) SRSq[i, j] = dft.dfbt(SRS[i, j])[0] # not a matricial product! AtR = Rcur - SRSq*CF # calculate Lcur = alpha(n) = (R(n),R(n))/(AtR(n),AtR(n)) # where (Y,Z) is inner product \sum_ij rho_i rho_j \int Y_ij Z_ij 4 \pi r^2 dr Lcur = abs(dotproduct(ctrl, syst, r, Rcur, Rcur)) / \ abs(dotproduct(ctrl, syst, r, AtR, AtR)) # calculate Wnew = W(n+1) (except for first iteration) if (nr_niter != 1): # do the full calculation, in the first iteration the value is pre-set to 1.0 Wpartial = 1.0 - Lcur * abs(dotproduct(ctrl, syst, r, Rcur, Rcur))/( Lold * Wcur * abs(dotproduct(ctrl, syst, r, Rold, Rold))) Wnew = 1.0 / Wpartial # end calculation of Wnew # calculate X(n+1) = X(n-1) + W(n+1)(alpha(n)ATR(n) + X(n) + X(n-1) Xnew = Xold + Wnew * (Lcur * AtR + Xcur - Xold) # end if (nr_norm_dsqn <= nr_convergence_crit) - handling of the else-branch (not converged) # end while (not nr-converged and nr_niter < ctrl['nr_max_iter']) # in case convergence was not reached, do Picard if (not nr_converged): print("\tcouldn't converge NR/CG cycle,"), if (not ctrl['nr_noconv_incr']): print("using Picard iteration instead") G_r_ij = (1.0 - ctrl['mix_param']) * \ G_o_ij + ctrl['mix_param'] * G_n_ij else: print("using non-converged increment") G_r_ij = G_o_ij + Xnew else: #G_r_ij = G_o_ij + Xcur G_r_ij = G_o_ij + Xnew # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # end else (Newton-Raphson method) # end else (calculation not converged) # output data # test whether some function of interest should be saved # if savefreq is 0, then skip # first element of the result of modf() call is a remainder after division # if it is zero, then save the file... # Gamma function if ((outp['G_ij_write']) and (outp['G_ij_savefreq'] != 0) and (not divmod(niter, outp['G_ij_savefreq'])[1])): # store the Gamma function print("\tGamma function stored") try: if (outp['G_ij_binary']): G_r_ij.tofile(outp['G_ij_name']) else: G_r_ij.tofile(outp['G_ij_name'], " ", "%e") except IOError as msg: print("error while saving Gamma function") print(msg) sys.exit(1) # end if # ********************************************************************************************* # end while (not converged) print("\niteration process completed in iteration %u" % niter) if (converged): print("\tcalculation converged") else: print("\tcalculation not converged; maximum number of iterations reached\n") # do closure cs_r_ij, g_r_ij = syst['closure']( syst, r, modMayerFunc, U_discontinuity, G_r_ij) # and evaluate uncorrected c(r) as well c_r_ij = cs_r_ij - U_erf_ij['real'] # update the plot if requested if (ctrl['do_graphics']): pyoz_plot.plot_update(syst, const, U_r=None, U_erf=None, G_r=G_r_ij, c_r=cs_r_ij, g_r=g_r_ij, c_f=None) # end if(do_graphics): print("\nsaving outputs") # some error checking should be added here! for both g, G try: # save g_r_ij to file if (outp['g_ij_write']): print("\tpair correlation function\t(%s)" % outp['g_ij_name']) fw = open(outp['g_ij_name'], "wt") fw.write("%8.3f" % 0.0) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): fw.write("%10.5f" % 0.0) fw.write("\n") for dr in range(ctrl['npoints']): fw.write("%8.3f" % r[dr]) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): fw.write("%10.5f" % g_r_ij[i, j, dr]) # end for i,j in range ncomponents... fw.write("\n") fw.close() # save c_r_ij to file if (outp['c_ij_write']): print("\tdirect correlation function\t(%s)" % outp['c_ij_name']) fw = open(outp['c_ij_name'], "wt") fw.write("%8.3f" % 0.0) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): fw.write("%10.5f" % 0.0) fw.write("\n") for dr in range(ctrl['npoints']): fw.write("%8.3f" % r[dr]) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): # careful, we have to write the complete c(r), i.e., we need to compensate for the # Ng-correction! # cs(r) = c(r) + Ucorr(r) => c(r) = cs(r) - Ucorr(r) fw.write("%10.5f" % c_r_ij[i, j, dr]) # end for i,j in range ncomponents... fw.write("\n") fw.close() # save c_r_sr_ij to file if (outp['c_ij_sr_write']): print("\tdirect short range correlation function\t(%s)" % outp['c_ij_sr_name']) fw = open(outp['c_ij_sr_name'], "wt") fw.write("%8.3f" % 0.0) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): fw.write("%10.5f" % 0.0) fw.write("\n") for dr in range(ctrl['npoints']): fw.write("%8.3f" % r[dr]) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): # careful, we have to write the complete c(r), i.e., we need to compensate for the # Ng-correction! # cs(r) = c(r) + Ucorr(r) => c(r) = cs(r) - Ucorr(r) fw.write("%10.5f" % c_r_sr_ij[i, j, dr]) + U_ij[i, j, dr] # end for i,j in range ncomponents... fw.write("\n") fw.close() #TODO: See below for the short range direct correlation fn c_r_ij_sr - copy above and write out too # c_r_ij_sr = c_r_ij + U_ij_individual[index] # save S to file if (outp['S_ij_write']): print("\tpartial structure factors\t(%s)" % outp['S_ij_name']) fw = open(outp['S_ij_name'], "wt") fw.write("%8.3f" % 0.0) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): fw.write("%10.5f" % 0.0) fw.write("\n") for dr in range(ctrl['npoints']): fw.write("%8.3f" % k[dr]) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): # careful, we have to write the complete c(r), i.e., we need to compensate for the # Ng-correction! # cs(r) = c(r) + Ucorr(r) => c(r) = cs(r) - Ucorr(r) fw.write("%10.5f" % S[i, j, dr]) # end for i,j in range ncomponents... fw.write("\n") fw.close() if (outp['G_ij_write']): print("\tGamma function\t\t\t(%s)" % outp['G_ij_name']) # store the Gamma function if required # short-ranged Gamma is saved! if (outp['G_ij_binary']): G_r_ij.tofile(outp['G_ij_name']) else: G_r_ij.tofile(outp['G_ij_name'], " ", "%e") # save total interaction (U+Gamma(long-ranged!)) to file if (outp['Utot_ij_write']): print("\ttotal potential (U+Gamma)\t(%s)" % outp['Utot_ij_name']) fw = open(outp['Utot_ij_name'], "wt") for dr in range(ctrl['npoints']): fw.write("%8.3f" % r[dr]) for i in range(syst['ncomponents']): for j in range(i, syst['ncomponents']): fw.write("%20.5e" % ( U_ij[i, j, dr] + G_r_ij[i, j, dr] + U_erf_ij['real'][i, j, dr])) # end for i,j in range ncomponents... fw.write("\n") fw.close() # end try block except IOError as msg: print("error while saving output") print(msg) sys.exit(1) # end try/except block # now 'remove' the Ng-renormalization G_r_ij += U_erf_ij['real'] # we also have the short range G in the Fourier space # if needed, it can be of course used here! # from now on, the G_r_ij is the real Gamma function without the # stuff making the convergence easier # calculate also the term involving Gamma in HNC (exp(Gamma)) and PY (1+Gamma) # and save it G_term_ij = calcGammaTerm(syst, G_r_ij) # calculation of thermodynamic properties print("\ncalculation of (thermodynamic) properties") if (converged): # only evaluate the properties for converged calculation! # all information is printed inside these functions # for excess chem potential and compressibility, we need short range version of c_ij # it's different short-range than the one coming from Ng (which is finite for r=0) # our is given just by c_{ij}^s = c_{ij} + \beta U_{ij}^{coulomb} since for coulomb, # c(r) = -\beta U_ij^{coulomb} for r->\infty # we check here, if coulomb potential is used and if yes, we just subtract the coulomb interaction from it # step 1 - get the index in the parm array, where coulomb info is stored. the same index is used in the # U_ij_individual array print("\ttesting for long-ranged potentials") index = -1 for i in range(len(parm)): if ('coulomb' in parm[i].values()): index = i if (index >= 0): print("\t\tfound, using short-ranged c(r)\n") # ----- was done already! firstly, we get rid of ng, (subtract U_erf_ij) converting cs_r_ij to c_r_ij # then we add the coulomb (lr correction) as shown above, to get c_r_ij_sr #c_r_ij_sr = cs_r_ij -U_erf_ij['real'] + U_ij_individual[index] c_r_ij_sr = c_r_ij + U_ij_individual[index] else: print("\t\tnot found, using original c(r)\n") # in this case, ng-correction is zero and cs_r_ij is already the function we need c_r_ij_sr = c_r_ij # kirkwood-buff integrals properties.kirkwood_buff(ctrl, syst, r, g_r_ij) # osmotic coefficients properties.osmotic_coeff(ctrl, syst, parm, const, r, g_r_ij, G_r_ij, G_term_ij, U_ij_individual, dU_ij_individual, U_discontinuity, modMayerFunc['contrib']) # excess chemical potential/activity # only supported for hnc! if (syst['closure_name'] == 'hnc'): properties.excess_chempot( ctrl, syst, const, r, g_r_ij - 1.0, G_r_ij, c_r_ij_sr, index, parm) else: print("\texcess chemical potentials available only with HNC\n") # isothermal compressibility properties.compressibility(ctrl, syst, const, r, c_r_ij_sr) # end if (converged) else: print("\tnon-converged calculation, properties won't be evaluated\n") # end of calculation of thermodynamic properties print("calculation finished\n") if (cmdline['output'] != None): sys.stderr.write("\ncalculation finished\n") if(ctrl['do_graphics']): # stop in order not to destroy the window with the plotted functions sys.stderr.write("press enter to close the graphics window and exit\n") sys.stdin.readline()
def get_kernel_basis(self): """ Returns the kernel as a kernel_dim x G numpy array """ return sp.copy(self._kernel_basis)
P_UAV = 5000 Pc_UAV = 4000 rmin = 0.4*log(2) tau_ini = Parameter(value=0.5) # mat = spio.loadmat('XModel_Ex6_4D2D.mat', squeeze_me=True) # mat = spio.loadmat('XModel_Ex6_6D2D.mat', squeeze_me=True) # mat = spio.loadmat('XModel_Ex6_8D2D.mat', squeeze_me=True) mat = spio.loadmat('XModel_Ex6_10D2D.mat', squeeze_me=True) h_d2d = mat['h_d2d'] h_uav_d2d = mat['h_uav_d2d'] # ## the size of h_d2d is (num_d2d, num_d2d) # ## the size of h_uav_d2d is (1,num_d2d) max_d2d_gains_diff = sp.copy(h_d2d[:, :]) sp.fill_diagonal(max_d2d_gains_diff, 0) d2d_to_d2d_gains_diff = max_d2d_gains_diff[:num_d2d, :num_d2d] d2d_to_d2d_gains_diag = sp.subtract(h_d2d, d2d_to_d2d_gains_diff) # # ############################################################ # # This code is used to solve the opt problem # # ############################################################ t0 = time.time() pow_co = Variable(num_d2d) objective = Minimize( sum(pow_co)*sp.subtract(1, tau_ini.value) ) constraints = [] constraints.append(d2d_to_d2d_gains_diag * pow_co >= (exp(rmin / (1-tau_ini.value)) - 1) * (d2d_to_d2d_gains_diff*pow_co + 1))
def get_freq_modes_over_f(power_mat, window_function, frequency, n_modes, plots=False): """Fines the most correlated frequency modes and fits thier noise.""" n_f = len(frequency) d_f = sp.mean(sp.diff(frequency)) dt = 1. / 2. / frequency[-1] n_chan = power_mat.shape[-1] n_time = window_function.shape[0] # The threshold for assuming there isn't enough data to measure anything. no_data_thres = 10. / n_time # Initialize the dictionary that will hold all the parameters. output_params = {} # First take the low frequency part of the spetrum matrix and average over # enough bins to get a well conditioned matrix. low_f_mat = sp.mean(power_mat[:4 * n_chan, :, :].real, 0) # Factor the matrix to get the most correlated modes. e, v = linalg.eigh(low_f_mat) # Make sure they are sorted. if not sp.alltrue(sp.diff(e) >= 0): raise RuntimeError("Eigenvalues not sorted") # Power matrix striped of the biggest modes. reduced_power = sp.copy(power_mat) mode_list = [] # Solve for the spectra of these modes. for ii in range(n_modes): this_mode_params = {} # Get power spectrum and window function for this mode. mode = v[:, -1 - ii] mode_power = sp.sum(mode * power_mat.real, -1) mode_power = sp.sum(mode * mode_power, -1) mode_window = sp.sum(mode[:, None]**2 * window_function, 1) mode_window = sp.sum(mode_window * mode[None, :]**2, 1) # Protect against no data. if sp.mean(mode_window).real < no_data_thres: this_mode_params['amplitude'] = 0. this_mode_params['index'] = 0. this_mode_params['f_0'] = 1. this_mode_params['thermal'] = T_infinity**2 * dt else: # Fit the spectrum. p = fit_overf_const(mode_power, mode_window, frequency) # Put all the parameters we measured into the output. this_mode_params['amplitude'] = p[0] this_mode_params['index'] = p[1] this_mode_params['f_0'] = p[2] this_mode_params['thermal'] = p[3] this_mode_params['mode'] = mode output_params['over_f_mode_' + str(ii)] = this_mode_params # Remove the mode from the power matrix. tmp_amp = sp.sum(reduced_power * mode, -1) tmp_amp2 = sp.sum(reduced_power * mode[:, None], -2) tmp_amp3 = sp.sum(tmp_amp2 * mode, -1) reduced_power -= tmp_amp[:, :, None] * mode reduced_power -= tmp_amp2[:, None, :] * mode[:, None] reduced_power += tmp_amp3[:, None, None] * mode[:, None] * mode mode_list.append(mode) # Initialize the compensation matrix, that will be used to restore thermal # noise that gets subtracted out. See Jan 29, Feb 17th, 2012 of Kiyo's # notes. compensation = sp.eye(n_chan, dtype=float) for mode1 in mode_list: compensation.flat[::n_chan + 1] -= 2 * mode1**2 for mode2 in mode_list: mode_prod = mode1 * mode2 compensation += mode_prod[:, None] * mode_prod[None, :] # Now that we've striped the noisiest modes, measure the auto power # spectrum, averaged over channels. auto_spec_mean = reduced_power.view() auto_spec_mean.shape = (n_f, n_chan**2) auto_spec_mean = auto_spec_mean[:, ::n_chan + 1].real auto_spec_mean = sp.mean(auto_spec_mean, -1) diag_window = window_function.view() diag_window.shape = (n_time, n_chan**2) diag_window = diag_window[:, ::n_chan + 1] auto_spec_window = sp.mean(diag_window, -1) if sp.mean(auto_spec_window).real < no_data_thres: auto_cross_over = 0. auto_index = 0. auto_thermal = 0 else: auto_spec_params = fit_overf_const(auto_spec_mean, auto_spec_window, frequency) auto_thermal = auto_spec_params[3] if (auto_spec_params[0] <= 0 or auto_spec_params[3] <= 0 or auto_spec_params[1] > -0.599): auto_cross_over = 0. auto_index = 0. else: auto_index = auto_spec_params[1] auto_cross_over = auto_spec_params[2] * ( auto_spec_params[0] / auto_spec_params[3])**(-1. / auto_index) #if auto_cross_over < d_f: # auto_index = 0. # auto_cross_over = 0. # Plot the mean auto spectrum if desired. if plots: h = plt.gcf() a = h.add_subplot(*h.current_subplot) norm = sp.mean(auto_spec_window).real auto_plot = auto_spec_mean / norm plotable = auto_plot > 0 lines = a.loglog(frequency[plotable], auto_plot[plotable]) c = lines[-1].get_color() # And plot the fit in a light color. if auto_cross_over > d_f / 4.: spec = npow.overf_power_spectrum(auto_thermal, auto_index, auto_cross_over, dt, n_time) else: spec = sp.zeros(n_time, dtype=float) spec += auto_thermal spec[0] = 0 spec = npow.convolve_power(spec, auto_spec_window) spec = npow.prune_power(spec) spec = spec[1:].real if norm > no_data_thres: spec /= norm plotable = spec > 0 a.loglog(frequency[plotable], spec[plotable], c=c, alpha=0.4, linestyle=':') output_params['all_channel_index'] = auto_index output_params['all_channel_corner_f'] = auto_cross_over # Finally measure the thermal part of the noise in each channel. cross_over_ind = sp.digitize([auto_cross_over * 4], frequency)[0] cross_over_ind = max(cross_over_ind, n_f // 2) cross_over_ind = min(cross_over_ind, int(9. * n_f / 10.)) thermal = reduced_power[cross_over_ind:, :, :].real n_high_f = thermal.shape[0] thermal.shape = (n_high_f, n_chan**2) thermal = sp.mean(thermal[:, ::n_chan + 1], 0) thermal_norms = sp.mean(diag_window, 0).real bad_inds = thermal_norms < no_data_thres thermal_norms[bad_inds] = 1. # Compensate for power lost in mode subtraction. compensation[:, bad_inds] = 0 compensation[bad_inds, :] = 0 for ii in xrange(n_chan): if bad_inds[ii]: compensation[ii, ii] = 1. thermal = linalg.solve(compensation, thermal) # Normalize thermal /= thermal_norms thermal[bad_inds] = T_infinity**2 * dt # Occationally the compensation fails horribly on a few channels. # When this happens, zero out the offending indices. thermal[thermal < 0] = 0 output_params['thermal'] = thermal # Now that we know what thermal is, we can subtract it out of the modes we # already measured. for ii in range(n_modes): mode_params = output_params['over_f_mode_' + str(ii)] thermal_contribution = sp.sum(mode_params['mode']**2 * thermal) # Subtract a maximum of 90% of the white noise to keep things positive # definate. new_white = max(mode_params['thermal'] - thermal_contribution, 0.1 * mode_params['thermal']) if mode_params['thermal'] < 0.5 * T_infinity**2 * dt: mode_params['thermal'] = new_white return output_params
## calculating the charge-symmetric case (n=0.5) to get Lambda0 if chat: print('#\n# calculating the charge-symmetric solution:') if chat: print('# norm[G0]: {0: .6f}, n[G0]: {1: .6f}'\ .format(float(IntDOS(GFzero_A)),float(Filling(GFzero_A)))) if chat: print('# calculating the charge-symmetric two-particle bubble...') Bubble_A = TwoParticleBubble(GFzero_A,GFzero_A,'eh') # Bubble[0] is negative BubZero = Bubble_A[int(N/2)] Uc = -1.0/sp.real(BubZero) if chat: print('# - Bubble[0] = {0: .6f}, critical U = {1: .6f}'.format(BubZero,Uc)) if chat: print('# calculating the charge-symmetric Lambda vertex...') Lambda = CalculateLambda(Bubble_A,GFzero_A,GFzero_A) if chat: print('# - Lambda = {0: .6f}'.format(Lambda)) SigmaT = SigmaT_old = -ed GFtherm_A = sp.copy(GFzero_A) nT = 0.5 k = 1 ## calculating the non-charge-symmetric case ############## if ed!=0.0: nTold = 1e5 Lambdaold = 1e5 if chat: print('#\n# calculating the non-symmetric solution:') if chat: print('# iterating the thermodynamic self-energy SigmaT, mixing: alpha = {0: .3f}:'\ .format(float(alpha))) while any([sp.fabs(nT-nTold)>epst,sp.fabs(SigmaT-SigmaT_old)>epst]): nTold = nT Lambdaold = Lambda SigmaT_old = SigmaT if GFtype in ['cubic','square']: GFtherm_A = ShiftGreensFunction(GFzero_A,-ed-SigmaT) else: GFtherm_A = GFlambda(En_A-ed-SigmaT)
def subdivide(network, pores, shape, labels=[]): r''' It trim the pores and replace them by cubic networks with the sent shape. Parameters ---------- network : OpenPNM Network Object pores : array_like The first group of pores to be replaced shape : array_like The shape of cubic networks in the target locations Notes ----- - It works only for cubic networks. Examples -------- >>> import OpenPNM >>> pn = OpenPNM.Network.Cubic(shape=[5,6,5], spacing=0.001) >>> pn.Np 150 >>> nano_pores = [2,13,14,15] >>> pn.subdivide(pores=nano_pores, shape=[4,7,3], labels='nano') >>> pn.Np 482 >>> assert pn.Np == (150+4*(4*7*3)-4) ''' mro = [item.__name__ for item in network.__class__.__mro__] if 'Cubic' not in mro: raise Exception('Subdivide is only supported for Cubic Networks') from OpenPNM.Network import Cubic pores = _sp.array(pores, ndmin=1) # Checks to find boundary pores in the selected pores if 'pore.boundary' in network.labels(): if (_sp.in1d(pores, network.pores('boundary'))).any(): raise Exception('boundary pores cannot be subdivided!') if not hasattr(network, '_subdivide_flag'): network._subdivide_flag = True else: raise Exception('The network has subdivided pores, so the method ' + 'does not support another subdivision.') # Assigning right shape and division if _sp.size(shape) != 2 and _sp.size(shape) != 3: raise Exception('Subdivide not implemented for Networks other than 2D \ and 3D') elif _sp.size(shape) == 3 and 1 not in shape: div = _sp.array(shape, ndmin=1) single_dim = None else: single_dim = _sp.where(_sp.array(network._shape) == 1)[0] if _sp.size(single_dim) == 0: single_dim = None if _sp.size(shape) == 3: div = _sp.array(shape, ndmin=1) else: div = _sp.zeros(3, dtype=_sp.int32) if single_dim is None: dim = 2 else: dim = single_dim div[dim] = 1 div[-_sp.array(div, ndmin=1, dtype=bool)] = _sp.array(shape, ndmin=1) # Creating small network and handling labels network_spacing = network._spacing new_net_spacing = network_spacing/div new_net = Cubic(shape=div, spacing=new_net_spacing) main_labels = ['left', 'right', 'front', 'back', 'top', 'bottom'] if single_dim is not None: label_groups = _sp.array([['front', 'back'], ['left', 'right'], ['top', 'bottom']]) non_single_labels = label_groups[_sp.array([0, 1, 2]) != single_dim] for l in main_labels: new_net['pore.surface_' + l] = False network['pore.surface_' + l] = False if single_dim is None: new_net['pore.surface_' + l][new_net.pores(labels=l)] = True else: for ind in [0, 1]: loc = (non_single_labels[ind] == l) temp_pores = new_net.pores(non_single_labels[ind][loc]) new_net['pore.surface_' + l][temp_pores] = True old_coords = _sp.copy(new_net['pore.coords']) if labels == []: labels = ['pore.subdivided_' + new_net.name] for P in pores: # Shifting the new network to the right location and attaching it to # the main network shift = network['pore.coords'][P] - network_spacing/2 new_net['pore.coords'] += shift Pn = network.find_neighbor_pores(pores=P) try: Pn_new_net = network.pores(labels) except: Pn_new_net = [] Pn_old_net = Pn[~_sp.in1d(Pn, Pn_new_net)] Np1 = network.Np extend(pore_coords=new_net['pore.coords'], throat_conns=new_net['throat.conns'] + Np1, labels=labels, network=network) # Moving the temporary labels to the big network for l in main_labels: network['pore.surface_'+l][Np1:] = new_net['pore.surface_'+l] # Stitching the old pores of the main network to the new extended pores surf_pores = network.pores('surface_*') surf_coord = network['pore.coords'][surf_pores] for neighbor in Pn: neighbor_coord = network['pore.coords'][neighbor] dist = [round(_sp.inner(neighbor_coord-x, neighbor_coord-x), 20) for x in surf_coord] nearest_neighbor = surf_pores[dist == _sp.amin(dist)] if neighbor in Pn_old_net: coplanar_labels = network.labels(pores=nearest_neighbor) new_neighbors = network.pores(coplanar_labels, mode='intersection') # This might happen to the edge of the small network if _sp.size(new_neighbors) == 0: labels = network.labels(pores=nearest_neighbor, mode='intersection') common_label = [l for l in labels if 'surface_' in l] new_neighbors = network.pores(common_label) elif neighbor in Pn_new_net: new_neighbors = nearest_neighbor connect_pores(network=network, pores1=neighbor, pores2=new_neighbors, labels=labels) # Removing temporary labels for l in main_labels: network['pore.surface_' + l] = False new_net['pore.coords'] = _sp.copy(old_coords) network._label_surfaces() for l in main_labels: del network['pore.surface_'+l] trim(network=network, pores=pores) _mgr.purge_object(obj=new_net, mode='complete')
def ld_pruning(data_file=None, ld_radius=None, out_file_prefix=None, p_thres=None, verbose=False, max_r2=0.2): """ LD pruning + P-value thresholding """ df = h5py.File(data_file, 'r') has_phenotypes = False if 'y' in df.keys(): 'Validation phenotypes found.' y = df['y'][...] # Phenotype num_individs = len(y) risk_scores = sp.zeros(num_individs) has_phenotypes = True print '' if max_r2 < 1: print 'Applying LD-pruning + P-value thresholding with p-value threshold of %0.2e, a LD radius of %d SNPs, and a max r2 of %0.2f' % ( p_thres, ld_radius, max_r2) else: if p_thres < 1: print 'Applying P-value thresholding with p-value threshold of %0.2e' % ( p_thres) else: print 'Calculating polygenic risk score using all SNPs' results_dict = {} num_snps = 0 cord_data_g = df['cord_data'] chromsomes = [] for chrom_str in cord_data_g.keys(): g = cord_data_g[chrom_str] betas = g['betas'][...] n_snps = len(betas) num_snps += n_snps chromsomes.append(int((chrom_str.split('_'))[1])) chromsomes.sort() p_str = '%0.4f' % p_thres results_dict[p_str] = {} if out_file_prefix: #Preparing output files raw_effect_sizes = [] raw_pval_effect_sizes = [] updated_effect_sizes = [] updated_pval_effect_sizes = [] sids = [] chromosomes = [] positions = [] nts = [] tot_num_snps = 0 num_snps_used = 0 for chrom in chromsomes: chrom_str = 'chrom_%d' % chrom #print 'Chromosome %s:' % chrom_str g = cord_data_g[chrom_str] pvalues = g['ps'][...] snp_filter = pvalues < p_thres num_snps = sp.sum(snp_filter) if num_snps == 0: #print 'No SNPs, skipping chromosome' continue tot_num_snps += num_snps pvalues = pvalues[snp_filter] if 'raw_snps_val' in g.keys(): raw_snps = g['raw_snps_val'][...][snp_filter] else: raw_snps = g['raw_snps_ref'][...][snp_filter] snp_means = g['snp_means_ref'][...][snp_filter] snp_stds = g['snp_stds_ref'][...][snp_filter] raw_betas = g['log_odds'][...][snp_filter] pval_derived_betas = g['betas'][...][snp_filter] if out_file_prefix: chromosomes.extend([chrom_str] * len(pval_derived_betas)) positions.extend(g['positions'][...][snp_filter]) sids.extend(g['sids'][...][snp_filter]) raw_effect_sizes.extend(raw_betas) raw_pval_effect_sizes.extend(pval_derived_betas) nts.extend(g['nts'][...][snp_filter]) if max_r2 < 1: #print 'Generating LD table from genotypes.' snp_means.shape = (len(snp_means), 1) snp_stds.shape = (len(snp_means), 1) #Normalize SNPs.. norm_ref_snps = sp.array((raw_snps - snp_means) / snp_stds, dtype='float32') ld_table = ld.calc_ld_table(norm_ref_snps, max_ld_dist=ld_radius, min_r2=max_r2, verbose=verbose) updated_raw_betas, pruning_vector = smart_ld_pruning( raw_betas, ld_table, pvalues=pvalues, max_ld=max_r2, verbose=verbose) updated_pval_derived_betas = pval_derived_betas * pruning_vector num_snps_used += sp.sum(pruning_vector) else: updated_raw_betas = sp.copy(raw_effect_sizes) updated_pval_derived_betas = sp.copy(pval_derived_betas) updated_pval_derived_betas = updated_pval_derived_betas / ( snp_stds.flatten()) pruning_vector = sp.ones(len(pval_derived_betas)) num_snps_used += sp.sum(pruning_vector) if out_file_prefix: updated_effect_sizes.extend(updated_raw_betas) updated_pval_effect_sizes.extend(updated_pval_derived_betas) if has_phenotypes: print 'Calculating scores for Chromosome %s' % chrom_str prs = sp.dot(updated_raw_betas, raw_snps) risk_scores += prs corr = sp.corrcoef(y, prs)[0, 1] r2 = corr**2 print 'The R2 prediction accuracy of PRS using %s was: %0.4f' % ( chrom_str, r2) print 'There were %d (SNP) effects after p-value thresholding' % tot_num_snps print 'After LD-pruning %d SNPs had non-zero effects' % num_snps_used if has_phenotypes: num_indivs = len(y) results_dict[p_str]['y'] = y results_dict[p_str]['risk_scores'] = risk_scores print 'Prediction accuracy was assessed using %d individuals.' % ( num_indivs) corr = sp.corrcoef(y, risk_scores)[0, 1] r2 = corr**2 results_dict[p_str]['r2_pd'] = r2 print 'The R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % ( r2, ((1 - r2)**2) / num_indivs) if corr < 0: risk_scores = -1 * risk_scores # auc = calc_auc(y,risk_scores_pval_derived) # print 'AUC for the whole genome was: %0.4f'%auc #Now calibration denominator = sp.dot(risk_scores.T, risk_scores) y_norm = (y - sp.mean(y)) / sp.std(y) numerator = sp.dot(risk_scores.T, y_norm) regression_slope = (numerator / denominator) print 'The slope for predictions with P-value derived effects is:', regression_slope results_dict[p_str]['slope_pd'] = regression_slope if max_r2 == 1: weights_out_file = '%s_all_snps.txt' % (out_file_prefix) else: weights_out_file = '%s_P+T_p%0.4e.txt' % (out_file_prefix, p_thres) with open(weights_out_file, 'w') as f: f.write( 'chrom pos sid nt1 nt2 raw_beta raw_pval_beta updated_beta updated_pval_beta \n' ) for chrom, pos, sid, nt, raw_beta, raw_pval_beta, upd_beta, upd_pval_beta in it.izip( chromosomes, positions, sids, nts, raw_effect_sizes, raw_pval_effect_sizes, updated_effect_sizes, updated_pval_effect_sizes): nt1, nt2 = nt[0], nt[1] f.write( '%s %d %s %s %s %0.4e %0.4e %0.4e %0.4e\n' % (chrom, pos, sid, nt1, nt2, raw_beta, raw_pval_beta, upd_beta, upd_pval_beta))
Lambda = Lambda0 [nTupOld, nTdnOld] = [1e8, 1e8] [Sigma0, Sigma1] = [U * (nTup + nTdn - 1.0) / 2.0, Lambda * (nTdn - nTup) / 2.0] k = 1 sumsq = 1e8 if FSC else 0.0 ## converence criterium for FSC scheme while any([ sp.fabs(nTupOld - nTup) > epsn, sp.fabs(nTdnOld - nTdn) > epsn, sumsq > 0.01 ]): if chat: print('#\n# Iteration {0: 3d}'.format(k)) [nTupOld, nTdnOld] = [nTup, nTdn] if FSC: GFTupOld_A = sp.copy(GFTup_A) ## Lambda vertex if chat: print('# - calculating Lambda vertex:') Lambda = CalculateLambdaD(GFTup_A, GFTdn_A, Lambda) if chat: print('# - - Lambda vertex: Lambda: {0: .8f}'.format(Lambda)) if True: ## print auxiliary functions, development only # if False: K = KvertexD(Lambda, GFTup_A, GFTdn_A) if chat: print('# - - K vertex: K: {0: .8f}'.format(K)) ## check the integrals: XD = ReBDDFDD(GFTup_A, GFTdn_A, 0) if chat: print('# - - aux. integral: X: {0: .8f}'.format(XD)) ## HF self-energy if chat: print('# - calculating static self-energy:') [Sigma0, Sigma1] = CalculateSigmaT(Lambda, Sigma0, Sigma1, GFlambda, DensityLambda)
def non_infinitesimal_mcmc(beta_hats, Pi, Sigi2, sig_12, start_betas=None, h2=None, n=1000, ld_radius=100, num_iter=60, burn_in=10, zero_jump_prob=0.05, ld_dict=None): """ MCMC of non-infinitesimal model """ m = len(beta_hats) curr_betas = sp.copy(start_betas) curr_post_means = sp.zeros(m) avg_betas = sp.zeros(m) # Iterating over effect estimates in sequential order iter_order = sp.arange(m) for k in range(num_iter): #Big iteration #Force an alpha shrink if estimates are way off compared to heritability estimates. (Improves MCMC convergence.) h2_est = max(0.00001, sp.sum(curr_betas**2)) alpha = min(1 - zero_jump_prob, 1.0 / h2_est, (h2 + 1 / sp.sqrt(n)) / h2_est) rand_ps = sp.random.random(m) for i, snp_i in enumerate(iter_order): if Sigi2[snp_i] == 0: curr_post_means[snp_i] = 0 curr_betas[snp_i] = 0 else: hdmp = (Sigi2[snp_i] / Pi[snp_i]) #(h2 / Mp) hdmpn = hdmp + sig_12 #1.0 / n hdmp_hdmpn = (hdmp / hdmpn) c_const = (Pi[snp_i] / sp.sqrt(hdmpn)) d_const = (1 - Pi[snp_i]) / (sp.sqrt(sig_12)) start_i = max(0, snp_i - ld_radius) focal_i = min(ld_radius, snp_i) stop_i = min(m, snp_i + ld_radius + 1) #Local LD matrix D_i = ld_dict[snp_i] #Local (most recently updated) effect estimates local_betas = curr_betas[start_i:stop_i] #Calculate the local posterior mean, used when sampling. local_betas[focal_i] = 0 res_beta_hat_i = beta_hats[snp_i] - sp.dot(D_i, local_betas) b2 = res_beta_hat_i**2 d_const_b2_exp = d_const * sp.exp(-b2 / (2.0 * sig_12)) if sp.isreal(d_const_b2_exp): numerator = c_const * sp.exp(-b2 / (2.0 * hdmpn)) if sp.isreal(numerator): if numerator == 0: postp = 0 else: postp = numerator / (numerator + d_const_b2_exp) assert sp.isreal( postp), 'Posterior mean is not a real number?' else: postp = 0 else: postp = 1 curr_post_means[snp_i] = hdmp_hdmpn * postp * res_beta_hat_i if rand_ps[i] < postp * alpha: #Sample from the posterior Gaussian dist. proposed_beta = stats.norm.rvs( 0, (hdmp_hdmpn) * sig_12, size=1) + hdmp_hdmpn * res_beta_hat_i else: #Sample 0 proposed_beta = 0 curr_betas[snp_i] = proposed_beta #UPDATE BETA if k >= burn_in: avg_betas += curr_post_means #Averaging over the posterior means instead of samples. avg_betas = avg_betas / float(num_iter - burn_in) return {'betas': avg_betas, 'inf_betas': start_betas}
def flood(im, regions=None, mode='max'): r""" Floods/fills each region in an image with a single value based on the specific values in that region. The ``mode`` argument is used to determine how the value is calculated. Parameters ---------- im : array_like An ND image with isolated regions containing 0's elsewhere. regions : array_like An array the same shape as ``im`` with each region labeled. If None is supplied (default) then ``scipy.ndimage.label`` is used with its default arguments. mode : string Specifies how to determine which value should be used to flood each region. Options are: *'max'* : Floods each region with the local maximum in that region *'min'* : Floods each region the local minimum in that region *'size'* : Floods each region with the size of that region Returns ------- An ND-array the same size as ``im`` with new values placed in each forground voxel based on the ``mode``. See Also -------- props_to_image """ mask = im > 0 if regions is None: labels, N = spim.label(mask) else: labels = sp.copy(regions) N = labels.max() I = im.flatten() L = labels.flatten() if mode.startswith('max'): V = sp.zeros(shape=N + 1, dtype=float) for i in range(len(L)): if V[L[i]] < I[i]: V[L[i]] = I[i] elif mode.startswith('min'): V = sp.ones(shape=N + 1, dtype=float) * sp.inf for i in range(len(L)): if V[L[i]] > I[i]: V[L[i]] = I[i] elif mode.startswith('size'): V = sp.zeros(shape=N + 1, dtype=int) for i in range(len(L)): V[L[i]] += 1 im_flooded = sp.reshape(V[labels], newshape=im.shape) im_flooded = im_flooded * mask return im_flooded
range_num_d2d_pairs = [2, 3, 4, 5, 6, 7, 8, 9, 10] # range_num_d2d_pairs = [10] time_sol_vec_Mon = [] EE_sol_vec_Mon = [] avg = {} num_infeasible = sp.zeros(len(range_num_d2d_pairs)) for prin in range_num_d2d_pairs: num_d2d_pairs = prin # rmin = sp.multiply(0.4, sp.log(2)) time_sol_vec = [] EE_sol_vec = [] for Mon in xrange(max_chan_realizaion): try: max_d2d_to_d2d_gains_diff = sp.copy(max_d2d_to_d2d_gains[:, :, Mon]) sp.fill_diagonal(max_d2d_to_d2d_gains_diff, 0) max_d2d_to_d2d_gains_diag = sp.subtract( max_d2d_to_d2d_gains[:, :, Mon], max_d2d_to_d2d_gains_diff) uav_to_d2d_gains = max_uav_to_d2d_gains[:num_d2d_pairs, Mon] d2d_to_d2d_gains = max_d2d_to_d2d_gains[:num_d2d_pairs, : num_d2d_pairs, Mon] d2d_to_d2d_gains_diff = max_d2d_to_d2d_gains_diff[:num_d2d_pairs, : num_d2d_pairs] d2d_to_d2d_gains_diag = sp.subtract(d2d_to_d2d_gains, d2d_to_d2d_gains_diff) # ############################################################ # This code is used to find the initial point for EEmax algorithm # ############################################################
def bi_get_initial(p_dict): local_ld_dict_file = '%s_ldradius%d.pickled.gz'%(p_dict['local_ld_prefix'], p_dict['ld_radius']) if not os.path.isfile(local_ld_dict_file): df1 = h5py.File(p_dict['coord_D1']) df2 = h5py.File(p_dict['coord_D2']) chrom_ld_scores_dict1 = {} chrom_ld_dict1 = {} chrom_ref_ld_mats1 = {} ld_score_sum1 = 0 num_snps1 = 0 chrom_snps1 = {} chrom_betas1 = {} chrom_snpids = {} chrom_betas2 = {} print 'Calculating LD information w. radius %d'% p_dict['ld_radius'] cord_data_g1 = df1['cord_data'] cord_data_g2 = df2['cord_data'] # find overlap of chrom list chr_list = list(set(cord_data_g1.keys()) & set(cord_data_g2.keys())) for chrom_str in chr_list: print 'Working on %s'%chrom_str print 'Sorting disease 1' g1 = cord_data_g1[chrom_str] if 'raw_snps_ref' in g1.keys(): raw_snps1 = g1['raw_snps_ref'][...] snp_stds1 = g1['snp_stds_ref'][...] snp_means1 = g1['snp_means_ref'][...] betas1 = g1['betas'][...] #Filter monomorphic SNPs ok_snps_filter1 = snp_stds1>0 ok_snps_filter1 = ok_snps_filter1.flatten() sids1 = g1['sids'][...] sids1 = sids1[ok_snps_filter1] print 'Sorting disease 2' g2 = cord_data_g2[chrom_str] if 'raw_snps_ref' in g2.keys(): raw_snps2 = g2['raw_snps_ref'][...] snp_stds2 = g2['snp_stds_ref'][...] snp_means2 = g2['snp_means_ref'][...] betas2 = g2['betas'][...] #Filter monomorphic SNPs ok_snps_filter2 = snp_stds2>0 ok_snps_filter2 = ok_snps_filter2.flatten() sids2 = g2['sids'][...] sids2 = sids2[ok_snps_filter2] print 'Extracting SNPs shared by both disease 1 and 2' ind1 = np.in1d(sids1,sids2) ind2 = np.in1d(sids2,sids1) sids_shared1 = sids1[ind1] sids_shared2 = sids2[ind2] raw_snps1 = raw_snps1[ok_snps_filter1][ind1] snp_means1 = snp_means1[ok_snps_filter1][ind1] snp_stds1 = snp_stds1[ok_snps_filter1][ind1] betas1 = betas1[ok_snps_filter1][ind1] betas2 = betas2[ok_snps_filter2][ind2] n_snps1 = len(raw_snps1) snp_means1.shape = (n_snps1,1) snp_stds1.shape = (n_snps1,1) ### check order ### if sum(sids_shared1==sids_shared2)==len(sids_shared2): print 'Good!' else: print 'Shit happens, sorting sids1 and sids2' O1 = np.argsort(sids_shared1) O2 = np.argsort(sids_shared2) O3 = np.argsort(O2) sids_shared1 = sids_shared1[O1][O3] if sum(sids_shared1==sids_shared2)==len(sids_shared2): raw_snps1 = raw_snps1[O1][O3] snp_means1 = snp_means1[O1][O3] snp_stds1 = snp_stds1[O1][O3] betas1 = betas1[O1][O3] else: print 'Stop! Problems with sorting!' # Normalize SNPs.. chrom_snpids[chrom_str] = sids_shared1 snps1 = sp.array((raw_snps1 - snp_means1)/snp_stds1,dtype='float32') assert snps1.shape==raw_snps1.shape, 'Array Shape mismatch' chrom_snps1[chrom_str] = snps1 ret_dict1 = get_LDpred_ld_tables(snps1, ld_radius=p_dict['ld_radius'], ld_window_size=2*p_dict['ld_radius']) chrom_ld_dict1[chrom_str] = ret_dict1['ld_dict'] chrom_ref_ld_mats1[chrom_str] = ret_dict1['ref_ld_matrices'] ld_scores1 = ret_dict1['ld_scores'] chrom_ld_scores_dict1[chrom_str] = {'ld_scores':ld_scores1, 'avg_ld_score':sp.mean(ld_scores1)} ld_score_sum1 += sp.sum(ld_scores1) num_snps1 += n_snps1 chrom_betas1[chrom_str] = betas1 chrom_betas2[chrom_str] = betas2 avg_gw_ld_score1 = ld_score_sum1 / float(num_snps1) ld_scores_dict1 = {'avg_gw_ld_score': avg_gw_ld_score1, 'chrom_dict':chrom_ld_scores_dict1} print 'Done calculating the LD table and LD score, writing to file:', local_ld_dict_file print 'Genome-wide average LD score was:', ld_scores_dict1['avg_gw_ld_score'] ld_dict = {'ld_scores_dict':ld_scores_dict1, 'chrom_ld_dict':chrom_ld_dict1, 'chrom_ref_ld_mats':chrom_ref_ld_mats1, 'chrom_snps':chrom_snps1, 'chrom_betas1':chrom_betas1, 'chrom_betas2':chrom_betas2, 'chrom_betas3':chrom_betas3, 'chrom_snpids':chrom_snpids} f = gzip.open(local_ld_dict_file, 'wb') cPickle.dump(ld_dict, f, protocol=2) f.close() print 'LD information is now pickled.' else: print 'Loading LD information from file: %s'%local_ld_dict_file f = gzip.open(local_ld_dict_file, 'r') ld_dict = cPickle.load(f) f.close() print 'Starting calculation using h2 files as priors' print 'Loading prior information from file: %s'%p_dict['hfile'] with open(p_dict['hfile']) as f: data = f.readlines() prf_chr = sp.empty(len(data),dtype='int8') prf_sids = [] prf_h2_D1 = sp.zeros(len(data)) prf_h2_D2 = sp.zeros(len(data)) for i,line in enumerate(data): li = line.split() prf_chr[i] = int(li[0]) prf_sids.append(li[1]) #prf_pi[i] = p_dict['PS'][0] prf_h2_D1[i] = float(li[2]) prf_h2_D2[i] = float(li[3]) prf_sids = sp.array(prf_sids,dtype='str') prf = {} prf['chrom'] = prf_chr prf['sids'] = prf_sids prf['h2_D1'] = prf_h2_D1 prf['h2_D2'] = prf_h2_D2 data_file_D1=p_dict['coord_D1'] data_file_D2=p_dict['coord_D2'] out_file_prefix=p_dict['out'] ld_radius=p_dict['ld_radius'] ld_dict = ld_dict n1=p_dict['N1'] n2=p_dict['N2'] PRF = prf prf_chr = PRF['chrom'] prf_sids = PRF['sids'] h2_D1 = PRF['h2_D1'] h2_D2 = PRF['h2_D2'] df1 = h5py.File(data_file_D1,'r') df2 = h5py.File(data_file_D2,'r') cord_data_g1 = df1['cord_data'] cord_data_g2 = df2['cord_data'] has_phenotypes1=False if 'y' in df1.keys(): 'Validation phenotypes of disease 1 found.' y1 = df1['y'][...] # Phenotype num_individs1 = len(y1) prs_D1 = sp.zeros(num_individs1) has_phenotypes1=True has_phenotypes2=False if 'y' in df2.keys(): 'Validation phenotypes of disease 2 found.' y2 = df2['y'][...] # Phenotype num_individs2 = len(y2) prs_D2 = sp.zeros(num_individs2) has_phenotypes2=True ld_scores_dict1 = ld_dict1['ld_scores_dict'] chrom_ld_dict1 = ld_dict1['chrom_ld_dict'] chrom_ref_ld_mats1 = ld_dict1['chrom_ref_ld_mats'] chrom_snps1 = ld_dict1['chrom_snps'] chrom_betas1 = ld_dict1['chrom_betas'] chrom_snpids = ld_dict1['chrom_snpids'] ld_scores_dict2 = ld_dict2['ld_scores_dict'] chrom_ld_dict2 = ld_dict2['chrom_ld_dict'] chrom_ref_ld_mats2 = ld_dict2['chrom_ref_ld_mats'] chrom_snps2 = ld_dict2['chrom_snps'] chrom_betas2 = ld_dict2['chrom_betas'] #results_dict = {} num_snps1 = 0 sum_beta2s1 = 0 num_snps2 = 0 sum_beta2s2 = 0 chr_list = list(set(cord_data_g1.keys()) & set(cord_data_g2.keys())) for chrom_str in chromosomes_list: if chrom_str in chr_list: betas1 = chrom_betas1[chrom_str] n_snps1 = len(betas1) num_snps1 += n_snps1 sum_beta2s1 += sp.sum(betas1 ** 2) betas2 = chrom_betas2[chrom_str] n_snps2 = len(betas2) num_snps2 += n_snps2 sum_beta2s2 += sp.sum(betas2 ** 2) L1 = ld_scores_dict['avg_gw_ld_score'] chi_square_lambda1 = sp.mean(n1 * sum_beta2s1 / float(num_snps1)) print 'Genome-wide lambda inflation of D1:', chi_square_lambda1 print 'Genome-wide mean LD score of D1:', L1 gw_h2_ld_score_est1 = max(0.0001, (max(1, chi_square_lambda1) - 1) / (n1 * (L1 / num_snps1))) print 'Estimated genome-wide heritability of D1:', gw_h2_ld_score_est1 L2 = ld_scores_dict['avg_gw_ld_score'] chi_square_lambda2 = sp.mean(n2 * sum_beta2s2 / float(num_snps2)) print 'Genome-wide lambda inflation of D2:', chi_square_lambda2 print 'Genome-wide mean LD score of D2:', L2 gw_h2_ld_score_est2 = max(0.0001, (max(1, chi_square_lambda2) - 1) / (n2 * (L2 / num_snps2))) print 'Estimated genome-wide heritability of D2:', gw_h2_ld_score_est2 h2_new1 = sp.sum(h2_D1) sig_12_D1 = (1.0)/n1 pr_sig1 = {} h2_new2 = sp.sum(h2_D2) sig_12_D2 = (1.0)/n2 pr_sig2 = {} anno_post1 = {} anno_post2 = {} post_betas1 = {} post_betas2 = {} ld_post1 = {} ld_post2 = {} ## main calculation, chr by chr, posterior betas and prs ## for chrom_str in chromosomes_list: if chrom_str in chr_list: print 'Calculating scores for Chromosome %s'%((chrom_str.split('_'))[1]) pval_derived_betas1 = chrom_betas1[chrom_str] pval_derived_betas2 = chrom_betas2[chrom_str] snps1 = chrom_snps1[chrom_str] snps2 = chrom_snps2[chrom_str] sids = chrom_snpids[chrom_str] n_snps_chrom = len(sids) chri = int(chrom_str.split('_')[1]) prf_sids_chri = prf_sids[prf_chr==chri] h2_D1_chri = h2_D1[prf_chr==chri] h2_D2_chri = h2_D2[prf_chr==chri] if len(prf_sids_chri)==len(sids): if sum(prf_sids_chri==sids)==len(prf_sids_chri): pr_sig1[chrom_str] = sp.copy(h2_D1_chri) pr_sig2[chrom_str] = sp.copy(h2_D2_chri) else: print 'sorting prior files' pr_sig1[chrom_str] = sp.zeros(len(sids)) pr_sig2[chrom_str] = sp.zeros(len(sids)) for i, sid in enumerate(sids): pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri==sid] pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri==sid] else: print 'extracting prior files' pr_sig1[chrom_str] = sp.zeros(len(sids)) pr_sig2[chrom_str] = sp.zeros(len(sids)) for i, sid in enumerate(sids): pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri==sid] pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri==sid] pr_sig1[chrom_str] = gw_h2_ld_score_est1*pr_sig1[chrom_str]/h2_new1 pr_sig2[chrom_str] = gw_h2_ld_score_est2*pr_sig2[chrom_str]/h2_new2 ########################### using AnnoPred-baseline as initial values ############################### annopred_betas1 = annopred_inf( pval_derived_betas1, pr_sigi=pr_sig1[chrom_str], reference_ld_mats=chrom_ref_ld_mats1[chrom_str], n=n1, ld_window_size=2*ld_radius ) annopred_betas2 = annopred_inf( pval_derived_betas2, pr_sigi=pr_sig2[chrom_str], reference_ld_mats=chrom_ref_ld_mats2[chrom_str], n=n2, ld_window_size=2*ld_radius ) anno_post1[chrom_str] = annopred_betas1 anno_post2[chrom_str] = annopred_betas2 anno_post = {'anno_post1':anno_post1, 'anno_post2':anno_post2} f = gzip.open(out_file_prefix, 'wb') cPickle.dump(anno_post, f, protocol=2) f.close()
def pleiopred_genomewide(data_file_D1, data_file_D2, rho, ld_radius=None, ld_dict=None, out_file_prefix=None, n1=None, n2=None, PRF=None, user_h1=None, user_h2=None): """ Calculate LDpred for a genome """ prf_chr = PRF['chrom'] prf_sids = PRF['sids'] h2_D1 = PRF['h2_D1'] h2_D2 = PRF['h2_D2'] df1 = h5py.File(data_file_D1, 'r') df2 = h5py.File(data_file_D2, 'r') cord_data_g1 = df1['cord_data'] cord_data_g2 = df2['cord_data'] has_phenotypes1 = False if 'y' in df1.keys(): 'Validation phenotypes of disease 1 found.' y1 = df1['y'][...] # Phenotype num_individs1 = len(y1) prs_D1 = sp.zeros(num_individs1) # prs_anno_D1 = sp.zeros(num_individs1) # prs_ld_D1 = sp.zeros(num_individs1) prs_ld_pleio_D1 = sp.zeros(num_individs1) has_phenotypes1 = True has_phenotypes2 = False if 'y' in df2.keys(): 'Validation phenotypes of disease 2 found.' y2 = df2['y'][...] # Phenotype num_individs2 = len(y2) prs_D2 = sp.zeros(num_individs2) # prs_anno_D2 = sp.zeros(num_individs2) # prs_ld_D2 = sp.zeros(num_individs2) prs_ld_pleio_D2 = sp.zeros(num_individs2) has_phenotypes2 = True ld_scores_dict = ld_dict['ld_scores_dict'] chrom_ld_dict = ld_dict['chrom_ld_dict'] chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats'] chrom_snps = ld_dict['chrom_snps'] chrom_snpids = ld_dict['chrom_snpids'] chrom_betas1 = ld_dict['chrom_betas1'] chrom_betas2 = ld_dict['chrom_betas2'] #results_dict = {} num_snps1 = 0 sum_beta2s1 = 0 num_snps2 = 0 sum_beta2s2 = 0 chr_list = list(set(cord_data_g1.keys()) & set(cord_data_g2.keys())) for chrom_str in chromosomes_list: if chrom_str in chr_list: betas1 = chrom_betas1[chrom_str] n_snps1 = len(betas1) num_snps1 += n_snps1 sum_beta2s1 += sp.sum(betas1**2) betas2 = chrom_betas2[chrom_str] n_snps2 = len(betas2) num_snps2 += n_snps2 sum_beta2s2 += sp.sum(betas2**2) if user_h1 is None or user_h2 is None: L1 = ld_scores_dict['avg_gw_ld_score'] chi_square_lambda1 = sp.mean(n1 * sum_beta2s1 / float(num_snps1)) print 'Genome-wide lambda inflation of D1:', chi_square_lambda1 print 'Genome-wide mean LD score of D1:', L1 gw_h2_ld_score_est1 = max(0.0001, (max(1, chi_square_lambda1) - 1) / (n1 * (L1 / num_snps1))) print 'Estimated genome-wide heritability of D1:', gw_h2_ld_score_est1 assert chi_square_lambda1 > 1, 'Something is wrong with the GWAS summary statistics of D1. Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small. ' L2 = ld_scores_dict['avg_gw_ld_score'] chi_square_lambda2 = sp.mean(n2 * sum_beta2s2 / float(num_snps2)) print 'Genome-wide lambda inflation of D2:', chi_square_lambda2 print 'Genome-wide mean LD score of D2:', L2 gw_h2_ld_score_est2 = max(0.0001, (max(1, chi_square_lambda2) - 1) / (n2 * (L2 / num_snps2))) print 'Estimated genome-wide heritability of D2:', gw_h2_ld_score_est2 assert chi_square_lambda2 > 1, 'Something is wrong with the GWAS summary statistics of D2. Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small. ' else: gw_h2_ld_score_est1 = user_h1 gw_h2_ld_score_est2 = user_h2 h2_new1 = sp.sum(h2_D1) sig_12_D1 = (1.0) / n1 pr_sig1 = {} h2_new2 = sp.sum(h2_D2) sig_12_D2 = (1.0) / n2 pr_sig2 = {} post_betas1 = {} post_betas2 = {} # anno_post1 = {} # anno_post2 = {} # ld_post1 = {} # ld_post2 = {} # ld_pleio_post1 = {} ld_pleio_post2 = {} out1 = [] out1.append('Estimated Genome-wide heritability: ' + str(gw_h2_ld_score_est1) + '\n') out1.append('Posterior variance for each snp: ' + str(sig_12_D1) + '\n') out2 = [] out2.append('Estimated Genome-wide heritability: ' + str(gw_h2_ld_score_est2) + '\n') out2.append('Posterior variance for each snp: ' + str(sig_12_D2) + '\n') # out_anno1 = [] # out_anno1.append('Estimated Genome-wide heritability: '+str(gw_h2_ld_score_est1)+'\n') # out_anno1.append('Posterior variance for each snp: '+str(sig_12_D1)+'\n') # # out_anno2 = [] # out_anno2.append('Estimated Genome-wide heritability: '+str(gw_h2_ld_score_est2)+'\n') # out_anno2.append('Posterior variance for each snp: '+str(sig_12_D2)+'\n') # # out_ld1 = [] # out_ld1.append('Estimated Genome-wide heritability: '+str(gw_h2_ld_score_est1)+'\n') # out_ld1.append('Posterior variance for each snp: '+str(sig_12_D1)+'\n') # # out_ld2 = [] # out_ld2.append('Estimated Genome-wide heritability: '+str(gw_h2_ld_score_est2)+'\n') # out_ld2.append('Posterior variance for each snp: '+str(sig_12_D2)+'\n') out_ld_pleio1 = [] out_ld_pleio1.append('Estimated Genome-wide heritability: ' + str(gw_h2_ld_score_est1) + '\n') out_ld_pleio1.append('Posterior variance for each snp: ' + str(sig_12_D1) + '\n') out_ld_pleio2 = [] out_ld_pleio2.append('Estimated Genome-wide heritability: ' + str(gw_h2_ld_score_est2) + '\n') out_ld_pleio2.append('Posterior variance for each snp: ' + str(sig_12_D2) + '\n') ## main calculation, chr by chr, posterior betas and prs ## print 'Starting with rho = ', rho for chrom_str in chromosomes_list: if chrom_str in chr_list: print 'Calculating scores for Chromosome %s' % ( (chrom_str.split('_'))[1]) pval_derived_betas1 = chrom_betas1[chrom_str] pval_derived_betas2 = chrom_betas2[chrom_str] sids = chrom_snpids[chrom_str] n_snps_chrom = len(sids) chri = int(chrom_str.split('_')[1]) prf_sids_chri = prf_sids[prf_chr == chri] h2_D1_chri = h2_D1[prf_chr == chri] h2_D2_chri = h2_D2[prf_chr == chri] if len(prf_sids_chri) == len(sids): if sum(prf_sids_chri == sids) == len(prf_sids_chri): pr_sig1[chrom_str] = sp.copy(h2_D1_chri) pr_sig2[chrom_str] = sp.copy(h2_D2_chri) else: print 'sorting prior files' pr_sig1[chrom_str] = sp.zeros(len(sids)) pr_sig2[chrom_str] = sp.zeros(len(sids)) for i, sid in enumerate(sids): pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri == sid] pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri == sid] else: print 'extracting prior files' pr_sig1[chrom_str] = sp.zeros(len(sids)) pr_sig2[chrom_str] = sp.zeros(len(sids)) for i, sid in enumerate(sids): pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri == sid] pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri == sid] pr_sig1[ chrom_str] = gw_h2_ld_score_est1 * pr_sig1[chrom_str] / h2_new1 pr_sig2[ chrom_str] = gw_h2_ld_score_est2 * pr_sig2[chrom_str] / h2_new2 posterior_betas = pleiopred_inf( beta_hats1=pval_derived_betas1, beta_hats2=pval_derived_betas2, pr_sig1=pr_sig1[chrom_str], pr_sig2=pr_sig2[chrom_str], rho=rho, ref_ld_mats1=chrom_ref_ld_mats[chrom_str], ref_ld_mats2=chrom_ref_ld_mats[chrom_str], n1=n1, n2=n2, ld_window_size=2 * ld_radius) post_betas1[chrom_str] = posterior_betas['D1'] post_betas2[chrom_str] = posterior_betas['D2'] # annopred_betas1 = annopred_inf( # pval_derived_betas1, # pr_sigi=pr_sig1[chrom_str], # reference_ld_mats=chrom_ref_ld_mats1[chrom_str], # n=n1, # ld_window_size=2*ld_radius # ) # annopred_betas2 = annopred_inf( # pval_derived_betas2, # pr_sigi=pr_sig2[chrom_str], # reference_ld_mats=chrom_ref_ld_mats2[chrom_str], # n=n2, # ld_window_size=2*ld_radius # ) # anno_post1[chrom_str] = annopred_betas1 # anno_post2[chrom_str] = annopred_betas2 # # ldpred_betas1 = ldpred_inf( # pval_derived_betas1, # genotypes=None, # reference_ld_mats=chrom_ref_ld_mats1[chrom_str], # h2=gw_h2_ld_score_est1 * (n_snps_chrom / float(num_snps1)), # n=n1, # ld_window_size=2*ld_radius, # verbose=False # ) # ldpred_betas2 = ldpred_inf( # pval_derived_betas2, # genotypes=None, # reference_ld_mats=chrom_ref_ld_mats2[chrom_str], # h2=gw_h2_ld_score_est2 * (n_snps_chrom / float(num_snps2)), # n=n2, # ld_window_size=2*ld_radius, # verbose=False # ) # ld_post1[chrom_str] = ldpred_betas1 # ld_post2[chrom_str] = ldpred_betas2 ldpred_pleio_betas = ld_pred_inf_pleio( beta_hats1=pval_derived_betas1, beta_hats2=pval_derived_betas2, h2_D1=gw_h2_ld_score_est1 * (n_snps_chrom / float(num_snps1)), h2_D2=gw_h2_ld_score_est2 * (n_snps_chrom / float(num_snps2)), rho=rho, ref_ld_mats1=chrom_ref_ld_mats[chrom_str], ref_ld_mats2=chrom_ref_ld_mats[chrom_str], n1=n1, n2=n2, ld_window_size=2 * ld_radius) ld_pleio_post1[chrom_str] = ldpred_pleio_betas['D1'] ld_pleio_post2[chrom_str] = ldpred_pleio_betas['D2'] ## prs and auc ## if has_phenotypes1: prs_inf1 = sp.dot(posterior_betas['D1'], chrom_snps[chrom_str]) prs_D1 += prs_inf1 # prs_anno1 = sp.dot(annopred_betas1, snps1) # prs_anno_D1 += prs_anno1 # prs_ld1 = sp.dot(ldpred_betas1, snps1) # prs_ld_D1 += prs_ld1 prs_ld_pleio1 = sp.dot(ldpred_pleio_betas['D1'], chrom_snps[chrom_str]) prs_ld_pleio_D1 += prs_ld_pleio1 if has_phenotypes2: prs_inf2 = sp.dot(posterior_betas['D2'], chrom_snps[chrom_str]) prs_D2 += prs_inf2 # prs_anno2 = sp.dot(annopred_betas2, snps2) # prs_anno_D2 += prs_anno2 # prs_ld2 = sp.dot(ldpred_betas2, snps2) # prs_ld_D2 += prs_ld2 prs_ld_pleio2 = sp.dot(ldpred_pleio_betas['D2'], chrom_snps[chrom_str]) prs_ld_pleio_D2 += prs_ld_pleio2 ############ PleioPred results ############# corr_inf1 = sp.corrcoef(y1, prs_D1)[0, 1] r2_inf1 = corr_inf1**2 #results_dict[p_str]['r2_pd']=r2_inf print 'D1: the R2 prediction accuracy (observed scale) of PleioPred was: %0.4f (%0.6f)' % ( r2_inf1, ((1 - r2_inf1)**2) / num_individs1) out1.append( 'D1: the R2 prediction accuracy (observed scale) of PleioPred was: ' + str(r2_inf1) + ' (' + str(((1 - r2_inf1)**2) / num_individs1) + ')\n') if corr_inf1 < 0: prs_D1 = -1 * prs_D1 auc1 = pred_accuracy(y1, prs_D1) print 'D1: PleioPred AUC for the whole genome was: %0.4f' % auc1 out1.append('D1: PleioPred AUC for the whole genome was: ' + str(auc1) + '\n') sp.savetxt('%s_y_' % (out_file_prefix) + '_D1.txt', y1) sp.savetxt('%s_prs-inf' % (out_file_prefix) + '_PleioPred_D1.txt', prs_D1) #Now calibration ff_inf = open('%s_inf_auc_' % (out_file_prefix) + '_PleioPred_D1.txt', "w") ff_inf.writelines(out1) ff_inf.close() corr_inf2 = sp.corrcoef(y2, prs_D2)[0, 1] r2_inf2 = corr_inf2**2 #results_dict[p_str]['r2_pd']=r2_inf print 'D2: the R2 prediction accuracy (observed scale) of PleioPred was: %0.4f (%0.6f)' % ( r2_inf2, ((1 - r2_inf2)**2) / num_individs2) out2.append( 'D2: the R2 prediction accuracy (observed scale) of PleioPred was: ' + str(r2_inf2) + ' (' + str(((1 - r2_inf2)**2) / num_individs2) + ')\n') if corr_inf2 < 0: prs_D2 = -1 * prs_D2 auc2 = pred_accuracy(y2, prs_D2) print 'D2: PleioPred AUC for the whole genome was: %0.4f' % auc2 out2.append('D2: PleioPred AUC for the whole genome was: ' + str(auc2) + '\n') sp.savetxt('%s_y_' % (out_file_prefix) + '_D2.txt', y2) sp.savetxt('%s_prs-inf' % (out_file_prefix) + '_PleioPred_D2.txt', prs_D2) #Now calibration ff_inf = open('%s_inf_auc_' % (out_file_prefix) + '_PleioPred_D2.txt', "w") ff_inf.writelines(out2) ff_inf.close() ############ AnnoPred results ############# # corr_inf1 = sp.corrcoef(y1, prs_anno_D1)[0, 1] # r2_inf1 = corr_inf1 ** 2 # #results_dict[p_str]['r2_pd']=r2_inf # print 'D1: the R2 prediction accuracy (observed scale) of AnnoPred_inf was: %0.4f (%0.6f)' % (r2_inf1, ((1-r2_inf1)**2)/num_individs1) # out_anno1.append('The R2 prediction accuracy (observed scale) AnnoPred_inf was: '+str(r2_inf1)+' ('+str(((1-r2_inf1)**2)/num_individs1)+')\n') # # if corr_inf1<0: # prs_anno_D1 = -1* prs_anno_D1 # auc1 = pred_accuracy(y1,prs_anno_D1) # print 'D1: AnnoPred AUC for the whole genome was: %0.4f'%auc1 # out_anno1.append('D1: AnnoPred AUC for the whole genome was: '+str(auc1)+'\n') # # # sp.savetxt('%s_y_'%(out_file_prefix)+'_D1.txt',y1) # sp.savetxt('%s_prs-inf'%(out_file_prefix)+'_AnnoPred_D1.txt',prs_anno_D1) # # #Now calibration # ff_inf = open('%s_inf_auc_'%(out_file_prefix)+'_AnnoPred_D1.txt',"w") # ff_inf.writelines(out_anno1) # ff_inf.close() # # corr_inf2 = sp.corrcoef(y2, prs_anno_D2)[0, 1] # r2_inf2 = corr_inf2 ** 2 # #results_dict[p_str]['r2_pd']=r2_inf # print 'D2: the R2 prediction accuracy (observed scale) of AnnoPred_inf was: %0.4f (%0.6f)' % (r2_inf2, ((1-r2_inf2)**2)/num_individs2) # out_anno2.append('D2: the R2 prediction accuracy (observed scale) of AnnoPred_inf was: '+str(r2_inf2)+' ('+str(((1-r2_inf2)**2)/num_individs2)+')\n') # # if corr_inf2<0: # prs_anno_D2 = -1* prs_anno_D2 # auc2 = pred_accuracy(y2,prs_anno_D2) # print 'D2: AnnoPred AUC for the whole genome was: %0.4f'%auc2 # out_anno2.append('D2: AnnoPred AUC for the whole genome was: '+str(auc2)+'\n') # ## sp.savetxt('%s_y_'%(out_file_prefix)+'_D2.txt',y2) # sp.savetxt('%s_prs-inf'%(out_file_prefix)+'_AnnoPred_D2.txt',prs_D2) # # #Now calibration # ff_inf = open('%s_inf_auc_'%(out_file_prefix)+'_AnnoPred_D2.txt',"w") # ff_inf.writelines(out_anno2) # ff_inf.close() ############ LDpred results ############# # corr_inf1 = sp.corrcoef(y1, prs_ld_D1)[0, 1] # r2_inf1 = corr_inf1 ** 2 # #results_dict[p_str]['r2_pd']=r2_inf # print 'D1: the R2 prediction accuracy (observed scale) of LDpred_inf was: %0.4f (%0.6f)' % (r2_inf1, ((1-r2_inf1)**2)/num_individs1) # out_ld1.append('The R2 prediction accuracy (observed scale) LDpred_inf was: '+str(r2_inf1)+' ('+str(((1-r2_inf1)**2)/num_individs1)+')\n') # # if corr_inf1<0: # prs_ld_D1 = -1* prs_ld_D1 # auc1 = pred_accuracy(y1,prs_ld_D1) # print 'D1: LDpred AUC for the whole genome was: %0.4f'%auc1 # out_ld1.append('D1: LDpred AUC for the whole genome was: '+str(auc1)+'\n') # # # sp.savetxt('%s_y_'%(out_file_prefix)+'_D1.txt',y1) # sp.savetxt('%s_prs-inf'%(out_file_prefix)+'_LDpred_D1.txt',prs_ld_D1) # # #Now calibration # ff_inf = open('%s_inf_auc_'%(out_file_prefix)+'_LDpred_D1.txt',"w") # ff_inf.writelines(out_ld1) # ff_inf.close() # # corr_inf2 = sp.corrcoef(y2, prs_ld_D2)[0, 1] # r2_inf2 = corr_inf2 ** 2 # #results_dict[p_str]['r2_pd']=r2_inf # print 'D2: the R2 prediction accuracy (observed scale) of LDpred_inf was: %0.4f (%0.6f)' % (r2_inf2, ((1-r2_inf2)**2)/num_individs2) # out_ld2.append('D2: the R2 prediction accuracy (observed scale) of LDpred_inf was: '+str(r2_inf2)+' ('+str(((1-r2_inf2)**2)/num_individs2)+')\n') # # if corr_inf2<0: # prs_ld_D2 = -1* prs_ld_D2 # auc2 = pred_accuracy(y2,prs_ld_D2) # print 'D2: LDpred_inf AUC for the whole genome was: %0.4f'%auc2 # out_ld2.append('D2: LDpred_inf AUC for the whole genome was: '+str(auc2)+'\n') # ## sp.savetxt('%s_y_'%(out_file_prefix)+'_D2.txt',y2) # sp.savetxt('%s_prs-inf'%(out_file_prefix)+'_LDpred_D2.txt',prs_D2) # # #Now calibration # ff_inf = open('%s_inf_auc_'%(out_file_prefix)+'_LDpred_D2.txt',"w") # ff_inf.writelines(out_ld2) # ff_inf.close() ############ LDpred_pleio results ############# corr_inf1 = sp.corrcoef(y1, prs_ld_pleio_D1)[0, 1] r2_inf1 = corr_inf1**2 #results_dict[p_str]['r2_pd']=r2_inf print 'D1: the R2 prediction accuracy (observed scale) of LDpred_inf_pleio was: %0.4f (%0.6f)' % ( r2_inf1, ((1 - r2_inf1)**2) / num_individs1) out_ld_pleio1.append( 'The R2 prediction accuracy (observed scale) LDpred_inf_pleio was: ' + str(r2_inf1) + ' (' + str(((1 - r2_inf1)**2) / num_individs1) + ')\n') if corr_inf1 < 0: prs_ld_pleio_D1 = -1 * prs_ld_pleio_D1 auc1 = pred_accuracy(y1, prs_ld_pleio_D1) print 'D1: LDpred_pleio AUC for the whole genome was: %0.4f' % auc1 out_ld_pleio1.append( 'D1: LDpred_inf_pleio AUC for the whole genome was: ' + str(auc1) + '\n') # sp.savetxt('%s_y_'%(out_file_prefix)+'_D1.txt',y1) sp.savetxt('%s_prs-inf' % (out_file_prefix) + '_LDpred_pleio_D1.txt', prs_ld_pleio_D1) #Now calibration ff_inf = open('%s_inf_auc_' % (out_file_prefix) + '_LDpred_pleio_D1.txt', "w") ff_inf.writelines(out_ld_pleio1) ff_inf.close() corr_inf2 = sp.corrcoef(y2, prs_ld_pleio_D2)[0, 1] r2_inf2 = corr_inf2**2 #results_dict[p_str]['r2_pd']=r2_inf print 'D2: the R2 prediction accuracy (observed scale) of LDpred_inf_pleio was: %0.4f (%0.6f)' % ( r2_inf2, ((1 - r2_inf2)**2) / num_individs2) out_ld_pleio2.append( 'D2: the R2 prediction accuracy (observed scale) of LDpred_inf_pleio was: ' + str(r2_inf2) + ' (' + str(((1 - r2_inf2)**2) / num_individs2) + ')\n') if corr_inf2 < 0: prs_ld_pleio_D2 = -1 * prs_ld_pleio_D2 auc2 = pred_accuracy(y2, prs_ld_pleio_D2) print 'D2: LDpred_inf_pleio AUC for the whole genome was: %0.4f' % auc2 out_ld_pleio2.append( 'D2: LDpred_inf_pleio AUC for the whole genome was: ' + str(auc2) + '\n') # sp.savetxt('%s_y_'%(out_file_prefix)+'_D2.txt',y2) sp.savetxt('%s_prs-inf' % (out_file_prefix) + '_LDpred_pleio_D2.txt', prs_ld_pleio_D2) #Now calibration ff_inf = open('%s_inf_auc_' % (out_file_prefix) + '_LDpred_pleio_D2.txt', "w") ff_inf.writelines(out_ld_pleio2) ff_inf.close()
from scipy import floor, linspace, array, zeros, copy, loadtxt from scipy.fftpack import rfft, irfft, dct, idct from pylab import plot, show, xlabel, ylabel dow2 = loadtxt('../../cpresources/dow2.txt', float) # plot(dow2) # show() # Using the discrete Fourier transform dow2_fourier = rfft(dow2) N = len(dow2_fourier) first_2_percent = zeros(N, float) first_2_percent[0:int(N / 50)] = copy(dow2_fourier[0:int(N / 50)]) smoothed_dow2 = irfft(first_2_percent) # using the discrete cosine transform dow2_cos = dct(dow2) n = len(dow2_cos) first_cos_2_percent = zeros(n, float) first_cos_2_percent[0:int(n / 50)] = copy(dow2_cos[0:int(n / 50)]) smoothed_cos_dow2 = idct(first_cos_2_percent) / ( 2 * n) # need factor of 1 / 2n for normalization plot(dow2, 'k') plot(smoothed_dow2, 'g') plot(smoothed_cos_dow2, 'r') show()
def read_ansys_mesh(mesh_dir, filename, nodes_subset=[], elem_subset=[], debug=False): """Read an ansys .in file Only Linear lagrange elements supported. CODE NEEDS TO BE UPDATED FOR TET MESHES Keyword arguments: nodes_subset -- nodes to load (all if empty) elem_subset -- elements to load (all if empty) """ # Load ansys .in file f = open(os.path.join(mesh_dir,filename), 'r') lines = f.readlines() num_lines = len(lines) # Initialising empty arrays in which to store node values and coordinates nodes_per_elem = 4 # for a tet mesh node_array = numpy.empty((0,1),int) node_coordinates = numpy.empty((0,3),int) element_array = numpy.empty((0,1),int) element_nodes_array = numpy.empty((0,nodes_per_elem),int) # Add nodes for line_idx, line in enumerate(lines): if line.split(' ,')[0] == 'NBLOCK': for node_line_idx in range(line_idx+2, num_lines+1): node_line = lines[node_line_idx] if node_line.split()[0] == 'N': break else: coordinates = node_line.split(' ')[-1] x = float(coordinates[1:17]) y = float(coordinates[17:33]) z = float(coordinates[33:-1]) node_num = int(node_line.split()[0]) if node_num in nodes_subset or nodes_subset == []: # Save node numbers (node_num) and coordinates (x, y, z) to arrays node_array = numpy.append(node_array,node_num) node_coordinates = numpy.append(node_coordinates,numpy.array([[x,y,z]]), axis = 0) break # Add elements for line_idx, line in enumerate(lines): if line.split(' ,')[0] == 'EBLOCK': for node_line_idx in range(line_idx+2, num_lines+1): node_line = lines[node_line_idx] if node_line.split() == []: break else: element_nodes = node_line.split()[11:-1] element_nodes, idx_array = scipy.unique(scipy.array([int(node) for node in element_nodes]), return_index=True) idx_array = [3 if idx==4 else idx for idx in idx_array] # Reordering the node arrangement renumbered_nodes = scipy.copy(element_nodes) for position, idx in enumerate(idx_array): renumbered_nodes[idx] = element_nodes[position] element_num = int(node_line.split()[10]) if element_num in elem_subset or elem_subset == []: # Save element number (element_num) and element nodes (element_nodes) to arrays element_array = numpy.append(element_array,element_num) element_nodes_array = numpy.append(element_nodes_array,numpy.array([renumbered_nodes]), axis = 0) break inlet_node_array = numpy.empty((0,1),int) outlet_node_array = numpy.empty((0,1),int) # Find which nodes are part of the inlet for line_idx, line in enumerate(lines): if line.split(',')[0] == 'CMBLOCK' and line.split(',')[1] == 'MOUTH': for node_line_idx in range(line_idx+2, num_lines+1): node_line = lines[node_line_idx] if node_line.split(',')[0] == 'CMBLOCK': break else: inlet_node_row = node_line.split() inlet_node_row = scipy.array([int(node) for node in inlet_node_row]) for inlet_node in inlet_node_row: inlet_node_array = numpy.append(inlet_node_array,inlet_node) break # Find which nodes are part of the outlet for line_idx, line in enumerate(lines): if line.split(',')[0] == 'CMBLOCK' and line.split(',')[1] == 'OUTLET': for node_line_idx in range(line_idx+2, num_lines+1): node_line = lines[node_line_idx] if node_line.split(',')[0] == 'CMBLOCK': break else: outlet_node_row = node_line.split() outlet_node_row = scipy.array([int(node) for node in outlet_node_row]) for outlet_node in outlet_node_row: outlet_node_array = numpy.append(outlet_node_array,outlet_node) break # Return node number and coordinate arrays, and element number and element node arrays return node_array, node_coordinates, element_array, element_nodes_array, inlet_node_array, outlet_node_array