def __init__(self, type='random', pars=parameters()): if type == 'random': ee = (rand(pars['Ne'], pars['Ne']) < pars['p_ee']) ei = (rand(pars['Ne'], pars['Ni']) < pars['p_ei']) ii = (rand(pars['Ni'], pars['Ni']) < pars['p_ii']) ie = (rand(pars['Ni'], pars['Ne']) < pars['p_ie']) self.A = vstack((hstack((ee, ei)), hstack((ie, ii)))) self.A[range(pars['Ne'] + pars['Ni']), range(pars['Ne'] + pars['Ni'])] = 0 # remove selfloops elif type == 'none': self.A = zeros((pars['N'], pars['N'])) # no connectivity elif type == 'uni_torus': # torus with uniform connectivity profile self.A = zeros((pars['N'], pars['N'])) # construct matrix of pairwise distance distMat = zeros((pars['N'], pars['N'])) for n1 in range(pars['N']): coord1 = linear2grid(n1, pars['N_col']) for n2 in arange(n1 + 1, pars['N']): coord2 = linear2grid(n2, pars['N_col']) - coord1 # this sets neuron n1 to the origin distMat[n1, n2] = toric_length(coord2, pars['N_row'], pars['N_col']) distMat = distMat + distMat.transpose() # construct adjajency matrix for n1 in range(pars['N']): neighbor_ids = nonzero(distMat[:, n1] < pars['sigma_con'])[0] random.shuffle(neighbor_ids) idx = neighbor_ids[0:min([pars['ncon'], len(neighbor_ids)])] self.A[idx, n1] = 1 else: print "type " + type + " not yet implemented"
def solver(M, _k, _sigma=0., _tol=1e-7): #t_start = time() try: if scipy.__version__.split('.', 2)[1] == '10': # # eigsh sparse eigensolver, with sigma setting (in scipy>=0.10) # eigval, eigvec = SparseLinalg.eigsh(M, k=_k, sigma=_sigma, tol=_tol) elif scipy.__version__.split('.', 2)[1] in ('8', '9'): # # eigsh sparse eigensolver, no sigma setting (in scipy<0.10) # ask more then _k eigvecs, otherwise solver is unstable # eigval, eigvec = SparseLinalg.eigsh(M, k=_k*10, which='SM') #_, eigval, eigvec = SparseLinalg.svds(W, k=_k*10) except SparseLinalg.arpack.ArpackNoConvergence as excobj: print "ARPACK iteration did not converge" eigval, eigvec = excobj.eigenvalues, excobj.eigenvectors eigval = scipy.hstack((eigval, numpy.zeros(_k-eigval.shape[0]))) eigvec = scipy.hstack((eigvec, numpy.zeros((n,_k-eigvec.shape[1])))) # # If eigval/eigvec pairs are not sorted on eigvals value # #ixEig = numpy.argsort(eigval) #eigval = eigval[ixEig] #eigvec = eigvec[:,ixEig] #print 'Eigen-values/vectors found in %.6fs' % (time()-t_start) return eigval, eigvec
def make_data_twoclass(N=50): # generates some toy data mu = sp.array([[0,2],[0,-2]]).T C = sp.array([[5.,4.],[4.,5.]]) X = sp.hstack((mvn(mu[:,0],C,N/2).T, mvn(mu[:,1],C,N/2).T)) Y = sp.hstack((sp.ones((1,N/2.)),-sp.ones((1,N/2.)))) return X,Y
def bounds(Xs,Ys,ns=100): #use a gp to infer mean and bounds on sets of x/y data that have diffent x #f,a = plt.subplots(2) #for i in xrange(len(Ys)): # a[0].plot(Xs[i],Ys[i]) X = sp.hstack(Xs) np = X.size Y = sp.hstack(Ys) X.resize([np,1]) Y.resize([np,1]) #a[1].plot(X,Y,'r.') np = X.size S = sp.zeros(np) D = [[sp.NaN]]*np ki = GPdc.MAT52CS mprior = sp.array([1.,2.,1.]) sprior = sp.array([2.,2.,2.]) #MAPH = GPdc.searchMAPhyp(X,Y,S,D,mprior,sprior, ki,mx=500) MAPH = sp.array([0.5,5.,0.3]) g = GPdc.GPcore(X,Y,S,D,GPdc.kernel(ki,1,MAPH)) sup = sp.linspace(min(X),max(X),ns) [m,V] = g.infer_diag_post(sup,[[sp.NaN]]*ns) std = sp.sqrt(V+MAPH[2]) #plt.fill_between(sup.flatten(),(m-std).flatten(),(m+std).flatten(),facecolor='lightblue',edgecolor='lightblue',alpha=0.5) #a[1].plot(sup,m.flatten(),'b') return [sup,m,std]
def MNEfit(stim,resp,order): # in order for dlogloss to work, we need to know -<g(yt(n),xt)>data # == calculate the constrained averages over the data set Nsamples = sp.size(stim,0) Ndim = sp.size(stim,1) psp = sp.mean(sp.mean(resp)) #spike probability (first constraint) avg = (1.0*stim.T*resp)/(Nsamples*1.0) avgs = sp.vstack((psp,avg)) if(order > 1): avgsqrd = (stim.T*1.0)*(sp.array(sp.tile(resp,(1,Ndim)))*sp.array(stim))/(Nsamples*1.0) avgsqrd = sp.reshape(avgsqrd,(Ndim**2,1)) avgs = sp.vstack((avgs,avgsqrd)) #initialize params: pstart = sp.log(1/avgs[0,0] - 1) pstart = sp.hstack((pstart,(.001*(2*sp.random.rand(Ndim)-1)))) if(order > 1): temp = .0005*(2*sp.random.rand(Ndim,Ndim)-1) pstart = sp.hstack((pstart,sp.reshape(temp+temp.T,(1,Ndim**2))[0])) #redefine functions with fixed vals: def logLoss(p): return LLF.log_loss(p, stim, resp, order) def dlogLoss(p): return LLF.d_log_loss(p, stim, avgs, order) #run the function: #pfinal = opt.fmin_tnc(logLoss,pstart,fprime=dlogLoss) # conjugate-gradient: pfinal = opt.fmin_cg(logLoss,pstart,fprime=dlogLoss) #pfinal = opt.fmin(logLoss,pstart,fprime=dlogLoss) return pfinal
def getResultMatrix(self, stst=False, lbls=False): """ Returns an array of result data. I'm keepin this for backwards compatibility but it will be replaced by a getOutput() method when this scanner is updated to use the new data_scan object. - *stst* add steady-state data to output array - *lbls* return a tuple of (array, column_header_list) If *stst* is True output has dimensions [scan_parameters]+[state_species+state_flux]+[Useroutput] otherwise [scan_parameters]+[Useroutput]. """ output_array = None labels = [] if stst: if self.HAS_USER_OUTPUT: output_array = scipy.hstack([self.ScanSpace, self.SteadyStateResults, self.UserOutputResults]) labels = self.GenOrder+list(self.mod.species)+list(self.mod.reactions)+self.UserOutputList else: output_array = scipy.hstack([self.ScanSpace, self.SteadyStateResults]) labels = self.GenOrder+list(self.mod.species)+list(self.mod.reactions) else: output_array = scipy.hstack([self.ScanSpace, self.UserOutputResults]) labels = self.GenOrder+self.UserOutputList if lbls: return output_array, labels else: return output_array
def coulomb_mat_eigvals(atoms, at_idx, r_cut, do_calc_connect=True, n_eigs=20): if do_calc_connect: atoms.set_cutoff(8.0) atoms.calc_connect() pos = sp.vstack((sp.asarray([sp.asarray(a.diff) for a in atoms.neighbours[at_idx]]), sp.zeros(3))) Z = sp.hstack((sp.asarray([atoms.z[a.j] for a in atoms.neighbours[at_idx]]), atoms.z[at_idx])) M = sp.outer(Z, Z) / (sp.spatial.distance_matrix(pos, pos) + np.eye(pos.shape[0])) sp.fill_diagonal(M, 0.5 * Z ** 2.4) # data = [[atoms.z[a.j], sp.asarray(a.diff)] for a in atoms.neighbours[at_idx]] # data.append([atoms.z[at_idx], sp.array([0,0,0])]) # central atom # M = sp.zeros((len(data), len(data))) # for i, atom1 in enumerate(data): # M[i,i] = 0.5 * atom1[0] ** 2.4 # for j, atom2 in enumerate(data[i+1:]): # j += i+1 # M[i,j] = atom1[0] * atom2[0] / LA.norm(atom1[1] - atom2[1]) # M = 0.5 * (M + M.T) eigs = (LA.eigh(M, eigvals_only=True))[::-1] if n_eigs == None: return eigs # all elif eigs.size >= n_eigs: return eigs[:n_eigs] # only first few eigenvectors else: return sp.hstack((eigs, sp.zeros(n_eigs - eigs.size))) # zero-padded extra fields
def cv(nn_name,d_num = 10000,k_fold = 7,score_metrics = 'accuracy',verbose = 0): suff = str(nn_name[:2]) if nn_name.find('calib') > 0: X_data_name = 'train_data_icalib_'+ suff + '.npy' y_data_name = 'labels_icalib_'+ suff + '.npy' else: X_data_name = 'train_data_'+ suff + '.npy' y_data_name = 'labels_'+ suff + '.npy' X,y = sp.load(X_data_name),sp.load(y_data_name) d_num = min(len(X),d_num) X = X[:d_num] y = y[:d_num] rates12 = sp.hstack((0.05 * sp.ones(25,dtype=sp.float32),0.005*sp.ones(15,dtype=sp.float32),0.0005*sp.ones(10,dtype=sp.float32))) rates24 = sp.hstack((0.01 * sp.ones(25,dtype=sp.float32),0.0001*sp.ones(15,dtype=sp.float32))) rates48 = sp.hstack ([0.05 * sp.ones(15,dtype=sp.float32),0.005*sp.ones(10,dtype=sp.float32) ]) if nn_name == '48-net': X12 = sp.load('train_data_12.npy')[:d_num] X24 = sp.load('train_data_24.npy')[:d_num] elif nn_name == '24-net': X12 = sp.load('train_data_12.npy')[:d_num] if score_metrics == 'accuracy': score_fn = accuracy_score else: score_fn = f1_score scores = [] iteration = 0 for t_indx,v_indx in util.kfold(X,y,k_fold=k_fold): nn = None X_train,X_test,y_train,y_test = X[t_indx], X[v_indx], y[t_indx], y[v_indx] #print('\t \t',str(iteration+1),'fold out of ',str(k_fold),'\t \t' ) if nn_name == '24-net': nn = Cnnl(nn_name = nn_name,l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model( '12-net_lasagne_.pickle')) nn.fit(X = X_train,y = y_train,X12 = X12[t_indx]) elif nn_name == '48-net': nn = Cnnl(nn_name = nn_name,l_rates=rates48,subnet=Cnnl(nn_name = '24-net',l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model( '12-net_lasagne_.pickle')).load_model('24-net_lasagne_.pickle')) nn.fit(X = X_train,y = y_train,X12 = X12[t_indx],X24 = X24[t_indx]) else: nn = Cnnl(nn_name = nn_name,l_rates=rates12,verbose=verbose) nn.fit(X = X_train,y = y_train) if nn_name == '24-net': y_pred = nn.predict(X_test,X12=X12[v_indx]) elif nn_name == '48-net': y_pred = nn.predict(X_test,X12=X12[v_indx],X24=X24[v_indx]) else: y_pred = nn.predict(X_test) score = score_fn(y_test,y_pred) #print(iteration,'fold score',score) scores.append(score) iteration += 1 score_mean = sp.array(scores).mean() print(d_num,'mean score',score) return score_mean
def funky(): x0 = sp.array([0.25, 0.3, 0.5, 0.6, 0.6]) y0 = sp.array([0.2, 0.35, 0.0, 0.25, 0.65]) tx = 0.46 ty = 0.23 t0 = Triangulation(x0, y0) t1 = Triangulation(sp.hstack((x0, [tx])), sp.hstack((y0, [ty]))) return t0, t1
def backprop(self, A_in, Z_out, prev_delta, prev_params): f = GRADFNS[self.modelfn] num_pts = np.shape(Z_out)[0] bias_ones = np.ones((num_pts, 1)) sgrd = f(np.hstack([bias_ones, Z_out])) delta = np.dot(prev_params.T, prev_delta) * sgrd.T grad = np.dot(delta[1:,:], np.hstack([bias_ones, A_in])) / num_pts return grad, delta
def pdist(X,idx,q): N = len(X) p = scipy.zeros((N,N)) for i in idx: for j in scipy.arange(i,N): if i != j: p[i,j] = dist(X[i],X[j]) q.put(scipy.hstack((i,p[i]))) q.put(scipy.hstack((-1,scipy.zeros(N))))
def make_data_xor(N=80,noise=.25): # generates some toy data mu = sp.array([[-1,1],[1,1]]).T C = sp.eye(2)*noise X = sp.hstack((mvn(mu[:,0],C,N/4).T,mvn(-mu[:,0],C,N/4).T, mvn(mu[:,1],C,N/4).T,mvn(-mu[:,1],C,N/4).T)) Y = sp.hstack((sp.ones((1,N/2.)),-sp.ones((1,N/2.)))) randidx = sp.random.permutation(N) Y = Y[0,randidx] X = X[:,randidx] return X,Y
def stripe2(): Y1 = sp.vstack((sp.ones((50,1)), sp.zeros((50,1)))) Y2 = sp.vstack((sp.zeros((50,1)), sp.ones((50,1)))) Y = sp.hstack([Y1, Y2]) X1 = sp.random.multivariate_normal([-2,2], [[1,.8],[.8,1]],size=50) X2 = sp.random.multivariate_normal([2,-1], [[1,.8],[.8,1]], size=50) X = sp.hstack((sp.ones((100,1)),sp.vstack([X1,X2]))) return Y, X
def plot(i,zz): plt.figure(i, figsize=(10,10)) plt.plot(sp.hstack((quad_x,quad_x[0])),sp.hstack((quad_y,quad_y[0])), '-g') plt.plot(quad_x[0],quad_y[0], 'ro') plt.axis('equal') plt.grid('on') plt.xlim((9,12)) plt.ylim((9,12)) #plt.contourf(x_samples,y_samples,z_samples,100, interpolation=None) plt.contourf(x_samples,y_samples,abs(zz),100, interpolation=None) plt.colorbar()
def draw_support_inplane(g, lb, ub, n, method, axis, value, para=1.0): print "dsinplane axis:{} value:{}".format(axis, value) if type(g) is int: gf = g - 1 else: gf = gpfake(g, axis, value) lb_red = sp.hstack([lb[:axis], lb[axis + 1 :]]) ub_red = sp.hstack([ub[:axis], ub[axis + 1 :]]) X = draw_support(gf, lb_red, ub_red, n, method, para=para) return sp.hstack([X[:, :axis], sp.ones([n, 1]) * value, X[:, axis:]])
def combineRedLaw(ofn, chiar_curve="ism", power=-1.8): """ A method to combine the Fitzpatrick 2004 and Chiar & Tielens 2006 reddening laws as well as to extrapolate Chiar and Tielens 2006 to longer wavelengths. The result is saved in a file and used by the IvS repository as a valid reddening law. @param ofn: The output filename with path @type ofn: str @keyword chiar_curve: The curve type for Chiar & Tielens 2004. Either 'gc' or 'ism'. (default: 'ism') @type chiar_curve: str @keyword power: The power for the power law extrapolation. Default is taken from Chiar and Tielens 2006, as a typical value for local ISM between 2 and 5 micron. gc may require different value but not very important. (default: -1.8) @type power: float """ chiar_curve = chiar_curve.lower() # -- Extract the two relevant extinction laws. xchiar, a_ak_chiar = red.get_law("chiar2006", norm="Ak", wave_units="micron", curve=chiar_curve) xfitz, a_ak_fitz = red.get_law("fitzpatrick2004", norm="Ak", wave_units="micron") # -- Define a power law for the extrapolation def power_law(x, scale, power): return scale * (x) ** power # -- Determine the scaling factor from specific chiar/tielens law scale = a_ak_chiar[-1] / (xchiar[-1] ** power) # -- Create an x grid for longer wavelengths. xlong = np.linspace(xchiar[-1] + 0.1, 1000, 1000) a_ak_long = power_law(xlong, scale, power) # -- Combine the three sections xcom = hstack([xfitz[xfitz < xchiar[0]], xchiar, xlong]) a_ak_com = hstack([a_ak_fitz[xfitz < xchiar[0]], a_ak_chiar, a_ak_long]) # -- Write the result to a file comments = "#-- wavelength (micron) A_lambda/A_k\n" DataIO.writeCols(filename=ofn, cols=[[comments]]) DataIO.writeCols(filename=ofn, cols=[xcom, a_ak_com], mode="a")
def search_acq(self,cfn,logsl,logsu,volper=1e-6,dv=[[sp.NaN]]): def directwrap(Q,extra): x = sp.array([Q[:-1]]) s = 10**Q[-1] acq = PESgain(self.G,self.Ga,self.Z,x,dv,[s]) try: R = -acq/cfn(x,**{'s':s}) except TypeError: R = -acq/cfn(x,s) return (R,0) [xmin, ymin, ierror] = DIRECT.solve(directwrap,sp.hstack([self.lb,logsl]),sp.hstack([self.ub,logsu]),user_data=[], algmethod=1, volper=volper, logfilename='/dev/null') return [xmin,ymin,ierror]
def init_search(self,para): self.para=para self.sdefault = -1 self.lb = sp.hstack([sp.array([[para['sl']]]),self.lb]) self.ub = sp.hstack([sp.array([[para['su']]]),self.ub]) #print self.lb #print self.ub if self.initstate: self.setstate() else: for i in xrange(para['ninit']): self.step(random=True) return
def store(old, new): old=old.reshape((1,len(old))) lold=old.shape[1] lnew=new.shape[1] if (lold==lnew): X=sc.vstack((old,new)) elif (lold>lnew): new =sc.hstack(([0]*(lold-lnew),new)) X=X=sc.vstack((old,new)) elif (lnew>lold): old =sc.hstack((old,[0]*(lnew-lold))) X=X=sc.vstack((old,new)) return(X)
def simplex_array_boundary(s,parity): """ Compute the boundary faces and boundary operator of an array of simplices with given simplex parities E.g. For a mesh with two triangles [0,1,2] and [1,3,2], the second triangle has opposite parity relative to sorted order. simplex_array_boundary(array([[0,1,2],[1,2,3]]),array([0,1])) """ #TODO handle edge case as special case num_simplices = s.shape[0] faces_per_simplex = s.shape[1] num_faces = num_simplices * faces_per_simplex orientations = 1 - 2*parity #faces[:,:-2] are the indices of the faces #faces[:,-2] is the index of the simplex whose boundary produced the face #faces[:,-1] is the orientation of the face in the boundary of the simplex faces = empty((num_faces,s.shape[1]+1),dtype=s.dtype) for i in range(faces_per_simplex): rows = faces[num_simplices*i:num_simplices*(i+1)] rows[:, : i] = s[:, :i] rows[:,i :-2] = s[:,i+1: ] rows[:, -2 ] = arange(num_simplices) rows[:, -1 ] = ((-1)**i)*orientations #sort rows faces = faces[lexsort( faces[:,:-2].T[::-1] )] #find unique faces face_mask = -hstack((array([False]),alltrue(faces[1:,:-2] == faces[:-1,:-2],axis=1))) unique_faces = faces[face_mask,:-2] #compute CSR representation for boundary operator csr_indptr = hstack((arange(num_faces)[face_mask],array([num_faces]))) csr_indices = ascontiguousarray(faces[:,-2]) csr_data = faces[:,-1].astype('int8') shape = (len(unique_faces),num_simplices) boundary_operator = csr_matrix((csr_data,csr_indices,csr_indptr), shape) return unique_faces,boundary_operator
def stripe3(): zero = sp.zeros((33,1)) ones = sp.ones((33,1)) Y1 = sp.vstack([ones, zero, zero]) Y2 = sp.vstack([zero, ones, zero]) Y3 = sp.vstack([zero, zero, ones]) Y = sp.hstack((Y1, Y2, Y3)) X1 = sp.random.multivariate_normal([-2,2], [[1,.8],[.8,1]], size=33) X2 = sp.random.multivariate_normal([2,-2], [[1,.8],[.8,1]], size=33) X3 = sp.random.multivariate_normal([0,0], [[1,.8],[.8,1]], size=33) X = sp.hstack((sp.vstack((ones,ones,ones)),sp.vstack((X1,X2,X3)))) return Y, X
def gauss_contour(vertices, order): """ Generates a contour using Gauss-Legendre quadrature. """ (x, w) = p_roots(order) num_segments = len(vertices) - 1 points = weights = sp.empty(0, complex) for i in range(num_segments): a = vertices[i] b = vertices[i + 1] scaled_x = (x * (b - a) + (a + b))/2 scaled_w = w * (b - a)/2 points = sp.hstack((points, scaled_x)) weights = sp.hstack((weights, scaled_w)) return (points, weights)
def test_covariate_shift(self): n_sample = 100 # Biased training var_bias = .5**2 mean_bias = .7 x_train = SP.random.randn(n_sample)*SP.sqrt(var_bias) + mean_bias y_train = self.complete_sample(x_train) # Unbiased test set var = .3**2 mean = 0 x_test = SP.random.randn(n_sample)*SP.sqrt(var) + mean x_complete = SP.hstack((x_train, x_test)) kernel = utils.getQuadraticKernel(x_complete, d=1) +\ 10 * SP.dot(x_complete.reshape(-1, 1), x_complete.reshape(1, -1)) kernel = utils.scale_K(kernel) kernel_train = kernel[SP.ix_(SP.arange(x_train.size), SP.arange(x_train.size))] kernel_test = kernel[SP.ix_(SP.arange(x_train.size, x_complete.size), SP.arange(x_train.size))] mf = MF(n_estimators=100, kernel=kernel_train, min_depth=0, subsampling=False) mf.fit(x_train.reshape(-1, 1), y_train.reshape(-1, 1)) response_gp = mf.predict(x_test.reshape(-1, 1), kernel_test, depth=0) self.assertTrue(((response_gp - self.polynom(x_test))**2).sum() < 2.4)
def run(self): i = 1 self.nS = 0 while (True): si = self.Perturba(list(self.s),self.fit) aux = self.f(si) delta = aux - self.fit if (delta < 0) or (math.exp(-delta/self.T) > scipy.rand()): self.s = list(si); self.fit = aux self.nS = self.nS + 1 i = i + 1 if (i > self.P) or (self.nS > self.L): k = 0 if self.nS > 0: while (self.fit > self.hall_of_fame[k][0]): k = k + 1 if k == 15: break if k < 15: self.hall_of_fame.insert(k,scipy.hstack((self.fit,self.s))) self.hall_of_fame.pop() break self.T = self.alpha*self.T dump_fd = open("dump_sim_ann.pkl","wb") cPickle.dump(self.s,dump_fd) cPickle.dump(self.T,dump_fd) cPickle.dump(self.fit,dump_fd) cPickle.dump(self.hall_of_fame,dump_fd) dump_fd.close()
def test_symmetry(self): # Test that a basic V-cycle yields a symmetric linear operator. Common # reasons for failure are problems with using the same rho for the # pres/post-smoothers and using the same block_D_inv for # pre/post-smoothers. n = 500 A = poisson((n,), format='csr') smoothers = [('gauss_seidel', {'sweep': 'symmetric'}), ('schwarz', {'sweep': 'symmetric'}), ('block_gauss_seidel', {'sweep': 'symmetric'}), 'jacobi', 'block_jacobi'] Bs = [ones((n, 1)), hstack((ones((n, 1)), arange(1, n + 1, dtype='float').reshape(-1, 1)))] for smoother in smoothers: for B in Bs: ml = rootnode_solver(A, B, max_coarse=10, presmoother=smoother, postsmoother=smoother) P = ml.aspreconditioner() x = rand(n,) y = rand(n,) assert_approx_equal(dot(P * x, y), dot(x, P * y))
def getImageDescriptor(model, im, conf): im = standardizeImage(im) height, width = im.shape[:2] numWords = model.vocab.shape[1] frames, descrs = getPhowFeatures(im, conf.phowOpts) # quantize appearance if model.quantizer == 'vq': binsa, _ = vq(descrs.T, model.vocab.T) elif model.quantizer == 'kdtree': raise ValueError('quantizer kdtree not implemented') else: raise ValueError('quantizer {0} not known or understood'.format(model.quantizer)) hist = [] for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX, model.numSpatialX): binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x)) binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y)) # binsx and binsy list to what spatial bin each feature point belongs to if (numpy.any(distsx < 0)) | (numpy.any(distsx > (width/n_spatial_bins_x+0.5))): print ("something went wrong") import pdb; pdb.set_trace() if (numpy.any(distsy < 0)) | (numpy.any(distsy > (height/n_spatial_bins_y+0.5))): print ("something went wrong") import pdb; pdb.set_trace() # combined quantization number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords temp = arange(number_of_bins) # update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords]) bin_comb = temp[binsx, binsy, binsa] hist_temp, _ = histogram(bin_comb, bins=range(number_of_bins+1), density=True) hist.append(hist_temp) hist = hstack(hist) hist = array(hist, 'float32') / sum(hist) return hist
def trainVocab(selTrain, all_images, conf): selTrainFeats = sample(selTrain, conf.images_for_histogram) descrs = [] if MULTIPROCESSING: raise ValueError('MULTIPROCESSING not implemented') #pool = Pool(processes=30) #list_of_train_images = [all_images[i] for i in selTrainFeats] #descrs.append(pool.map_async(getPhowFeatures, list_of_train_images).get()) else: for i in selTrainFeats: im = imread(all_images[i]) descrs.append(getPhowFeatures(im, conf.phowOpts)[1]) # the '[1]' is there because we only want the descriptors and not the frames descrs = hstack(descrs) n_features = descrs.shape[1] sample_indices = sample(arange(n_features), conf.numbers_of_features_for_histogram) descrs = descrs[:, sample_indices] descrs = array(descrs, 'uint8') # Quantize the descriptors to get the visual words vocab, _ = vl_ikmeans(descrs, K=conf.numWords, verbose=conf.verbose, method='elkan') return vocab
def get_loss_grad(self,w_vector,*args): X=args[0] Y=args[1] Gobs=args[2] reg_type = args[3] reg_lambda = args[4] wfull = scipy.reshape(w_vector,((shape(X)[1]+1),shape(Y)[1])) B = self.get_energy(X,wfull[:-1,:],wfull[-1,:]) G_pred = scipy.hstack(((exp(B).transpose()*X),scipy.sum(exp(B).transpose(),axis=1))) # Cross entropy: vv = scipy.sum(np.multiply(B,Y),axis=1) # Calculate and subtract the entropy of Y to get kl-divergence Ypl = log(Y) Ypl[Y==0]=0 Ypl = np.multiply(Ypl,Y) vv = vv-scipy.sum(Ypl,axis=1) # Get the mean of the kl-divergence V = sum(vv)/float(shape(X)[0]) G = np.array(Gobs-G_pred).transpose() G = np.reshape(G,size(G))/float(shape(X)[0]) V_reg, G_reg = self.get_regularization_loss_grad(w_vector,X,Y,reg_type,reg_lambda) V += V_reg G += G_reg if self.verbose: print -V, return -V, -np.array(G)
def estimateBeta(X,Y,K,C=None,addBiasTerm=False,numintervals0=100,ldeltamin0=-5.0,ldeltamax0=5.0): """ compute all pvalues If numintervalsAlt==0 use EMMA-X trick (keep delta fixed over alternative models) """ n,s=X.shape; n_pheno=Y.shape[1]; S,U=LA.eigh(K); UY=SP.dot(U.T,Y); UX=SP.dot(U.T,X); if (C==None): Ucovariate=SP.dot(U.T,SP.ones([n,1])); else: if (addBiasTerm): C_=SP.concatenate((C,SP.ones([n,1])),axis=1) Ucovariate=SP.dot(U.T,C_); else: Ucovariate=SP.dot(U.T,C); n_covar=Ucovariate.shape[1]; beta = SP.empty((n_pheno,s,n_covar+1)); LL=SP.ones((n_pheno,s))*(-SP.inf); ldelta=SP.empty((n_pheno,s)); sigg2=SP.empty((n_pheno,s)); pval=SP.ones((n_pheno,s))*(-SP.inf); for phen in SP.arange(n_pheno): UY_=UY[:,phen]; ldelta[phen]=optdelta(UY_,Ucovariate,S,ldeltanull=None,numintervals=numintervals0,ldeltamin=ldeltamin0,ldeltamax=ldeltamax0); for snp in SP.arange(s): UX_=SP.hstack((UX[:,snp:snp+1],Ucovariate)); nLL_, beta_, sigg2_=nLLeval(ldelta[phen,snp],UY_,UX_,S,MLparams=True); beta[phen,snp,:]=beta_; sigg2[phen,snp]=sigg2_; LL[phen,snp]=-nLL_; return beta, ldelta
def gpmapasrecc(optstate, **para): if para["onlyafter"] > len(optstate.y) or not len(optstate.y) % para["everyn"] == 0: return [sp.NaN for i in para["lb"]], {"didnotrun": True} logger.info("gpmapas reccomender") d = len(para["lb"]) x = sp.hstack([sp.vstack(optstate.x), sp.vstack([e["xa"] for e in optstate.ev])]) y = sp.vstack(optstate.y) s = sp.vstack([e["s"] for e in optstate.ev]) dx = [e["d"] for e in optstate.ev] MAP = GPdc.searchMAPhyp(x, y, s, dx, para["mprior"], para["sprior"], para["kindex"]) logger.info("MAPHYP {}".format(MAP)) G = GPdc.GPcore(x, y, s, dx, GPdc.kernel(para["kindex"], d + 1, MAP)) def directwrap(xq, y): xq.resize([1, d]) xe = sp.hstack([xq, sp.array([[0.0]])]) # print xe a = G.infer_m(xe, [[sp.NaN]]) return (a[0, 0], 0) [xmin, ymin, ierror] = DIRECT.solve( directwrap, para["lb"], para["ub"], user_data=[], algmethod=1, volper=para["volper"], logfilename="/dev/null" ) logger.info("reccsearchresult: {}".format([xmin, ymin, ierror])) return [i for i in xmin], {"MAPHYP": MAP, "ymin": ymin}
def load(cls, path, prefix, network=None): r""" Load data from the \'dat\' files located in specified folder. Parameters ---------- path : string The full path to the folder containing the set of \'dat\' files. prefix : string The file name prefix on each file. The data files are stored as \<prefix\>_node1.dat. network : OpenPNM Network Object If given then the data will be loaded on it and returned. If not given, a Network will be created and returned. Returns ------- An OpenPNM Project containing a GenericNetwork holding all the data """ net = {} # --------------------------------------------------------------------- # Parse the link1 file path = Path(path) filename = Path(path.resolve(), prefix + '_link1.dat') with open(filename, mode='r') as f: link1 = pd.read_table(filepath_or_buffer=f, header=None, skiprows=1, sep=' ', skipinitialspace=True, index_col=0) link1.columns = [ 'throat.pore1', 'throat.pore2', 'throat.radius', 'throat.shape_factor', 'throat.total_length' ] # Add link1 props to net net['throat.conns'] = sp.vstack( (link1['throat.pore1'] - 1, link1['throat.pore2'] - 1)).T net['throat.conns'] = sp.sort(net['throat.conns'], axis=1) net['throat.radius'] = sp.array(link1['throat.radius']) net['throat.shape_factor'] = sp.array(link1['throat.shape_factor']) net['throat.total_length'] = sp.array(link1['throat.total_length']) # --------------------------------------------------------------------- filename = Path(path.resolve(), prefix + '_link2.dat') with open(filename, mode='r') as f: link2 = pd.read_table(filepath_or_buffer=f, header=None, sep=' ', skipinitialspace=True, index_col=0) link2.columns = [ 'throat.pore1', 'throat.pore2', 'throat.pore1_length', 'throat.pore2_length', 'throat.length', 'throat.volume', 'throat.clay_volume' ] # Add link2 props to net cl_t = sp.array(link2['throat.length']) net['throat.length'] = cl_t net['throat.conduit_lengths.throat'] = cl_t net['throat.volume'] = sp.array(link2['throat.volume']) cl_p1 = sp.array(link2['throat.pore1_length']) net['throat.conduit_lengths.pore1'] = cl_p1 cl_p2 = sp.array(link2['throat.pore2_length']) net['throat.conduit_lengths.pore2'] = cl_p2 net['throat.clay_volume'] = sp.array(link2['throat.clay_volume']) # --------------------------------------------------------------------- # Parse the node1 file filename = Path(path.resolve(), prefix + '_node1.dat') with open(filename, mode='r') as f: row_0 = f.readline().split() num_lines = int(row_0[0]) array = sp.ndarray([num_lines, 6]) for i in range(num_lines): row = f.readline()\ .replace('\t', ' ').replace('\n', ' ').split() array[i, :] = row[0:6] node1 = pd.DataFrame(array[:, [1, 2, 3, 4]]) node1.columns = [ 'pore.x_coord', 'pore.y_coord', 'pore.z_coord', 'pore.coordination_number' ] # Add node1 props to net net['pore.coords'] = sp.vstack( (node1['pore.x_coord'], node1['pore.y_coord'], node1['pore.z_coord'])).T # --------------------------------------------------------------------- # Parse the node1 file filename = Path(path.resolve(), prefix + '_node2.dat') with open(filename, mode='r') as f: node2 = pd.read_table(filepath_or_buffer=f, header=None, sep=' ', skipinitialspace=True, index_col=0) node2.columns = [ 'pore.volume', 'pore.radius', 'pore.shape_factor', 'pore.clay_volume' ] # Add node2 props to net net['pore.volume'] = sp.array(node2['pore.volume']) net['pore.radius'] = sp.array(node2['pore.radius']) net['pore.shape_factor'] = sp.array(node2['pore.shape_factor']) net['pore.clay_volume'] = sp.array(node2['pore.clay_volume']) net['throat.area'] = ((net['throat.radius']**2) / (4.0 * net['throat.shape_factor'])) net['pore.area'] = ((net['pore.radius']**2) / (4.0 * net['pore.shape_factor'])) if network is None: network = GenericNetwork() network = cls._update_network(network=network, net=net) # Use OpenPNM Tools to clean up network # Trim throats connected to 'inlet' or 'outlet' reservoirs trim1 = sp.where(sp.any(net['throat.conns'] == -1, axis=1))[0] # Apply 'outlet' label to these pores outlets = network['throat.conns'][trim1, 1] network['pore.outlets'] = False network['pore.outlets'][outlets] = True trim2 = sp.where(sp.any(net['throat.conns'] == -2, axis=1))[0] # Apply 'inlet' label to these pores inlets = network['throat.conns'][trim2, 1] network['pore.inlets'] = False network['pore.inlets'][inlets] = True # Now trim the throats to_trim = sp.hstack([trim1, trim2]) trim(network=network, throats=to_trim) return network.project
def plot_phen_relatedness(self, k, k_accessions, plot_file_prefix, pids=None): import kinship import pylab import scipy as sp from scipy import linalg if not pids: pids = self.get_pids() self.convert_to_averages(pids) self.filter_ecotypes_2(k_accessions, pids) for pid in pids: ets = self.get_ecotypes(pid) vals = self.get_values(pid) k_m = kinship.prepare_k(k, k_accessions, ets) c = sp.sum((sp.eye(len(k_m)) - (1.0 / len(k_m)) * sp.ones(k_m.shape)) * sp.array(k_m)) k_scaled = (len(k) - 1) * k / c p_her = self.get_pseudo_heritability(pid, k_m) x_list = [] y_list = [] for i in range(len(ets)): for j in range(i): x_list.append(k_m[i, j]) y_list.append(vals[i] - vals[j]) ys = sp.array(y_list) ys = ys * ys xs = sp.array(x_list) phen_name = self.get_name(pid) phen_name = phen_name.replace('<i>', '') phen_name = phen_name.replace('</i>', '') phen_name = phen_name.replace('+', '_plus_') phen_name = phen_name.replace('/', '_div_') file_name = plot_file_prefix + '_%d_%s.png' % (pid, phen_name) pylab.figure() pylab.plot(xs, ys, 'k.', alpha=0.2) pylab.xlabel('Relatedness') pylab.ylabel('Squared phenotypic difference') #Plot regression line Y_mat = sp.mat(ys).T X_mat = sp.hstack((sp.mat(sp.ones(len(xs))).T, sp.mat(xs).T)) (betas, residues, rank, s) = linalg.lstsq(X_mat, Y_mat) x_min, x_max = pylab.xlim() pylab.plot( [x_min, x_max], [betas[0] + x_min * betas[1], betas[0] + x_max * betas[1]]) corr = sp.corrcoef(xs, ys)[0, 1] y_min, y_max = pylab.ylim() x_range = x_max - x_min y_range = y_max - y_min pylab.axis([ x_min - 0.025 * x_range, x_max + 0.025 * x_range, y_min - 0.025 * y_range, y_max + 0.15 * y_range ]) pylab.text(x_min + 0.1 * x_range, y_max + 0.03 * y_range, 'Correlation: %0.4f' % (corr)) pylab.text(x_min + 0.5 * x_range, y_max + 0.03 * y_range, 'Pseudo-heritability: %0.4f' % (p_her)) pylab.savefig(file_name) del k_m del k_scaled
# at around 170 epochs though. # Training on 5000 samples of (0,0.1) and validating on 5000 (0,100000) gives validation MAPE of 149 (but it's several orders of magnitude off for position) # Also bad when both windows are (0,1000). (0,1) is really the only good choice for convergence at current learning rate and with 4 linear dense layers. plotHistory = True batchSize = 32 numEpochs = 1000 # row: time; col: x, v, dt numSamples = 500 # meters and seconds randomPositions = sp.multiply(sp.rand(numSamples, 1), 1) randomVelocities = sp.multiply(sp.rand(numSamples, 1), 1) randomAccels = sp.multiply(sp.rand(numSamples, 1), 1) randomDeltaTs = sp.multiply(sp.rand(numSamples, 1), 1) state_input = sp.hstack( (randomPositions, randomVelocities, randomAccels, randomDeltaTs)) state_output = sp.hstack( (randomPositions + sp.multiply(randomVelocities, randomDeltaTs) + sp.multiply(sp.multiply(randomAccels, 0.5), sp.power(randomDeltaTs, 2)), randomVelocities + sp.multiply(randomDeltaTs, randomAccels), randomAccels)) # state_output = sp.hstack((randomPositions+sp.multiply(randomVelocities,randomDeltaTs)+sp.multiply(sp.multiply(randomAccels,0.5),sp.power(randomDeltaTs,2)) # ,randomVelocities+sp.multiply(randomDeltaTs,randomAccels),randomAccels, randomDeltaTs)) randomPositions_val = sp.multiply(sp.rand(numSamples, 1), 1) randomVelocities_val = sp.multiply(sp.rand(numSamples, 1), 1) randomAccels_val = sp.multiply(sp.rand(numSamples, 1), 1) randomDeltaTs_val = sp.multiply(sp.rand(numSamples, 1), 1) val_input = sp.hstack((randomPositions_val, randomVelocities_val, randomAccels_val, randomDeltaTs_val)) val_output = sp.hstack( (randomPositions_val +
import scipy import matplotlib.pyplot as plt import calibrate # load the odometry measurements odom_motions = scipy.loadtxt('odom_motions.dat') # the motions as they are estimated by scan-matching scanmatched_motions = scipy.loadtxt('scanmatched_motions.dat') # create our measurements vector z z = scipy.hstack((scanmatched_motions, odom_motions)) # perform the calibration X = calibrate.ls_calibrate_odometry(z) print('calibration result'), print(X) # apply the estimated calibration parameters calibrated_motions = calibrate.apply_odometry_correction(X, odom_motions) # compute the current odometry trajectory, the scanmatch result, and the calibrated odom odom_trajectory = calibrate.compute_trajectory(odom_motions) scanmatch_trajectory = calibrate.compute_trajectory(scanmatched_motions) calibrated_trajectory = calibrate.compute_trajectory(calibrated_motions) # plot the trajectories plt.plot(odom_trajectory[:,0], odom_trajectory[:,1],label="Uncalibrated Odometry") plt.plot(scanmatch_trajectory[:,0], scanmatch_trajectory[:,1], label="Scan-Matching") plt.plot(calibrated_trajectory[:,1], calibrated_trajectory(:,2), label="Calibrated Odometry") plt.legend()
# Creating the Cosine_sim for the test dataset: for i in matrixs_test: temp = cosine_similarity(i) Metrics_test.append(temp) ''' Not real sure if I need to do this or not This will merge all the sparse dataframes together to get one big dataset to run through the SVM. ''' from scipy import hstack A = Metrics_train[0] B = Metrics_train[1] C = Metrics_train[2] part = hstack([A,B]) full = hstack([part,C]) A = Metrics_test[0] B = Metrics_test[1] C = Metrics_test[2] part_test = hstack([A,B]) full_test = hstack([part_test,C]) # Construct Models from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LogisticRegression
def pltcdf(Y,C,ax,col): return ax.plot(sp.hstack([[i,i] for i in Y])[1:-1],sp.hstack([[i-C[0],i] for i in C])[1:-1],color=col,label='Sampled CDF')
def makeAzo(azo_off_path, da_p, r_e, p_h, dr, angle, wsamp, rwin, awin, search_x, search_y, width, length): index = azo_off_path.rfind("/") azo_dir = azo_off_path[:index] if index < 0: azo_dir = "." cmd = "sed -e '/\*/d' " + azo_off_path + " > temp\nmv " + azo_off_path + " " + azo_off_path + ".old\nmv temp " + azo_off_path + "\n" infile = open(azo_off_path, "r") # Read entire binary file into matrix "indat" which # is reshaped to have 8 columns #indat = pylab.fromfile(infile,pylab.float32,-1).reshape(-1,8); indat = numpy.genfromtxt(infile) infile.close() #da_p = ;#azimuth pixel size at orbit radius #r_e = ; #earth radius #p_h = ; #platform height r_p = r_e + p_h #platform radius da_e = da_p * r_e / r_p * 100 #az pixel size at earth surface, cm #dr = ; #range pixel size la = angle * pylab.pi / 180 #look angle dr_g = dr / pylab.sin(la) * 100 #ground pixel size in range direction, cm x1ind = scipy.matrix([indat[:, 0]], pylab.int32).conj().transpose() dx = scipy.matrix([indat[:, 1]]).conj().transpose() y1ind = scipy.matrix([indat[:, 2]], pylab.int32).conj().transpose() dy = scipy.matrix([indat[:, 3]]).conj().transpose() snr = scipy.matrix([indat[:, 4]]).conj().transpose() c11 = scipy.matrix([scipy.sqrt(indat[:, 5])]).conj().transpose() #1 sigma drng c22 = scipy.matrix([scipy.sqrt(indat[:, 6])]).conj().transpose() #1 sigma dazo c12 = scipy.matrix([indat[:, 7]]).conj().transpose() #these may need to be hardwired for eventual geocoding #width = max(x1ind)+rwin #length = max(y1ind)+awin #must read in from azo.rsc file x1 = x1ind * dr_g dx = dx * dr_g y1 = y1ind * da_e dy = dy * da_e c11 = c11 * dr_g #1 sigma drng c22 = c22 * da_e #1 sigma dazo x2 = x1 + dx y2 = y1 + dy rlooks = rwin / wsamp alooks = awin / wsamp width1 = scipy.floor(width / rlooks) length1 = scipy.floor(length / alooks) [xg, yg] = scipy.meshgrid(scipy.arange(1, width1 + 1, 1), scipy.arange(1, length1 + 1, 1)) xg = xg * dr_g * rlooks / 1e5 #convert from pix to km yg = yg * da_e * alooks / 1e5 #convert from pix to km #load_azo sigy_thresh = 1e1000 #cm sigx_thresh = 1e1000 #cm snr_thresh = 0 #(not log10) mag_threshx = 1e1000 #cm mag_threshy = 1e1000 #cm #initial mask c22good = scipy.matrix(pylab.find(c22 < sigy_thresh)).conj().transpose() c11good = scipy.matrix(pylab.find(c11 < sigx_thresh)).conj().transpose() snrgood = scipy.matrix(pylab.find(snr > snr_thresh)).conj().transpose() good = (scipy.matrix( scipy.unique( scipy.asarray( scipy.concatenate((snrgood, c11good, c22good), axis=0))))).conj().transpose() x1good = x1[good].reshape(-1, 1) x1goodind = x1ind[good].reshape(-1, 1) y1good = y1[good].reshape(-1, 1) y1goodind = y1ind[good].reshape(-1, 1) x2good = x2[good].reshape(-1, 1) y2good = y2[good].reshape(-1, 1) #get and remove affine fit good2 = scipy.matrix(pylab.find(good < 300000)).conj().transpose() x1good = x1[good2].reshape(-1, 1) y1good = y1[good2].reshape(-1, 1) x2good = x2[good2].reshape(-1, 1) y2good = y2[good2].reshape(-1, 1) c0 = scipy.matrix(scipy.zeros((scipy.size(good2)))).reshape(-1, 1) c1 = scipy.matrix(scipy.ones((scipy.size(good2)))).reshape(-1, 1) n = c1.shape[0] A = scipy.vstack((scipy.hstack((x1good, y1good, c0, c0, c1, c0)), scipy.hstack((c0, c0, x1good, y1good, c0, c1)))) b = scipy.vstack((x2good, y2good)) M = numpy.linalg.lstsq(A, b)[0] pred = A * M res = pred - b # std() in python defaults to 0 degrees of freedom resdev = res.std(axis=0, ddof=1) q = pylab.find(abs(res) < 1.5 * resdev) A1 = A[q, ] b1 = b[q] M = numpy.linalg.lstsq(A1, b1)[0] pred = A * M x1good = x1[good].reshape(-1, 1) x1goodind = x1ind[good].reshape(-1, 1) y1good = y1[good].reshape(-1, 1) y1goodind = y1ind[good].reshape(-1, 1) x2good = x2[good].reshape(-1, 1) y2good = y2[good].reshape(-1, 1) c0 = scipy.matrix(scipy.zeros((scipy.size(good)))).reshape(-1, 1) c1 = scipy.matrix(scipy.ones((scipy.size(good)))).reshape(-1, 1) n = c1.shape[0] A = scipy.vstack((scipy.hstack((x1good, y1good, c0, c0, c1, c0)), scipy.hstack((c0, c0, x1good, y1good, c0, c1)))) b = scipy.vstack((x2good, y2good)) pred = A * M n = c1.shape[0] res = pred - b resdx = res[0:n] resdy = res[(n):(2 * n)] #remap into matrix newx = scipy.matrix(scipy.ceil(x1goodind / rlooks), pylab.int32) newy = scipy.matrix(scipy.floor(y1goodind / alooks), pylab.int32) vind = scipy.asarray((newy - 1) * width1 + newx, pylab.int32).reshape(-1) temp = scipy.matrix( 0 * (scipy.arange(1, length1 * width1 + 1, 1))).conj().transpose() temp[vind] = resdy dyg = temp.reshape(length1, width1) temp = scipy.matrix( 0 * (scipy.arange(1, length1 * width1 + 1, 1))).conj().transpose() temp[vind] = resdx dxg = temp.reshape(length1, width1) #setup mask indicies newx = scipy.matrix(scipy.ceil(x1ind / rlooks), pylab.int32) newy = scipy.matrix(scipy.floor(y1ind / alooks), pylab.int32) vind = scipy.asarray((newy - 1) * width1 + newx, pylab.int32).reshape(-1) temp = scipy.NaN * scipy.matrix(scipy.arange(0, length1 * width1, 1)).conj().transpose() #sigma_y mask temp[vind] = c22 sigyg = temp.reshape(length1, width1) mask_sigy = scipy.zeros(dyg.shape) mask_sigy[(sigyg > sigy_thresh)] = scipy.NaN #sigma_x mask temp = scipy.NaN * scipy.matrix(scipy.arange(0, length1 * width1, 1)).conj().transpose() temp[vind] = c11 sigxg = temp.reshape(length1, width1) mask_sigx = scipy.zeros(dxg.shape) mask_sigx[(sigxg > sigx_thresh)] = scipy.NaN #SNR mask temp = scipy.NaN * scipy.matrix(scipy.arange(0, length1 * width1, 1)).conj().transpose() temp[vind] = snr snrg = temp.reshape(length1, width1) mask_snr = scipy.zeros(dyg.shape) mask_snr[(snrg < snr_thresh)] = scipy.NaN #mag mask y mask_magy = scipy.zeros(dyg.shape) mask_magy[abs(dyg) > mag_threshy] = scipy.NaN #mag mask x mask_magx = scipy.zeros(dxg.shape) mask_magx[abs(dxg) > mag_threshx] = scipy.NaN #final mask mask_total = mask_snr + mask_sigy + mask_magy bad = scipy.isnan(mask_total) dyg[bad] = scipy.NaN mask_total = mask_snr + mask_sigx + mask_magx bad = scipy.isnan(mask_total) dxg[bad] = scipy.NaN #dump output to binary file outg = scipy.hstack((abs(dyg), dyg)) outr = scipy.hstack((dxg, dxg)) outsnr = scipy.hstack((snrg, snrg)) ind = scipy.isnan(outg) outg[ind == 1] = 0 outr[ind == 1] = 0 outfile = open( azo_dir + "/azimuth_r" + str(rwin) + "x" + str(awin) + "_s" + search_x + "x" + search_y + "_" + str(int(rwin) / int(wsamp)) + "rlks.unw", 'wb') outg = scipy.matrix(outg, scipy.float32) outg.tofile(outfile) outfile.close() outfile = open( azo_dir + "/range_r" + str(rwin) + "x" + str(awin) + "_s" + search_x + "x" + search_y + "_" + str(int(rwin) / int(wsamp)) + "rlks.unw", 'wb') outr = scipy.matrix(outr, scipy.float32) outr.tofile(outfile) outfile.close() outfile = open( azo_dir + "/snr_r" + str(rwin) + "x" + str(awin) + "_s" + search_x + "x" + search_y + "_" + str(int(rwin) / int(wsamp)) + "rlks.unw", 'wb') outsnr = scipy.matrix(outsnr, scipy.float32) outsnr.tofile(outfile) outfile.close() return
def calc_risk_scores(bed_file, rs_id_map, phen_map, out_file=None, split_by_chrom=False, adjust_for_sex=False, adjust_for_covariates=False, adjust_for_pcs=False): print('Parsing PLINK bed file: %s' % bed_file) num_individs = len(phen_map) assert num_individs > 0, 'No individuals found. Problems parsing the phenotype file?' if split_by_chrom: raw_effects_prs = sp.zeros(num_individs) pval_derived_effects_prs = sp.zeros(num_individs) for i in range(1, 23): if i in non_zero_chromosomes: genotype_file = bed_file + '_%i_keep' % i if os.path.isfile(genotype_file + '.bed'): print('Working on chromosome %d' % i) prs_dict = get_prs(genotype_file, rs_id_map, phen_map) raw_effects_prs += prs_dict['raw_effects_prs'] pval_derived_effects_prs += prs_dict[ 'pval_derived_effects_prs'] # raw_eff_r2 = (sp.corrcoef(raw_effects_prs, prs_dict['true_phens'])[0,1])**2 # pval_eff_r2 = (sp.corrcoef(pval_derived_effects_prs, prs_dict['true_phens'])[0,1])**2 # print 'Overall raw effects PRS r2: %0.4f'%raw_eff_r2 # print 'Overall weigted effects PRS r2: %0.4f'%pval_eff_r2 else: print('Skipping chromosome') else: prs_dict = get_prs(bed_file, rs_id_map, phen_map) raw_effects_prs = prs_dict['raw_effects_prs'] pval_derived_effects_prs = prs_dict['pval_derived_effects_prs'] true_phens = prs_dict['true_phens'] # Report prediction accuracy raw_eff_corr = sp.corrcoef(raw_effects_prs, prs_dict['true_phens'])[0, 1] raw_eff_r2 = raw_eff_corr**2 pval_eff_corr = sp.corrcoef(pval_derived_effects_prs, prs_dict['true_phens'])[0, 1] pval_eff_r2 = pval_eff_corr**2 print('Final raw effects PRS correlation: %0.4f' % raw_eff_corr) print('Final raw effects PRS r2: %0.4f' % raw_eff_r2) print('Final weighted effects PRS correlation: %0.4f' % pval_eff_corr) print('Final weighted effects PRS r2: %0.4f' % pval_eff_r2) res_dict = {'pred_r2': pval_eff_r2} raw_effects_prs.shape = (len(raw_effects_prs), 1) pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1) true_phens = sp.array(true_phens) true_phens.shape = (len(true_phens), 1) # Store covariate weights, slope, etc. weights_dict = {} # Store Adjusted predictions adj_pred_dict = {} # Direct effect Xs = sp.hstack([pval_derived_effects_prs, sp.ones((len(true_phens), 1))]) (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)), true_phens) (betas, rss, r, s) = linalg.lstsq(Xs, true_phens) pred_r2 = 1 - rss / rss00 # print 'Fitted effects (betas) for PRS, and intercept on true phenotype:',betas weights_dict['unadjusted'] = { 'Intercept': betas[1][0], 'ldpred_prs_effect': betas[0][0] } # print pred_r2 # Adjust for sex if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0: sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq(sp.hstack([sex, sp.ones((len(true_phens), 1))]), true_phens) (betas, rss, r, s) = linalg.lstsq( sp.hstack([raw_effects_prs, sex, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack( [pval_derived_effects_prs, sex, sp.ones((len(true_phens), 1))]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['sex_adj'] = { 'Intercept': betas[2][0], 'ldpred_prs_effect': betas[0][0], 'sex': betas[1][0] } print( 'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:', betas) adj_pred_dict['sex_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss / rss0 print( 'Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss / rss00 print( 'Sex adjusted prediction + Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss_pd / rss0 print( 'Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Sex adjusted prediction + Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_adj_pred_r2+PC'] = pred_r2 # Adjust for PCs if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0: pcs = prs_dict['pcs'] (betas, rss0, r, s) = linalg.lstsq(sp.hstack([pcs, sp.ones((len(true_phens), 1))]), true_phens) (betas, rss, r, s) = linalg.lstsq( sp.hstack([raw_effects_prs, pcs, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack( [pval_derived_effects_prs, sp.ones((len(true_phens), 1)), pcs]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['pc_adj'] = { 'Intercept': betas[1][0], 'ldpred_prs_effect': betas[0][0], 'pcs': betas[2][0] } adj_pred_dict['pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss / rss0 print( 'PC adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss / rss00 print( 'PC adjusted prediction + PCs (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss_pd / rss0 print( 'PC adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'PC adjusted prediction + PCs (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_adj_pred_r2+PC'] = pred_r2 # Adjust for both PCs and Sex if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0: sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq( sp.hstack([sex, pcs, sp.ones((len(true_phens), 1))]), true_phens) (betas, rss, r, s) = linalg.lstsq( sp.hstack( [raw_effects_prs, sex, pcs, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, sex, sp.ones((len(true_phens), 1)), pcs ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['sex_pc_adj'] = { 'Intercept': betas[2][0], 'ldpred_prs_effect': betas[0][0], 'sex': betas[1][0], 'pcs': betas[3][0] } adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss / rss0 print( 'PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss / rss00 print( 'PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss_pd / rss0 print( 'PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_Sex_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2 # Adjust for covariates if adjust_for_covariates and 'covariates' in prs_dict and len( prs_dict['covariates']) > 0: covariates = prs_dict['covariates'] (betas, rss0, r, s) = linalg.lstsq( sp.hstack([covariates, sp.ones((len(true_phens), 1))]), true_phens) (betas, rss, r, s) = linalg.lstsq( sp.hstack( [raw_effects_prs, covariates, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, covariates, sp.ones((len(true_phens), 1)) ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) adj_pred_dict['cov_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss / rss0 print( 'Cov adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss / rss00 print( 'Cov adjusted prediction + Cov (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss_pd / rss0 print( 'Cov adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Cov adjusted prediction + Cov (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_adj_pred_r2+Cov'] = pred_r2 if adjust_for_pcs and 'pcs' in prs_dict and len( prs_dict['pcs']) and 'sex' in prs_dict and len( prs_dict['sex']) > 0: pcs = prs_dict['pcs'] sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq( sp.hstack( [covariates, sex, pcs, sp.ones((len(true_phens), 1))]), true_phens) (betas, rss, r, s) = linalg.lstsq( sp.hstack([ raw_effects_prs, covariates, sex, pcs, sp.ones((len(true_phens), 1)) ]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, covariates, sex, pcs, sp.ones((len(true_phens), 1)) ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss / rss0 print( 'Cov+PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss / rss00 print( 'Cov+PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) pred_r2 = 1 - rss_pd / rss0 print( 'Cov+PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Cov+PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2 # print sp.corrcoef(true_phens.T,adj_pred_dict['cov_sex_pc_adj'].T)**2 # Now calibration y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens) denominator = sp.dot(raw_effects_prs.T, raw_effects_prs) numerator = sp.dot(raw_effects_prs.T, y_norm) regression_slope = (numerator / denominator)[0][0] print('The slope for predictions with raw effects is:', regression_slope) denominator = sp.dot(pval_derived_effects_prs.T, pval_derived_effects_prs) numerator = sp.dot(pval_derived_effects_prs.T, y_norm) regression_slope = (numerator / denominator)[0][0] print('The slope for predictions with weighted effects is:', regression_slope) # print sp.corrcoef(prs_dict['raw_effects_prs'], prs_dict['true_phens'])[0,1] # print sp.corrcoef(prs_dict['pval_derived_effects_prs'], prs_dict['true_phens'])[0,1] num_individs = len(prs_dict['pval_derived_effects_prs']) # Write PRS out to file. if out_file != None: with open(out_file, 'w') as f: out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs' if 'sex' in prs_dict: out_str = out_str + ', sex' if 'pcs' in prs_dict: pcs_str = ', '.join([ 'PC%d' % (1 + pc_i) for pc_i in range(len(prs_dict['pcs'][0])) ]) out_str = out_str + ', ' + pcs_str out_str += '\n' f.write(out_str) for i in range(num_individs): out_str = '%s, %0.6e, %0.6e, %0.6e, ' % ( prs_dict['iids'][i], prs_dict['true_phens'][i], raw_effects_prs[i], pval_derived_effects_prs[i]) if 'sex' in prs_dict: out_str = out_str + '%d, ' % prs_dict['sex'][i] if 'pcs' in prs_dict: pcs_str = ', '.join(map(str, prs_dict['pcs'][i])) out_str = out_str + pcs_str out_str += '\n' f.write(out_str) if len(list(adj_pred_dict.keys())) > 0: with open(out_file + '.adj', 'w') as f: adj_prs_labels = list(adj_pred_dict.keys()) out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs, ' + ', '.join( adj_prs_labels) out_str += '\n' f.write(out_str) for i in range(num_individs): out_str = '%s, %0.6e, %0.6e, %0.6e' % ( prs_dict['iids'][i], prs_dict['true_phens'][i], raw_effects_prs[i], pval_derived_effects_prs[i]) for adj_prs in adj_prs_labels: out_str += ', %0.4f' % adj_pred_dict[adj_prs][i] out_str += '\n' f.write(out_str) if weights_dict != None: oh5f = h5py.File(out_file + '.weights.hdf5', 'w') for k1 in list(weights_dict.keys()): kg = oh5f.create_group(k1) for k2 in weights_dict[k1]: kg.create_dataset(k2, data=sp.array(weights_dict[k1][k2])) oh5f.close() return res_dict
def best_split_full_model(X, Uy, C, S, U, noderange, delta): mBest = -1 sBest = -float('inf') score_best = -float('inf') left_mean = None right_mean = None ldelta = SP.log(delta) levels = list(map(SP.unique, X[noderange].T)) feature_map = [] s = [] UXt = [] cnt = 0 for i in range(X.shape[1]): lev = levels[i] for j in range(lev.size - 1): split_point = SP.median(lev[j:j + 2]) x = SP.int_(X[noderange, i] > split_point) UXt.append(SP.dot(U.T[:, noderange], x)) feature_map.append(i) s.append(split_point) cnt += 1 UXt = SP.array(UXt).T if UXt.size == 0: #predictors are homogeneous return mBest, sBest, left_mean, right_mean, score_best else: #print UXt # print X[noderange] # print '' # print '' # test all transformed predictors scores = -NP.ones(cnt) * float('inf') UC = SP.dot(U.T, C) ######################## #finding the best split# ######################## score_0 = lmm_fast.nLLeval(ldelta, Uy[:, 0], UC, S) for snp_cnt in SP.arange(cnt): UX = SP.hstack((UXt[:, snp_cnt:snp_cnt + 1], UC)) scores[snp_cnt] = -lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S) scores[snp_cnt] += score_0 ############################ ###evaluate the new means### ############################ kBest = SP.argmax(scores) score_best = scores[kBest] sBest = s[kBest] if score_best > 0: sBest = s[kBest] score_best = scores[kBest] UX = SP.hstack((UXt[:, kBest:kBest + 1], UC)) _, beta, _ = lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S, MLparams=True) mBest = feature_map[kBest] CX = SP.zeros_like(Uy) CX[noderange] = SP.int_(X[noderange, mBest:mBest + 1] > sBest) C_new = SP.hstack((CX, C)) mean = SP.dot(C_new, beta.reshape(beta.size, -1)) #TODO:is this the correct way? left_mean = ((mean[noderange])[CX[noderange] == 0])[0] right_mean = ((mean[noderange])[CX[noderange] == 1])[0] return mBest, sBest, left_mean, right_mean, score_best
def score_2_dof(self, X, snp_dim='col', debug=False): """ Parameters ---------- X : (`N`, `1`) ndarray genotype vector (TODO: X should be small) Returns ------- pvalue : float P value """ #1. calculate Qs and pvs Q_rho = sp.zeros(len(self.rho_list)) Py = P(self.gp, self.y) xoPy = X * Py for i in xrange(len(self.rho_list)): rho = self.rho_list[i] LT = sp.vstack( (rho**0.5 * self.vec_ones, (1 - rho)**0.5 * self.Env.T)) LTxoPy = sp.dot(LT, X * Py) Q_rho[i] = 0.5 * sp.dot(LTxoPy.T, LTxoPy) # Calculating pvs is split into 2 steps # If we only consider one value of rho i.e. equivalent to SKAT and used for interaction test if len(self.rho_list) == 1: rho = self.rho_list[0] L = sp.hstack( (rho**0.5 * self.vec_ones.T, (1 - rho)**0.5 * self.Env)) xoL = X * L PxoL = P(self.gp, xoL) LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL) pval = self.qwedaviesskat.getPv(Q_rho[0], LToxPxoL) # Script ends here for interaction test return pval #or if we consider multiple values of rho i.e. equivalent to SKAT-O and used for association test else: pliumod = sp.zeros((len(self.rho_list), 4)) for i in xrange(len(self.rho_list)): rho = self.rho_list[i] L = sp.hstack( (rho**0.5 * self.vec_ones.T, (1 - rho)**0.5 * self.Env)) xoL = X * L PxoL = P(self.gp, xoL) LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL) eighQ, UQ = la.eigh(LToxPxoL) pliumod[i, ] = self.qweliumod.getPv(Q_rho[i], eighQ) T = pliumod[:, 0].min() rho_opt = pliumod[:, 0].argmin() optimal_rho = self.rho_list[rho_opt] # if optimal_rho == 0.999: # optimal_rho = 1 # 2. Calculate qmin qmin = sp.zeros(len(self.rho_list)) percentile = 1 - T for i in xrange(len(self.rho_list)): q = st.chi2.ppf(percentile, pliumod[i, 3]) # Recalculate p-value for each Q rho of seeing values at least as extreme as q again using the modified matching moments method qmin[i] = (q - pliumod[i, 3]) / ( 2 * pliumod[i, 3])**0.5 * pliumod[i, 2] + pliumod[i, 1] # 3. Calculate quantites that occur in null distribution Px1 = P(self.gp, X) m = 0.5 * sp.dot(X.T, Px1) xoE = X * self.Env PxoE = P(self.gp, xoE) ETxPxE = 0.5 * sp.dot(xoE.T, PxoE) ETxPx1 = sp.dot(xoE.T, Px1) ETxPx11xPxE = 0.25 / m * sp.dot(ETxPx1, ETxPx1.T) ZTIminusMZ = ETxPxE - ETxPx11xPxE eigh, vecs = la.eigh(ZTIminusMZ) eta = sp.dot(ETxPx11xPxE, ZTIminusMZ) vareta = 4 * sp.trace(eta) OneZTZE = 0.5 * sp.dot(X.T, PxoE) tau_top = sp.dot(OneZTZE, OneZTZE.T) tau_rho = sp.zeros(len(self.rho_list)) for i in xrange(len(self.rho_list)): tau_rho[i] = self.rho_list[i] * m + ( 1 - self.rho_list[i]) / m * tau_top MuQ = sp.sum(eigh) VarQ = sp.sum(eigh**2) * 2 + vareta KerQ = sp.sum(eigh**4) / (sp.sum(eigh**2)**2) * 12 Df = 12 / KerQ #4. Integration pvalue = self.qwedavies.getPv(qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, self.rho_list, T) # Final correction to make sure that the p-value returned is sensible multi = 3 if len(self.rho_list) < 3: multi = 2 idx = sp.where(pliumod[:, 0] > 0)[0] pval = pliumod[:, 0].min() * multi if pvalue <= 0 or len(idx) < len(self.rho_list): pvalue = pval if pvalue == 0: if len(idx) > 0: pvalue = pliumod[:, 0][idx].min() if debug: info = { 'Qs': Q_rho, 'pvs_liu': pliumod, 'qmin': qmin, 'MuQ': MuQ, 'VarQ': VarQ, 'KerQ': KerQ, 'lambd': eigh, 'VarXi': vareta, 'Df': Df, 'tau': tau_rho } return pvalue, info else: return pvalue
def dirwrap(x, y): z = G.infer_m(sp.hstack(sp.array(x) + [0.]), [[sp.NaN]])[0, 0] return (z, 0)
def pad(dat, pad_size): zer = sp.zeros((pad_size)) return sp.hstack((zer, dat, zer))
from scipy.interpolate import griddata mesh_x = S.loadtxt("mesh_x.txt") mesh_y = S.loadtxt("mesh_y.txt") the_splines = list() for i in range(mesh_x.shape[0]): the_splines.append(ParametricSpline(mesh_x[i], mesh_y[i])) SAMPLE_NUMBER = 100 ts = S.linspace(0.0, 1.0, SAMPLE_NUMBER) old_xy = S.vstack([aspline(ts) for aspline in the_splines]) new_xy = S.vstack([ S.hstack([i * S.ones((SAMPLE_NUMBER, 1)), ts.reshape(-1, 1)]) for i in range(len(the_splines)) ]) new_xs = griddata(old_xy, new_xy[:, 0], (x, z), method='linear') new_ys = griddata(old_xy, new_xy[:, 1], (x, z), method='linear') disp_genes = [ "kni__3", "D__3", "hbP__3", "bcdP__3", "KrP__3", "gt__3", "eve__3", "odd__3", "rho__3", "sna__3" ] #disp_genes = ["eve__3"] for one_gene_name in disp_genes: colnum = results[0]["column"].index(one_gene_name) - 1
def run_struct_lmm(reader, pheno, env, covs=None, rhos=None, no_mean_to_one=False, batch_size=1000, no_association_test=False, no_interaction_test=False, unique_variants=False): """ Utility function to run StructLMM Parameters ---------- reader : :class:`limix.data.BedReader` limix bed reader instance. pheno : (`N`, 1) ndarray phenotype vector env : (`N`, `K`) Environmental matrix (indviduals by number of environments) covs : (`N`, L) ndarray fixed effect design for covariates `N` samples and `L` covariates. rhos : list list of ``rho`` values. Note that ``rho = 1-rho`` in the equation described above. ``rho=0`` correspond to no persistent effect (only GxE); ``rho=1`` corresponds to only persistent effect (no GxE); By default, ``rho=[0, 0.1**2, 0.2**2, 0.3**2, 0.4**2, 0.5**2, 0.5, 1.]`` batch_size : int to minimize memory usage the analysis is run in batches. The number of variants loaded in a batch (loaded into memory at the same time). no_association_test : bool if True the association test is not consdered. The default value is False. no_interaction_test : bool if True the interaction test is not consdered. Teh default value is False. unique_variants : bool if True, only non-repeated genotypes are considered The default value is False. Returns ------- res : *:class:`pandas.DataFrame`* contains pv of joint test, pv of interaction test (if no_interaction_test is False) and snp info. """ if covs is None: covs = sp.ones((env.shape[0], 1)) if rhos is None: rhos = [0., 0.1**2, 0.2**2, 0.3**2, 0.4**2, 0.5**2, 0.5, 1.] if not no_association_test: # slmm fit null slmm = StructLMM(pheno, env, W=env, rho_list=rhos) null = slmm.fit_null(F=covs, verbose=False) if not no_interaction_test: # slmm int slmm_int = StructLMM(pheno, env, W=env, rho_list=[0]) n_batches = reader.getSnpInfo().shape[0] / batch_size t0 = time.time() res = [] for i, gr in enumerate(GIter(reader, batch_size=batch_size)): print '.. batch %d/%d' % (i, n_batches) X, _res = gr.getGenotypes(standardize=True, return_snpinfo=True) if unique_variants: X, idxs = f_univar(X, return_idxs=True) Isnp = sp.in1d(sp.arange(_res.shape[0]), idxs) _res = _res[Isnp] _pv = sp.zeros(X.shape[1]) _pv_int = sp.zeros(X.shape[1]) for snp in xrange(X.shape[1]): x = X[:, [snp]] if not no_association_test: # association test _p = slmm.score_2_dof(x) _pv[snp] = _p if not no_interaction_test: # interaction test covs1 = sp.hstack((covs, x)) null = slmm_int.fit_null(F=covs1, verbose=False) _p = slmm_int.score_2_dof(x) _pv_int[snp] = _p # add pvalues to _res and append to res if not no_association_test: _res = _res.assign(pv=pd.Series(_pv, index=_res.index)) if not no_interaction_test: _res = _res.assign(pv_int=pd.Series(_pv_int, index=_res.index)) res.append(_res) res = pd.concat(res) res.reset_index(inplace=True, drop=True) t = time.time() - t0 print '%.2f s elapsed' % t return res
def __call__(self, Xi, Xj, ni, nj, **kwargs): """Evaluate the covariance between points `Xi` and `Xj` with derivative order `ni`, `nj`. Parameters ---------- Xi : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. Xj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. ni : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `i`. nj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `j`. symmetric : bool, optional Whether or not the input `Xi`, `Xj` are from a symmetric matrix. Default is False. Returns ------- Kij : :py:class:`Array`, (`M`,) Covariances for each of the `M` `Xi`, `Xj` pairs. Raises ------ NotImplementedError If the `hyper_deriv` keyword is given and is not None. """ # Need to process ni, nj to handle the product rule properly. nij = scipy.hstack((ni, nj)) nij_unique = unique_rows(nij) result = scipy.zeros(Xi.shape[0]) for row in nij_unique: # deriv_pattern is the pattern of partial derivatives, where the # indicies for derivatives with respect to the elements of Xj have # been offset by self.num_dim. For instance, if ni = [1, 2] and # nj = [3, 4], deriv_pattern will be [0, 1, 1, 2, 2, 2, 3, 3, 3, 3]. deriv_pattern = [] for idx in xrange(0, len(row)): deriv_pattern.extend(row[idx] * [idx]) idxs = (nij == row).all(axis=1) S = powerset(deriv_pattern) # little "s" is a member of the power set of S: for s in S: # nij_1 is the combined array of derivative orders for function 1: nij_1 = scipy.zeros((idxs.sum(), 2 * self.num_dim)) # sC is the complement of s with respect to S: sC = list(deriv_pattern) for i in s: nij_1[:, i] += 1 sC.remove(i) # nij_2 is the combined array of derivative orders for function 2: nij_2 = scipy.zeros((idxs.sum(), 2 * self.num_dim)) for i in sC: nij_2[:, i] += 1 result[idxs] += ( self.k1(Xi[idxs, :], Xj[idxs, :], nij_1[:, :self.num_dim], nij_1[:, self.num_dim:], **kwargs) * self.k2(Xi[idxs, :], Xj[idxs, :], nij_2[:, :self.num_dim], nij_2[:, self.num_dim:], **kwargs) ) return result
import seaborn as sns; sns.set()# set decides the aesthetic parameters. Don't know if i need this. from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier plt.close('all') data,rate = sf.read('outputaudio.wav') #print(data[:,0]) #First channel #print(rate) #print(sp.shape(data)) #(540672,6) I think this is the number of samples (48000*11secs = 528000, so must be slightly longer than 11s)and 6 channels #print(help(mfcc)) features = mfcc(data[:,0],samplerate=48000,nfft=1200) #A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. #NFFT size should be equal or greater to frame lengthhttps://github.com/jameslyons/python_speech_features/issues/33 #print(sp.shape(features)) #(1125,13) ie ~11s/0.01s windows, 13 columns for different cepstrums: numcep – the number of cepstrum to return, default 13, from http://python-speech-features.readthedocs.io/en/latest/ #print(features[1124]) index = sp.arange(len(features)) index = index.reshape(len(features),1) # we need to reshape the index row vector into a column vector before we can append with hstack https://scipython.com/book/chapter-6-numpy/examples/vstack-and-hstack/ featuresI = sp.hstack((index,features)) #FIRST COLUMN CONTAINS INDEX WHICH WE WILL BE TRYING TO PREDICT #features = sp.append(index,features,axis=1) #print(sp.shape(index)) #print(sp.shape(featuresI)) #print(featuresI) X = featuresI[:,1:] y = featuresI[:,0] X_train, X_test, y_train, y_test = train_test_split(X,y) clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf.fit(X_train, y_train) print(clf.predict(X_test)) print(y_test) #print(clf.score(X_test,y_test))
def generate_base_points(num_points, domain_size, prob=None): r""" Generates a set of base points for passing into the DelaunayVoronoiDual class. The points can be distributed in spherical, cylindrical, or rectilinear patterns. Parameters ---------- num_points : scalar The number of base points that lie within the domain. Note that the actual number of points returned will be larger, with the extra points lying outside the domain. domain_size : list or array Controls the size and shape of the domain, as follows: **sphere** : If a single value is received, its treated as the radius [r] of a sphere centered on [0, 0, 0]. **cylinder** : If a two-element list is received it's treated as the radius and height of a cylinder [r, z] positioned at [0, 0, 0] and extending in the positive z-direction. **rectangle** : If a three element list is received, it's treated as the outer corner of rectangle [x, y, z] whose opposite corner lies at [0, 0, 0]. prob : 3D array, optional A 3D array that contains fractional (0-1) values indicating the liklihood that a point in that region should be kept. If not specified an array containing 1's in the shape of a sphere, cylinder, or cube is generated, depnending on the give ``domain_size`` with zeros outside. When specifying a custom probabiliy map is it recommended to also set values outside the given domain to zero. If not, then the correct shape will still be returned, but with too few points in it. Notes ----- This method places the given number of points within the specified domain, then reflects these points across each domain boundary. This results in smooth flat faces at the boundaries once these excess pores are trimmed. The reflection approach tends to create larger pores near the surfaces, so it might be necessary to use the ``prob`` argument to specify a slightly higher density of points near the surfaces. For rough faces, it is necessary to define a larger than desired domain then trim to the desired size. This will discard the reflected points plus some of the original points. Examples -------- The following generates a spherical array with higher values near the core. It uses a distance transform to create a sphere of radius 10, then a second distance transform to create larger values in the center away from the sphere surface. These distance values could be further skewed by applying a power, with values higher than 1 resulting in higher values in the core, and fractional values smoothinging them out a bit. >>> import OpenPNM as op >>> import scipy as sp >>> import scipy.ndimage as spim >>> im = sp.ones([21, 21, 21], dtype=int) >>> im[10, 10, 10] = 0 >>> im = spim.distance_transform_edt(im) <= 20 # Create sphere of 1's >>> prob = spim.distance_transform_edt(im) >>> prob = prob / sp.amax(prob) # Normalize between 0 and 1 >>> pts = op.Network.tools.generate_base_points(num_points=50, ... domain_size=[2], ... prob=prob) >>> net = op.Network.DelaunayVoronoiDual(points=pts, domain_size=[2]) """ def _try_points(num_points, prob): prob = _sp.array(prob)/_sp.amax(prob) # Ensure prob is normalized base_pts = [] N = 0 while N < num_points: pt = _sp.random.rand(3) # Generate a point # Test whether to keep it or not [indx, indy, indz] = _sp.floor(pt*_sp.shape(prob)).astype(int) if _sp.random.rand(1) <= prob[indx][indy][indz]: base_pts.append(pt) N += 1 base_pts = _sp.array(base_pts) return base_pts if len(domain_size) == 1: # Spherical domain_size = _sp.array(domain_size) if prob is None: prob = _sp.ones([41, 41, 41]) prob[20, 20, 20] = 0 prob = _spim.distance_transform_bf(prob) <= 20 base_pts = _try_points(num_points, prob) # Convert to spherical coordinates [X, Y, Z] = _sp.array(base_pts - [0.5, 0.5, 0.5]).T # Center at origin r = 2*_sp.sqrt(X**2 + Y**2 + Z**2)*domain_size[0] theta = 2*_sp.arctan(Y/X) phi = 2*_sp.arctan(_sp.sqrt(X**2 + Y**2)/Z) # Trim points outside the domain (from improper prob images) inds = r <= domain_size[0] [r, theta, phi] = [r[inds], theta[inds], phi[inds]] # Reflect base points across perimeter new_r = 2*domain_size - r r = _sp.hstack([r, new_r]) theta = _sp.hstack([theta, theta]) phi = _sp.hstack([phi, phi]) # Convert to Cartesean coordinates X = r*_sp.cos(theta)*_sp.sin(phi) Y = r*_sp.sin(theta)*_sp.sin(phi) Z = r*_sp.cos(phi) base_pts = _sp.vstack([X, Y, Z]).T elif len(domain_size) == 2: # Cylindrical domain_size = _sp.array(domain_size) if prob is None: prob = _sp.ones([41, 41, 41]) prob[20, 20, :] = 0 prob = _spim.distance_transform_bf(prob) <= 20 base_pts = _try_points(num_points, prob) # Convert to cylindrical coordinates [X, Y, Z] = _sp.array(base_pts - [0.5, 0.5, 0]).T # Center on z-axis r = 2*_sp.sqrt(X**2 + Y**2)*domain_size[0] theta = 2*_sp.arctan(Y/X) z = Z*domain_size[1] # Trim points outside the domain (from improper prob images) inds = r <= domain_size[0] [r, theta, z] = [r[inds], theta[inds], z[inds]] inds = ~((z > domain_size[1]) + (z < 0)) [r, theta, z] = [r[inds], theta[inds], z[inds]] # Reflect base points about faces and perimeter new_r = 2*domain_size[0] - r r = _sp.hstack([r, new_r]) theta = _sp.hstack([theta, theta]) z = _sp.hstack([z, z]) r = _sp.hstack([r, r, r]) theta = _sp.hstack([theta, theta, theta]) z = _sp.hstack([z, -z, 2-z]) # Convert to Cartesean coordinates X = r*_sp.cos(theta) Y = r*_sp.sin(theta) Z = z base_pts = _sp.vstack([X, Y, Z]).T elif len(domain_size) == 3: # Rectilinear domain_size = _sp.array(domain_size) Nx, Ny, Nz = domain_size if prob is None: prob = _sp.ones([10, 10, 10], dtype=float) base_pts = _try_points(num_points, prob) base_pts = base_pts*domain_size # Reflect base points about all 6 faces orig_pts = base_pts base_pts = _sp.vstack((base_pts, [-1, 1, 1]*orig_pts + [2.0*Nx, 0, 0])) base_pts = _sp.vstack((base_pts, [1, -1, 1]*orig_pts + [0, 2.0*Ny, 0])) base_pts = _sp.vstack((base_pts, [1, 1, -1]*orig_pts + [0, 0, 2.0*Nz])) base_pts = _sp.vstack((base_pts, [-1, 1, 1]*orig_pts)) base_pts = _sp.vstack((base_pts, [1, -1, 1]*orig_pts)) base_pts = _sp.vstack((base_pts, [1, 1, -1]*orig_pts)) return base_pts
def vl_phow(im, verbose=False, fast=True, sizes=[4, 6, 8, 10], step=2, color='rgb', floatdescriptors=False, magnif=6, windowsize=1.5, contrastthreshold=0.005): opts = Options(verbose, fast, sizes, step, color, floatdescriptors, magnif, windowsize, contrastthreshold) dsiftOpts = DSiftOptions(opts) # make sure image is float, otherwise segfault im = array(im, 'float32') # Extract the features imageSize = shape(im) if im.ndim == 3: if imageSize[2] != 3: # "IndexError: tuple index out of range" if both if's are checked at the same time raise ValueError("Image data in unknown format/shape") if opts.color == 'gray': numChannels = 1 if (im.ndim == 2): im = vl_rgb2gray(im) else: numChannels = 3 if (im.ndim == 2): im = dstack([im, im, im]) if opts.color == 'rgb': pass elif opts.color == 'opponent': # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m # Note that the mean differs from the standard definition of opponent # space and is the regular intesity (for compatibility with # the contrast thresholding). # Note also that the mean is added pack to the other two # components with a small multipliers for monochromatic # regions. mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2] alpha = 0.01 im = dstack([mu, (im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu, (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) + alpha * mu]) else: raise ValueError('Color option ' + str(opts.color) + ' not recognized') if opts.verbose: print('{0}: color space: {1}'.format('vl_phow', opts.color)) print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0], imageSize[1])) print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes)) frames_all = [] descrs_all = [] for size_of_spatial_bins in opts.sizes: # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m # Recall from VL_DSIFT() that the first descriptor for scale SIZE has # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is # similar). It is convenient to align the descriptors at different # scales so that they have the same geometric centers. For the # maximum size we pick XMIN = 1 and we get centers starting from # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES). # In pracrice, the offset must be integer ('bounds'), so the # alignment works properly only if all OPTS.SZES are even or odd. off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1 # smooth the image to the appropriate scale based on the size # of the SIFT bins sigma = size_of_spatial_bins / float(opts.magnif) ims = vl_imsmooth(im, sigma) # extract dense SIFT features from all channels frames = [] descrs = [] for k in range(numChannels): size_of_spatial_bins = int(size_of_spatial_bins) # vl_dsift does not accept numpy.int64 or similar f_temp, d_temp = vl_dsift(image=ims[:, :, k], step=dsiftOpts.step, size=size_of_spatial_bins, fast=dsiftOpts.fast, verbose=dsiftOpts.verbose, norm=dsiftOpts.norm,) frames.append(f_temp.T) descrs.append(d_temp.T) frames = array(frames) descrs = array(descrs) d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]] descrs = descrs.reshape(d_new_shape) # remove low contrast descriptors # note that for color descriptors the V component is # thresholded if (opts.color == 'gray') | (opts.color == 'opponent'): contrast = frames[0][2, :] elif opts.color == 'rgb': contrast = mean([frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0) else: raise ValueError('Color option ' + str(opts.color) + ' not recognized') descrs = descrs[:, contrast > opts.contrastthreshold] frames = frames[0][:, contrast > opts.contrastthreshold] # save only x,y, and the scale frames_temp = array(frames[0:3, :]) padding = array(size_of_spatial_bins * ones(frames[0].shape)) frames_to_add = vstack([frames_temp, padding]) # print("Shape of frame for each window", frames_to_add.shape) # print("Shape of descriptors for each window", descrs.shape) # print("Sample Frame", frames_to_add[:,:1]) frames_all.append(vstack([frames_temp, padding])) descrs_all.append(array(descrs)) frames_all = hstack(frames_all) # print("length of descriptors ", len(descrs_all)) descrs_all = hstack(descrs_all) # print("Frames Shape", frames_all.shape) # print("Descriptors shape", descrs_all.shape) # print(np.unique(descrs_all, return_counts=True)) return frames_all.T[:,:2], descrs_all.T
def ampoffToUNW(ampoff_path, slc_rsc_path, mean_x_off, mean_y_off, ref_x, ref_y, search_x, search_y, step_x, step_y): import math import scipy width = "" length = "" da_p = "" r_e = "" p_h = "" dr = "" angle = "" infile = open(slc_rsc_path, "r") for line in infile: if line.find("WIDTH") > -1: width = line.split()[1] elif line.find("RANGE_PIXEL_SIZE") > -1: dr = line.split()[1] elif line.find("FILE_LENGTH") > -1: length = line.split()[1] elif line.find("HEIGHT") > -1 and line.find("_") < 0: p_h = line.split()[1] elif line.find("EARTH_RADIUS") > -1: r_e = line.split()[1] elif line.find("AZIMUTH_PIXEL_SIZE") > -1: da_p = line.split()[1] elif line.find("BEAM") > -1: angle = line.split()[1] infile.close() print(width, length, da_p, r_e, p_h, dr, angle) r_p = float(r_e) + float(p_h) #platform radius da_e = float(da_p) * float(r_e) / r_p * 100 #az pixel size at earth surface, cm #dr = ; #range pixel size la = float(angle) * math.pi / 180 #look angle dr_g = float(dr) / math.sin(la) * 100 #g unw_width = str(int(width) / int(step_x)) unw_length = str(int(length) / int(step_y)) azimuth_path = "azimuth_noaffine_r" + ref_x + "x" + ref_y + "_s" + search_x + "x" + search_y + "_" + step_x + "rlks.unw" range_path = "range_noaffine_r" + ref_x + "x" + ref_y + "_s" + search_x + "x" + search_y + "_" + step_x + "rlks.unw" snr_path = "snr_noaffine_r" + ref_x + "x" + ref_y + "_s" + search_x + "x" + search_y + "_" + step_x + "rlks.unw" if not os.path.exists(azimuth_path): # dxg = scipy.zeros((int(unw_width), int(unw_length))); # dyg = scipy.zeros((int(unw_width), int(unw_length))); # snrg = scipy.zeros((int(unw_width), int(unw_length))); dxg = scipy.zeros((int(unw_length), int(unw_width))) dyg = scipy.zeros((int(unw_length), int(unw_width))) snrg = scipy.zeros((int(unw_length), int(unw_width))) infile = open(ampoff_path, "r") for line in infile: elements = line.split() # dxg[int(elements[0])/int(step_x), int(elements[2])/int(step_y)] = float(elements[1]) - float(mean_x_off); # dyg[int(elements[0])/int(step_x), int(elements[2])/int(step_y)] = float(elements[3]) - float(mean_y_off); # snrg[int(elements[0])/int(step_x), int(elements[2])/int(step_y)] = float(elements[4]); dxg[int(elements[2]) / int(step_y), int(elements[0]) / int(step_x)] = (float(elements[1]) - float(mean_x_off)) * dr_g dyg[int(elements[2]) / int(step_y), int(elements[0]) / int(step_x)] = (float(elements[3]) - float(mean_y_off)) * da_e snrg[int(elements[2]) / int(step_y), int(elements[0]) / int(step_x)] = float(elements[4]) # for i in range(0, scipy.size(dxg, 0) - 1): # for j in range(0, scipy.size(dxg, 1) - 1): # low_x = i - 50; # high_x = i + 50; # low_y = j - 50; # high_y = j + 50; # if low_x < 0: # low_x = 0; # high_x = 100; # if high_x > scipy.size(dxg, 0) - 1: # high_x = scipy.size(dxg, 0) - 1; # low_x = (scipy.size(dxg, 0) - 100) - 1; # if low_y < 0: # low_y = 0; # high_y = 100; # if high_y > scipy.size(dxg, 1) - 1: # high_y = scipy.size(dxg, 1) - 1; # low_y = (scipy.size(dxg, 1) - 100) - 1; # median_range = scipy.median(dxg[low_x : high_x][low_y : high_y]); # dxg[i][j] = dxg[i][j] - median_range; # median_azimuth = scipy.median(dyg[low_x : high_x][low_y : high_y]); # dyg[i][j] = dyg[i][j] - median_azimuth; infile.close() outg = scipy.hstack((abs(dyg), dyg)) outfile = open(azimuth_path, "wb") outg = scipy.matrix(outg, scipy.float32) outg.tofile(outfile) outfile.close() outg = "" outr = scipy.hstack((dxg, dxg)) outfile = open(range_path, "wb") outr = scipy.matrix(outr, scipy.float32) outr.tofile(outfile) outfile.close() outr = "" outsnr = scipy.hstack((snrg, snrg)) outfile = open(snr_path, "wb") outsnr = scipy.matrix(outsnr, scipy.float32) outsnr.tofile(outfile) outfile.close() outsnr = "" return
def plot_connections(network, throats=None, fig=None, **kwargs): r""" Produces a 3D plot of the network topology showing how throats connect for quick visualization without having to export data to veiw in Paraview. Parameters ---------- network : OpenPNM Network Object The network whose topological connections to plot throats : array_like (optional) The list of throats to plot if only a sub-sample is desired. This is useful for inspecting a small region of the network. If no throats are specified then all throats are shown. fig and **kwargs: Matplotlib figure handle and line property arguments If a ``fig`` is supplied, then the topology will be overlaid. By also passing in different line properties such as ``color`` and limiting which ``throats`` are plots, this makes it possible to plot different types of throats on the same plot. For information on available line style options, visit the Matplotlib documentation at: http://matplotlib.org/api/lines_api.html#matplotlib.lines.Line2D Notes ----- The figure handle returned by this method can be passed into ``plot_coordinates`` to create a plot that combines pore coordinates and throat connections, and vice versa. Examples -------- >>> import OpenPNM as op >>> pn = op.Network.Cubic(shape=[10, 10, 3]) >>> pn.add_boundaries() >>> Ts = pn.throats('*boundary', mode='not') >>> # Create figure showing boundary throats >>> fig = op.Network.tools.plot_connections(network=pn, throats=Ts) >>> Ts = pn.throats('*boundary') >>> # Pass existing fig back into function to plot additional throats >>> fig = op.Network.tools.plot_connections(network=pn, throats=Ts, ... fig=fig, color='r') """ import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D if throats is None: Ts = network.Ts else: Ts = network._parse_locations(locations=throats) if fig is None: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') else: ax = fig.get_axes()[0] # Create dummy indexing to sp.inf i = -1*_sp.ones((_sp.size(Ts)*3, ), dtype=int) i[0::3] = network['throat.conns'][Ts, 0] i[1::3] = network['throat.conns'][Ts, 1] # Collect coordinates and scale axes to fit Ps = _sp.unique(network['throat.conns'][Ts]) X = network['pore.coords'][Ps, 0] Y = network['pore.coords'][Ps, 1] Z = network['pore.coords'][Ps, 2] _scale_3d_axes(ax=ax, X=X, Y=Y, Z=Z) # Add sp.inf to the last element of pore.coords (i.e. -1) inf = _sp.array((_sp.inf,)) X = _sp.hstack([network['pore.coords'][:, 0], inf]) Y = _sp.hstack([network['pore.coords'][:, 1], inf]) Z = _sp.hstack([network['pore.coords'][:, 2], inf]) ax.plot(xs=X[i], ys=Y[i], zs=Z[i], **kwargs) return fig
def ev(self, xi, yi): return self._ct_interp( scipy.hstack((scipy.atleast_2d(xi).T, scipy.atleast_2d(yi).T)))
x2good = x2[good].reshape(-1, 1) y2good = y2[good].reshape(-1, 1) #get and remove affine fit good2 = scipy.matrix(pylab.find(good < 300000)).conj().transpose() x1good = x1[good2].reshape(-1, 1) y1good = y1[good2].reshape(-1, 1) x2good = x2[good2].reshape(-1, 1) y2good = y2[good2].reshape(-1, 1) c0 = scipy.matrix(scipy.zeros((scipy.size(good2)))).reshape(-1, 1) c1 = scipy.matrix(scipy.ones((scipy.size(good2)))).reshape(-1, 1) n = c1.shape[0] A = scipy.vstack((scipy.hstack((x1good, y1good, c0, c0, c1, c0)), scipy.hstack((c0, c0, x1good, y1good, c0, c1)))) b = scipy.vstack((x2good, y2good)) M = scipy.linalg.lstsq(A, b)[0] pred = A * M res = pred - b # std() in python defaults to 0 degrees of freedom resdev = res.std(axis=0, ddof=1) q = pylab.find(abs(res) < 1.5 * resdev) A1 = A[q, ] b1 = b[q] M = scipy.linalg.lstsq(A1, b1)[0]
def count_graph_coverage_wrapper(fname_in, fname_out, options, sample_idx=None, qmode='all'): (genes, inserted) = pickle.load(open(fname_in, 'rb')) for g in genes: g.from_sparse() if genes[0].segmentgraph is None or genes[0].segmentgraph.is_empty(): for g in genes: g.segmentgraph = Segmentgraph(g) g.to_sparse() pickle.dump((genes, inserted), open(fname_in, 'wb'), -1) for g in genes: g.from_sparse() counts = dict() counts['segments'] = [] counts['seg_pos'] = [] counts['gene_ids_segs'] = [] counts['edges'] = [] counts['gene_ids_edges'] = [] counts['seg_len'] = sp.hstack([x.segmentgraph.segments[1, :] - x.segmentgraph.segments[0, :] for x in genes]).T counts['gene_names'] = sp.array([x.name for x in genes], dtype='str') if not options.pyproc: if options.merge == 'single': print('\nprocessing %s' % (options.samples[sample_idx])) counts_tmp = count_graph_coverage(genes, options.bam_fnames[sample_idx], options) elif options.merge == 'merge_graphs' and qmode == 'single': print('\nquantifying merged graph in single mode (first file only) on %s' % options.samples[0]) counts_tmp = count_graph_coverage(genes, options.bam_fnames[0], options) else: for s_idx in range(options.strains.shape[0]): print('\n%i/%i' % (s_idx + 1, options.strains.shape[0])) if s_idx == 0: counts_tmp = count_graph_coverage(genes, options.bam_fnames[s_idx], options) else: counts_tmp = sp.r_[sp.atleast_2d(counts_tmp), count_graph_coverage(genes, options.bam_fnames[s_idx], options)] for c in range(counts_tmp.shape[1]): counts['segments'].append(sp.hstack([sp.atleast_2d(x.segments).T for x in counts_tmp[:, c]])) counts['seg_pos'].append(sp.hstack([sp.atleast_2d(x.seg_pos).T for x in counts_tmp[:, c]])) counts['gene_ids_segs'].append(sp.ones((sp.atleast_2d(counts_tmp[0, c].seg_pos).shape[1], 1), dtype='int') * c) tmp = [sp.atleast_2d(x.edges) for x in counts_tmp[:, c] if x.edges.shape[0] > 0] if len(tmp) == 0: continue tmp = sp.hstack(tmp) if tmp.shape[0] > 0: counts['edges'].append(sp.c_[tmp[:, 0], tmp[:, sp.arange(1, tmp.shape[1], 2)]]) counts['gene_ids_edges'].append(sp.ones((tmp.shape[0], 1), dtype='int') * c) ### write result data to hdf5 for key in counts: counts[key] = sp.vstack(counts[key]) if len(counts[key]) > 0 else counts[key] counts['edge_idx'] = counts['edges'][:, 0] if len(counts['edges']) > 0 else sp.array([]) counts['edges'] = counts['edges'][:, 1:] if len(counts['edges']) > 0 else sp.array([]) h5fid = h5py.File(fname_out, 'w') h5fid.create_dataset(name='strains', data=codeUTF8(options.strains)) for key in counts: if sp.issubdtype(counts[key].dtype, sp.str_): h5fid.create_dataset(name=key, data=codeUTF8(counts[key])) else: h5fid.create_dataset(name=key, data=counts[key]) h5fid.close() else: ### have an adaptive chunk size, that takes into account the number of strains (take as many genes as it takes to have ~10K strains) if options.sparse_bam: chunksize = int(max(1, math.floor(1000000 / len(options.strains)))) else: chunksize = int(max(1, math.floor(100000 / len(options.strains)))) jobinfo = [] PAR = dict() PAR['options'] = options if options.merge == 'single': PAR['options'].bam_fnames = PAR['options'].bam_fnames[sample_idx] PAR['options'].samples = PAR['options'].samples[sample_idx] PAR['options'].strains = PAR['options'].strains[sample_idx] #s_idx = sp.argsort([x.chr for x in genes]) # TODO s_idx = sp.arange(genes.shape[0]) for c_idx in range(0, s_idx.shape[0], chunksize): cc_idx = min(s_idx.shape[0], c_idx + chunksize) fn = re.sub(r'.hdf5$', '', fname_out) + '.chunk_%i_%i.pickle' % (c_idx, cc_idx) if os.path.exists(fn): continue else: print('submitting chunk %i to %i (%i)' % (c_idx, cc_idx, s_idx.shape[0])) PAR['genes'] = genes[s_idx][c_idx:cc_idx] for gg in PAR['genes']: gg.to_sparse() PAR['fn_bam'] = options.bam_fnames PAR['fn_out'] = fn PAR['options'] = options jobinfo.append(rp.rproc('count_graph_coverage', PAR, 15000, options.options_rproc, 60*48)) rp.rproc_wait(jobinfo, 30, 1.0, -1) del genes ### merge results from count chunks if options.verbose: print('\nCollecting count data from chunks ...\n') print('writing data to %s' % fname_out) ### write data to hdf5 continuously h5fid = h5py.File(fname_out, 'w') h5fid.create_dataset(name='gene_names', data=codeUTF8(counts['gene_names'])) h5fid.create_dataset(name='seg_len', data=counts['seg_len']) h5fid.create_dataset(name='strains', data=codeUTF8(options.strains)) for c_idx in range(0, s_idx.shape[0], chunksize): cc_idx = min(s_idx.shape[0], c_idx + chunksize) if options.verbose: print('collecting chunk %i-%i (%i)' % (c_idx, cc_idx, s_idx.shape[0])) fn = re.sub(r'.hdf5$', '', fname_out) + '.chunk_%i_%i.pickle' % (c_idx, cc_idx) if not os.path.exists(fn): print('ERROR: Not all chunks in counting graph coverage completed!', file=sys.stderr) sys.exit(1) else: counts_tmp = pickle.load(open(fn, 'rb')) for c in range(counts_tmp.shape[1]): if 'segments' in h5fid: appendToHDF5(h5fid, sp.hstack([sp.atleast_2d(x.segments).T for x in counts_tmp[:, c]]), 'segments') appendToHDF5(h5fid, sp.hstack([sp.atleast_2d(x.seg_pos).T for x in counts_tmp[:, c]]), 'seg_pos') appendToHDF5(h5fid, sp.ones((sp.atleast_2d(counts_tmp[0, c].seg_pos).shape[1], 1), dtype='int') * (s_idx[c_idx + c]), 'gene_ids_segs') else: h5fid.create_dataset(name='segments', data=sp.hstack([sp.atleast_2d(x.segments).T for x in counts_tmp[:, c]]), chunks=True, compression='gzip', maxshape=(None, len(options.strains))) h5fid.create_dataset(name='seg_pos', data=sp.hstack([sp.atleast_2d(x.seg_pos).T for x in counts_tmp[:, c]]), chunks=True, compression='gzip', maxshape=(None, len(options.strains))) h5fid.create_dataset(name='gene_ids_segs', data=sp.ones((sp.atleast_2d(counts_tmp[0, c].seg_pos).shape[1], 1), dtype='int') * (s_idx[c_idx + c]), chunks=True, compression='gzip', maxshape=(None, 1)) tmp = [sp.atleast_2d(x.edges) for x in counts_tmp[:, c] if x.edges.shape[0] > 0] if len(tmp) == 0: continue tmp = sp.hstack(tmp) if tmp.shape[0] > 0: if 'edges' in h5fid: appendToHDF5(h5fid, tmp[:, sp.arange(1, tmp.shape[1], 2)], 'edges') appendToHDF5(h5fid, tmp[:, 0], 'edge_idx') appendToHDF5(h5fid, sp.ones((tmp.shape[0], 1), dtype='int') * (s_idx[c_idx + c]), 'gene_ids_edges') else: h5fid.create_dataset(name='edges', data=tmp[:, sp.arange(1, tmp.shape[1], 2)], chunks=True, compression='gzip', maxshape=(None, tmp.shape[1] / 2)) h5fid.create_dataset(name='edge_idx', data=tmp[:, 0], chunks=True, compression='gzip', maxshape=(None,)) h5fid.create_dataset(name='gene_ids_edges', data=sp.ones((tmp.shape[0], 1), dtype='int') * (s_idx[c_idx + c]), chunks=True, compression='gzip', maxshape=(None, 1)) del tmp, counts_tmp h5fid.close()
def __init__(self, x, y, z): self._ct_interp = scipy.interpolate.CloughTocher2DInterpolator( scipy.hstack((scipy.atleast_2d(x).T, scipy.atleast_2d(y).T)), z)
def __init__(self,Ionodict,inifile, outdir,outfilelist=None): """This function will create an instance of the RadarData class. It will take in the values and create the class and make raw IQ data. Inputs: sensdict - A dictionary of sensor parameters angles - A list of tuples which the first position is the az angle and the second position is the el angle. IPP - The interpulse period in seconds represented as a float. Tint - The integration time in seconds as a float. This will be the integration time of all of the beams. time_lim - The length of time of the simulation the number of time points will be calculated. pulse - A numpy array that represents the pulse shape. rng_lims - A numpy array of length 2 that holds the min and max range that the radar will cover.""" (sensdict,simparams) = readconfigfile(inifile) self.simparams = simparams N_angles = len(self.simparams['angles']) NNs = int(self.simparams['NNs']) self.sensdict = sensdict Npall = sp.floor(self.simparams['TimeLim']/self.simparams['IPP']) Npall = sp.floor(Npall/N_angles)*N_angles Np = Npall/N_angles print "All spectrums created already" filetimes = Ionodict.keys() filetimes.sort() ftimes = sp.array(filetimes) simdtype = self.simparams['dtype'] pulsetimes = sp.arange(Npall)*self.simparams['IPP'] +ftimes.min() pulsefile = sp.array([sp.where(itimes-ftimes>=0)[0][-1] for itimes in pulsetimes]) # differentiate between phased arrays and dish antennas if sensdict['Name'].lower() in ['risr','pfisr','risr-n']: beams = sp.tile(sp.arange(N_angles),Npall/N_angles) else: # for dish arrays brate = simparams['beamrate'] beams2 = sp.repeat(sp.arange(N_angles),brate) beam3 = sp.concatenate((beams2,beams2[::-1])) ntile = sp.ceil(Npall/len(beam3)) leftover = Npall-ntile*len(beam3) if ntile>0: beams = sp.tile(beam3,ntile) beams=sp.concatenate((beams,beam3[:leftover])) else: beams=beam3[:leftover] pulsen = sp.repeat(sp.arange(Np),N_angles) pt_list = [] pb_list = [] pn_list = [] fname_list = [] self.datadir = outdir self.maindir = os.path.dirname(os.path.abspath(outdir)) self.procdir =os.path.join(self.maindir,'ACF') if outfilelist is None: print('\nData Now being created.') Noisepwr = v_Boltz*sensdict['Tsys']*sensdict['BandWidth'] self.outfilelist = [] for ifn, ifilet in enumerate(filetimes): outdict = {} ifile = Ionodict[ifilet] print('\tData from {0:d} of {1:d} being processed Name: {2:s}.'.format(ifn,len(filetimes), os.path.split(ifile)[1])) curcontainer = IonoContainer.readh5(ifile) if ifn==0: self.timeoffset=curcontainer.Time_Vector[0,0] pnts = pulsefile==ifn pt =pulsetimes[pnts] pb = beams[pnts] pn = pulsen[pnts].astype(int) rawdata= self.__makeTime__(pt,curcontainer.Time_Vector, curcontainer.Sphere_Coords, curcontainer.Param_List,pb) Noise = sp.sqrt(Noisepwr/2)*(sp.random.randn(*rawdata.shape).astype(simdtype)+ 1j*sp.random.randn(*rawdata.shape).astype(simdtype)) outdict['AddedNoise'] =Noise outdict['RawData'] = rawdata+Noise outdict['RawDatanonoise'] = rawdata outdict['NoiseData'] = sp.sqrt(Noisepwr/2)*(sp.random.randn(len(pn),NNs).astype(simdtype)+ 1j*sp.random.randn(len(pn),NNs).astype(simdtype)) outdict['Pulses']=pn outdict['Beams']=pb outdict['Time'] = pt fname = '{0:d} RawData.h5'.format(ifn) newfn = os.path.join(self.datadir,fname) self.outfilelist.append(newfn) dict2h5(newfn,outdict) #Listing info pt_list.append(pt) pb_list.append(pb) pn_list.append(pn) fname_list.append(fname) infodict = {'Files':fname_list,'Time':pt_list,'Beams':pb_list,'Pulses':pn_list} dict2h5(os.path.join(outdir,'INFO.h5'),infodict) else: infodict= h52dict(os.path.join(outdir,'INFO.h5')) alltime=sp.hstack(infodict['Time']) self.timeoffset=alltime.min() self.outfilelist=outfilelist
def processdata(self): """ This will perform the the data processing and create the ACF estimates for both the data and noise. Inputs: timevec - A numpy array of times in seconds where the integration will begin. inttime - The integration time in seconds. lagfunc - A function that will make the desired lag products. Outputs: DataLags: A dictionary with keys 'Power' 'ACF','RG','Pulses' that holds the numpy arrays of the data. NoiseLags: A dictionary with keys 'Power' 'ACF','RG','Pulses' that holds the numpy arrays of the data. """ timevec = self.simparams['Timevec'] +self.timeoffset inttime = self.simparams['Tint'] # Get array sizes NNs = int(self.simparams['NNs']) range_gates = self.simparams['Rangegates'] N_rg = len(range_gates)# take the size pulse = self.simparams['Pulse'] Pulselen = len(pulse) N_samps = N_rg +Pulselen-1 simdtype = self.simparams['dtype'] Ntime=len(timevec) if 'outangles' in self.simparams.keys(): Nbeams = len(self.simparams['outangles']) inttime = inttime else: Nbeams = len(self.simparams['angles']) # Choose type of processing if self.simparams['Pulsetype'].lower() == 'barker': lagfunc=BarkerLag Nlag=1 else: lagfunc=CenteredLagProduct Nlag=Pulselen # initialize output arrays outdata = sp.zeros((Ntime,Nbeams,N_rg,Nlag),dtype=simdtype) outaddednoise = sp.zeros((Ntime,Nbeams,N_rg,Nlag),dtype=simdtype) outnoise = sp.zeros((Ntime,Nbeams,NNs-Pulselen+1,Nlag),dtype=simdtype) pulses = sp.zeros((Ntime,Nbeams)) pulsesN = sp.zeros((Ntime,Nbeams)) timemat = sp.zeros((Ntime,2)) Ksysvec = self.sensdict['Ksys'] # set up arrays that hold the location of pulses that are to be processed together infoname = os.path.join(self.datadir,'INFO.h5') # Just going to assume that the info file is in the directory infodict =h52dict(infoname) flist = infodict['Files'] file_list = [os.path.join(self.datadir,i) for i in flist] pulsen_list = infodict['Pulses'] beamn_list = infodict['Beams'] time_list = infodict['Time'] file_loclist = [ifn*sp.ones(len(ifl)) for ifn,ifl in enumerate(beamn_list)] if 'NoiseTime'in infodict.keys(): sridata = True tnoiselist=infodict['NoiseTime'] nfile_loclist=[ifn*sp.ones(len(ifl)) for ifn,ifl in enumerate(tnoiselist)] else: sridata=False pulsen = sp.hstack(pulsen_list).astype(int)# pulse number beamn = sp.hstack(beamn_list).astype(int)# beam numbers ptimevec = sp.hstack(time_list).astype(float)# time of each pulse file_loc = sp.hstack(file_loclist).astype(int)# location in the file if sridata: ntimevec = sp.vstack(tnoiselist).astype(float) nfile_loc = sp.hstack(nfile_loclist).astype(int) outnoise = sp.zeros((Ntime,Nbeams,NNs-Pulselen+1,Nlag),dtype=simdtype) # run the time loop print("Forming ACF estimates") # For each time go through and read only the necisary files for itn,it in enumerate(timevec): print("\tTime {0:d} of {1:d}".format(itn,Ntime)) # do the book keeping to determine locations of data within the files cur_tlim = (it,it+inttime) curcases = sp.logical_and(ptimevec>=cur_tlim[0],ptimevec<cur_tlim[1]) # SRI data Hack if sridata: curcases_n=sp.logical_and(ntimevec[:,0]>=cur_tlim[0],ntimevec[:,0]<cur_tlim[1]) curfileloc_n = nfile_loc[curcases_n] curfiles_n = set(curfileloc_n) if not sp.any(curcases): print("\tNo pulses for time {0:d} of {1:d}, lagdata adjusted accordinly".format(itn,Ntime)) outdata = outdata[:itn] outnoise = outnoise[:itn] pulses=pulses[:itn] pulsesN=pulsesN[:itn] timemat=timemat[:itn] continue pulseset = set(pulsen[curcases]) poslist = [sp.where(pulsen==item)[0] for item in pulseset ] pos_all = sp.hstack(poslist) try: pos_all = sp.hstack(poslist) curfileloc = file_loc[pos_all] except: pdb.set_trace() # Find the needed files and beam numbers curfiles = set(curfileloc) beamlocs = beamn[pos_all] timemat[itn,0] = ptimevec[pos_all].min() timemat[itn,1]=ptimevec[pos_all].max() # cur data pulls out all data from all of the beams and posisions curdata = sp.zeros((len(pos_all),N_samps),dtype = simdtype) curaddednoise = sp.zeros((len(pos_all),N_samps),dtype = simdtype) curnoise = sp.zeros((len(pos_all),NNs),dtype = simdtype) # Open files and get required data # XXX come up with way to get open up new files not have to reread in data that is already in memory for ifn in curfiles: curfileit = [sp.where(pulsen_list[ifn]==item)[0] for item in pulseset ] curfileitvec = sp.hstack(curfileit) ifile = file_list[ifn] curh5data = h52dict(ifile) file_arlocs = sp.where(curfileloc==ifn)[0] curdata[file_arlocs] = curh5data['RawData'][curfileitvec] curaddednoise[file_arlocs] = curh5data['AddedNoise'].astype(simdtype)[curfileitvec] # Read in noise data when you have don't have ACFs if not sridata: curnoise[file_arlocs] = curh5data['NoiseData'].astype(simdtype)[curfileitvec] #SRI data if sridata: curnoise = sp.zeros((len(curfileloc_n),Nbeams,NNs-Pulselen+1,Pulselen),dtype = simdtype) for ifn in curfiles_n: curfileit_n = sp.where(sp.logical_and(tnoiselist[ifn][:,0]>=cur_tlim[0],tnoiselist[ifn][:,0]<cur_tlim[1]))[0] ifile=file_list[ifn] curh5data_n = h52dict(ifile) file_arlocs = sp.where(curfileloc_n==ifn)[0] curnoise[file_arlocs] = curh5data_n['NoiseDataACF'][curfileit_n] # differentiate between phased arrays and dish antennas if self.sensdict['Name'].lower() in ['risr','pfisr','risr-n']: # After data is read in form lags for each beam for ibeam in range(Nbeams): print("\t\tBeam {0:d} of {0:d}".format(ibeam,Nbeams)) beamlocstmp = sp.where(beamlocs==ibeam)[0] pulses[itn,ibeam] = len(beamlocstmp) outdata[itn,ibeam] = lagfunc(curdata[beamlocstmp].copy(), numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype']) if sridata: pulsesN[itn,ibeam] = len(curnoise) outnoise[itn,ibeam] = sp.nansum(curnoise[:,ibeam],axis=0) else: pulsesN[itn,ibeam] = len(beamlocstmp) outnoise[itn,ibeam] = lagfunc(curnoise[beamlocstmp].copy(), numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype']) outaddednoise[itn,ibeam] = lagfunc(curaddednoise[beamlocstmp].copy(), numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype']) else: for ibeam,ibeamlist in enumerate(self.simparams['outangles']): print("\t\tBeam {0:d} of {1:d}".format(ibeam,Nbeams)) beamlocstmp = sp.where(sp.in1d(beamlocs,ibeamlist))[0] curbeams = beamlocs[beamlocstmp] ksysmat = Ksysvec[curbeams] ksysmean = Ksysvec[ibeamlist[0]] inputdata = curdata[beamlocstmp].copy() noisedata = curnoise[beamlocstmp].copy() noisedataadd=curaddednoise[beamlocstmp].copy() ksysmult = ksysmean/sp.tile(ksysmat[:,sp.newaxis],(1,inputdata.shape[1])) ksysmultn = ksysmean/sp.tile(ksysmat[:,sp.newaxis],(1,noisedata.shape[1])) ksysmultna = ksysmean/sp.tile(ksysmat[:,sp.newaxis],(1,noisedataadd.shape[1])) pulses[itn,ibeam] = len(beamlocstmp) pulsesN[itn,ibeam] = len(beamlocstmp) outdata[itn,ibeam] = lagfunc(inputdata *ksysmult, numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype']) outnoise[itn,ibeam] = lagfunc(noisedata*ksysmultn, numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype']) outaddednoise[itn,ibeam] = lagfunc(noisedataadd*ksysmultna, numtype=self.simparams['dtype'], pulse=pulse,lagtype=self.simparams['lagtype']) # Create output dictionaries and output data DataLags = {'ACF':outdata,'Pow':outdata[:,:,:,0].real,'Pulses':pulses, 'Time':timemat,'AddedNoiseACF':outaddednoise} NoiseLags = {'ACF':outnoise,'Pow':outnoise[:,:,:,0].real,'Pulses':pulsesN,'Time':timemat} return(DataLags,NoiseLags)
def plot_signal_decoding_weber_law(data_flags, axes_to_plot=[0, 1], projected_variable_components=dict()): # Define the plot indices diversity_idxs = len(data_flags) / 2 assert len(data_flags) % 2 == 0, \ "Need command line arguments to be diversity_idxs*2, alternating " \ "Weber law and non-Weber law." # Ready the plotting window; colormaps; colors; signals to plot cmaps = [cm.Reds, cm.Blues] shades = sp.linspace(0.7, 0.3, diversity_idxs) success_plot_lws = sp.linspace(4.0, 3.0, diversity_idxs) # Decoding accuracy subfigures fig = decoding_accuracy_subfigures() # Plot success error figures for diversity_idx in range(diversity_idxs): shade = shades[diversity_idx] lw = success_plot_lws[diversity_idx] for Weber_idx in range(2): data_flag_idx = Weber_idx + diversity_idx * 2 data_flag = data_flags[data_flag_idx] # Blue for non-adapted; red for adapted cmap = cmaps[Weber_idx] list_dict = read_specs_file(data_flag) iter_vars = list_dict['iter_vars'] Nn = list_dict['params']['Nn'] iter_plot_var = iter_vars.keys()[axes_to_plot[0]] x_axis_var = iter_vars.keys()[axes_to_plot[1]] data = load_signal_decoding_weber_law(data_flag) successes = data['successes'] nAxes = len(successes.shape) if nAxes > 2: successes = project_tensor(successes, iter_vars, projected_variable_components, axes_to_plot) # Switch axes if necessary if axes_to_plot[0] > axes_to_plot[1]: successes = successes.T # Plot successes, averaged over second axis of successes array avg_successes = sp.average(successes, axis=1) * 100.0 plt.plot(iter_vars[iter_plot_var], avg_successes, color=cmap(shade), zorder=diversity_idx, lw=lw) # Save same plot in both Weber Law and non-Weber Law folders for Weber_idx in range(2): data_flag = data_flags[Weber_idx + diversity_idx * 2] save_decoding_accuracy_fig(fig, data_flag) plt.close() # Plot Kk2 of index [0, 0], sorted for data_flag in data_flags: list_dict = read_specs_file(data_flag) iter_vars = list_dict['iter_vars'] data = load_signal_decoding_weber_law(data_flag) Kk2s = data['Kk2s'] reshape_idxs = sp.hstack((-1, Kk2s.shape[-2:])) Kk2 = Kk2s.reshape(reshape_idxs)[0] means = sp.average(Kk2, axis=1) stdevs = sp.std(Kk2, axis=1) sorted_idxs = sp.argsort(means) sorted_Kk2 = Kk2[sorted_idxs, :] fig = Kk2_subfigures() plt.imshow(sp.log(sorted_Kk2.T) / sp.log(10), interpolation='nearest', cmap=plt.cm.inferno, vmin=-1.51, vmax=0.01) cbar = plt.colorbar() cbar.ax.tick_params(labelsize=14) save_Kk2_fig(fig, data_flag)
### sort everything by geneID intron_samples[t].sort_by_gene_id() ### binarize intron counts into oberserved / not observed intron_samples[t].binarize_intron_matrix(options.thresholds[t]) ### clean up data from samples with no intron expression ### remove samples that express 5 or fewer introns intron_samples[t].filter_strains_on_mincount( options.min_introns_per_sample) ### subset to introns that are only scarcely seen in normals if options.max_norm_expression_frac is not None: ### look at all samples together if not type(options.max_norm_expression_frac) is dict: tmp = sp.hstack( [intron_samples[x].introns_bin for x in options.normal_set]) exp_frac = sp.sum(tmp, axis=1).astype('float') / tmp.shape[1] del tmp k_idx = sp.where(exp_frac <= options.max_norm_expression_frac)[0] print >> sys.stdout, 'keep %i of %i introns that are expressed below the given threshold in normals' % ( k_idx.shape[0], exp_frac.shape[0]) ### compute fraction per normal type and take max else: exp_frac = sp.vstack([ sp.mean(intron_samples[x].introns_bin.astype('float'), axis=1) <= options.max_norm_expression_frac[x] for x in options.normal_set ]) k_idx = sp.where(exp_frac.min(axis=0))[0] print >> sys.stdout, 'keep %i of %i introns that are expressed below the given threshold in normals' % ( k_idx.shape[0], exp_frac.shape[1])
def calc_risk_scores(bed_file, rs_id_map, phen_map, out_file=None, split_by_chrom=False, adjust_for_sex=False, adjust_for_covariates=False, adjust_for_pcs=False, non_zero_chromosomes=None, only_score=False, verbose=False, summary_dict=None): print('Parsing PLINK bed file: %s' % bed_file) if split_by_chrom: num_individs = len(phen_map) assert num_individs > 0, 'No individuals found. Problems parsing the phenotype file?' pval_derived_effects_prs = sp.zeros(num_individs) for i in range(1, 23): if non_zero_chromosomes is None or i in non_zero_chromosomes: genotype_file = bed_file + '_%i_keep' % i if os.path.isfile(genotype_file + '.bed'): if verbose: print('Working on chromosome %d' % i) prs_dict = get_prs(genotype_file, rs_id_map, phen_map, only_score=only_score, verbose=verbose) pval_derived_effects_prs += prs_dict[ 'pval_derived_effects_prs'] elif verbose: print('Skipping chromosome') else: prs_dict = get_prs(bed_file, rs_id_map, phen_map, only_score=only_score, verbose=verbose) num_individs = len(prs_dict['iids']) pval_derived_effects_prs = prs_dict['pval_derived_effects_prs'] if only_score: write_only_scores_file(out_file, prs_dict, pval_derived_effects_prs) res_dict = {} elif sp.std(prs_dict['true_phens']) == 0: print('No variance left to explain in phenotype.') res_dict = {'pred_r2': 0} else: # Report prediction accuracy assert len( phen_map ) > 0, 'No individuals found. Problems parsing the phenotype file?' pval_eff_corr = sp.corrcoef(pval_derived_effects_prs, prs_dict['true_phens'])[0, 1] pval_eff_r2 = pval_eff_corr**2 res_dict = {'pred_r2': pval_eff_r2} pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1) true_phens = sp.array(prs_dict['true_phens']) true_phens.shape = (len(true_phens), 1) # Store covariate weights, slope, etc. weights_dict = {} # Store Adjusted predictions adj_pred_dict = {} # Direct effect Xs = sp.hstack( [pval_derived_effects_prs, sp.ones((len(true_phens), 1))]) (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)), true_phens) (betas, rss, r, s) = linalg.lstsq(Xs, true_phens) pred_r2 = 1 - rss / rss00 weights_dict['unadjusted'] = { 'Intercept': betas[1][0], 'ldpred_prs_effect': betas[0][0] } if verbose: print('PRS correlation: %0.4f' % pval_eff_corr) print('Variance explained (Pearson R2) by PRS: %0.4f' % pred_r2) # Adjust for sex if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0: sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq(sp.hstack([sex, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack( [pval_derived_effects_prs, sex, sp.ones((len(true_phens), 1))]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['sex_adj'] = { 'Intercept': betas[2][0], 'ldpred_prs_effect': betas[0][0], 'sex': betas[1][0] } if verbose: print( 'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:', betas) adj_pred_dict['sex_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Sex_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS + Sex : %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Sex_adj_pred_r2+Sex'] = pred_r2 # Adjust for PCs if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0: pcs = prs_dict['pcs'] (betas, rss0, r, s) = linalg.lstsq(sp.hstack([pcs, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack( [pval_derived_effects_prs, sp.ones((len(true_phens), 1)), pcs]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['pc_adj'] = { 'Intercept': betas[1][0], 'ldpred_prs_effect': betas[0][0], 'pcs': betas[2][0] } adj_pred_dict['pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for PCs: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS + PCs: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_adj_pred_r2+PC'] = pred_r2 # Adjust for both PCs and Sex if adjust_for_sex and 'sex' in prs_dict and len( prs_dict['sex']) > 0: sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq( sp.hstack([sex, pcs, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, sex, sp.ones((len(true_phens), 1)), pcs ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['sex_pc_adj'] = { 'Intercept': betas[2][0], 'ldpred_prs_effect': betas[0][0], 'sex': betas[1][0], 'pcs': betas[3][0] } adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for PCs and Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_Sex_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS+PCs+Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2 # Adjust for covariates if adjust_for_covariates and 'covariates' in prs_dict and len( prs_dict['covariates']) > 0: covariates = prs_dict['covariates'] (betas, rss0, r, s) = linalg.lstsq( sp.hstack([covariates, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, covariates, sp.ones((len(true_phens), 1)) ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) adj_pred_dict['cov_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for Covariates: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS + Cov: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_adj_pred_r2+Cov'] = pred_r2 if adjust_for_pcs and 'pcs' in prs_dict and len( prs_dict['pcs']) and 'sex' in prs_dict and len( prs_dict['sex']) > 0: pcs = prs_dict['pcs'] sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq( sp.hstack( [covariates, sex, pcs, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, covariates, sex, pcs, sp.ones((len(true_phens), 1)) ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for Cov+PCs+Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS+Cov+PCs+Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2 # Now calibration y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens) denominator = sp.dot(pval_derived_effects_prs.T, pval_derived_effects_prs) numerator = sp.dot(pval_derived_effects_prs.T, y_norm) regression_slope = (numerator / denominator)[0][0] if verbose: print('The slope for predictions with weighted effects is: %0.4f' % regression_slope) num_individs = len(prs_dict['pval_derived_effects_prs']) # Write PRS out to file. if out_file != None: write_scores_file(out_file, prs_dict, pval_derived_effects_prs, adj_pred_dict, weights_dict=weights_dict) return res_dict
def load_txt(df,annoFiles, niceTerms=True,annoDBs='MSigDB',dataFile_delimiter=',', verbose=True): """Load input file for slalom from txt files. Loads an txt files and extracts all the inputs required by slalom Args: dataFile (str): Strong containing the file name of the text file with the expression levels dataFile_delimiter (str): delimiter for reading the data_file. Defaults to ','. annoFiles (str, list): Either string containing the file name of the txt file with the gene set annotations or a list containing several anotation files. Each line in in an annotattion file corresponds one gene set; a line starts with the name of the gene set and is followed by the annotated genes. annoDBs (str, list) : database file (MsigDB/REACTOME). If several annotation files are provided this hast to be a list of the same length. niceTerms (bool): Indicates whether to nice terms (omit prefix, capitalize, shorten). Defaults to true. dataFile_delimiter (str): Delimiter used in dataFile; defaults to ','. verbose (bool): Show progress on loading terms (defaults to True). Returns: An dictionary containing all the inputs required by slalom. """ annoFiles = [annoFiles] annoDBs = [annoDBs] niceTerms = [niceTerms] if len(annoFiles)>1: if len(niceTerms)==1: niceTerms = rep(niceTerms,len(annoFiles)) if not len(annoDBs)==len(annoFiles): raise Exception('annoFiles and annoDBs should have the same length') if verbose==True: print('Data file loaded') Ilist = list() termsList = list() i_file = 0 for annoFile in annoFiles: if not os.path.exists(annoFile): raise Exception('annotation file (%s) not found' % annoFile) annoDB = annoDBs[i_file].lower() if not annoDB in ['msigdb','reactome', 'custom']: raise Exception('database (db) needs to be either msigdb, reactome or custom') with open(annoFile) as f: content = [x.strip('\n') for x in f.readlines()] content = [anno.split() for anno in content] terms = [] annotated_genes = [] for anno in content: terms.append(anno[0]) if annoDB=='msigdb': anno_lower = [gene.title() for gene in anno[2:]] else: anno_lower = [gene.title() for gene in anno[1:]] annotated_genes.append(anno_lower) I = pd.DataFrame(SP.zeros((df.shape[0], len(terms))), index=[ind.title() for ind in df.index], columns=terms) for i_anno in range(len(terms)): anno_expressed = list() for g in annotated_genes[i_anno]: if g in I.index: anno_expressed.append(g) I.loc[anno_expressed,terms[i_anno]]=1. if verbose==True and SP.mod(i_anno,50)==0: print('%i terms out of %i terms loaded for current annotation file' % (i_anno, len(terms))) if niceTerms[i_file]==True: if annoDB=='msigdb': substring='HALLMARK_' elif annoDB=='reactome': substring='REACTOME_' else: substring=' ' terms = [term[term.find(substring)+len(substring):30] for term in terms] terms = [term.capitalize().replace('_',' ') for term in terms] Ilist.append(I.values) termsList.append(terms) i_file+=1 if verbose==True: print('Processed annotation file',annoFile) data_out = {} data_out['terms'] = SP.hstack(termsList) data_out['Y'] = df.values.T data_out['I'] = SP.hstack(Ilist) data_out['genes'] = list(df.index) data_out['lab'] = df.columns return data_out