def estimateKernel(X, maf): #1. maf filter Xpop = SP.float_(X[:, checkMaf(X, maf)]).copy() Xpop -= Xpop.mean(axis=0) Xpop /= Xpop.std(axis=0) Kpop = SP.dot(Xpop, Xpop.T) return scale_K(Kpop)
def estimateKernel(X, maf): #1. maf filter Xpop = SP.float_(X[:,checkMaf(X, maf)]).copy() Xpop -= Xpop.mean(axis=0) Xpop /= Xpop.std(axis=0) Kpop = SP.dot(Xpop,Xpop.T) return scale_K(Kpop)
def readIn(self,filename): np.set_printoptions(precision=22) with open(self.dir+filename, "r") as in_file: in_line = in_file.readline() in_line = in_file.readline() while True: in_line = in_file.readline() if not in_line: break # in_line = in_line[:-1] if in_line[5]=="C": dat= float_(in_line[24:40]) self.GCOEFC1[int(in_line[14:17])][int(in_line[17:22])]=(dat*10**(int(in_line[41:]))) else: dat= float_(in_line[24:40]) self.GCOEFS1[int(in_line[14:17])][int(in_line[17:22])]=(dat*10**int(in_line[41:]))
def get_ci(im_data, center='median', mod=3000.0, percentile=0.01): flattened = scipy.concatenate(im_data) flattened = flattened[scipy.nonzero(flattened)] if center == 'median': mu = scipy.median(flattened) elif center == 'mean': mu = scipy.average(flattened) sigma = stats.tstd(flattened) mod == scipy.float_(mod) sigma = var_truncNormal(mu - mod, mu + mod, mu, sigma, flattened, mod=mod) ci = 2 * mu - stats.norm.ppf(percentile, mu, sigma) return ci
def generate_linear_data(n_max, n_step, ssv_g, var): x = SP.arange(0,n_max,n_step).reshape(-1,1) y = SP.zeros_like(x).reshape(-1,1)*0.0 X = convertToBinaryPredictor(x) Xbg = (SP.random.rand(X.shape[0], X.shape[1]) < .5) * 1.0 weights = var*SP.random.randn(2,1) y += X[:,3:4] * weights[0,:] Xbg[:,3:4] = X[:,3:4] l = X[:,1:2] * X[:,2:3] Xbg[:,1:2] = X[:,1:2] Xbg[:,2:3] = X[:,2:3] y += l * weights[1,:] yTr = y.copy() ssv_v = 1.0-ssv_g if ssv_g > 0.0: ldelta = SP.log(ssv_v/SP.float_(ssv_g)) K = scale_K(getQuadraticKernel(x, d=20)) else: ldelta = None K = SP.eye(y.shape[0]) y += SP.random.multivariate_normal(SP.zeros(K.shape[0]),ssv_g*K+ssv_v*SP.eye(K.shape[0])).reshape(-1,1) return Xbg, x, y, yTr, K, ldelta
def generate_linear_data(n_max, n_step, ssv_g, var): x = SP.arange(0, n_max, n_step).reshape(-1, 1) y = SP.zeros_like(x).reshape(-1, 1) * 0.0 X = convertToBinaryPredictor(x) Xbg = (SP.random.rand(X.shape[0], X.shape[1]) < .5) * 1.0 weights = var * SP.random.randn(2, 1) y += X[:, 3:4] * weights[0, :] Xbg[:, 3:4] = X[:, 3:4] l = X[:, 1:2] * X[:, 2:3] Xbg[:, 1:2] = X[:, 1:2] Xbg[:, 2:3] = X[:, 2:3] y += l * weights[1, :] yTr = y.copy() ssv_v = 1.0 - ssv_g if ssv_g > 0.0: ldelta = SP.log(ssv_v / SP.float_(ssv_g)) K = scale_K(getQuadraticKernel(x, d=20)) else: ldelta = None K = SP.eye(y.shape[0]) y += SP.random.multivariate_normal(SP.zeros( K.shape[0]), ssv_g * K + ssv_v * SP.eye(K.shape[0])).reshape(-1, 1) return Xbg, x, y, yTr, K, ldelta
def predictionError(yTest, yPredict): return ((yTest - yPredict)**2).sum() / SP.float_(yTest.shape[0])
def predictionError(yTest, yPredict): return ((yTest - yPredict)**2).sum()/SP.float_(yTest.shape[0])
def load_data(CFG, is_Ens=True, gene_set='GOCB', het_only = True, het_onlyCB=True, pairs=False, filter_median = True, combine=False, filter_expressed = 0): f = h5py.File(CFG['train_file'],'r') Y = f['LogNcountsMmus'][:] labels = f['labels'][:].ravel() futil = h5py.File(CFG['util_file'],'r') Y_util = futil['LogNcountsQuartz'][:] ftst = h5py.File(CFG['test_file'],'r') if is_Ens ==True: genes = f['EnsIds'][:] genes_util = futil['gene_names_all'][:] else: genes = SP.char.lower(f['sym_names'][:]) genes_util = SP.char.lower(futil['sym_namesQ'][:]) #test file labels_util = futil['phase_vecS'][:]*2+futil['phase_vecG2M'][:]*3+futil['phase_vecG1'][:] if CFG['util_file']==CFG['test_file']: genes_tst = genes_util YT = ftst['LogNcountsQuartz'][:] labels_tst = ftst['phase_vecS'][:]*2+ftst['phase_vecG2M'][:]*3+ftst['phase_vecG1'][:] elif is_Ens == False: ftst = h5py.File(CFG['test_file'],'r') YT = ftst['counts'][:] genes_tst = SP.char.lower(ftst['sym_names'][:]) #genes_tst = ftst['ensIds'][:] #labels_tst = SP.array([1,1,1,1,1])#ftst['labels'][:].ravel() labels_tst = ftst['labels'][:].ravel() elif is_Ens == True: ftst = h5py.File(CFG['test_file'],'r') YT = ftst['counts'][:] #genes_tst = ftst['sym_names'][:] genes_tst = ftst['ensIds'][:] #labels_tst = SP.array([1,1,1,1,1])#ftst['labels'][:].ravel() labels_tst = ftst['labels'][:].ravel() if 'class_labels' in ftst.keys(): class_labels = ftst['class_labels'][:] else: class_labels = [i.astype('str') for i in labels_tst] class_labels = SP.sort(SP.unique(class_labels)) heterogen_util = genes_util[SP.intersect1d(SP.where(Y_util.mean(0)>0)[0],SP.where(futil['genes_heterogen'][:]==1)[0])] heterogen_train = genes[SP.intersect1d(SP.where(Y.mean(0)>0)[0],SP.where(f['genes_heterogen'][:]==1)[0])] cellcyclegenes_GO = genes[SP.unique(f['cellcyclegenes_filter'][:].ravel() -1)] # idx of cell cycle genes cellcyclegenes_CB = genes[f['ccCBall_gene_indices'][:].ravel() -1] # idxof cell cycle genes ... if SP.any(gene_set=='GOCB'): cc_ens = SP.union1d(cellcyclegenes_GO,cellcyclegenes_CB) elif SP.any(gene_set=='GO'): cc_ens = cellcyclegenes_GO elif SP.any(gene_set=='CB'): cc_ens = cellcyclegenes_CB elif SP.any(gene_set=='all'): cc_ens = genes else: #assert(gene_set in CFG.keys()), str(gene_set+' does not exist. Chose different gene set.') cc_ens = gene_set if het_only==True: cc_ens = SP.intersect1d(cc_ens, heterogen_train) if pairs==True: Y = Y[:,SP.where(f['genes_heterogen'][:]==1)[0]] genes = genes[SP.where(f['genes_heterogen'][:]==1)[0]] if het_onlyCB==True: cc_ens = SP.intersect1d(cc_ens, heterogen_util) #filter_expressed = .2 lod = 0 if filter_expressed>0: medY = SP.sum(Y>lod,0)*1.0 idx_filter = (medY/SP.float_(Y.shape[0]))>filter_expressed Y = Y[:,idx_filter] genes = genes[idx_filter] #medY_tst = SP.sum(Y_tst>lod,0) #Y_tst = Y_tst[:,medY_tst>filter_expressed] #genes_tst = genes_tst[medY_tst>filter_expressed] medY_util = SP.sum(Y_util>lod,0) idx_filter = (medY_util/SP.float_(Y_util.shape[0]))>filter_expressed Y_util = Y_util[:,idx_filter] genes_util = genes_util[idx_filter] cc_ens = SP.intersect1d(cc_ens, genes) cc_ens = SP.intersect1d(cc_ens, genes_tst) cc_ens = SP.intersect1d(cc_ens, genes_util) if combine==True: genes = list(genes) genes_util = list(genes_util) genes_intersect = SP.intersect1d(genes,genes_util) cidx_tr = [ genes.index(x) for x in genes_intersect ] cidx_util = [genes_util.index(x) for x in genes_intersect] genes = SP.array(genes)[cidx_tr] genes_util = SP.array(genes_util)[cidx_util] Y = SP.vstack([Y[:,cidx_tr],Y_util[:,cidx_util]]) genes = genes_intersect labels = SP.hstack([labels, labels_util]) Y_tst = YT cc_data = {} cc_data['cc_ens'] = cc_ens cc_data['labels_tst'] = labels_tst cc_data['labels'] = labels cc_data['genes_tst'] = genes_tst cc_data['genes'] = genes cc_data['Y'] = Y cc_data['Y_test'] = Y_tst cc_data['class_labels'] = class_labels return cc_data
def drag(radius, temp, freq=0.0, height=inf, density=None, viscosity=None, lateral=True, verbose=False): """ Calculate the frequency-dependent viscous drag of a sphere close to an infinit plane surface. The function uses an approximation to Faxen's solution for the drag near a plane surface at frequency f for a sphere of radius R. The drag depends on the height (surface to bead-center distance) of the sphere above the surface. Further, it depends on the kinematic viscosity of the medium, thus you can provide density and viscosity of the medium. If not provided, the kinematic viscosity of water at the given temperature is calculated. Arguments --------- radius : float Radius of the sphere in meter. temp : float Absolute temperature in kelvin. height : float Surface to bead-center distance in meter. If height = inf, the function returns stokes drag. density : float or None Density of the medium in kg/m³. If None, the density of water at the given temperature is used. viscosity : float Viscosity of the medium in Pa*s. If None, the viscosity of water at the given temperature is used. later : bool !Not implemented yet! If true, the lateral drag is calculated. If false, the axial one is calculated. [Ref]: Tolić-Nørrelykke et al. Rev. Sci. Instrum. 77, 103101 (2006) Nota bene: In contrast to [Ref.], here f_nu is directly calculated with the given viscosity. """ R = float_(radius) T = float_(temp) f = float_(freq) l = float_(height) if height >= radius else radius if verbose and height < radius: warnings.warn('Height is less than specified radius: ' '{0:1.3e} < {1:1.3e}\n' 'Height is set to radius as fallback.' ''.format(height, radius)) # viscous drag far away from a surface at constant speed eta = viscosity if viscosity else viscosity_H2O(T) d_0 = 6 * pi * eta * R if density is None or viscosity is None: nu = kinematic_viscosity_H2O(T) if verbose: print('Kinematic viscosity of water is used: ' '{0:1.4e} m²/s'.format(nu)) else: nu = viscosity / density if verbose: print('Given viscosity and density is used ' '{0:1.4e} Pa*s / {1:1.4e} kg/m³ = {2:1.4e} m²/s' ''.format(viscosity, density, nu)) # f_nu - characteristic frequency f_nu = nu / (pi * R * R) # relative frequency f_ = f / f_nu # viscous drag far away from a surface at sinusodial movement # at frequency f # [Ref.] Equ. D4 d_stokes = d_0 * (1 + complex_(1 - 1j) * sqrt(f_) - complex_(1j) * (2 / 9) * f_) if isinf(l): return d_stokes if lateral: # viscous drag at distance l from the surface at sinusodial movement f # [Ref.] Equ. D6 delta = R / sqrt(f_) expon = 1 - exp((-(2 * l - R) / delta) * complex_(1 - 1j)) # workaround 1 / sqrt(complex_(0)) = inf + nan j try: expon[f_ == 0.0] = complex_(0) except: if f_ == 0.0: # assume f_ is a scalar expon = complex_(0) denom = (1 - (9 / 16) * (R / l) * (1 - sqrt(f_) / 3 * complex_(1 - 1j) + f_ * complex_(2j / 9) - (4 / 3) * expon)) d = d_stokes / denom else: # viscous drag at distance l from the surface at sinusodial movement f # [Ref.] Equ. D6 delta = R / sqrt(f_) expon = 1 - exp((-(2 * l - R) / delta) * complex_(1 - 1j)) # workaround 1 / sqrt(complex_(0)) = inf + nan j try: expon[f_ == 0.0] = complex_(0) except: if f_ == 0.0: # assume f_ is a scalar expon = complex_(0) denom = (1 - (9 / 16) * (R / l) * (1 - sqrt(f_) / 3 * complex_(1 - 1j) + f_ * complex_(2j / 9) - (4 / 3) * expon)) d = d_stokes / denom return d