def graph_connected_components(graph_mat): """takes graph as matrix and return list of connected components arguments: graph_mat - matrix of graph output: list_of_comp - list of components, list_of_comp[i] - list of node numbers in (i-1)-th component""" component_of_graph = scipy.zeros(graph_mat.shape[0],dtype = scipy.int8) # component_of_graph[i] is the number of component of i-th node. cur_comp = 1 #the number of current component (see below) try: tmp_nodes_to_process = [scipy.nonzero(component_of_graph==0)[0][0]] #node indexes to process except IndexError: #exceptional situation, when graph_mat is empty return [] #kind of breadth first search while(len(tmp_nodes_to_process)>0): #while there is nodes to process cur_node = tmp_nodes_to_process.pop() #take one from array lnodes_numbers = scipy.nonzero(graph_mat[cur_node,:])[0] #take indexes of all of it linked nodes #and choose that corresponds to the non processed nodes of them, the node is non processed if its component is zero lnodes_numbers = scipy.extract(component_of_graph[lnodes_numbers] == 0,lnodes_numbers) tmp_nodes_to_process +=lnodes_numbers.tolist() component_of_graph[lnodes_numbers] = cur_comp # if there is no linked nodes, start processing of new connected component, and next unprocessed node if (len(tmp_nodes_to_process) == 0): cur_comp+=1 tmp_arr = scipy.nonzero(component_of_graph==0)[0] if (len(tmp_arr)>0):tmp_nodes_to_process = [tmp_arr[0]] list_of_comp = [] #collect list for i in range(cur_comp+1): tmp_arr=scipy.nonzero(component_of_graph==(i+1))[0].tolist() if (len(tmp_arr)>0):list_of_comp+=[tmp_arr] return list_of_comp
def setup_Q(self): self.Q = scipy.zeros((self.nperiods, self.ndists, self.ndists)) #D = self.D * self.Dmask for p in range(self.nperiods): for i, d1 in enumerate(self.dists): s1 = sum(d1) if s1 > 0: for j, d2 in enumerate(self.dists): s2 = sum(d2) xor = scipy.logical_xor(d1, d2) # only consider transitions between dists that are # 1 step (dispersal or extinction) apart if sum(xor) == 1: dest = scipy.nonzero(xor)[0] #prior = self.dist_priors[i] if s1 < s2: # dispersal rate = 0.0 for src in scipy.nonzero(d1)[0]: rate += self.D[p,src,dest] * \ self.Dmask[p,src,dest] # for each area in d1, add rate of # dispersal to dest else: # extinction rate = self.E[p,dest] #self.Q[i][j] = (prior * rate) self.Q[p,i,j] = rate self.set_Qdiag(p)
def getGenoIndex(self,pos_start=None,pos_end=None,chrom=None,windowsize=0): """computes 0-based genotype index from position of cumulative position. Positions can be given in one out of two ways: - position (pos_start-pos_end on chrom) - cumulative position (pos_cum_start-pos_cum_end) If all these are None (default), then all genotypes are returned Args: pos_start: position based selection (start position) pos_end: position based selection (end position) chrom: position based selection (chromosome) Returns: idx_start: genotype index based selection (start index) idx_end: genotype index based selection (end index) """ if (pos_start is not None) & (pos_end is not None): assert pos_start[0] == pos_end[0], "chromosomes don't match between start and end position" I = self.position["chrom"]==pos_start[0] I = I & (self.position["pos"]>=(pos_start[1]-windowsize)) & (self.position["pos"]<(pos_end[1]+windowsize)) I = sp.nonzero(I)[0] idx_start = I.min() idx_end = I.max() elif chrom is not None: I = self.position["chrom"]==chrom I = sp.nonzero(I)[0] if I.size==0: return None idx_start = I.min() idx_end = I.max() else: idx_start=None idx_end=None return idx_start,idx_end
def cut(self): average = sp.sum(sp.absolute(self.data))/sp.size(self.data) head = sp.nonzero(sp.absolute(self.data)>average)[0][5] bottom = sp.nonzero(sp.absolute(self.data)>average)[0][-1] self.data = self.data[head:bottom] self.duration_list = self.duration_list[head:bottom] self.duration = self.duration_list[-1] - self.duration_list[0]
def getGenoIndex(self,pos0=None,pos1=None,chrom=None,pos_cum0=None,pos_cum1=None): """computes 0-based genotype index from position of cumulative position. Positions can be given in one out of two ways: - position (pos0-pos1 on chrom) - cumulative position (pos_cum0-pos_cum1) If all these are None (default), then all genotypes are returned Args: pos0: position based selection (start position) pos1: position based selection (stop position) chrom: position based selection (chromosome) pos_cum0: cumulative position based selection (start position) pos_cum1: cumulative position based selection (stop position) Returns: i0: genotype index based selection (start index) i1: genotype index based selection (stop index) """ if (pos0 is not None) & (pos1 is not None) & (chrom is not None): I = self.gneoChrom==chrom I = I & (self.genoPos>=p0) & (self.genoPos<p1) I = SP.nonzero(I)[0] i0 = I.min() i1 = I.max() elif (pos_cum0 is not None) & (pos_cum1 is not None): I = (self.genoPos_cum>=pos_cum0) & (self.genoPos_cum<pos_cum1) I = SP.nonzero(I)[0] if I.size==0: return None i0 = I.min() i1 = I.max() else: i0=None i1=None return i0,i1
def __interpolateBetweenBinaryObjects(obj1, obj2, slices): """ Takes two binary objects and puts slices slices in-between them, each of which contains a smooth binary transition between the objects. @note private inner function """ if not obj1.shape == obj2.shape: raise AttributeError( "The two supplied objects have to be of the same shape, not {} and {}.".format(obj1.shape, obj2.shape) ) # constant offset = 0.5 # must be a value smaller than the minimal distance possible temporal_dimension = 3 # get all voxel position obj1_voxel = scipy.nonzero(obj1) obj2_voxel = scipy.nonzero(obj2) # get smallest pairwise distances between all object voxels distances = cdist(scipy.transpose(obj1_voxel), scipy.transpose(obj2_voxel)) # keep for each True voxel of obj1 only the smallest distance to a True voxel in obj2 min_distances = distances.min(1) # test if all seems to work if len(min_distances) != len(obj1_voxel[0]): raise Exception("Invalid number of minimal distances received.") # replace True voxels in obj1 with their respective distances to the True voxels in obj2 thr_obj = obj1.copy() thr_obj = thr_obj.astype(scipy.float_) thr_obj[obj1_voxel] = min_distances thr_obj[obj1_voxel] += offset # previous steps distances include zeros, therefore this is required # compute the step size for each slice that is added maximum = min_distances.max() step = maximum / float(slices + 1) threshold = maximum # control step: see if thr_obj really corresponds to obj1 if not scipy.all(thr_obj.astype(scipy.bool_) == obj1.astype(scipy.bool_)): raise Exception("First created object does not correspond to obj1.") # assemble return volume return_volume = [thr_obj.astype(scipy.bool_)] # corresponds to obj1 for _ in range(slices): threshold -= step # remove all value higher than the threshold thr_obj[thr_obj > threshold] = 0 # add binary volume to list /makes a copy) return_volume.append(thr_obj.astype(scipy.bool_)) # add last slice (corresponds to es obj2 slice) thr_obj[thr_obj > offset] = 0 return_volume.append(thr_obj.astype(scipy.bool_)) # return binary scipy array return scipy.rollaxis(scipy.asarray(return_volume, dtype=scipy.bool_), 0, temporal_dimension + 1)
def move_nodes(d, lfun, nodes):#, dely): " Move nodes one timestep (projecting lost points to the boundary) " # get delauney dely = get_dely(d, nodes) # force constant dt = .1520015 deps = h0 * sqrt(finfo(float64).eps) restlength_factor = 1.400025 # bars and midpoints bars, barmids = dely[-2:] barvecs = nodes[:,bars[:,1]] - nodes[:,bars[:,0]] barls = sqrt((barvecs**2).sum(0)) u = barvecs / barls # unit vectors # force from each bar restlen = restlength_factor * lfun(*barmids) # print('restlen: {0} \nbarls : {1}'.format(restlen.shape, barls.shape)) logic = restlen > barls f = where(logic, restlen-barls, 0.0) # f = where(f<h0/2.0, f, h0/2.0) # ns = nodes.shape spmat = sparse.csc_matrix # print(ns) # print(u[0].shape) # print(f.shape) # print(bars[:,0].shape) dp = (-spmat((u[0]*f, (0*bars[:,0], bars[:,0])), shape=ns).todense() + spmat((u[0]*f, (0*bars[:,1], bars[:,1])), shape=ns).todense()) dp += (-spmat((u[1]*f, (0*bars[:,0]+1, bars[:,0])), shape=ns).todense() + spmat((u[1]*f, (0*bars[:,1]+1, bars[:,1])), shape=ns).todense()) nodes = array(nodes + dt*dp) # project boundary points back into the domain d_ = d(*nodes) ix = nonzero(d_>0) some_out = True count = 0 while some_out: gradx = 1.0/deps * (d(nodes[0,ix]+deps, nodes[1,ix]) - d_[ix]) grady = 1.0/deps * (d(nodes[0,ix], nodes[1,ix] + deps) - d_[ix]) norm = sqrt(gradx**2 + grady**2) nodes[0,ix] -= d_[ix]*gradx / norm nodes[1,ix] -= d_[ix]*grady / norm d_ = d(*nodes) ix = nonzero(d_>geps) some_out = ix[0].size count+=1 if count>5: #raise ValueError("counted "+str(ix[0].size)+" nodes oob") print("counted ",str(ix[0].size)," nodes oob") break return nodes
def __interpolateBetweenBinaryObjects(obj1, obj2, slices): """ Takes two binary objects and puts slices slices in-between them, each of which contains a smooth binary transition between the objects. @note private inner function """ if not obj1.shape == obj2.shape: raise AttributeError('The two supplied objects have to be of the same shape, not {} and {}.'.format(obj1.shape, obj2.shape)) # constant offset = 0.5 # must be a value smaller than the minimal distance possible temporal_dimension = 3 # get all voxel position obj1_voxel = scipy.nonzero(obj1) obj2_voxel = scipy.nonzero(obj2) # get smallest pairwise distances between all object voxels distances = cdist(scipy.transpose(obj1_voxel), scipy.transpose(obj2_voxel)) # keep for each True voxel of obj1 only the smallest distance to a True voxel in obj2 min_distances = distances.min(1) # test if all seems to work if len(min_distances) != len(obj1_voxel[0]): raise Exception('Invalid number of minimal distances received.') # replace True voxels in obj1 with their respective distances to the True voxels in obj2 thr_obj = obj1.copy() thr_obj = thr_obj.astype(scipy.float_) thr_obj[obj1_voxel] = min_distances thr_obj[obj1_voxel] += offset # previous steps distances include zeros, therefore this is required # compute the step size for each slice that is added maximum = min_distances.max() step = maximum / float(slices + 1) threshold = maximum # control step: see if thr_obj really corresponds to obj1 if not scipy.all(thr_obj.astype(scipy.bool_) == obj1.astype(scipy.bool_)): raise Exception('First created object does not correspond to obj1.') # assemble return volume return_volume = [thr_obj.astype(scipy.bool_)] # corresponds to obj1 for _ in range(slices): threshold -= step # remove all value higher than the threshold thr_obj[thr_obj > threshold] = 0 # add binary volume to list /makes a copy) return_volume.append(thr_obj.astype(scipy.bool_)) # add last slice (corresponds to es obj2 slice) thr_obj[thr_obj > offset] = 0 return_volume.append(thr_obj.astype(scipy.bool_)) # return binary scipy array return scipy.rollaxis(scipy.asarray(return_volume, dtype=scipy.bool_), 0, temporal_dimension + 1)
def neutral_step(community, death_map, v, T_opt_map, band_temperatures, dispersal_map, variance_survival): """""" # `band_temperatures` - temperature of each altitudinal band # `variance_survival` - #~ pdb.set_trace() shape = community.shape # `shape` has 3 items if `community` is 3D, but 2 if `community` is 2D. landscape_size = shape[0] * shape[1] died = [0, 0, 0] # Initialise a list that the function will use to index the arrays, `community` and `T_opt_map`. death_z_index = sc.random.choice(sc.arange(landscape_size), p=death_map) died[:2] = get_xy(death_z_index, shape[1]) # Randomly pick the cell where death occurs - picks an index, hence `sc.arange(landscape_size)`. if len(shape) == 2: # if `community` is 2D, not 3D - if number of individuals per cell (density) is 1 #~ died[2] = None died = tuple(died[:2]) else: died[2] = sc.random.choice(sc.nonzero(community[died[0],died[1]])[0]) died = tuple(died) # From that cell, randomly pick an individual to die (*uniform distribution). # `sc.nonzero` returns the indices of non-zero elements. (A zero in `community` can represent the absence of an individual, if this is set up). # `sc.random.choice(a)` - `a` must be 1D, but `sc.nonzero(b)` returns a tuple of arrays (one for each dimension of `b`), thus `sc.nonzero(...)[0]` # - will get all the data as `community[x,y]` is 1D. x = sc.random.uniform(0, 1 + sys.float_info.epsilon) if x <= v: community[died] = sc.amax(community) + 1 # speciation T_opt_map[died] = band_temperatures[died[0]] # The Topt of a new species is the temperature of its position - the vacant position. # `band_temperatures[died[0]]` is the vacant position's temperature (the 1st dimension of `community` represents altitudinal bands). else: # dispersal #~ reproduced = [0, 0, 0] offspring_survived = False while offspring_survived == False: # rejection sampling reproduced = [0, 0, 0] # prob want to avoid extra tick of clock in while loop birth_z_index = sc.random.choice(sc.arange(landscape_size), p=dispersal_map[death_z_index]) reproduced[:2] = get_xy(birth_z_index, shape[1]) # Randomly pick the cell where birth occurs. # `dispersal_map[z]` is a nested dispersal map - a probability distribution - probability of dispersing to the cell with index z, from every cell. if len(shape) == 2: # if `community` is 2D, not 3D reproduced = tuple(reproduced[:2]) else: reproduced[2] = sc.random.choice(sc.nonzero(community[reproduced[0],reproduced[1]])[0]) reproduced = tuple(reproduced) # From that cell, randomly pick an individual to reproduce. offspring_survived = check_offspring_survival(T_opt_map[reproduced], band_temperatures[died[0]], variance_survival) # note - wasteful/inefficient - redoes both draws, if have to re-pick individual to reproduce - draw pos then ind (unsure if better way) community[died] = community[reproduced] T_opt_map[died] = T_opt_map[reproduced] return community, T_opt_map # comm, T_opt_map = neutral_step(...)
def removeFactors(self, *idx): for i in idx: if self.idx[idx] == 1: self.learnTheta.removeFactors( s.where(i == s.nonzero(self.idx)[0])[0]) else: self.constTheta.removeFactors( s.where(i == s.nonzero(1 - self.idx)[0])[0]) self.idx = self.idx[s.arange(self.K) != i] self.K -= 1
def __call__(self, dx): """ hat function evaluation """ x = convarg(dx) - self.c y = sc.zeros_like(x) i = sc.nonzero((x>self.a)&(x<=.0)) y[i] = 1. - x[i]/self.a i = sc.nonzero((x>.0)&(x<self.b)) y[i] = 1. - x[i]/self.b return(y)
def __call__(self, dx): """ hat function evaluation """ x = convarg(dx) - self.c y = sc.zeros_like(x) i = sc.nonzero((x > self.a) & (x <= .0)) y[i] = 1. - x[i] / self.a i = sc.nonzero((x > .0) & (x < self.b)) y[i] = 1. - x[i] / self.b return (y)
def gen_single_trial(interval_lengths, rates): """ Generates a single spike train with intervals of length `interval_lengths` and the firing rates given in `rates`. """ boundaries = sp.ones(len(interval_lengths) + 1) * pq.s boundaries[1:] = [l.rescale(boundaries.units) for l in interval_lengths] rates = rates[sp.nonzero(boundaries[1:])] boundaries = boundaries[sp.nonzero(boundaries)] boundaries[0] = 0.0 * pq.s boundaries = sp.cumsum(boundaries) return stools.st_concatenate([stg.gen_homogeneous_poisson( rate, t_start=boundaries[i], t_stop=boundaries[i + 1]) for i, rate in enumerate(rates)])
def subgraph_with_center(graph_mat, node_names, center_name): """takes number of center node, and returns new subgraph, that consists of all nodes connected with center. arguments: graph_mat,node_names - graph description as matrix (see matrix_from_tuple_list doc for details) center_num - name of the center node output: subgraph,sub_node_names - subgraph description as matrix (see matrix_from_tuple_list doc for details)""" center_num = scipy.nonzero(node_names==center_name)[0][0] center_friends_num = scipy.nonzero(graph_mat[center_num,:])[0] #indexes of nodes that linked with central node including itself subgraph = graph_mat[center_friends_num,:][:,center_friends_num] # FIXME we consider part of graph which consists of nodes linked with center only sub_node_names = node_names[center_friends_num] return (subgraph,sub_node_names)
def interpolateBetweenBinaryObjects(obj1, obj2, slices): """ Takes two binary objects and puts slices slices in-between them, each of which contains a smooth binary transition between the objects. """ # constants temporal_dimension = 3 # flip second returned binary objects along temporal axis slicer = [slice(None) for _ in range(obj1.ndim + 1)] slicer[temporal_dimension] = slice(None, None, -1) # logical-and combination ret = __interpolateBetweenBinaryObjects( obj1, obj2, slices) | __interpolateBetweenBinaryObjects( obj2, obj1, slices)[slicer] # control step: see if last volume corresponds to obj2 slicer[temporal_dimension] = slice(-1, None) if not scipy.all(scipy.squeeze(ret[slicer]) == obj2.astype(scipy.bool_)): raise Exception( 'Last created object does not correspond to obj2. Difference of {} voxels.' .format( len( scipy.nonzero( scipy.squeeze(ret[slicer]) & obj2.astype(scipy.bool_))[0]))) return ret
def getPhenotypes(self, phenotype_IDs=None, phenotype_query=None, sample_idx=None, center=True, intersection=False): """load Phenotypes Args: phenotype_IDs: names of phenotypes to load phenotype_query: string hoding a pandas query (e.g. "(environment==1) & (phenotype_ID=='growth')" selects all phenotypes that have a phenotype_ID equal to growth under environment 1. sample_idx: Boolean sample index for subsetting individuals center: Boolean: mean center (and mean-fill in missing values if intersection==False)? (default True) impute: imputation of missing values (default: True) intersection: restrict observation to those obseved in all phenotypes? (default: False) Returns: phenotypes: [N x P] scipy.array of phenotype values for P phenotypes sample_idx_intersect: index of individuals in phenotypes after filtering missing valuesS """ if phenotype_IDs is not None: I = SP.array([ SP.nonzero(self.phenotype_ID == n)[0][0] for n in phenotype_IDs ]) elif phenotype_query is not None: try: I = self.index_frame.query(phenotype_query).values[:, 0] except Exception, arg: print "query '%s' yielded no results: %s" % phenotype_query, str( arg) I = SP.zeros([0], dtype="int")
def hzd_do_value(sa, r_nu, rtrn_rte): """ parrams: sa [vector (nx1)] response spectral accelerations r_nu [vector (nx1)] event activity for the corresponding element in sa rtrn_rte [vector (mx1)] return rates of interest. returns: hzd [vector (1xm)] hazard value for each return rate """ # Get rid of events with sa = 0, since they will effect the end of the # curve assert sa.shape == r_nu.shape, str(sa.shape) + 'should = ' + str( r_nu.shape) nonzero_ind = nonzero(sa) sa = sa[nonzero_ind] r_nu = r_nu[nonzero_ind] hzd, cumnu = _rte2cumrte(sa, r_nu) # annual exceedance rate = cumulative event activity # for exceedance rates larger than what we have data for, give 0. # for exceedance rates smaller than what we have data for, give hzd[0]. if len(hzd) == 0: hzd_val = zeros(rtrn_rte.shape) else: hzd_val = interp(rtrn_rte, cumnu, hzd, left=hzd[0], right=0.0) return hzd_val
def automatch(self): axy = array([self.ax, self.ay]).T pxy = self.peaks[:, :2] sf = sqrt(axy.var(0) + pxy.var(0)) nindex = [ nonzero((abs((pxy - axy[i, :])) / sf < 0.1).all(1))[0] for i in range(axy.shape[0]) ] d2 = [sorted(((axy - x)**2).sum(1))[1] for x in axy] dindex = [ x[(((pxy[x] - axy[i])**2).sum(1) < 0.25 * d2[i])] for i, x in enumerate(nindex) ] qindex = [(i, x, ((pxy[x] - axy[i])**2).sum(1)) for i, x in enumerate(dindex) if len(x) > 1] d2cutoff = 10 for i, x in [(i, [x[argmin(ds)]]) for i, x, ds in qindex if sorted(ds / min(ds))[1] > d2cutoff]: dindex[i] = x self.mpeaks = [ pxy[x[0]].tolist() + [self.alabels[i].get_text()] for i, x in enumerate(dindex) if len(x) == 1 ] print "Matched %d peaks out of %d/%d" % (len( self.mpeaks), len(axy), len(pxy))
def __init__(self, grid, fArray, zDrawsSorted): assert(len(grid) == len(fArray)) (self.grid, self.fArray) = (grid, fArray) self.zDraws = zDraws self.slopes = scipy.zeros(len(grid) - 1) self.dx = grid[1] - grid[0] for i in range(len(grid) - 1): self.slopes[i] = (fArray[i+1] - fArray[i]) / self.dx # set up sums self.cellSums = scipy.zeros(len(grid) + 1) self.boundaryIndices = [len(zDraws)] * len(grid) for (i, x) in enumerate(grid): indices = scipy.nonzero(self.zDraws >= x)[0] if (len(indices) > 0): self.boundaryIndices[i] = indices[0] self.cellSums[0] = scipy.sum(self.zDraws[0:self.boundaryIndices[0]]) for i in range(1, len(self.cellSums)-1): self.cellSums[i] = scipy.sum(self.zDraws[self.boundaryIndices[i-1] : self.boundaryIndices[i]]) self.cellSums[-1] = scipy.sum(self.zDraws[self.boundaryIndices[-1] : ]) diff = scipy.sum(self.zDraws) - scipy.sum(self.cellSums) print("diff: %f" % diff) for i in range(len(grid)): if (self.boundaryIndices[i] < len(self.zDraws)): print("grid point %f, boundary %f" % (self.grid[i], self.zDraws[self.boundaryIndices[i]])) else: print("grid point %f, no draws to right" % self.grid[i])
def errorApproximation(self, ratio, dim=20): self.buildMatrix() sumNonzeros = (self.vxm !=0).sum() numTest = int(ratio*sumNonzeros) elementList = [] nonZeroTuple = sp.nonzero(self.vxm) for x in range(int(numTest)): rInt = sp.random.randint(0,nonZeroTuple[0].size) randrow = nonZeroTuple[0][rInt] randcolumn = nonZeroTuple[1][rInt] valElementIndex = [randrow,randcolumn] elementList.append(valElementIndex) self.modvxm = sp.copy(self.vxm) for x in elementList: self.modvxm[x[0],x[1]] = 0 self.modvmx = self.fillAverages(vxm = self.modvxm) self.newmodvxm = self.predict(dim,vxm=self.modvxm) sqDiff = 0 for x in elementList: sqDiff += sp.square(self.newmodvxm[x[0],x[1]] - self.vxm[x[0],x[1]]) self.rmse = sp.sqrt(sqDiff/len(elementList))
def smoP(dataMatIn, classLabels, C, toler, maxIter, kTup=('normal', 0)): # 对应的外层循环,和smoSimple是类似的,不同的是退出循环的条件更多了,貌似迭代6次左右就停止了。 oS = optStruct(sp.mat(dataMatIn), sp.mat(classLabels).transpose(), C, toler, kTup) iterm = 0 entireSet = True alphaPairsChanged = 0 while (iterm < maxIter) and ((alphaPairsChanged > 0) or (entireSet)): alphaPairsChanged = 0 if entireSet: for i in range(oS.m): # 遍历所有的值,找第一个a alphaPairsChanged += innerL(i, oS) # 找第二个a print("fullSet, iter: %d i:%d, pairs changed %d" % (iterm, i, alphaPairsChanged)) iterm += 1 else: # 遍历非边界的值,找第一个a,就是0<a<c那个正方形中的 nonBoundIs = sp.nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0] for i in nonBoundIs: alphaPairsChanged += innerL(i, oS) # 找第二个 print("non-bound, iter: %d i:%d, pairs changed %d" % (iterm, i, alphaPairsChanged)) iterm += 1 if entireSet: # 控制在边界和非边界循环切换 entireSet = False elif (alphaPairsChanged == 0): entireSet = True print("iteration number: %d" % iterm) return oS.b, oS.alphas
def getPhenotypes(self,phenotype_IDs=None,phenotype_query=None,sample_idx=None,center=True,intersection=False): """load Phenotypes Args: phenotype_IDs: names of phenotypes to load phenotype_query: string hoding a pandas query (e.g. "(environment==1) & (phenotype_ID=='growth')" selects all phenotypes that have a phenotype_ID equal to growth under environment 1. sample_idx: Boolean sample index for subsetting individuals center: Boolean: mean center (and mean-fill in missing values if intersection==False)? (default True) impute: imputation of missing values (default: True) intersection: restrict observation to those obseved in all phenotypes? (default: False) Returns: phenotypes: [N x P] scipy.array of phenotype values for P phenotypes sample_idx_intersect: index of individuals in phenotypes after filtering missing valuesS """ if phenotype_IDs is not None: I = SP.array([SP.nonzero(self.phenotype_ID==n)[0][0] for n in phenotype_IDs]) elif phenotype_query is not None: try: I = self.index_frame.query(phenotype_query).values[:,0] #if there are no results we won't actually get an exception, we just get an #empty response if len(I) == 0: print "query '%s' yielded no results!" % (phenotype_query) I = SP.zeros([0],dtype="int") except Exception, arg: print "query '%s' yielded no results: %s" % (phenotype_query, str(arg)) I = SP.zeros([0],dtype="int")
def dup_fig(x, y): colors = [None, 'black','blue','brown','purple','orange','cyan','gray','yellow','black','red','green'] k = len(sp.unique(y)) for i in range(1, k+1): inds = sp.nonzero(y == i) plt.plot(x[inds, 0], x[inds, 1], 'o', color=colors[i]) plt.show()
def testGauss(k1=1.3): dataArr, labelArr = loadData('testSetRBF.txt') # 训练,得到参数 b, alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, ('Gauss', k1)) datMat = sp.mat(dataArr) labelMat = sp.mat(labelArr).transpose() svInd = sp.nonzero(alphas.A > 0)[0] # 支持向量的矩阵 sVs = datMat[svInd] labelSV = labelMat[svInd] print("there are %d Support Vectors" % np.shape(sVs)[0]) m, n = np.shape(datMat) errorCount = 0 for i in range(m): kernelEval = kernelTrans(sVs, datMat[i, :], ('Gauss', k1)) predict = kernelEval.T * (sp.multiply(labelSV, alphas[svInd])) + b if np.sign(predict) != np.sign(labelArr[i]): errorCount += 1 print("the training error rate is: %f" % (float(errorCount) / m)) # 测试参数在新数据上如何 dataArr, labelArr = loadData('testSetRBF2.txt') errorCount = 0 datMat = sp.mat(dataArr) labelMat = sp.mat(labelArr).transpose() m, n = np.shape(datMat) for i in range(m): kernelEval = kernelTrans(sVs, datMat[i, :], ('Gauss', k1)) predict = kernelEval.T * (sp.multiply(labelSV, alphas[svInd])) + b if np.sign(predict) != np.sign(labelArr[i]): errorCount += 1 print("the test error rate is: %f" % (float(errorCount) / m))
def testDigits(kTup=('normal', 10)): # 和testGauss基本上差不多也 dataArr, labelArr = loadImage('trainingDigits') b, alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, kTup) datMat = sp.mat(dataArr) labelMat = sp.mat(labelArr).transpose() svInd = sp.nonzero(alphas.A > 0)[0] # 支持向量的矩阵 sVs = datMat[svInd] labelSV = labelMat[svInd] print("there are %d Support Vectors" % np.shape(sVs)[0]) m, n = np.shape(datMat) errorCount = 0 for i in range(m): kernelEval = kernelTrans(sVs, datMat[i, :], kTup) predict = kernelEval.T * (sp.multiply(labelSV, alphas[svInd])) + b if np.sign(predict) != np.sign(labelArr[i]): errorCount += 1 print("the training error rate is: %f" % (float(errorCount) / m)) # 测试参数在新数据上如何 dataArr, labelArr = loadImage('testDigits') errorCount = 0 datMat = sp.mat(dataArr) labelMat = sp.mat(labelArr).transpose() m, n = np.shape(datMat) for i in range(m): kernelEval = kernelTrans(sVs, datMat[i, :], kTup) predict = kernelEval.T * (sp.multiply(labelSV, alphas[svInd])) + b if np.sign(predict) != np.sign(labelArr[i]): errorCount += 1 print("the test error rate is: %f" % (float(errorCount) / m))
def getRegion(self, size=3e4, min_nSNPs=1, chrom_i=None, pos_min=None, pos_max=None): """ Sample a region from the piece of genotype X, chrom, pos minSNPnum: minimum number of SNPs contained in the region Ichrom: restrict X to chromosome Ichrom before taking the region cis: bool vector that marks the sorted region region: vector that contains chrom and init and final position of the region """ bim = plink_reader.readBIM(self.bfile, usecols=(0, 1, 2, 3)) chrom = SP.array(bim[:, 0], dtype=int) pos = SP.array(bim[:, 3], dtype=int) if chrom_i is None: n_chroms = chrom.max() chrom_i = int(SP.ceil(SP.rand() * n_chroms)) pos = pos[chrom == chrom_i] chrom = chrom[chrom == chrom_i] ipos = SP.ones(len(pos), dtype=bool) if pos_min is not None: ipos = SP.logical_and(ipos, pos_min < pos) if pos_max is not None: ipos = SP.logical_and(ipos, pos < pos_max) pos = pos[ipos] chrom = chrom[ipos] if size == 1: # select single SNP idx = int(SP.ceil(pos.shape[0] * SP.rand())) cis = SP.arange(pos.shape[0]) == idx region = SP.array([chrom_i, pos[idx], pos[idx]]) else: while 1: idx = int(SP.floor(pos.shape[0] * SP.rand())) posT1 = pos[idx] posT2 = pos[idx] + size if posT2 <= pos.max(): cis = chrom == chrom_i cis *= (pos > posT1) * (pos < posT2) if cis.sum() > min_nSNPs: break region = SP.array([chrom_i, posT1, posT2]) start = SP.nonzero(cis)[0].min() nSNPs = cis.sum() rv = plink_reader.readBED(self.bfile, useMAFencoding=True, start=start, nSNPs=nSNPs, bim=bim) Xr = rv['snps'] return Xr, region
def _do_one_inner_iteration(self,inv_val): #Written by: Jeff Gostick ([email protected]) r""" Determines which throats are invaded at a given applied capillary pressure This function uses the scipy.csgraph module for the graph theory cluster labeling algorithm (connected_components) Dependencies: - Creates: - """ #Generate a tlist containing boolean values for throat state self._net.throat_properties['invaded'] = self._net.throat_properties['Pc_entry']<inv_val #Fill adjacency matrix with invasion state info self._net.create_adjacency_matrix('invaded',sprsfmt='csr',dropzeros=True) clusters = sprs.csgraph.connected_components(self._net._adjmatrix_csr)[1] #Clean up (not invaded = -2, invaded >=0) clusters = (clusters[0:]>=0)*(clusters[0:]+1) #Identify clusters connected to invasion sites if self._ALOP == 1: inj_clusters = self._inv_src*clusters elif self._OP == 1: temp1 = self._net.throat_properties['invaded']*((self._net.throat_properties['connections'][:,0]+1)-1) temp2 = self._net.throat_properties['invaded']*((self._net.throat_properties['connections'][:,1]+1)-1) inj_clusters = np.append(self._net.pore_properties['numbering'][temp1[temp1>=0]],self._net.pore_properties['numbering'][temp2[temp2>=0]]) #Trim non-connected clusters temp = sp.unique(inj_clusters[sp.nonzero(inj_clusters)]) inv_clusters = sp.zeros([np.size(clusters,0)],np.int32) for i in range(0,np.shape(temp)[0]): pores=sp.where(clusters==temp[i])[0] inv_clusters[pores] = temp[i] return(inv_clusters)
def _generate_pores(self): r""" Generate the pores (coordinates, numbering and types) """ self._logger.info("generate_pores: Create specified number of pores") #Find non-zero elements in image template = self._template Np = np.sum(template > 0) #Add pores to data and ifo pind = np.arange(0, Np) self.set_pore_info(label='all', locations=pind) self.set_pore_data(prop='numbering', data=pind) # Remove eventually img_ind = np.ravel_multi_index(sp.nonzero(template), dims=sp.shape(template), order='F') self.set_pore_data(prop='voxel_index', data=img_ind) #This voxel_to_pore map is messy but works temp = sp.prod(sp.shape(template))*sp.ones(np.prod(sp.shape(template),),dtype=sp.int32) temp[img_ind] = pind self._voxel_to_pore_map = temp coords = self._Lc*(0.5 + np.transpose(np.nonzero(template))) self.set_pore_data(prop='coords', data=coords) self._logger.debug("generate_pores: End of method")
def hzd_do_value(sa, r_nu, rtrn_rte): """ parrams: sa [vector (nx1)] response spectral accelerations r_nu [vector (nx1)] event activity for the corresponding element in sa rtrn_rte [vector (mx1)] return rates of interest. returns: hzd [vector (1xm)] hazard value for each return rate """ # Get rid of events with sa = 0, since they will effect the end of the # curve assert sa.shape == r_nu.shape, str( sa.shape) + 'should = ' + str(r_nu.shape) nonzero_ind = nonzero(sa) sa = sa[nonzero_ind] r_nu = r_nu[nonzero_ind] hzd, cumnu = _rte2cumrte(sa, r_nu) # annual exceedance rate = cumulative event activity # for exceedance rates larger than what we have data for, give 0. # for exceedance rates smaller than what we have data for, give hzd[0]. if len(hzd) == 0: hzd_val = zeros(rtrn_rte.shape) else: hzd_val = interp(rtrn_rte, cumnu, hzd, left=hzd[0], right=0.0) return hzd_val
def getGenoIndex(self,pos_start=None,pos_end=None,windowsize=0): """computes 0-based genotype index from position of cumulative position. Positions can be given as (pos_start-pos_end on chrom) If both of these are None (default), then all genotypes are returned Args: chrom: chromosome based selection (return whole chromosome) pos_start: position based selection (start position) tuple of chrom, position pos_end: position based selection (end position) tuple of chrom, position windowsize: additionally include a flanking window around the selected positions (default 0) Returns: idx_start: genotype index based selection (start index) idx_end: genotype index based selection (end index) """ if (pos_start is not None) & (pos_end is not None): assert pos_start[0]==pos_end[0], "getGenoIndex only supports selection on a single chromosome" I = self.position["chrom"]==pos_start[0] I = I & (self.postion["pos"]>=(pos_start[1]-windowsize)) & (self.position["pos"]<(pos_end[1]+windowsize)) I = sp.nonzero(I)[0] idx_start = I.min() idx_end = I.max() elif (chrom is not None): I = self.position["chrom"]==chrom idx_start = I.min() idx_end = I.max() else: idx_start=None idx_end=None return idx_start,idx_end
def check_if_click_is_on_an_existing_point(mouse_x_coord,mouse_y_coord): # First, figure out how many points we have. # Each point is one row in the coords_array, # so we count the number of rows, which is dimension-0 for Python number_of_points = scipy.shape(coords_array)[0] this_coord = scipy.array([[ mouse_x_coord, mouse_y_coord ]]) # The double square brackets above give the this_coord array # an explicit structure of having rows and also columns if number_of_points > 0: # If there are some points, we want to calculate the distance # of the new mouse-click location from every existing point. # One way to do this is to make an array which is the same size # as coords_array, and which contains the mouse x,y-coords on every row. # Then we can subtract that xy_coord_matchng_matrix from coords_array ones_vec = scipy.ones((number_of_points,1)) xy_coord_matching_matrix = scipy.dot(ones_vec,this_coord) distances_from_existing_points = (coords_array - xy_coord_matching_matrix) squared_distances_from_existing_points = distances_from_existing_points**2 sum_sq_dists = scipy.sum(squared_distances_from_existing_points,axis=1) # The axis=1 means "sum over dimension 1", which is columns for Python euclidean_dists = scipy.sqrt(sum_sq_dists) distance_threshold = 0.5 within_threshold_points = scipy.nonzero(euclidean_dists < distance_threshold ) num_within_threshold_points = scipy.shape(within_threshold_points)[1] if num_within_threshold_points > 0: # We only want one matching point. # It's possible that more than one might be within threshold. # So, we take the unique smallest distance point_to_be_deleted = scipy.argmin(euclidean_dists) return point_to_be_deleted else: # If there are zero points, then we are not deleting any point_to_be_deleted = -1 return point_to_be_deleted
def interpolateBetweenBinaryObjects(obj1, obj2, slices): """ Takes two binary objects and puts slices slices in-between them, each of which contains a smooth binary transition between the objects. """ # constants temporal_dimension = 3 # flip second returned binary objects along temporal axis slicer = [slice(None) for _ in range(obj1.ndim + 1)] slicer[temporal_dimension] = slice(None, None, -1) # logical-and combination ret = ( __interpolateBetweenBinaryObjects(obj1, obj2, slices) | __interpolateBetweenBinaryObjects(obj2, obj1, slices)[slicer] ) # control step: see if last volume corresponds to obj2 slicer[temporal_dimension] = slice(-1, None) if not scipy.all(scipy.squeeze(ret[slicer]) == obj2.astype(scipy.bool_)): raise Exception( "Last created object does not correspond to obj2. Difference of {} voxels.".format( len(scipy.nonzero(scipy.squeeze(ret[slicer]) & obj2.astype(scipy.bool_))[0]) ) ) return ret
def sparse_vector(nDims, params, sample_type='normal', seed=0): """ Set sparse stimulus with given statistics """ Nn, Kk = nDims Ss = sp.zeros(Nn) sp.random.seed(seed) for iK in range(Kk): if sample_type == "normal": mu, sigma = params if sigma != 0: Ss[iK] = sp.random.normal(mu, sigma) else: Ss[iK] = mu elif sample_type == "uniform": lo, hi = params Ss[iK] = sp.random.uniform(lo, hi) sp.random.shuffle(Ss) idxs = sp.nonzero(Ss) return Ss, idxs
def getGenoIndex(self, pos_start=None, pos_end=None, windowsize=0): """computes 0-based genotype index from position of cumulative position. Positions can be given as (pos_start-pos_end on chrom) If both of these are None (default), then all genotypes are returned Args: chrom: chromosome based selection (return whole chromosome) pos_start: position based selection (start position) tuple of chrom, position pos_end: position based selection (end position) tuple of chrom, position windowsize: additionally include a flanking window around the selected positions (default 0) Returns: idx_start: genotype index based selection (start index) idx_end: genotype index based selection (end index) """ if (pos_start is not None) & (pos_end is not None): assert pos_start[0] == pos_end[ 0], "getGenoIndex only supports selection on a single chromosome" I = self.position["chrom"] == pos_start[0] I = I & (self.postion["pos"] >= (pos_start[1] - windowsize)) & (self.position["pos"] < (pos_end[1] + windowsize)) I = sp.nonzero(I)[0] idx_start = I.min() idx_end = I.max() elif (chrom is not None): I = self.position["chrom"] == chrom idx_start = I.min() idx_end = I.max() else: idx_start = None idx_end = None return idx_start, idx_end
def prob_contour(H, xedges, yedges, p=0.95): """Compute PDF value enclosing desired probability mass. The contour corresponding to the returned PDF value will contain (approximately) p integrated probability mass. Parameters ---------- H : 2d array, (n_x, n_y) Normalized (as PDF) histogram. xedges : 1d array, (n_x + 1,) X edges of histogram bins. yedges: 1d array, (n_y + 1,) Y edges of histogram bins. p : float, optional Probability to find contour of. Default is 0.95 """ # Plan: Find highest value, add. Repeat until target probability reached, # return value of H at last point added. This should be the contour which # encloses the desired fraction of probability mass. dx = scipy.atleast_2d(scipy.diff(xedges)).T dy = scipy.atleast_2d(scipy.diff(yedges)) PM = (H * dx * dy).ravel() H = H.ravel() # Sort into order of decreasing probability mass: srtidx = PM.argsort()[::-1] # Find cumulative sum: PM_sum = PM[srtidx].cumsum() # Find first point where PM_sum >= p: mask = PM_sum >= scipy.atleast_2d(p).T out = scipy.zeros(mask.shape[0]) for i in range(mask.shape[0]): idx, = scipy.nonzero(mask[i, :]) out[i] = H[srtidx[idx[0]]] return out
def getRegion(self,size=3e4,min_nSNPs=1,chrom_i=None,pos_min=None,pos_max=None): """ Sample a region from the piece of genotype X, chrom, pos minSNPnum: minimum number of SNPs contained in the region Ichrom: restrict X to chromosome Ichrom before taking the region cis: bool vector that marks the sorted region region: vector that contains chrom and init and final position of the region """ if (self.chrom is None) or (self.pos is None): bim = plink_reader.readBIM(self.bfile,usecols=(0,1,2,3)) chrom = SP.array(bim[:,0],dtype=int) pos = SP.array(bim[:,3],dtype=int) else: chrom = self.chrom pos = self.pos if chrom_i is None: n_chroms = chrom.max() chrom_i = int(SP.ceil(SP.rand()*n_chroms)) pos = pos[chrom==chrom_i] chrom = chrom[chrom==chrom_i] ipos = SP.ones(len(pos),dtype=bool) if pos_min is not None: ipos = SP.logical_and(ipos,pos_min<pos) if pos_max is not None: ipos = SP.logical_and(ipos,pos<pos_max) pos = pos[ipos] chrom = chrom[ipos] if size==1: # select single SNP idx = int(SP.ceil(pos.shape[0]*SP.rand())) cis = SP.arange(pos.shape[0])==idx region = SP.array([chrom_i,pos[idx],pos[idx]]) else: while 1: idx = int(SP.floor(pos.shape[0]*SP.rand())) posT1 = pos[idx] posT2 = pos[idx]+size if posT2<=pos.max(): cis = chrom==chrom_i cis*= (pos>posT1)*(pos<posT2) if cis.sum()>min_nSNPs: break region = SP.array([chrom_i,posT1,posT2]) start = SP.nonzero(cis)[0].min() nSNPs = cis.sum() if self.X is None: rv = plink_reader.readBED(self.bfile,useMAFencoding=True,start = start, nSNPs = nSNPs,bim=bim) Xr = rv['snps'] else: Xr = self.X[:,start:start+nSnps] return Xr, region
def _update_alpha_X(self): """ Updating target matrix dual. """ iX, iY = sp.nonzero(self.V) values = np.sum(self.W[iX] * self.H[:, iY].T, axis=-1) scores = sp.sparse.coo_matrix((values, (iX, iY)), shape=self.V.shape) self.alpha_X = self.alpha_X + self.rho * (self.X - scores)
def last_index(X): timestamps_infinite = sp.all(~sp.isfinite(X), axis=1) # Are there NaNs padded after the TS? if sp.alltrue(~timestamps_infinite): idx = X.shape[0] else: # Yes? then remove them idx = sp.nonzero(timestamps_infinite)[0][0] return idx
def indof_constfeatures(X, axis=0): ''' Assumes features are columns (by default, but can do rows), and checks to see if all features are simply constants, such that it is equivalent to a bias and nothing else ''' featvar = sp.var(X, axis=axis) badind = sp.nonzero(featvar == 0)[0] return badind
def indof_constfeatures(X,axis=0): ''' Assumes features are columns (by default, but can do rows), and checks to see if all features are simply constants, such that it is equivalent to a bias and nothing else ''' featvar=sp.var(X,axis=axis) badind = sp.nonzero(featvar==0)[0] return badind
def watershed8(i_d, logger): # compute neighbours logger.info('Computing differences / edges...') #nbs = compute_neighbours_max_border(i_d) nbs = compute_neighbours_max_border_gradient(i_d) # compute min altitude map logger.info('Computing minimal altitude map...') minaltitude = nbs[0] for nb in nbs[1:]: minaltitude = scipy.minimum(minaltitude, nb) logger.info('Prepare neighbours list...') neighbours = [[[set() for _ in range(i_d.shape[2])] for _ in range(i_d.shape[1])] for _ in range(i_d.shape[0])] # compute relevant neighbours def test(x, y, z, shape): if x<0: print "x<0", x, y, z if y<0: print "y<0", x, y, z if z<0: print "z<0", x, y, z if x >= shape[0]: print "x>={}".format(shape[0]), x, y, z if y >= shape[1]: print "y>={}".format(shape[1]), x, y, z if z >= shape[2]: print "z>={}".format(shape[2]), x, y, z logger.info('Computing relevant neighbours through masks...') # down (x-1) # up (x+1) # left (y-1) # right (y+1) # into (z-1) # out (z+1) offsets = ((-1,0,0), (1,0,0), (0,-1,0), (0,1,0), (0,0,-1), (0,0,1)) for nb, (xo, yo, zo) in zip(nbs, offsets): for x, y, z in scipy.transpose(scipy.nonzero(nb == minaltitude)): #test(x+xo,y+yo,z+zo,nb.shape) neighbours[x][y][z].add((x+xo,y+yo,z+zo)) c = [0,0,0,0,0,0] for x in range(minaltitude.shape[0]): for y in range(minaltitude.shape[1]): for z in range(minaltitude.shape[2]): c[len(neighbours[x][y][z]) - 1] += 1 print "Distribution of relevant neighbours (1,2,3,4,5,6):", c # watershed logger.info('Watershed \w minaltitude and relevant neighbours as list pre-computation...') result = scipy.zeros(i_d.shape, dtype=scipy.int_) nb_labs = 0 for x in range(result.shape[0]): for y in range(result.shape[1]): for z in range(result.shape[2]): if result[x,y,z] == 0: L, lab = stream_neighbours2_set(i_d, result, minaltitude, neighbours, (x, y, z)) if -1 == lab: nb_labs += 1 for p in L: result[p] = nb_labs else: for p in L: result[p] = lab return result
def Kgrad_x(self, logtheta, x1, x2, d): RV = SP.zeros([x1.shape[0], x2.shape[0]]) if d not in self.dimension_indices: return RV #get corresponding amplitude: i = SP.nonzero(self.dimension_indices == d)[0][0] A = SP.exp(2 * logtheta[i]) RV[:, :] = A * x2[:, d] return RV
def nb_vals(matrix, indices): matrix = scipy.array(matrix) indices = tuple(scipy.transpose(scipy.atleast_2d(indices))) arr_shape = scipy.shape(matrix) dist = scipy.ones(arr_shape) dist[indices] = 0 dist = scipy.ndimage.distance_transform_cdt(dist, metric='chessboard') nb_indices = scipy.transpose(scipy.nonzero(dist == 1)) return [matrix[tuple(ind)] for ind in nb_indices]
def Kgrad_x(self,logtheta,x1,x2,d): RV = SP.zeros([x1.shape[0],x2.shape[0]]) if d not in self.dimension_indices: return RV #get corresponding amplitude: i = SP.nonzero(self.dimension_indices==d)[0][0] A = SP.exp(2*logtheta[i]) RV[:,:] = A*x2[:,d] return RV
def getPhenotypes(self,phenotype_IDs=None,phenotype_query=None,sample_idx=None,center=True,intersection=False): """load Phenotypes Args: phenotype_IDs: names of phenotypes to load phenotype_query: string hoding a pandas query (e.g. "(environment==1) & (phenotype_ID=='growth')" selects all phenotypes that have a phenotype_ID equal to growth under environment 1. sample_idx: Boolean sample index for subsetting individuals center: Boolean: mean center (and mean-fill in missing values if intersection==False)? (default True) impute: imputation of missing values (default: True) intersection: restrict observation to those obseved in all phenotypes? (default: False) Returns: phenotypes: [N x P] scipy.array of phenotype values for P phenotypes sample_idx_intersect: index of individuals in phenotypes after filtering missing valuesS """ if phenotype_IDs is not None: I = SP.array([SP.nonzero(self.phenotype_ID==n)[0][0] for n in phenotype_IDs]) elif phenotype_query is not None: try: I = self.index_frame.query(phenotype_query).values[:,0] #if there are no results we won't actually get an exception, we just get an #empty response if len(I) == 0: print(("query '%s' yielded no results!" % (phenotype_query))) I = SP.zeros([0],dtype="int") except Exception as arg: print(("query '%s' yielded no results: %s" % (phenotype_query, str(arg)))) I = SP.zeros([0],dtype="int") else: I = SP.arange(self.phenotype_ID.shape[0]) phenotypes = SP.array(self.pheno_matrix[:,I],dtype='float') phenotypes = phenotypes[sample_idx] Iok = (~SP.isnan(phenotypes)) if intersection: sample_idx_intersect = Iok.all(axis=1) else: sample_idx_intersect = Iok.any(axis=1) phenotypes = phenotypes[sample_idx_intersect] Iok = Iok[sample_idx_intersect] if center: for i in range(phenotypes.shape[1]): ym = phenotypes[Iok[:,i],i].mean() phenotypes[:,i] -= ym phenotypes[~Iok[:,i],i] = ym phenotypes[:,i] /= phenotypes[:,i].std() phenotypes = pd.DataFrame(data=phenotypes, index=self.sample_ID[sample_idx_intersect],columns=self.phenotype_ID[I]) #calculate overlap of missing values return phenotypes, sample_idx_intersect
def Experimento(db): # nome das figuras name_arr = scipy.array(db.keys()) # outro dicionario: nome das figuras x rótulos das classes cl = dict(zip(name_arr,[int(db[i][0]) for i in name_arr])) # Obtém da base de entrada uma Matriz N_Samples x N_Features # Descarta primeira coluna (Rótulos das classes) data = scipy.array([db[nome][1:] for nome in name_arr]) # distancia : medida de dissimilaridade a ser empregada #distancias = ['braycurtis','canberra','chebyshev','cityblock','correlation', # 'cosine','dice','euclidean','hamming','jaccard', # 'kulsinski','mahalanobis','matching','minkowski', # 'rogerstanimoto','russelrao','seuclidean','sokalmichener', # 'sokalsneath','sqeuclidean','yule'] distancia = 'euclidean' # Numero de amostras Nobj = data.shape[0] # Total de classes Nclasses = max(cl.values()) # Total de amostras por classe # assumindo que a base é balanceada!!!! Nac = Nobj/Nclasses # Numero de recuperações Nretr = Nac # Calcula matriz de distancias md = squareform(pdist(data,distancia)) # Para contabilizar a Matriz de confusão l = scipy.zeros((Nclasses,Nac),dtype = int) for i,nome in zip(scipy.arange(Nobj),name_arr): # Para cada linha de md estabelece rank de recuperacao # O primeiro elemento de cada linha corresponde a forma modelo # Obtem a classe dos objetos recuperados pelo ordem crescente de distancia idx = scipy.argsort(md[i]) # pega classes a qual pertencem o primeiro padrao e as imagens recuperadas classe_padrao = cl[nome] name_retr = name_arr[idx] aux = scipy.array([cl[j] for j in name_retr]) # estamos interessados apenas nos Nretr subsequentes resultados classe_retrs = aux[1:Nretr] n = scipy.nonzero(classe_retrs == classe_padrao) # Contabiliza resultados for i in n[0]: l[classe_padrao-1,i] = l[classe_padrao-1,i] + 1 return l,Nac
def dup_fig(x, y): colors = [ None, 'black', 'blue', 'brown', 'purple', 'orange', 'cyan', 'gray', 'yellow', 'black', 'red', 'green' ] k = len(sp.unique(y)) for i in range(1, k + 1): inds = sp.nonzero(y == i) plt.plot(x[inds, 0], x[inds, 1], 'o', color=colors[i]) plt.show()
def getGroundPlane(self): Legs = self.getLegs() on_ground = [self.getLegs()[i].isFootOnGround() for i in range(NUM_LEGS)] on_ground_indices = nonzero(on_ground)[0] points = array([self.transformLeg2Body(i,Legs[i].getFootPos()) for i in on_ground_indices]) try: abc = planeFromPoints(points) except: abc = None return abc
def _update_X(self): """ Updating user_1-user_2 matrix. """ iX, iY = sp.nonzero(self._V) values = np.sum(self._W[iX]*self._H[:, iY].T, axis=-1) scores = sp.sparse.coo_matrix((values-1, (iX, iY)), shape=self._V.shape) left = self.rho*scores-self._alpha_X right = (left.power(2)+4.0*self.rho*self._V).power(0.5) self._X = (left+right)/(2*self.rho)
def tuple_list_from_matrix(graph_mat,uids): """ takes graph as graph_mat matrix and uids list as arguments returns graph_tuple, which is representation of the graph as list of tuple. This is inverse function for matrix_from_tuple_list, in sense that matrix_from_tuple_list(tuple_list_from_matrix(graph)) ~ graph, and tuple_list_from_matrix(matrix_from_tuple_list(graph)) ~ graph (not strictly equal because elements maybe rearranged""" tuple_list = [] for i in xrange(graph_mat.shape[0]): indexes = scipy.nonzero(graph_mat[i,:])[0] #list of nodes connected with i-th tuple_list+=[(uids[i],j) for j in uids[indexes]] return tuple_list
def Kgrad_xdiag(self, logtheta, x1, d): """derivative w.r.t diagonal of self covariance matrix""" RV = SP.zeros([x1.shape[0]]) if d not in self.dimension_indices: return RV #get corresponding amplitude: i = SP.nonzero(self.dimension_indices == d)[0][0] A = SP.exp(2 * logtheta[i]) RV = SP.zeros([x1.shape[0]]) RV[:] = 2 * A * x1[:, d] return RV
def update_X(self): """ Updating user-item matrix. """ iX, iY = sp.nonzero(self.V) values = np.sum(self.W[iX] * self.H[:, iY].T, axis=-1) scores = sp.sparse.coo_matrix((values - 1, (iX, iY)), shape=self.V.shape) left = self.args.rho * scores - self.alpha_X right = (left.power(2) + 4.0 * self.args.rho * self.V).power(0.5) self.X = (left + right) / (2 * self.args.rho)
def Kgrad_xdiag(self,logtheta,x1,d): """derivative w.r.t diagonal of self covariance matrix""" RV = SP.zeros([x1.shape[0]]) if d not in self.dimension_indices: return RV #get corresponding amplitude: i = SP.nonzero(self.dimension_indices==d)[0][0] A = SP.exp(2*logtheta[i]) RV = SP.zeros([x1.shape[0]]) RV[:] = 2*A*x1[:,d] return RV
def forecast_fatality(MMI, population, beta=0.17, theta=14.05): """ Forecast fatalities from MMI values and population formula taken from USGS Open-File-Report 2009-1136 default value for beta and theta is for Indonesia """ MMI = array(MMI) fatality_rate = zeros(MMI.shape) ind = nonzero(MMI < 5) fatality_rate[ind] = 0 ind = nonzero(logical_and(MMI >= 5, MMI <= 10)) fatality_rate[ind] = norm.cdf(1.0 / beta * log(MMI[ind] / theta)) ind = nonzero(MMI > 10) fatality_rate[ind] = norm.cdf(1.0 / beta * log(10 / theta)) fatality = fatality_rate * population return fatality
def log_likelihood(C, T): """ implementation of likelihood of C given T """ C = C.tocsr() T = T.tocsr() ind = scipy.nonzero(C) relT = np.array(T[ind])[0, :] relT = np.log(relT) relC = np.array(C[ind])[0, :] return relT.dot(relC)