def get_top_mas(self, list_of_mas, top_percentage): """ 05-09-05 start 06-07-05 if top_percentage is less than 200, use 200. """ sys.stderr.write("Getting the top %s std edges..."%top_percentage) list_of_stds = [] for ma in list_of_mas: std = MLab.std(ma.compressed()) #disregard the NAs list_of_stds.append(std) top_number = int(len(list_of_stds)*top_percentage) #how many we want if top_number<200: #06-07-05 200 is the bottom line. top_number = 200 arg_list = argsort(list_of_stds) #sort it, ascending arg_list = arg_list.tolist() #convert from array to list arg_list.reverse() #reverse, descending order top_arg_list = arg_list[:top_number] #get the top_number of arg_list #06-07-05 if top_number>len(arg_list), it's ok. if self.debug: print "list_of_stds is %s"%repr(list_of_stds) print "top_number is %s"%top_number print "arg_list is %s"%repr(arg_list) print "top_arg_list is %s"%repr(top_arg_list) list_of_top_mas = [] for index in top_arg_list: list_of_top_mas.append(list_of_mas[index]) sys.stderr.write("Done.\n") return list_of_top_mas
def toConsensus(self, cutoff=None, fully_degenerate=False,\ include_all=False): """Returns the consensus sequence from a profile. cutoff: cutoff value, determines how much should be covered in a position (row) of the profile. Example: pos 0 [.2,.1,.3,.4] (CharOrder: TCAG). To cover .65 (=cutoff) we need two characters: A and G, which results in the degenerate character R. fully_degenerate: determines whether the fully degenerate character is returned at a position. For the example above an 'N' would be returned. inlcude_all: all possibilities are included in the degenerate character. Example: row = UCAG = [.1,.3,.3,.3] cutoff = .4, consensus = 'V' (even though only 2 chars would be enough to reach the cutoff value). The Alphabet of the Profile should implement degenerateFromSequence. Note that cutoff has priority over fully_degenerate. In other words, if you specify a cutoff value and set fully_degenerate to true, the calculation will be done with the cutoff value. If nothing gets passed in, the maximum argument is chosen. In the first example above G will be returned. """ #set up some local variables co = array(self.CharOrder) alpha = self.Alphabet data = self.Data #determine the action. Cutoff takes priority over fully_degenerate if cutoff: result = [] degen = self.rowDegeneracy(cutoff) sorted = argsort(data) if include_all: #if include_all include all possiblilities in the degen char for row_idx, (num_to_keep, row) in enumerate(zip(degen,sorted)): to_take = [item for item in row[-num_to_keep:]\ if item in nonzero(data[row_idx])] +\ [item for item in nonzero(data[row_idx] ==\ data[row_idx,row[-num_to_keep]]) if item in\ nonzero(data[row_idx])] result.append(alpha.degenerateFromSequence(\ map(str,take(co, to_take)))) else: for row_idx, (num_to_keep, row) in enumerate(zip(degen,sorted)): result.append(alpha.degenerateFromSequence(\ map(str,take(co, [item for item in row[-num_to_keep:]\ if item in nonzero(data[row_idx])])))) elif not fully_degenerate: result = take(co, argmax(self.Data)) else: result = [] for row in self.Data: result.append(alpha.degenerateFromSequence(\ map(str,take(co, nonzero(row))))) return ''.join(map(str,result))
def median_filter(data, N=5): """ Median filter sequence data with window of width N. """ results = zeros(len(data - N)) for i in xrange(N, len(data)): x = data[i - N:i] s = argsort(x) results[i] = x[s[(N / 2) + 1]] return results
def k_nearest(self,key,k): # TODO: These distance computations can be further optimized # if the keys are stored as a matrix instead of as separate vectors. # However that would require changes in the VectorTree class, too. if not self.db: return [],[] X = array([x for x,v in self.db]) dists = matrixnorm(key-X) sorted_indices = argsort(dists) return ([self.db[i] for i in sorted_indices[:k]], [dists[i] for i in sorted_indices[:k]])
def evsort(eval, evec): """Since NumPy returns the eigenvectors/eigenvalues unsorted, perform a sort on them together, based on the eigenvalues.""" n = len(eval) rows, cols = evec.shape newvec = copy.copy(evec) newval = copy.copy(eval) if n != rows: print "Help! eval and evec are different sizes!" sys.exit() index = argsort(eval) for i in index: newval[i] = eval[index[i]] newvec[i, :] = evec[index[i], :] return newval, newvec
def pca(M): "Perform PCA on M, return eigenvectors and eigenvalues, sorted." T, N = shape(M) # if there are fewer rows T than columns N, use snapshot method if T < N: C = dot(M, t(M)) evals, evecsC = eigenvectors(C) # HACK: make sure evals are all positive evals = where(evals < 0, 0, evals) evecs = 1. / sqrt(evals) * dot(t(M), t(evecsC)) else: # calculate covariance matrix K = 1. / T * dot(t(M), M) evals, evecs = eigenvectors(K) # sort the eigenvalues and eigenvectors, descending order order = (argsort(evals)[::-1]) evecs = take(evecs, order, 1) evals = take(evals, order) return evals, t(evecs)
def pca(M): from Numeric import take, dot, shape, argsort, where, sqrt, transpose as t from LinearAlgebra import eigenvectors "Perform PCA on M, return eigenvectors and eigenvalues, sorted." T, N = shape(M) # if there are less rows T than columns N, use # snapshot method if T < N: C = dot(M, t(M)) evals, evecsC = eigenvectors(C) # HACK: make sure evals are all positive evals = where(evals < 0, 0, evals) evecs = 1./sqrt(evals) * dot(t(M), t(evecsC)) else: # calculate covariance matrix K = 1./T * dot(t(M), M) evals, evecs = eigenvectors(K) # sort the eigenvalues and eigenvectors, decending order order = (argsort(evals)[::-1]) evecs = take(evecs, order, 1) evals = take(evals, order) return evals, t(evecs)
def get_top_edges_and_output(self, graph_dict, top_number, outfname, header_row): """ 06-21-05 If the number of edges is less than the top_number, take it directly. """ sys.stderr.write("Getting the top %s edges..."%top_percentage) edge_tuple_list = graph_dict.keys() cor_list = graph_dict.values() arg_cor_list = argsort(cor_list) #sort it, ascending arg_cor_list = arg_cor_list.tolist() #convert from array to list arg_cor_list.reverse() #reverse, descending order top_arg_list = arg_cor_list[:top_number] #get the top_number of arg_list #06-07-05 if top_number>len(arg_list), it's ok. if self.debug: print "cor_list is %s"%repr(cor_list) print "top_number is %s"%top_number print "arg_cor_list is %s"%repr(arg_cor_list) print "top_arg_list is %s"%repr(top_arg_list) writer = csv.writer(open(outfname, 'w'), delimiter='\t') writer.writerow(header_row) for index in top_arg_list: writer.writerow(['e']+ list(edge_tuple_list[index])+ [cor_list[index]]) del writer sys.stderr.write("Done.\n")
def normalizeConfiguration(self, repr=None): """Applies a linear transformation such that the coordinate origin becomes the center of mass of the object and its principal axes of inertia are parallel to the three coordinate axes. A specific representation can be chosen by setting |repr| to Ir : x y z <--> b c a IIr : x y z <--> c a b IIIr : x y z <--> a b c Il : x y z <--> c b a IIl : x y z <--> a c b IIIl : x y z <--> b a c """ from LinearAlgebra import determinant cm, inertia = self.centerAndMomentOfInertia() self.translateBy(-cm) ev, diag = inertia.diagonalization() if determinant(diag.array) < 0: diag.array[0] = -diag.array[0] if repr != None: from Numeric import argsort, array seq = argsort(ev) if repr == 'Ir': seq = array([seq[1], seq[2], seq[0]]) elif repr == 'IIr': seq = array([seq[2], seq[0], seq[1]]) elif repr == 'Il': seq = seq[2::-1] elif repr == 'IIl': seq[1:3] = array([seq[2], seq[1]]) elif repr == 'IIIl': seq[0:2] = array([seq[1], seq[0]]) elif repr != 'IIIr': print 'unknown representation' diag.array = Numeric.take(diag.array, seq) self.applyTransformation(Transformation.Rotation(diag))
def median(data): N = len(data) inds = argsort(data) return data[inds[N / 2 + 1]]
def winners(self,N=1): N = min(N,len(self.dists)) indices = argsort(self.dists) return tuple(indices[:N])
def winners(self, N=1): N = min(N, len(self.dists)) indices = argsort(self.dists) return tuple(indices[:N])