예제 #1
0
	def get_top_mas(self, list_of_mas, top_percentage):
		"""
		05-09-05
			start
		06-07-05
			if top_percentage is less than 200, use 200.
		"""
		sys.stderr.write("Getting the top %s std edges..."%top_percentage)
		list_of_stds = []
		for ma in list_of_mas:
			std = MLab.std(ma.compressed())	#disregard the NAs
			list_of_stds.append(std)
		top_number = int(len(list_of_stds)*top_percentage)	#how many we want
		if top_number<200:	#06-07-05	200 is the bottom line.
			top_number = 200
		arg_list  = argsort(list_of_stds)	#sort it, ascending
		arg_list = arg_list.tolist()	#convert from array to list
		arg_list.reverse()	#reverse, descending order
		top_arg_list = arg_list[:top_number]	#get the top_number of arg_list	#06-07-05 if top_number>len(arg_list), it's ok.
		if self.debug:
			print "list_of_stds is %s"%repr(list_of_stds)
			print "top_number is %s"%top_number
			print "arg_list is %s"%repr(arg_list)
			print "top_arg_list is %s"%repr(top_arg_list)
		list_of_top_mas = []
		for index in top_arg_list:
			list_of_top_mas.append(list_of_mas[index])
		sys.stderr.write("Done.\n")
		return list_of_top_mas
예제 #2
0
    def toConsensus(self, cutoff=None, fully_degenerate=False,\
        include_all=False):
        """Returns the consensus sequence from a profile.

        cutoff: cutoff value, determines how much should be covered in a
        position (row) of the profile. Example: pos 0 [.2,.1,.3,.4]
        (CharOrder: TCAG). To cover .65 (=cutoff) we need two characters:
        A and G, which results in the degenerate character R.
        
        fully_degenerate: determines whether the fully degenerate character
        is returned at a position. For the example above an 'N' would
        be returned.
       
        inlcude_all: all possibilities are included in the degenerate 
        character. Example: row = UCAG = [.1,.3,.3,.3] cutoff = .4, 
        consensus = 'V' (even though only 2 chars would be enough to 
        reach the cutoff value).

        The Alphabet of the Profile should implement degenerateFromSequence.
        
        Note that cutoff has priority over fully_degenerate. In other words,
        if you specify a cutoff value and set fully_degenerate to true, 
        the calculation will be done with the cutoff value. If nothing 
        gets passed in, the maximum argument is chosen. In the first example
        above G will be returned.
        """
        #set up some local variables
        co = array(self.CharOrder)
        alpha = self.Alphabet
        data = self.Data

        #determine the action. Cutoff takes priority over fully_degenerate
        if cutoff:
            result = []
            degen = self.rowDegeneracy(cutoff)
            sorted = argsort(data)
            if include_all:
                #if include_all include all possiblilities in the degen char 
                for row_idx, (num_to_keep, row) in enumerate(zip(degen,sorted)):
                    to_take = [item for item in row[-num_to_keep:]\
                    if item in nonzero(data[row_idx])] +\
                    [item for item in nonzero(data[row_idx] ==\
                        data[row_idx,row[-num_to_keep]]) if item in\
                        nonzero(data[row_idx])]
                    result.append(alpha.degenerateFromSequence(\
                    map(str,take(co, to_take))))
            else:
                for row_idx, (num_to_keep, row) in enumerate(zip(degen,sorted)):
                    result.append(alpha.degenerateFromSequence(\
                        map(str,take(co, [item for item in row[-num_to_keep:]\
                        if item in nonzero(data[row_idx])]))))
                                    
        elif not fully_degenerate: 
            result = take(co, argmax(self.Data))
        else:
            result = []
            for row in self.Data:
                result.append(alpha.degenerateFromSequence(\
                map(str,take(co, nonzero(row)))))
        return ''.join(map(str,result))
def median_filter(data, N=5):
    """
    Median filter sequence data with window of width N.
    """
    results = zeros(len(data - N))
    for i in xrange(N, len(data)):
        x = data[i - N:i]
        s = argsort(x)
        results[i] = x[s[(N / 2) + 1]]
    return results
    def k_nearest(self,key,k):

        # TODO: These distance computations can be further optimized
        # if the keys are stored as a matrix instead of as separate vectors.
        # However that would require changes in the VectorTree class, too.
        if not self.db:
            return [],[]
        X = array([x for x,v in self.db])
        dists = matrixnorm(key-X)
        sorted_indices = argsort(dists)
        
        return ([self.db[i] for i in sorted_indices[:k]],
                [dists[i] for i in sorted_indices[:k]])
def evsort(eval, evec):
    """Since NumPy returns the eigenvectors/eigenvalues unsorted,
       perform a sort on them together, based on the eigenvalues."""

    n = len(eval)
    rows, cols = evec.shape
    newvec = copy.copy(evec)
    newval = copy.copy(eval)
    if n != rows:
        print "Help! eval and evec are different sizes!"
        sys.exit()

    index = argsort(eval)
    for i in index:
        newval[i] = eval[index[i]]
        newvec[i, :] = evec[index[i], :]

    return newval, newvec
def pca(M):
    "Perform PCA on M, return eigenvectors and eigenvalues, sorted."
    T, N = shape(M)
    # if there are fewer rows T than columns N, use snapshot method
    if T < N:
        C = dot(M, t(M))
        evals, evecsC = eigenvectors(C)
        # HACK: make sure evals are all positive
        evals = where(evals < 0, 0, evals)
        evecs = 1. / sqrt(evals) * dot(t(M), t(evecsC))
    else:
        # calculate covariance matrix
        K = 1. / T * dot(t(M), M)
        evals, evecs = eigenvectors(K)
    # sort the eigenvalues and eigenvectors, descending order
    order = (argsort(evals)[::-1])
    evecs = take(evecs, order, 1)
    evals = take(evals, order)
    return evals, t(evecs)
예제 #7
0
def pca(M):
    from Numeric import take, dot, shape, argsort, where, sqrt, transpose as t
    from LinearAlgebra import eigenvectors
    "Perform PCA on M, return eigenvectors and eigenvalues, sorted."
    T, N = shape(M)
    # if there are less rows T than columns N, use
    # snapshot method
    if T < N:
        C = dot(M, t(M))
        evals, evecsC = eigenvectors(C)
        # HACK: make sure evals are all positive
        evals = where(evals < 0, 0, evals)
        evecs = 1./sqrt(evals) * dot(t(M), t(evecsC))
    else:
        # calculate covariance matrix
        K = 1./T * dot(t(M), M)
        evals, evecs = eigenvectors(K)
    # sort the eigenvalues and eigenvectors, decending order
    order = (argsort(evals)[::-1])
    evecs = take(evecs, order, 1)
    evals = take(evals, order)
    return evals, t(evecs)
예제 #8
0
	def get_top_edges_and_output(self, graph_dict, top_number, outfname, header_row):
		"""
		06-21-05
			If the number of edges is less than the top_number, take it directly.
		"""
		sys.stderr.write("Getting the top %s edges..."%top_percentage)
		edge_tuple_list = graph_dict.keys()
		cor_list = graph_dict.values()
		arg_cor_list  = argsort(cor_list)	#sort it, ascending
		arg_cor_list = arg_cor_list.tolist()	#convert from array to list
		arg_cor_list.reverse()	#reverse, descending order
		top_arg_list = arg_cor_list[:top_number]	#get the top_number of arg_list	#06-07-05 if top_number>len(arg_list), it's ok.
		if self.debug:
			print "cor_list is %s"%repr(cor_list)
			print "top_number is %s"%top_number
			print "arg_cor_list is %s"%repr(arg_cor_list)
			print "top_arg_list is %s"%repr(top_arg_list)
		writer = csv.writer(open(outfname, 'w'), delimiter='\t')
		writer.writerow(header_row)
		for index in top_arg_list:
			writer.writerow(['e']+ list(edge_tuple_list[index])+ [cor_list[index]])
		del writer
		sys.stderr.write("Done.\n")
예제 #9
0
파일: Collection.py 프로젝트: fxia22/ASM_xf
    def normalizeConfiguration(self, repr=None):
        """Applies a linear transformation such that the coordinate
        origin becomes the center of mass of the object and its
        principal axes of inertia are parallel to the three coordinate
        axes.

        A specific representation can be chosen by setting |repr| to
          Ir    : x y z <--> b c a
          IIr   : x y z <--> c a b
          IIIr  : x y z <--> a b c
          Il    : x y z <--> c b a
          IIl   : x y z <--> a c b
          IIIl  : x y z <--> b a c
        """
        from LinearAlgebra import determinant
        cm, inertia = self.centerAndMomentOfInertia()
	self.translateBy(-cm)
        ev, diag = inertia.diagonalization()
        if determinant(diag.array) < 0:
            diag.array[0] = -diag.array[0]
        if repr != None:
            from Numeric import argsort, array
            seq = argsort(ev)
            if repr == 'Ir':
                seq = array([seq[1], seq[2], seq[0]])
            elif repr == 'IIr':
                seq = array([seq[2], seq[0], seq[1]])
            elif repr == 'Il':
                seq = seq[2::-1]
            elif repr == 'IIl':
                seq[1:3] = array([seq[2], seq[1]])
            elif repr == 'IIIl':
                seq[0:2] = array([seq[1], seq[0]])
            elif repr != 'IIIr':
                print 'unknown representation'
            diag.array = Numeric.take(diag.array, seq)                
        self.applyTransformation(Transformation.Rotation(diag))
def median(data):
    N = len(data)
    inds = argsort(data)
    return data[inds[N / 2 + 1]]
예제 #11
0
 def winners(self,N=1):
     N = min(N,len(self.dists))
     indices = argsort(self.dists)
     return tuple(indices[:N])
 def winners(self, N=1):
     N = min(N, len(self.dists))
     indices = argsort(self.dists)
     return tuple(indices[:N])