Python take 예제들, Numeric.take Python 예제들

예제 #1

0

파일 보기

파일: ArrayTools.py 프로젝트: danse-inelastic/inelastic-svn

def SliceArray(array, slicelist):
    """Method for slicing an array
	This method can be used for slicing an array by specifying the array
	and a slicelist. The slicelist should have the same length as the
	number of axes in the array.

	**An example:**

	To slice an array having three axes with (12:17,:,[13,26]):

	'SliceArray(array,[range(12,17),None,(13,26)])'

	Note that 'None' implies that all the values along the given axis are
	retained. The output array will have the same number of axes as the
	input array.
	"""
    from Numeric import take
    for i in range(len(slicelist)):
        if slicelist[i] == None:
            pass
        elif type(slicelist[i]) == type(1):
            array = take(array, [slicelist[i]], i)
        else:
            array = take(array, slicelist[i], i)
    return array

예제 #2

0

파일 보기

파일: profile.py 프로젝트: pombredanne/old-cogent

    def toConsensus(self, cutoff=None, fully_degenerate=False,\
        include_all=False):
        """Returns the consensus sequence from a profile.

        cutoff: cutoff value, determines how much should be covered in a
        position (row) of the profile. Example: pos 0 [.2,.1,.3,.4]
        (CharOrder: TCAG). To cover .65 (=cutoff) we need two characters:
        A and G, which results in the degenerate character R.
        
        fully_degenerate: determines whether the fully degenerate character
        is returned at a position. For the example above an 'N' would
        be returned.
       
        inlcude_all: all possibilities are included in the degenerate 
        character. Example: row = UCAG = [.1,.3,.3,.3] cutoff = .4, 
        consensus = 'V' (even though only 2 chars would be enough to 
        reach the cutoff value).

        The Alphabet of the Profile should implement degenerateFromSequence.
        
        Note that cutoff has priority over fully_degenerate. In other words,
        if you specify a cutoff value and set fully_degenerate to true, 
        the calculation will be done with the cutoff value. If nothing 
        gets passed in, the maximum argument is chosen. In the first example
        above G will be returned.
        """
        #set up some local variables
        co = array(self.CharOrder)
        alpha = self.Alphabet
        data = self.Data

        #determine the action. Cutoff takes priority over fully_degenerate
        if cutoff:
            result = []
            degen = self.rowDegeneracy(cutoff)
            sorted = argsort(data)
            if include_all:
                #if include_all include all possiblilities in the degen char 
                for row_idx, (num_to_keep, row) in enumerate(zip(degen,sorted)):
                    to_take = [item for item in row[-num_to_keep:]\
                    if item in nonzero(data[row_idx])] +\
                    [item for item in nonzero(data[row_idx] ==\
                        data[row_idx,row[-num_to_keep]]) if item in\
                        nonzero(data[row_idx])]
                    result.append(alpha.degenerateFromSequence(\
                    map(str,take(co, to_take))))
            else:
                for row_idx, (num_to_keep, row) in enumerate(zip(degen,sorted)):
                    result.append(alpha.degenerateFromSequence(\
                        map(str,take(co, [item for item in row[-num_to_keep:]\
                        if item in nonzero(data[row_idx])]))))
                                    
        elif not fully_degenerate: 
            result = take(co, argmax(self.Data))
        else:
            result = []
            for row in self.Data:
                result.append(alpha.degenerateFromSequence(\
                map(str,take(co, nonzero(row)))))
        return ''.join(map(str,result))

예제 #3

0

파일 보기

파일: test_geometry.py 프로젝트: pombredanne/old-cogent

 def test_center_of_mass_two_array(self):
     """center_of_mass_two_array should behave correctly"""
     com2 = center_of_mass_two_array
     coor = take(self.square_odd,(0,1),1)
     weights = take(self.square_odd,(2,),1)
     self.assertEqual(com2(coor,weights), array([2,2]))
     weights = weights.flat
     self.assertEqual(com2(coor,weights), array([2,2]))

예제 #4

0

파일 보기

파일: geometry.py 프로젝트: pombredanne/old-cogent

def center_of_mass_one_array(data,weight_idx=-1):
    """Calculates the center of mass for a dataset

    data should be an array of x1,...,xn,r coordinates, where r is the 
        weight of the point
    """
    data = array(data)
    coord_idx = range(data.shape[1])
    del coord_idx[weight_idx]
    coordinates = take(data,(coord_idx),1)
    weights = take(data,(weight_idx,),1)
    return sum(coordinates * weights)/sum(weights)

예제 #5

0

파일 보기

파일: ArrayTools.py 프로젝트: danse-inelastic/inelastic-svn

def Translate(array, translation):
    """Method for translating an array used by Simulations.Dacapo.Grid"""
    from Numeric import concatenate, take
    import copy
    newarray = array
    size = array.shape
    for dim in range(len(translation)):
        axis = dim - len(translation)
        newarray = concatenate(
            (take(newarray, range(translation[dim], size[axis]),
                  axis), take(newarray, range(translation[dim]), axis)), axis)
    # the array is copied to make it contiguous
    return copy.copy(newarray)

예제 #6

0

파일 보기

파일: test_geometry.py 프로젝트: pombredanne/old-cogent

 def test_center_of_mass(self):
     """center_of_mass should make right choice between functional methods
     """
     com = center_of_mass
     com1 = center_of_mass_one_array
     com2 = center_of_mass_two_array
     self.assertEqual(com(self.simple),com1(self.simple))
     self.assertFloatEqual(com(self.more_weight),com1(self.more_weight))
     self.assertEqual(com(self.sec_weight,1), com1(self.sec_weight,1))
     coor = take(self.square_odd,(0,1),1)
     weights = take(self.square_odd,(2,),1)
     self.assertEqual(com(coor,weights), com2(coor,weights))
     weights = weights.flat
     self.assertEqual(com(coor,weights), com2(coor,weights))

예제 #7

0

파일 보기

파일: MpiBiclustering.py 프로젝트: polyactis/annot

	def output_in_copath_format(self, outfname, node_rank):
		"""
		04-20-05
			output go_no2cluster_group
		04-25-05
			cluster_id redefined
		"""
		(conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
		
		outf = open(outfname, 'a')
		writer = csv.writer(outf, delimiter='\t')
		for go_no, cluster_group in self.go_no2cluster_group.iteritems():
			counter = 0
			for bicluster in cluster_group.bicluster_list:
				seed_edge_id_list = list(take(cluster_group.edge_id_array, bicluster.row_index_list))
				edge_id_list = seed_edge_id_list + bicluster.added_edge_id_list
				vertex_list , edge_list = get_vertex_edge_list_by_edge_id(curs, edge_id_list)
				no_of_nodes = len(vertex_list)
				connectivity = len(edge_list)*2.0/(no_of_nodes*(no_of_nodes-1))
				vertex_string = '{' + ';'.join(vertex_list) + ';}'
				edge_string  = self.edge_string_from_edge_list(edge_list)
				cluster_id = "%s.%s"%(go_no, counter)
				writer.writerow([cluster_id, connectivity, vertex_string, edge_string])
				counter += 1
		del writer
		outf.close()

예제 #8

0

파일 보기

파일: MpiBiclustering.py 프로젝트: polyactis/annot

	def seed_grow(self, node_rank, cor_cut_off, euc_dist_cut_off):
		"""
		04-20-05
			add candidate edge based on its correlation with consensus_list, (>=0.8)
		"""
		sys.stderr.write("Node %s, seed_growing...\n"%node_rank)
		for i in range(self.candidate_edge_array.shape[0]):
			candidate_edge_vector = self.candidate_edge_array[i,:]
			edge_id = int(candidate_edge_vector[0])	#first grab the edge_id
			candidate_edge_vector = candidate_edge_vector[1:]	#then grab it's correlation vector
			for go_no, cluster_group in self.go_no2cluster_group.iteritems():
				if edge_id in cluster_group.edge_id_set:
					continue	#this edge is among the function group
				for j in range(len(cluster_group.bicluster_list)):
					bicluster = cluster_group.bicluster_list[j]
					selected_candidate_edge_vector = list(take(candidate_edge_vector, bicluster.column_index_list))
					edge_data = graph_modeling.ind_cor(selected_candidate_edge_vector, \
						bicluster.consensus_list, -1)	#leave_one_out = -1, means no leave_one_out
					euc_edge_data = graph_modeling.euc_dist(selected_candidate_edge_vector,\
						bicluster.consensus_list)
					if edge_data.value>=cor_cut_off and euc_edge_data.value/(euc_edge_data.degree+2)<=euc_dist_cut_off:	#average euclidean distance
						bicluster.added_edge_id_list.append(edge_id)
						bicluster.added_edge_matrix.append(selected_candidate_edge_vector)
						cluster_group.bicluster_list[j] = bicluster	#update the list values, different from dictionary
		sys.stderr.write("Node %s, Done.\n"%(node_rank))

예제 #9

0

파일 보기

    def __getitem__(self, key):
        """Override built in.

      This is called when instance is called to retrieve a position
      e.g.:

      li = matrix['A']

      returns a list (a single column vector if only one position
      specified), or list of lists: (a set of column vectors if
      several positions specified) of tuples for that position"""
        if type(key) == types.TupleType:
            row, colName = key
            if colName in self.colList:
                col = self.extraCount + self.colList.index(colName)
            else:
                raise KeyError("can't find %s column" % colName)
            return self.array[(row, col)]
        elif type(key) == types.StringType:
            colNames = string.split(key, ":")
            li = []
            for col in colNames:
                # check first in list of alleles
                if col in self.colList:
                    # get relative location in list
                    relativeLoc = self.colList.index(col)
                    # calculate real locations in array
                    col1 = relativeLoc * 2 + self.extraCount
                    col2 = col1 + 1
                    li.append(col1)
                    li.append(col2)
                # now check in non-allele metadata
                elif col in self.extraList:
                    li.append(self.extraList.index(col))
                else:
                    raise KeyError("can't find %s column" % col)

            if len(colNames) == 1:
                # return simply the pair of columns at that location as
                # a list
                return take(self.array, tuple(li[0:2]), 1).tolist()
            else:
                # return the matrix consisting of column vectors
                # of the designated keys
                return take(self.array, tuple(li), 1).tolist()
        else:
            raise KeyError("keys must be a string or tuple")

예제 #10

0

파일 보기

파일: tree.py 프로젝트: pombredanne/old-cogent

    def setIds(self, id_fun=lambda x: x.Data.split("_")[-1]):
        """
        Sets "LeafLabel", "LeafCts", and "ContainsAll" attributes

        id_fun: function that takes node and generate a unique id (label)
            for each node. By default will create a label consisting of 
            the string to the right of the last underscore in the data
            attribute. E.g. if the node has data label of 1234_HSA, the
            function will return a unique lable of "HSA". the idea being
            that if your tree has multiple human (HSA) sequences, the
            result of the function will be multiple nodes w/the same
            label. 

        The LeafLabel attribute is the the result of the id_fun function.

        The LeafCts attribute is an array with counts of the leaves with the 
            same label.

        The ContainsAll attribute is True when it contains every instance 
            of the LeafLabels of its terminal descendants. E.g. the set
            of LeafLabels of its terminal descendants occur nowhere else
            in the tree. 

        This is used by the uniqueIds function to remove duplicate species
        from the tree but can be used for any label you choose.
        """
        labels =  [id_fun(x)  for x in self.TerminalDescendants]
        u_labels = list(set(labels))
        len_u_labels = len(u_labels)
        labels_dict =  dict(zip(u_labels, range(len_u_labels)))
        all_cts = zeros(len(u_labels))

        for label in labels: 
            all_cts[labels_dict[label]] += 1
      
        for n in self.traverse(self_before=False, self_after=True):
            if not n.Children:
                setattr(n, "LeafLabel", id_fun(n))
                setattr(n, "LeafCts", zeros(len_u_labels))
                n.LeafCts[labels_dict[n.LeafLabel]] = 1
            else:
                n.LeafCts = zeros(len_u_labels)
                for c in n.Children:
                    n.LeafCts += c.LeafCts 
            nzero = nonzero(n.LeafCts)
            total = sum(take(all_cts, nzero)- take(n.LeafCts, nzero))
            setattr(n, "ContainsAll", (total == 0))

예제 #11

0

파일 보기

파일: Utils.py 프로젝트: MMesbahU/pypop

  def __getitem__(self, key):
      """Override built in.

      This is called when instance is called to retrieve a position
      e.g.:

      li = matrix['A']

      returns a list (a single column vector if only one position
      specified), or list of lists: (a set of column vectors if
      several positions specified) of tuples for that position"""
      if type(key) == types.TupleType:
          row,colName= key
          if colName in self.colList:
              col = self.extraCount+self.colList.index(colName)
          else:
              raise KeyError("can't find %s column" % colName)
          return self.array[(row,col)]
      elif type(key) == types.StringType:
          colNames = string.split(key, ":")
          li = []
          for col in colNames:
              # check first in list of alleles
              if col in self.colList:
                  # get relative location in list
                  relativeLoc = self.colList.index(col)
                  # calculate real locations in array
                  col1 = relativeLoc * 2 + self.extraCount
                  col2 = col1 + 1
                  li.append(col1)
                  li.append(col2)
              # now check in non-allele metadata
              elif col in self.extraList:
                  li.append(self.extraList.index(col))
              else:
                  raise KeyError("can't find %s column" % col)

          if len(colNames) == 1:
              # return simply the pair of columns at that location as
              # a list
              return take(self.array, tuple(li[0:2]), 1).tolist()
          else:
              # return the matrix consisting of column vectors
              # of the designated keys
              return take(self.array, tuple(li), 1).tolist()
      else:
          raise KeyError("keys must be a string or tuple")

예제 #12

0

파일 보기

파일: test.py 프로젝트: pombredanne/old-cogent

def safe_sum_p_log_p(a, base=None):
    """Calculates p * log(p) safely for an array that may contain zeros."""
    flat = ravel(a)
    nz = take(flat, nonzero(flat))
    logs = log(nz)
    if base:
        logs /= log(base)
    return sum(nz * logs)

예제 #13

0

파일 보기

파일: gng.py 프로젝트: ronaldahmed/robot-navigation

    def gaussian_activation(self):
        x = self.dists
        radii = zeros(self.dists.shape) * 0.0

        for u,conn_dict in enumerate(self.connections):
            neighbors = take(self.weights,conn_dict.keys())
            radii[u] = average(matrixnorm(neighbors-self.weights[u]))

        self.__activation = gaussian(x,radii/2)

예제 #14

0

파일 보기

파일: gng.py 프로젝트: shubhampachori12110095/navigation-corpus

    def gaussian_activation(self):
        x = self.dists
        radii = zeros(self.dists.shape) * 0.0

        for u, conn_dict in enumerate(self.connections):
            neighbors = take(self.weights, conn_dict.keys())
            radii[u] = average(matrixnorm(neighbors - self.weights[u]))

        self.__activation = gaussian(x, radii / 2)

예제 #15

0

파일 보기

파일: pca.py 프로젝트: shubhampachori12110095/navigation-corpus

def pca(M):
    "Perform PCA on M, return eigenvectors and eigenvalues, sorted."
    T, N = shape(M)
    # if there are fewer rows T than columns N, use snapshot method
    if T < N:
        C = dot(M, t(M))
        evals, evecsC = eigenvectors(C)
        # HACK: make sure evals are all positive
        evals = where(evals < 0, 0, evals)
        evecs = 1. / sqrt(evals) * dot(t(M), t(evecsC))
    else:
        # calculate covariance matrix
        K = 1. / T * dot(t(M), M)
        evals, evecs = eigenvectors(K)
    # sort the eigenvalues and eigenvectors, descending order
    order = (argsort(evals)[::-1])
    evecs = take(evecs, order, 1)
    evals = take(evals, order)
    return evals, t(evecs)

예제 #16

0

파일 보기

파일: profile.py 프로젝트: pombredanne/old-cogent

    def randomSequence(self, force_accumulate=False, random_f = random):
        """Returns random sequence matching current probability matrix.

        Stores cumulative sum (sort of) of probability matrix in 
        self._accumulated; Use force_accumulate to reset if you change 
        the matrix in place (which you shouldn't do anyway).
        """
        co = self.CharOrder
        random_indices = self.randomIndices(force_accumulate,random_f)
        return ''.join(map(str,take(co,random_indices)))

예제 #17

0

파일 보기

파일: gng.py 프로젝트: ronaldahmed/robot-navigation

    def uniform_gaussian_activation(self):
        x = self.dists

        total = 0.0
        count = 0

        for u,conn_dict in enumerate(self.connections):
            neighbors = take(self.weights,conn_dict.keys())
            total += sum(matrixnorm(neighbors-self.weights[u]))
            count += len(conn_dict)

        self.__activation = gaussian(x,(total/count)/2)

예제 #18

0

파일 보기

파일: gng.py 프로젝트: shubhampachori12110095/navigation-corpus

    def uniform_gaussian_activation(self):
        x = self.dists

        total = 0.0
        count = 0

        for u, conn_dict in enumerate(self.connections):
            neighbors = take(self.weights, conn_dict.keys())
            total += sum(matrixnorm(neighbors - self.weights[u]))
            count += len(conn_dict)

        self.__activation = gaussian(x, (total / count) / 2)

예제 #19

0

파일 보기

def pca(M):
    from Numeric import take, dot, shape, argsort, where, sqrt, transpose as t
    from LinearAlgebra import eigenvectors
    "Perform PCA on M, return eigenvectors and eigenvalues, sorted."
    T, N = shape(M)
    # if there are less rows T than columns N, use
    # snapshot method
    if T < N:
        C = dot(M, t(M))
        evals, evecsC = eigenvectors(C)
        # HACK: make sure evals are all positive
        evals = where(evals < 0, 0, evals)
        evecs = 1./sqrt(evals) * dot(t(M), t(evecsC))
    else:
        # calculate covariance matrix
        K = 1./T * dot(t(M), M)
        evals, evecs = eigenvectors(K)
    # sort the eigenvalues and eigenvectors, decending order
    order = (argsort(evals)[::-1])
    evecs = take(evecs, order, 1)
    evals = take(evals, order)
    return evals, t(evecs)

예제 #20

0

파일 보기

파일: MpiBiclustering.py 프로젝트: polyactis/annot

	def output(self, outfname, node_rank):
		"""
		04-26-05
			output the information about the bicluster, easy to check
		04-25-05
			cluster_id redefined
		"""
		outf = open(outfname, 'a')
		writer = csv.writer(outf, delimiter='\t')
		for go_no, cluster_group in self.go_no2cluster_group.iteritems():
			counter = 0
			for bicluster in cluster_group.bicluster_list:
				cluster_id = "%s.%s"%(go_no, counter)
				seed_edge_id_list = list(take(cluster_group.edge_id_array, bicluster.row_index_list))
				edge_id_list = seed_edge_id_list + bicluster.added_edge_id_list
				writer.writerow([cluster_id, bicluster.score, repr(edge_id_list), repr(bicluster.column_index_list)])
				counter += 1

예제 #21

0

파일 보기

파일: array.py 프로젝트: pombredanne/old-cogent

def safe_log(a):
    """Returns the log (base 2) of each nonzero item in a.

    a: Numeric array

    WARNING: log2 is only defined on positive numbers, so make sure
    there are no negative numbers in the array.

    Always returns an array with floats in there to avoid unexpected
    results when applying it to an array with just integers.
    """
    c = array(a.copy(),Float64)
    flat = ravel(c)
    nz_i = nonzero(flat)
    nz_e = take(flat,nz_i)
    log_nz = log2(nz_e)
    put(flat,nz_i,log_nz)
    return c

예제 #22

0

파일 보기

파일: array.py 프로젝트: pombredanne/old-cogent

def pairs_to_array(pairs, num_items=None, transform=None):
    """Returns array with same data as pairs (list of tuples).

    pairs can contain (first, second, weight) or (first, second) tuples.
    If 2 items in the tuple, weight will be assumed to be 1.

    num_items should contain the number of items that the pairs are chosen
    from. If None, will calculate based on the largest item in the actual
    list.

    transform contains a array that maps indices in the pairs coordinates
    to other indices, i.e. transform[old_index] = new_index. It is
    anticipated that transform will be the result of calling ungapped_to_gapped
    on the original, gapped sequence before the sequence is passed into
    something that strips out the gaps (e.g. for motif finding or RNA folding).

    WARNING: all tuples must be the same length! (i.e. if weight is supplied
    for any, it must be supplied for all.

    WARNING: if num_items is actually smaller than the biggest index in the
    list (+ 1, because the indices start with 0), you'll get an exception
    when trying to place the object. Don't do it.
    """
    #handle easy case
    if not pairs:
        return array([])
    data = array(pairs)
    #figure out if we're mapping the indices to gapped coordinates
    if transform:
        #pairs of indices
        idx_pairs = take(transform, data[:,0:2].astype(Int32))    
    else:
        idx_pairs = data[:,0:2].astype(Int32)
    #figure out biggest item if not supplied
    if num_items is None:
        num_items = int(max(ravel(idx_pairs))) + 1
    #make result array
    result = zeros((num_items,num_items), Float64)
    if len(data[0]) == 2:
        values = 1
    else:
        values = data[:,2]
    put(ravel(result), idx_pairs[:,0]*num_items+idx_pairs[:,1], values)
    return result

예제 #23

0

파일 보기

파일: UPGMA.py 프로젝트: pombredanne/old-cogent

def condense_matrix(matrix, smallest_index, large_value):
    """converges the rows and columns indicated by smallest_index
    
    Smallest index is returned from find_smallest_index.
    For both the rows and columns, the values for the two indices are
    averaged. The resulting vector replaces the first index in the array
    and the second index is replaced by an array with large numbers so that
    it is never chosen again with find_smallest_index.
    """
    first_index, second_index = smallest_index
    #get the rows and make a new vector that has their average
    rows = take(matrix, smallest_index)
    new_vector = average(rows)
    #replace info in the row and column for first index with new_vector
    matrix[first_index] = new_vector
    matrix[:, first_index] = new_vector
    #replace the info in the row and column for the second index with 
    #high numbers so that it is ignored
    matrix[second_index] = large_value
    matrix[:, second_index] = large_value
    return matrix

예제 #24

0

파일 보기

파일: spline.py 프로젝트: pyridoxus/lab-tools

def splint(xa, ya, y2a, x, derivs=False):
	"""returns the interpolated from from the spline
	x can either be a scalar or a listable item, in which case a Numeric Float array will be
	returned and the multiple interpolations will be done somewhat more efficiently.
	If derivs is not False, return y, y', y'' instead of just y."""
	if type(x) is types.IntType or type(x) is types.FloatType: 
		if (x<xa[0] or x>xa[-1]):
			raise RangeError, "%f not in range (%f, %f) in splint()" % (x, xa[0], xa[-1])
			 
		khi=max(searchsorted(xa,x),1)
		klo=khi-1
		h=float(xa[khi]-xa[klo])
		a=(xa[khi]-x)/h; b=1.0-a
		ylo=ya[klo]; yhi=ya[khi]; y2lo=y2a[klo]; y2hi=y2a[khi]
	else:
		#if we got here, we are processing a list, and should do so more efficiently
		if (min(x)<xa[0] or max(x)>xa[-1]):
			raise RangeError, "(%f, %f) not in range (%f, %f) in splint()" % (min(x), max(x), xa[0], xa[-1])
	
		npoints=len(x)
		khi=clip(searchsorted(xa,x),1,len(xa)) 
		
		klo=khi-1
		xhi=take(xa, khi)
		xlo=take(xa, klo)
		yhi=take(ya, khi)
		ylo=take(ya, klo)
		y2hi=take(y2a, khi)
		y2lo=take(y2a, klo)
		
		h=(xhi-xlo).astype(Float)
		a=(xhi-x)/h
		b=1.0-a
		
	y=a*ylo+b*yhi+((a*a*a-a)*y2lo+(b*b*b-b)*y2hi)*(h*h)/6.0
	if derivs:
		return y, (yhi-ylo)/h+((3*b*b-1)*y2hi-(3*a*a-1)*y2lo)*h/6.0, b*y2hi+a*y2lo
	else:
		return y

예제 #25

0

파일 보기

파일: util.py 프로젝트: pombredanne/old-cogent

def AlnToProfile(aln, alphabet=None, char_order=None, split_degenerates=False,\
    weights=None):
    """Generates a Profile object from an Alignment.

    aln: Alignment object
    alphabet (optional): an Alphabet object (or list of chars, but if you 
        want to split degenerate symbols, the alphabet must have a 
        Degenerates property. Default is the alphabet of the first seq in 
        the alignment.
    char_order (optional): order of the characters in the profile. Default
        is list(alphabet)
    split_degenerates (optional): Whether you want the counts for the 
        degenerate symbols to be divided over the non-degenerate symbols they
        code for.
    weights (optional): dictionary of seq_id: weight. If not entered all seqs
        are weighted equally

    A Profile is a position x character matrix describing which characters
    occur at each position of an alignment. The Profile is always normalized,
    so it gives the probabilities of each character at each position.
    
    Ignoring chars: you can ignore characters in the alignment by not putting
    the char in the CharOrder. If you ignore all characters at a particular
    position, an error will be raised, because the profile can't be normalized.

    Splitting degenerates: you can split degenerate characters over the 
    non-degenerate characters they code for. For example: R = A or G. So,
    an R at a position counts for 0.5 A and 0.5 G.
   
    Example:
    seq1    TCAG    weight: 0.5
    seq2    TAR-    weight: 0.25
    seq3    YAG-    weight: 0.25
    Profile(aln,alphabet=DnaAlphabet,char_order="TACG",weights=w,
    split_degenerates=True)
    Profile:
       T      A      C      G
    [[ 0.875  0.     0.125  0.   ]
     [ 0.     0.5    0.5    0.   ]
     [ 0.     0.625  0.     0.375]
     [ 0.     0.     0.     1.   ]]
    """

    if alphabet is None:
        alphabet = aln.values()[0].Alphabet
    if char_order is None:
        char_order = list(alphabet)
    if weights is None:
        weights = dict.fromkeys(aln.keys(),1/len(aln))
    
    char_meaning = CharMeaningProfile(alphabet, char_order,\
        split_degenerates)

    profiles = []
    for k,v in aln.items():
        profiles.append(take(char_meaning.Data, asarray(v.upper(), UInt8))\
            * weights[k])
    s = reduce(add,profiles)
    
    result = Profile(s,alphabet, char_order)
    try:
        result.normalizePositions()
    except:
        raise ValueError,\
        "Probably one of the rows in your profile adds up to zero,\n "+\
        "because you are ignoring all of the characters in the "+\
        "corresponding\n column in the alignment"
    return result

예제 #26

0

파일 보기

파일: util.py 프로젝트: pombredanne/old-cogent

def SeqToProfile(seq, alphabet=None, char_order=None,\
    split_degenerates=False):
    """Generates a Profile object from a Sequence object.

    seq: Sequence object
    alphabet (optional): Alphabet object (if you want to split
        degenerate symbols, the alphabet object should have a 
        Degenerates property. Default is the Alphabet associated with 
        the Sequence object.
    char_order (optional): The order the characters occur in the Profile.
        Default is the list(alphabet)
    split_degenerates (optional): Whether you want the counts for the 
        degenerate symbols to be divided over the non-degenerate symbols they
        code for.
    
    A Profile is a position x character matrix describing which characters
    occur at each position. In a sequence (as opposed to an alignment) only
    one character occurs at each position. In general a sequence profile
    will only contain ones and zeros. However, you have the possibility of 
    splitting degenerate characters. For example, if a position is R, it 
    means that you have 50/50% chance of A and G. It's also possible to 
    ignore characters, which in a sequence profile will lead to positions
    (rows) containing only zeros.
    
    Example:
    Sequence = ACGU
    Profile(seq, CharOrder=UCAG):
    U   C   A   G
    0   0   1   0   first pos
    0   1   0   0   second pos
    0   0   0   1   third pos
    1   0   0   0   fourth pos

    Sequence= GURY
    Profile(seq,CharOrder=UCAG, split_degenerates=True)
    U   C   A   G
    0   0   0   1   first pos
    1   0   0   0   second pos
    0   0   .5  .5  third pos
    .5  .5  0   0   fourth pos

    Characters can also be ignored
    Sequence = ACN-
    Profile(seq, CharOrder=UCAGN, split_degenerates=True)
    U   C   A   G
    0   0   1   0   first pos
    0   1   0   0   second pos
    .25 .25 .25 .25 third pos
    0   0   0   0   fourth pos <--contains only zeros
    """

    if alphabet is None:
        alphabet = seq.Alphabet
    if char_order is None:
        char_order = list(alphabet)

    #Determine the meaning of each character based on the alphabet, the
    #character order, and the option to split degenerates
    char_meaning = CharMeaningProfile(alphabet, char_order,\
        split_degenerates)
    #construct profile data
    result_data = take(char_meaning.Data, asarray(seq.upper(), UInt8))
    
    return Profile(result_data, alphabet, char_order)

예제 #27

0

파일 보기

def redraw(screen, buf, palette, frames):
    x, y = indices(screensize)
    # this 256 is not ncolors; it's a timing/pacing thing
    buf += ((x + frames) & (y + frames)) >> (frames % 256) >> 3
    buf %= ncolors
    pygame.surfarray.blit_array(screen, take(palette, buf))