コード例 #1
0
ファイル: array.py プロジェクト: pombredanne/old-cogent
def column_degeneracy(a,cutoff=.5):
    """Returns the number of characters that's needed to cover >= cutoff

    a: Numeric array
    cutoff: number that should be covered in the array

    Example:
    [   [.1 .8  .3],
        [.3 .2  .3],
        [.6 0   .4]]
    if cutoff = .75: column_degeneracy -> [2,1,3]
    if cutoff = .45: column_degeneracy -> [1,1,2]

    WARNING: watch out with floating point numbers. 
    if the cutoff= 0.9 and in the array is also 0.9, it might not be found
    >>> searchsorted(cumsum(array([.6,.3,.1])),.9)
    2
    >>> searchsorted(cumsum(array([.5,.4,.1])),.9)
    1

    If the cutoff value is not found, the result is clipped to the
    number of rows in the array. 
    """
    if not a:
        return []
    b = cumsum(sort(a,0)[::-1])
    try:
        degen = [searchsorted(b[:,idx],cutoff) for idx in range(len(b[0]))]
    except TypeError:
        raise ValueError, "Array has to be two dimensional"
    #degen contains now the indices at which the cutoff was hit
    #to change to the number of characters, add 1
    return clip(array(degen)+1,0,a.shape[0])
コード例 #2
0
ファイル: array.py プロジェクト: pombredanne/old-cogent
def row_degeneracy(a,cutoff=.5):
    """Returns the number of characters that's needed to cover >= cutoff

    a: Numeric array
    cutoff: number that should be covered in the array

    Example:
    [   [.1 .3  .4  .2],
        [.5 .3  0   .2],
        [.8 0   .1  .1]]
    if cutoff = .75: row_degeneracy -> [3,2,1]
    if cutoff = .95: row_degeneracy -> [4,3,3]

    WARNING: watch out with floating point numbers. 
    if the cutoff= 0.9 and in the array is also 0.9, it might not be found
    >>> searchsorted(cumsum(array([.6,.3,.1])),.9)
    2
    >>> searchsorted(cumsum(array([.5,.4,.1])),.9)
    1

    If the cutoff value is not found, the result is clipped to the
    number of columns in the array.
    """
    if not a:
        return []
    try:
        b = cumsum(sort(a)[:,::-1],1)
    except IndexError:
        raise ValueError, "Array has to be two dimensional"
    degen = [searchsorted(aln_pos,cutoff) for aln_pos in b]
    #degen contains now the indices at which the cutoff was hit
    #to change to the number of characters, add 1
    return clip(array(degen)+1,0,a.shape[1])
コード例 #3
0
ファイル: hmm.py プロジェクト: pruan/TestDepot
 def simulate(self,length,show_hidden=0):
     """generates a random sequence of observations of given length
     if show_hidden is true, returns a liste of (state,observation)"""
     import random
     cumA = cumsum(self.A)
     cumB = cumsum(self.B)
     state = searchsorted(cumsum(self.pi),random.random())
     seq = []
     
     for i in range(length):
         symbol = self.omega_O[searchsorted(cumB[:,state],
                                            random.random())]
         if show_hidden:
             seq.append((self.omega_X[state],symbol))
         else:
             seq.append(symbol)
         state = searchsorted(cumA[:,state],random.random())
     return seq
コード例 #4
0
ファイル: profile.py プロジェクト: pombredanne/old-cogent
    def randomIndices(self, force_accumulate=False, random_f = random):
        """Returns random indices matching current probability matrix.

        Stores cumulative sum (sort of) of probability matrix in 
        self._accumulated; Use force_accumulate to reset if you change 
        the matrix in place (which you shouldn't do anyway).

        The returned indices correspond to the characters in the
        CharOrder of the Profile.
        """
        if force_accumulate or not hasattr(self, '_accumulated'):
            self._accumulated = cumsum(self.Data, 1)
        choices = random_f(len(self.Data))
        return array([searchsorted(v, c) for v, c in\
            zip(self._accumulated, choices)])
コード例 #5
0
ファイル: array.py プロジェクト: pombredanne/old-cogent
def masked_to_unmasked(mask, remove_mask=False):
    """Returns array mapping indices in orig to indices in ungapped.

    Specifically, for each position in orig, returns the index of the position
    in the unmasked sequence of the last non-masked character at or before
    that index (i.e. if the index corresponds to a masked position, will return
    the index of the previous non-masked position since the masked positions
    aren't in the unmasked sequence by definition).

    If remove_mask is True (the default is False), sets the masked positions
    to -1 for easy detection.
    """
    result = cumsum(logical_not(mask)) -1
    if remove_mask:
        result = where(mask, -1, result)
    return result