Beispiel #1
def column_degeneracy(a,cutoff=.5):
    """Returns the number of characters that's needed to cover >= cutoff

    a: Numeric array
    cutoff: number that should be covered in the array

    [   [.1 .8  .3],
        [.3 .2  .3],
        [.6 0   .4]]
    if cutoff = .75: column_degeneracy -> [2,1,3]
    if cutoff = .45: column_degeneracy -> [1,1,2]

    WARNING: watch out with floating point numbers. 
    if the cutoff= 0.9 and in the array is also 0.9, it might not be found
    >>> searchsorted(cumsum(array([.6,.3,.1])),.9)
    >>> searchsorted(cumsum(array([.5,.4,.1])),.9)

    If the cutoff value is not found, the result is clipped to the
    number of rows in the array. 
    if not a:
        return []
    b = cumsum(sort(a,0)[::-1])
        degen = [searchsorted(b[:,idx],cutoff) for idx in range(len(b[0]))]
    except TypeError:
        raise ValueError, "Array has to be two dimensional"
    #degen contains now the indices at which the cutoff was hit
    #to change to the number of characters, add 1
    return clip(array(degen)+1,0,a.shape[0])
Beispiel #2
def row_degeneracy(a,cutoff=.5):
    """Returns the number of characters that's needed to cover >= cutoff

    a: Numeric array
    cutoff: number that should be covered in the array

    [   [.1 .3  .4  .2],
        [.5 .3  0   .2],
        [.8 0   .1  .1]]
    if cutoff = .75: row_degeneracy -> [3,2,1]
    if cutoff = .95: row_degeneracy -> [4,3,3]

    WARNING: watch out with floating point numbers. 
    if the cutoff= 0.9 and in the array is also 0.9, it might not be found
    >>> searchsorted(cumsum(array([.6,.3,.1])),.9)
    >>> searchsorted(cumsum(array([.5,.4,.1])),.9)

    If the cutoff value is not found, the result is clipped to the
    number of columns in the array.
    if not a:
        return []
        b = cumsum(sort(a)[:,::-1],1)
    except IndexError:
        raise ValueError, "Array has to be two dimensional"
    degen = [searchsorted(aln_pos,cutoff) for aln_pos in b]
    #degen contains now the indices at which the cutoff was hit
    #to change to the number of characters, add 1
    return clip(array(degen)+1,0,a.shape[1])
Beispiel #3
 def simulate(self,length,show_hidden=0):
     """generates a random sequence of observations of given length
     if show_hidden is true, returns a liste of (state,observation)"""
     import random
     cumA = cumsum(self.A)
     cumB = cumsum(self.B)
     state = searchsorted(cumsum(self.pi),random.random())
     seq = []
     for i in range(length):
         symbol = self.omega_O[searchsorted(cumB[:,state],
         if show_hidden:
         state = searchsorted(cumA[:,state],random.random())
     return seq
Beispiel #4
    def randomIndices(self, force_accumulate=False, random_f = random):
        """Returns random indices matching current probability matrix.

        Stores cumulative sum (sort of) of probability matrix in 
        self._accumulated; Use force_accumulate to reset if you change 
        the matrix in place (which you shouldn't do anyway).

        The returned indices correspond to the characters in the
        CharOrder of the Profile.
        if force_accumulate or not hasattr(self, '_accumulated'):
            self._accumulated = cumsum(self.Data, 1)
        choices = random_f(len(self.Data))
        return array([searchsorted(v, c) for v, c in\
            zip(self._accumulated, choices)])
Beispiel #5
def masked_to_unmasked(mask, remove_mask=False):
    """Returns array mapping indices in orig to indices in ungapped.

    Specifically, for each position in orig, returns the index of the position
    in the unmasked sequence of the last non-masked character at or before
    that index (i.e. if the index corresponds to a masked position, will return
    the index of the previous non-masked position since the masked positions
    aren't in the unmasked sequence by definition).

    If remove_mask is True (the default is False), sets the masked positions
    to -1 for easy detection.
    result = cumsum(logical_not(mask)) -1
    if remove_mask:
        result = where(mask, -1, result)
    return result