Ejemplo n.º 1
0
Archivo: filter.py Proyecto: ctuna/ring
  def go(self, xn):
    if (self.x == None):
      self.x = ones(len(self.b)) * xn * 1.0
      self.y = ones(len(self.a)) * xn * 1.0 * sum(self.b) / (1+sum(self.a))

    self.x = concatenate([[xn], self.x[:-1]])
    yn = dot(self.b, self.x) - dot(self.a, self.y)
    self.y = concatenate([[yn], self.y[:-1]])
    return yn
Ejemplo n.º 2
0
 def _beta(self,obsIndices,scale_factors=None):
     """ computes backward values"""
     B = self.B
     A = self.A
     scale_factors = scale_factors or list(ones(len(obsIndices),Float))
     beta = [ones(self.N,Float)*scale_factors[-1]]                         # (24)
     for t  in range(len(obsIndices)-2,-1,-1):
         beta.append(matrixmultiply(A,(1./scale_factors[t])*B[obsIndices[t+1]]*beta[-1]))    # (25)
     beta.reverse()
     return beta
Ejemplo n.º 3
0
def eigenvector_for_largest_eigenvalue(matrix):
    """Returns eigenvector corresponding to largest eigenvalue of matrix.
    
    Implements a numerical method for finding an eigenvector by repeated 
    application of the matrix to a starting vector. For a matrix A the 
    process w(k) <-- A*w(k-1) converges to eigenvector w with the largest 
    eigenvalue. Because distance matrix D has all entries >= 0, a theorem 
    due to Perron and Frobenius on nonnegative matrices guarantees good 
    behavior of this method, excepting degenerate cases where the 
    iteration oscillates. For distance matrices a remedy is to add the 
    identity matrix to A, permitting the iteration to converge to the 
    eigenvector. (From Sander and Schneider (1991), and Vingron and 
    Sibbald (1993)) 
    
    Note: Only works on square matrices.
    """
    #always add the identity matrix to avoid oscillating behavior
    matrix = matrix + identity(len(matrix))

    #v is a random vector (chosen as the normalized vector of ones)
    v = ones(len(matrix))/len(matrix)

    #iterate until convergence
    for i in range(1000):
        new_v = matrixmultiply(matrix,v)
        new_v = new_v/sum(new_v) #normalize
        if sum(map(abs,new_v-v)) > 1e-9:
            v = new_v #not converged yet
            continue
        else: #converged
            break
    
    return new_v
Ejemplo n.º 4
0
    def test_vanishing_moments(self):
        """Test that coefficients in lp satisfy the
           vanishing moments condition
        """ 

        from daubfilt import daubfilt, number_of_filters

        for i in range(number_of_filters):
            D = 2*(i+1)

            P = D/2  # Number of vanishing moments
            N = P-1  # Dimension of nullspace of the matrix A
            R = P+1  # Rank of A, R = D-N = P+1 equations
        
            lp, hp = daubfilt(D)


            # Condition number of A grows with P, so we test only
            # the first 6 (and eps is slightly larger than machine precision)

            A    = zeros((R,D), Float)  # D unknowns, D-N equations
            b    = zeros((R,1), Float)  # Right hand side
            b[0] = sqrt(2)                
  
            A[0,:] = ones(D, Float)   # Coefficients must sum to sqrt(2)
            for p in range(min(P,6)): # the p'th vanishing moment (Cond Ap)
                for k in range(D):            
                    m=D-k;
                    A[p+1,k] = (-1)**m * k**p;

            assert allclose(b, mvmul(A,lp))         
    def learn_batch(self, data):

        X = array([x for x, y in data])
        Y = array([y for x, y in data])

        if self.use_bias:
            X = join((X, ones((len(X), 1))), axis=1)
        W, residuals, rank, s = linear_least_squares(X, Y)

        self.w = W
Ejemplo n.º 6
0
    def learn_batch(self,data):

        X = array([x for x,y in data])
        Y = array([y for x,y in data])

        if self.use_bias:
            X = join((X,ones((len(X),1))),axis=1)
        W,residuals,rank,s = linear_least_squares(X,Y)

        self.w = W
Ejemplo n.º 7
0
    def evaluate(self):
        table = self.dataset.get_data() # TODO: type checking

        # sample expression
        #eval('col(0) = col(0) + 10')
        def col(nr):
            return table[nr].copy()

        buffer = self.textview.get_buffer()
        start, end = buffer.get_bounds()
        expression = buffer.get_text(start, end)        
    
        result = eval(expression,
                      {'__builtins__': {},
                       'sin': sin},
                      {'col' : col,
                       'cc' : self.colnr
                       }
                      )

        # If the result is not an array, then it is probably a scalar.
        # We create an array from this by multiplying it with an array
        # that consists only of ones.
        if not isinstance(result, ArrayType):
            o = ones( (table.nrows,), table[self.colnr].typecode() )
            result = o * result
            print "-- result is not an array --"
            print "==> converted to array"
            
        ul = UndoList().describe("Calculate column")
        try:
            def set_column_data(column, data, undolist=[]):
                old_data = column.data
                column.data = data
                undolist.append(UndoInfo(set_column_data, column, old_data))

            set_column_data(table.column(self.colnr), result, undolist=ul)

        except TypeError:
            print "-- incompatible result --", result
            return
        self.dataset.notify_change(undolist=ul)
        self.project.journal.append(ul)
Ejemplo n.º 8
0
    def toOddsMatrix(self, symbol_freqs=None):
        """Returns the OddsMatrix of a profile as a new Profile.

        symbol_freqs: per character array of background frequencies
        e.g. [.25,.25,.25,.25] for equal frequencies for each of the 
        four bases.

        If no symbol frequencies are provided, all symbols will get equal 
        freqs. The length of symbol freqs should match the number of 
        columns in the profile! If symbol freqs contains a zero entry,
        a ProfileError is raised. This is done to prevent either a
        ZeroDivisionError (raised when zero is an int) or 'inf' in the 
        resulting matrix (which happens when zero is a float).
        """
        pl = self.Data.shape[1] #profile length
        #if symbol_freqs is None, create an array with equal frequencies
        if symbol_freqs is None:
            symbol_freqs = ones(pl)/pl
        else:
            symbol_freqs = array(symbol_freqs)
            
        #raise error when symbol_freqs has wrong length
        if len(symbol_freqs) != pl:
            raise ProfileError,\
            "Length of symbol freqs should be %s, but is %s"\
            %(pl,len(symbol_freqs))
        
        #raise error when symbol freqs contains zero (to prevent 
        #ZeroDivisionError or 'inf' in the resulting matrix)
        if sum(symbol_freqs != 0) != len(symbol_freqs):
            raise ProfileError,\
            "Symbol frequency is not allowed to be zero: %s"\
            %(symbol_freqs)

        #calculate the OddsMatrix
        log_odds = self.Data/symbol_freqs
        return Profile(log_odds, self.Alphabet, self.CharOrder)
"""
Various utility functions.

$Id: utils.py,v 1.19 2006/04/07 23:24:31 jp Exp $
"""

import Numeric
from Numeric import ones, exp, sqrt, zeros, argsort
import sys, operator

import plastk.rand as rand

inf = (ones(1) / 0.0)[0]


def enumerate(seq):
    """
    (Deprecated) duplicates enum generator.
    """
    i = 0
    for x in seq:
        yield i, x
        i += 1


enum = enumerate


def smooth(data, N=10, step=1):
    """
    Smooth sequence data using a moving window of width N.  Returns a
Ejemplo n.º 10
0
def ACL(tree):
    """Returns a normalized dictionary of sequence weights {seq_id: weight}

    tree: a PhyloNode object

    The ACL method is named after Altschul, Carroll and Lipman, who published a 
    paper on sequence weighting in 1989.

    The ACL method is based on an idea of Felsenstein (1973). Imagine 
    electrical current flows from the root of the tree down the edges and 
    out the leaves. If the edge lengths are proportional to their electrical 
    resistances, current flowing out each leaf equals the leaf weight.

    The first step in the calculation of the weight vector is calculating a
    variance-covariance matrix. The variance of a leaf is proportional to the 
    distance from the root to that leaf. The covariance of two leaves is 
    proportional to the distance from the root to the last common ancestor 
    of the two leaves.

    The second step in the calculation results in a vector of weights. Suppose
    there are n leaves on the tree. Let i be the vector of size n, all of whose 
    elements are 1.0. The weight vector is calculated as:
    w = (inverse(M)*i)/(transpose(i)*inverse(M)*i)
    See Altschul 1989
    """
    #clip branch lengths to avoid error due to negative or zero branch lengths
    _clip_branch_lengths(tree)
    
    #get a list of sequence IDs (in the order that the tree will be traversed)
    seqs = []
    for n in tree.TerminalDescendants:
        seqs.append(n.Data)

    #initialize the variance-covariance matrix
    m = zeros([len(seqs),len(seqs)],Float64)

    #calculate (co)variances
    #variance of a node is defined as the distance from the root to the leaf
    #covariance of two nodes is defined as the distance from the root to the
    #last common ancestor of the two leaves. 
    for x in tree.TerminalDescendants:
        for y in tree.TerminalDescendants:
            idx_x = seqs.index(x.Data)
            idx_y = seqs.index(y.Data)
            if idx_x == idx_y:
                m[idx_x,idx_y] = x.distance(tree)
            else:
                lca = x.lastCommonAncestor(y)
                dist_lca_root = lca.distance(tree)
                m[idx_x,idx_y] = dist_lca_root
                m[idx_y,idx_x] = dist_lca_root

    #get the inverse of the variance-covariance matrix
    inv = inverse(m)
    #build vector i (vector or ones, length = # of leaves in the tree)
    i = ones(len(seqs),Float64)
    
    numerator = matrixmultiply(inv, i)
    denominator = matrixmultiply(matrixmultiply(transpose(i),inv),i)
    weight_vector = numerator/denominator

    #return a Weights object (is dict {seq_id: weight})
    return Weights(dict(zip(seqs,weight_vector)))
Ejemplo n.º 11
0
 def hasValidData(self):
     """Returns True if all rows in self.Data add up to one"""
     if sum(self.Data,1) == ones(len(self.Data)):
         return True
     return False
Ejemplo n.º 12
0
    print "Error: No files found"

# get line freqs & stat weights from molecular data file
file = open(flist[0], 'r')
line = ''
while (line[0:7] != 'molfile'):
    line = file.readline()
file.close()
molfile = line[8:-1]

file = open(molfile, 'r')
for idum in range(5):
    file.readline()
nlev = int(file.readline())
file.readline()
gstat = ones(nlev, 'd')

for ilev in range(nlev):
    line = file.readline()
    words = line.split()
    gstat[ilev] = float(words[2])

file.readline()
nlin = int(file.readline())
file.readline()
frq = ones(nlin, 'd')
gup = ones(nlin, 'd')
glo = ones(nlin, 'd')
jup = ones(nlin, 'i')
jlo = ones(nlin, 'i')