def go(self, xn): if (self.x == None): self.x = ones(len(self.b)) * xn * 1.0 self.y = ones(len(self.a)) * xn * 1.0 * sum(self.b) / (1+sum(self.a)) self.x = concatenate([[xn], self.x[:-1]]) yn = dot(self.b, self.x) - dot(self.a, self.y) self.y = concatenate([[yn], self.y[:-1]]) return yn
def _beta(self,obsIndices,scale_factors=None): """ computes backward values""" B = self.B A = self.A scale_factors = scale_factors or list(ones(len(obsIndices),Float)) beta = [ones(self.N,Float)*scale_factors[-1]] # (24) for t in range(len(obsIndices)-2,-1,-1): beta.append(matrixmultiply(A,(1./scale_factors[t])*B[obsIndices[t+1]]*beta[-1])) # (25) beta.reverse() return beta
def eigenvector_for_largest_eigenvalue(matrix): """Returns eigenvector corresponding to largest eigenvalue of matrix. Implements a numerical method for finding an eigenvector by repeated application of the matrix to a starting vector. For a matrix A the process w(k) <-- A*w(k-1) converges to eigenvector w with the largest eigenvalue. Because distance matrix D has all entries >= 0, a theorem due to Perron and Frobenius on nonnegative matrices guarantees good behavior of this method, excepting degenerate cases where the iteration oscillates. For distance matrices a remedy is to add the identity matrix to A, permitting the iteration to converge to the eigenvector. (From Sander and Schneider (1991), and Vingron and Sibbald (1993)) Note: Only works on square matrices. """ #always add the identity matrix to avoid oscillating behavior matrix = matrix + identity(len(matrix)) #v is a random vector (chosen as the normalized vector of ones) v = ones(len(matrix))/len(matrix) #iterate until convergence for i in range(1000): new_v = matrixmultiply(matrix,v) new_v = new_v/sum(new_v) #normalize if sum(map(abs,new_v-v)) > 1e-9: v = new_v #not converged yet continue else: #converged break return new_v
def test_vanishing_moments(self): """Test that coefficients in lp satisfy the vanishing moments condition """ from daubfilt import daubfilt, number_of_filters for i in range(number_of_filters): D = 2*(i+1) P = D/2 # Number of vanishing moments N = P-1 # Dimension of nullspace of the matrix A R = P+1 # Rank of A, R = D-N = P+1 equations lp, hp = daubfilt(D) # Condition number of A grows with P, so we test only # the first 6 (and eps is slightly larger than machine precision) A = zeros((R,D), Float) # D unknowns, D-N equations b = zeros((R,1), Float) # Right hand side b[0] = sqrt(2) A[0,:] = ones(D, Float) # Coefficients must sum to sqrt(2) for p in range(min(P,6)): # the p'th vanishing moment (Cond Ap) for k in range(D): m=D-k; A[p+1,k] = (-1)**m * k**p; assert allclose(b, mvmul(A,lp))
def learn_batch(self, data): X = array([x for x, y in data]) Y = array([y for x, y in data]) if self.use_bias: X = join((X, ones((len(X), 1))), axis=1) W, residuals, rank, s = linear_least_squares(X, Y) self.w = W
def learn_batch(self,data): X = array([x for x,y in data]) Y = array([y for x,y in data]) if self.use_bias: X = join((X,ones((len(X),1))),axis=1) W,residuals,rank,s = linear_least_squares(X,Y) self.w = W
def evaluate(self): table = self.dataset.get_data() # TODO: type checking # sample expression #eval('col(0) = col(0) + 10') def col(nr): return table[nr].copy() buffer = self.textview.get_buffer() start, end = buffer.get_bounds() expression = buffer.get_text(start, end) result = eval(expression, {'__builtins__': {}, 'sin': sin}, {'col' : col, 'cc' : self.colnr } ) # If the result is not an array, then it is probably a scalar. # We create an array from this by multiplying it with an array # that consists only of ones. if not isinstance(result, ArrayType): o = ones( (table.nrows,), table[self.colnr].typecode() ) result = o * result print "-- result is not an array --" print "==> converted to array" ul = UndoList().describe("Calculate column") try: def set_column_data(column, data, undolist=[]): old_data = column.data column.data = data undolist.append(UndoInfo(set_column_data, column, old_data)) set_column_data(table.column(self.colnr), result, undolist=ul) except TypeError: print "-- incompatible result --", result return self.dataset.notify_change(undolist=ul) self.project.journal.append(ul)
def toOddsMatrix(self, symbol_freqs=None): """Returns the OddsMatrix of a profile as a new Profile. symbol_freqs: per character array of background frequencies e.g. [.25,.25,.25,.25] for equal frequencies for each of the four bases. If no symbol frequencies are provided, all symbols will get equal freqs. The length of symbol freqs should match the number of columns in the profile! If symbol freqs contains a zero entry, a ProfileError is raised. This is done to prevent either a ZeroDivisionError (raised when zero is an int) or 'inf' in the resulting matrix (which happens when zero is a float). """ pl = self.Data.shape[1] #profile length #if symbol_freqs is None, create an array with equal frequencies if symbol_freqs is None: symbol_freqs = ones(pl)/pl else: symbol_freqs = array(symbol_freqs) #raise error when symbol_freqs has wrong length if len(symbol_freqs) != pl: raise ProfileError,\ "Length of symbol freqs should be %s, but is %s"\ %(pl,len(symbol_freqs)) #raise error when symbol freqs contains zero (to prevent #ZeroDivisionError or 'inf' in the resulting matrix) if sum(symbol_freqs != 0) != len(symbol_freqs): raise ProfileError,\ "Symbol frequency is not allowed to be zero: %s"\ %(symbol_freqs) #calculate the OddsMatrix log_odds = self.Data/symbol_freqs return Profile(log_odds, self.Alphabet, self.CharOrder)
""" Various utility functions. $Id: utils.py,v 1.19 2006/04/07 23:24:31 jp Exp $ """ import Numeric from Numeric import ones, exp, sqrt, zeros, argsort import sys, operator import plastk.rand as rand inf = (ones(1) / 0.0)[0] def enumerate(seq): """ (Deprecated) duplicates enum generator. """ i = 0 for x in seq: yield i, x i += 1 enum = enumerate def smooth(data, N=10, step=1): """ Smooth sequence data using a moving window of width N. Returns a
def ACL(tree): """Returns a normalized dictionary of sequence weights {seq_id: weight} tree: a PhyloNode object The ACL method is named after Altschul, Carroll and Lipman, who published a paper on sequence weighting in 1989. The ACL method is based on an idea of Felsenstein (1973). Imagine electrical current flows from the root of the tree down the edges and out the leaves. If the edge lengths are proportional to their electrical resistances, current flowing out each leaf equals the leaf weight. The first step in the calculation of the weight vector is calculating a variance-covariance matrix. The variance of a leaf is proportional to the distance from the root to that leaf. The covariance of two leaves is proportional to the distance from the root to the last common ancestor of the two leaves. The second step in the calculation results in a vector of weights. Suppose there are n leaves on the tree. Let i be the vector of size n, all of whose elements are 1.0. The weight vector is calculated as: w = (inverse(M)*i)/(transpose(i)*inverse(M)*i) See Altschul 1989 """ #clip branch lengths to avoid error due to negative or zero branch lengths _clip_branch_lengths(tree) #get a list of sequence IDs (in the order that the tree will be traversed) seqs = [] for n in tree.TerminalDescendants: seqs.append(n.Data) #initialize the variance-covariance matrix m = zeros([len(seqs),len(seqs)],Float64) #calculate (co)variances #variance of a node is defined as the distance from the root to the leaf #covariance of two nodes is defined as the distance from the root to the #last common ancestor of the two leaves. for x in tree.TerminalDescendants: for y in tree.TerminalDescendants: idx_x = seqs.index(x.Data) idx_y = seqs.index(y.Data) if idx_x == idx_y: m[idx_x,idx_y] = x.distance(tree) else: lca = x.lastCommonAncestor(y) dist_lca_root = lca.distance(tree) m[idx_x,idx_y] = dist_lca_root m[idx_y,idx_x] = dist_lca_root #get the inverse of the variance-covariance matrix inv = inverse(m) #build vector i (vector or ones, length = # of leaves in the tree) i = ones(len(seqs),Float64) numerator = matrixmultiply(inv, i) denominator = matrixmultiply(matrixmultiply(transpose(i),inv),i) weight_vector = numerator/denominator #return a Weights object (is dict {seq_id: weight}) return Weights(dict(zip(seqs,weight_vector)))
def hasValidData(self): """Returns True if all rows in self.Data add up to one""" if sum(self.Data,1) == ones(len(self.Data)): return True return False
print "Error: No files found" # get line freqs & stat weights from molecular data file file = open(flist[0], 'r') line = '' while (line[0:7] != 'molfile'): line = file.readline() file.close() molfile = line[8:-1] file = open(molfile, 'r') for idum in range(5): file.readline() nlev = int(file.readline()) file.readline() gstat = ones(nlev, 'd') for ilev in range(nlev): line = file.readline() words = line.split() gstat[ilev] = float(words[2]) file.readline() nlin = int(file.readline()) file.readline() frq = ones(nlin, 'd') gup = ones(nlin, 'd') glo = ones(nlin, 'd') jup = ones(nlin, 'i') jlo = ones(nlin, 'i')