def pacbporflist2similarityarray(pacbps, queryorsbjct, length): """ """ bea = zeros(length) for pacbporf in pacbps: spos = pacbporf._get_original_alignment_pos_start() epos = pacbporf._get_original_alignment_pos_end() q, m, s = pacbporf.get_unextended_aligned_protein_sequences() if queryorsbjct == 'query': start = spos.query_pos end = epos.query_pos + 1 seqa = list(q) ma = list(m) else: start = spos.sbjct_pos end = epos.sbjct_pos + 1 seqa = list(s) ma = list(m) for pos in range(len(seqa) - 1, -1, -1): if seqa[pos] == '-': seqa.pop(pos) ma.pop(pos) # prepare replacement of match string into match score list matcharray = zeros(end - start) for pos in range(0, len(ma)): symbol = ma[pos] if symbol != ' ': matcharray[pos] = 1 # update (binary) array bea[spos.query_pos:epos.query_pos + 1] += matcharray # correct bea for values > 1 bea = where(greater_equal(bea, 2), 1, bea) return bea
def pacbporflist2similarityarray(pacbps,queryorsbjct,length): """ """ bea = zeros(length) for pacbporf in pacbps: spos = pacbporf._get_original_alignment_pos_start() epos = pacbporf._get_original_alignment_pos_end() q,m,s = pacbporf.get_unextended_aligned_protein_sequences() if queryorsbjct == 'query': start = spos.query_pos end = epos.query_pos + 1 seqa = list(q) ma = list(m) else: start = spos.sbjct_pos end = epos.sbjct_pos + 1 seqa = list(s) ma = list(m) for pos in range(len(seqa)-1,-1,-1): if seqa[pos] == '-': seqa.pop(pos) ma.pop(pos) # prepare replacement of match string into match score list matcharray = zeros(end-start) for pos in range(0,len(ma)): symbol = ma[pos] if symbol != ' ': matcharray[pos] = 1 # update (binary) array bea[spos.query_pos:epos.query_pos+1] += matcharray # correct bea for values > 1 bea = where(greater_equal(bea, 2), 1, bea) return bea
def masked_to_unmasked(mask, remove_mask=False): """Returns array mapping indices in orig to indices in ungapped. Specifically, for each position in orig, returns the index of the position in the unmasked sequence of the last non-masked character at or before that index (i.e. if the index corresponds to a masked position, will return the index of the previous non-masked position since the masked positions aren't in the unmasked sequence by definition). If remove_mask is True (the default is False), sets the masked positions to -1 for easy detection. """ result = cumsum(logical_not(mask)) -1 if remove_mask: result = where(mask, -1, result) return result
def pca(M): "Perform PCA on M, return eigenvectors and eigenvalues, sorted." T, N = shape(M) # if there are fewer rows T than columns N, use snapshot method if T < N: C = dot(M, t(M)) evals, evecsC = eigenvectors(C) # HACK: make sure evals are all positive evals = where(evals < 0, 0, evals) evecs = 1. / sqrt(evals) * dot(t(M), t(evecsC)) else: # calculate covariance matrix K = 1. / T * dot(t(M), M) evals, evecs = eigenvectors(K) # sort the eigenvalues and eigenvectors, descending order order = (argsort(evals)[::-1]) evecs = take(evecs, order, 1) evals = take(evals, order) return evals, t(evecs)
def pca(M): from Numeric import take, dot, shape, argsort, where, sqrt, transpose as t from LinearAlgebra import eigenvectors "Perform PCA on M, return eigenvectors and eigenvalues, sorted." T, N = shape(M) # if there are less rows T than columns N, use # snapshot method if T < N: C = dot(M, t(M)) evals, evecsC = eigenvectors(C) # HACK: make sure evals are all positive evals = where(evals < 0, 0, evals) evecs = 1./sqrt(evals) * dot(t(M), t(evecsC)) else: # calculate covariance matrix K = 1./T * dot(t(M), M) evals, evecs = eigenvectors(K) # sort the eigenvalues and eigenvectors, decending order order = (argsort(evals)[::-1]) evecs = take(evecs, order, 1) evals = take(evals, order) return evals, t(evecs)
# JS. binary_denotations = { ',': lambda a, b: b, '|': operator.or_, '^': operator.xor, '&': operator.and_, '<<': operator.lshift, '>>': operator.rshift, '+': operator.add, '-': operator.sub, # This isn't correct, but it avoids Numeric's ArithmeticError: # Integer overflow in multiply. '*': lambda a, b: (a & (2**15-1)) * (b & (2**15-1)), # These two have to worry about SIGFPE from division by zero. '/': lambda a, b: a / where(b == 0, 1, b), '%': lambda a, b: a % where(b == 0, 1, b), '&&': lambda a, b: where(a, where(b, 1, 0), 0), '||': lambda a, b: where(a, 1, where(b, 1, 0)), '==': lambda a, b: where(a == b, 1, 0), '!=': lambda a, b: where(a != b, 1, 0), '<': lambda a, b: where(a < b, 1, 0), '>': lambda a, b: where(a > b, 1, 0), '<=': lambda a, b: where(a <= b, 1, 0), '>=': lambda a, b: where(a >= b, 1, 0), } unary_denotations = { '~': operator.inv, '-': operator.neg,
def clamp(a, b, c): "Threshold b between lower limit a and upper limit c." d = where(a < b, b, a) return where(d < c, d, c)
# JS. binary_denotations = { ',': lambda a, b: b, '|': operator.or_, '^': operator.xor, '&': operator.and_, '<<': operator.lshift, '>>': operator.rshift, '+': operator.add, '-': operator.sub, # This isn't correct, but it avoids Numeric's ArithmeticError: # Integer overflow in multiply. '*': lambda a, b: (a & (2**15 - 1)) * (b & (2**15 - 1)), # These two have to worry about SIGFPE from division by zero. '/': lambda a, b: a / where(b == 0, 1, b), '%': lambda a, b: a % where(b == 0, 1, b), '&&': lambda a, b: where(a, where(b, 1, 0), 0), '||': lambda a, b: where(a, 1, where(b, 1, 0)), '==': lambda a, b: where(a == b, 1, 0), '!=': lambda a, b: where(a != b, 1, 0), '<': lambda a, b: where(a < b, 1, 0), '>': lambda a, b: where(a > b, 1, 0), '<=': lambda a, b: where(a <= b, 1, 0), '>=': lambda a, b: where(a >= b, 1, 0), } unary_denotations = { '~': operator.inv, '-': operator.neg, '!': lambda x: where(x, 0, 1),