def __init__(self): MATCH_STATE = "MATCH_STATE" GAP_STATEX = "GAP_STATEX" GAP_STATEY = "GAP_STATEY" self.xSeq = None self.ySeq = None self.xStart = None self.yStart = None self.stateMachine = SparseAlign.StateMachine() self.stateMachine.addState(MATCH_STATE, self.match_de, SparseAlign.StateMachine.MATCH) self.stateMachine.addState(GAP_STATEX, self.gap_deX, SparseAlign.StateMachine.GAPX) self.stateMachine.addState(GAP_STATEY, self.gap_deY, SparseAlign.StateMachine.GAPY) self.stateMachine.addTransition(MATCH_STATE, MATCH_STATE, self.transitionFn(MATCH)) self.stateMachine.addTransition(MATCH_STATE, GAP_STATEX, self.transitionFn(GAP_OPEN)) self.stateMachine.addTransition(GAP_STATEX, MATCH_STATE, self.transitionFn(GAP_CLOSE)) self.stateMachine.addTransition(GAP_STATEX, GAP_STATEY, self.transitionFn(GAP_SWITCH)) self.stateMachine.addTransition(MATCH_STATE, GAP_STATEY, self.transitionFn(GAP_OPEN)) self.stateMachine.addTransition(GAP_STATEY, MATCH_STATE, self.transitionFn(GAP_CLOSE)) self.stateMachine.addTransition(GAP_STATEY, GAP_STATEX, self.transitionFn(GAP_SWITCH)) self.startStates = [0, 0, 0] #Maths.NEG_INFINITY, Maths.NEG_INFINITY] self.endStates = self.startStates[:] self.diagTraceBackLimit = 1 global m, gapEmissions m = [math.log(i) for i in m] gapEmissions = [math.log(i) for i in gapEmissions] i = [float(i) for i in open("gapDist", 'r').readlines()] j = sum(i) i = [math.log(k / j) for k in i] self.gapDist = i
def __init__(self): MATCH_STATE = "MATCH_STATE" GAP_STATEX = "GAP_STATEX" GAP_STATEY = "GAP_STATEY" self.xSeq = None self.ySeq = None self.xStart = None self.yStart = None self.stateMachine = SparseAlign.StateMachine() self.stateMachine.addState(MATCH_STATE, self.match_de, SparseAlign.StateMachine.MATCH) self.stateMachine.addState(GAP_STATEX, self.gap_de, SparseAlign.StateMachine.GAPX) self.stateMachine.addState(GAP_STATEY, self.gap_de, SparseAlign.StateMachine.GAPY) self.stateMachine.addTransition(MATCH_STATE, MATCH_STATE, self.transitionFn(MATCH)) self.stateMachine.addTransition(MATCH_STATE, GAP_STATEX, self.transitionFn(0.0)) self.stateMachine.addTransition(GAP_STATEX, MATCH_STATE, self.transitionFn(0.0)) self.stateMachine.addTransition(GAP_STATEX, GAP_STATEY, self.transitionFn(GAP_SWITCH)) self.stateMachine.addTransition(MATCH_STATE, GAP_STATEY, self.transitionFn(0.0)) self.stateMachine.addTransition(GAP_STATEY, MATCH_STATE, self.transitionFn(0.0)) self.stateMachine.addTransition(GAP_STATEY, GAP_STATEX, self.transitionFn(GAP_SWITCH)) self.startStates = [0, 0, 0] #Maths.NEG_INFINITY, Maths.NEG_INFINITY] self.endStates = self.startStates[:] self.diagTraceBackLimit = 1 self.COMBINED = GAP_EMISSIONL + GAP_EXTEND self.COMBINED2 = GAP_EMISSIONL + JUNK_EXTEND
MATCH = 0.9703833696510062 GAP_OPEN = (1.0 - MATCH) / 2 GAP_CLOSE = 1.0 - GAP_EXTEND - GAP_SWITCH GAP_SWITCH = 0.0007315179552849 match_match_t = transitionFn(MATCH) match_gap_t = transitionFn(GAP_OPEN) gap_match_t = transitionFn(GAP_CLOSE) gap_gap_t = transitionFn(GAP_SWITCH) """ end space for parameters """ stateMachine = SparseAlign.StateMachine() stateMachine.addState(MATCH_STATE, de, SparseAlign.StateMachine.MATCH) stateMachine.addState(GAP_STATE, de, SparseAlign.StateMachine.GAP) stateMachine.addTransition(MATCH_STATE, MATCH_STATE, t) stateMachine.addTransition(MATCH_STATE, GAP_STATE, t) stateMachine.addTransition(GAP_STATE, MATCH_STATE, t) stateMachine.addTransition(GAP_STATE, GAP_STATE, t) startStates = [0, Maths.NEG_INFINITY] endStates = startStates[:] def diagBuilder(i, j): raise NotImplementedError("Shouldn't be called")
def sparseAlignSimple(self, stateMachine, points, size): p = points["points"][:] gp = points["gapPoints"][:] blp = points["blGaps"][:] trp = points["trGaps"][:] print "points ", p print "gapPoints ", gp print "blPoints ", blp print "trPoints ", trp pPA = {} for i in p: pPA[i] = Maths.NEG_INFINITY def makeMatrix(values): matrix = [ [False]*size for i in xrange(0, size) ] for i in values: matrix[i[0]][i[1]] = True return matrix fMatrix = [ [ [Maths.NEG_INFINITY]*stateMachine.stateNo() for j in xrange(0, size) ] for i in xrange(0, size) ] bMatrix = [ [ [Maths.NEG_INFINITY]*stateMachine.stateNo() for j in xrange(0, size) ] for i in xrange(0, size) ] def fn3(i): if i == MATCH: return 0 return Maths.NEG_INFINITY startStates = [ fn3(i) for i in stateMachine.getStateTypes() ] fMatrix[0][0] = startStates[:] #[0]*stateMachine.stateNo() bMatrix[size-1][size-1] = startStates[:]#[0]*stateMachine.stateNo() #fMatrix[0][0] = [0]*stateMachine.stateNo() #bMatrix[size-1][size-1] = [0]*stateMachine.stateNo() def bTransition(s, sI, s2, sI2, i, j, t, de, *args): s2[i] = Maths.logAdd(s2[i], s[j] + t + de) print "doing backwards " fn = stateMachine.getFns(bTransition) computeMatrixR(bMatrix, makeMatrix(gp), makeMatrix(p), makeMatrix(blp), makeMatrix(trp), fn[(MATCH, GAP)], fn[(GAP, GAP)], fn[(GAP, MATCH)], fn[(MATCH, MATCH)]) stateNo = stateMachine.stateNo() total = SparseAlign.logSum([ bMatrix[0][0][i] + startStates[i] for i in range(0, stateNo)]) #total = bMatrix[0][0][0]#SparseAlign.logSum(bMatrix[0][0]) stateNo = stateMachine.stateNo() print "doing forwards " def fTransition(s, sI, s2, sI2, i, j, t, de, x2, y2, x1, y1): l = s[i] + t + de s2[j] = Maths.logAdd(s2[j], l) l += bMatrix[x1][y1][j] - total #print x1, x2, "boo" if x2 >= x1: raise IndexError() for k in xrange(0, x1-x2): pPA[(x1-k, y1-k)] = Maths.logAdd(pPA[(x1-k, y1-k)], l) fn = stateMachine.getFns(fTransition) def fTransition2(s, sI, s2, sI2, i, j, t, de, x2, y2, x1, y1): l = s[i] + t + de s2[j] = Maths.logAdd(s2[j], l) fn2 = stateMachine.getFns(fTransition2) print "gp", gp computeMatrix(fMatrix, makeMatrix(gp), makeMatrix(p), makeMatrix(blp), makeMatrix(trp), fn2[(MATCH, GAP)], fn2[(GAP, GAP)], fn[(GAP, MATCH)], fn[(MATCH, MATCH)]) print "fmatrix ", fMatrix print "bmatrix ", bMatrix print fMatrix[0][0], "cro", SparseAlign.logSum(bMatrix[0][0]) return (SparseAlign.logSum([ fMatrix[size-1][size-1][i] + startStates[i] for i in range(0, stateNo)]), SparseAlign.logSum([ bMatrix[0][0][i] + startStates[i] for i in range(0, stateNo)]) , pPA)
def sparseAlignReal(self, stateMachine, points): p = points["points"][:] gp = points["gapPoints"][:] blp = points["blGaps"][:] trp = points["trGaps"][:] pPA = {} for i in p: pPA[i] = 0 def join(one, two): def f(*args): two(*args) one(*args) return f total = [0] def totalGetter(t): total[0] = t pP.total(t) #totalFn, startDiagonal, endDiagonal, interval rescale = SparseAlign.Rescale(1, stateMachine.stateNo()) stateMachine.mapEmissions(rescale.rescaleFn) pPoints = p[:] pPoints.sort() pPoint = pPoints[len(pPoints)-1] bTransition = SparseAlign.BTransitionAndTotalReCalculator(totalGetter, 0, pPoint[0] + pPoint[1], r.randrange(1, 2 + 1.5*(pPoint[0] + pPoint[1])), stateMachine.stateNo()) pP = SparseAlign.PosteriorProbs(pPA, stateMachine.stateNo()) fn = stateMachine.getFns(join(pP.bTransition, bTransition.bTransition)) fn_2 = stateMachine.getFns(bTransition.bTransition) fn2 = stateMachine.getFns(SparseAlign.fTransition(stateMachine.stateNo())) x1 = [0] y1 = [0] x2 = [0] y2 = [0] def diagBuilder(i, j): x2[0] = i y2[0] = j def diagStart(i, j): x1[0] = i y1[0] = j x2[0] = i y2[0] = j def diagEnd(): pass def dFW(diagFn): def f(s1, sI, s2, sI2, ): return diagFn(s1, sI, s2, sI2, x2[0]-1, y2[0]-1, x1[0], y1[0]) return f def fn3(i): if i == MATCH: return 0 return Maths.NEG_INFINITY startStates = [ fn3(i) for i in stateMachine.getStateTypes() ] SparseAlign.sparseAlign(startStates[:],#*stateMachine.stateNo(), p, blp, trp, startStates[:],#[0]*stateMachine.stateNo(), stateMachine.stateNo(), fn2[(MATCH, GAP)], fn2[(GAP, GAP)], diagStart, diagBuilder, diagEnd, dFW(fn2[(GAP, MATCH)]), dFW(fn2[(MATCH, MATCH)]), fn_2[(MATCH, GAP)], fn_2[(GAP, GAP)], join(diagStart, pP.diagStart), join(diagBuilder, pP.diagBuilder), join(diagEnd, pP.diagEnd), dFW(fn[(GAP, MATCH)]), dFW(fn[(MATCH, MATCH)]), totalGetter, rescale, 1000000) return (total[0], total[0], pPA)
def alignScript(stateMachine, startStates, endStates, diagStart, diagBuilder, diagEnd, xStart, yStart, pointsFile, bottomLeftPointsFile, topRightPointsFile, outputFile, seqXFile, seqYFile, diagTraceBackLimit=100000, retotallingInterval=50): MATCH = SparseAlign.StateMachine.MATCH GAP = SparseAlign.StateMachine.GAP def fn(f): def fn2(i): i = i.split() return (i[0] - xStart, i[1] - yStart) return [ fn2(i) for i in open(pointsFile, 'r').readlines() ] points = fn(pointsFile) points.sort() bottomLeftPoints = fn(bottomLeftPointsFile) topRightPoints = fn(topRightPointsFile) def fn(f): return "".join([ i[:-1] for i in open("out", 'r').readlines() ]) seqX = fn(seqXFile) seqY = fn(seqYFile) pPA = {} for i in points: pPA[i] = 0 posteriorProbs = SparseAlign.PosteriorProbs(pPA, stateMachine.stateNo()) rescale = SparseAlign.Rescale(sum(points[0][0:2])+1, stateMachine.stateNo()) stateMachine.mapEmissions(rescale.rescaleFn) bTransition = SparseAlign.\ BTransitionAndTotalReCalculator(posteriorProbs.total, \ sum(points[0][0:2]), \ sum(points[-1][0:2]),\ retotallingInterval, \ stateMachine.stateNo()) backMatchFn = stateMachine.getFns(join(pP.bTransition, bTransition.bTransition)) backGapFn = stateMachine.getFns(bTransition.bTransition) forwardFn = stateMachine.getFns(SparseAlign.fTransition(stateMachine.stateNo())) def join(one, two): def f(*args): two(*args) one(*args) return f SparseAlign.\ sparseAlign(startStates, points, bottomLeftPoints, topRightPoints, endStates, stateMachine.stateNo(), forwardFn((MATCH, GAP)), forwardFn[(GAP, GAP)], diagStart, diagBuilder, diagEnd, forwardFn[(GAP, MATCH)], forwardFn[(MATCH, MATCH)], backGapFn[(MATCH, GAP)], backGapFn[(GAP, GAP)], join(diagStart, posteriorProbs.diagStart), join(diagBuilder, posteriorProbs.diagBuilder), join(diagEnd, posteriorProbs.diagEnd), forwardFn[(GAP, MATCH)], forwardFn[(MATCH, MATCH)], posteriorProbs.total, rescale, diagTraceBackLimit) out = open(outputFile, 'w') for i in pPA.keys(): out.write("%s %s %s \n" % (i[0] + xStart, i[1] + yStart, pPA[i])) out.close()