def pairwiseScoreSingleAnnotation(S1, S2, T1, T2, deltaMass, eps): #Define fuzzyEqual(a,b) as abs(b-a) < eps #Return count of all tuples (t1, t2) such that: # t1 is an element of T1, # t2 is an element of T2, # fuzzyEqual(t2-t1, deltaMass) # There exists s1 is an element of S1 such that fuzzyEqual(s1,t1) # There exists s2 is an element of S2 such that fuzzyEqual(s2,t2) results = [] tPairs = defaultdict(list) for t1 in T1: for t2 in T2: if t2 <= t1: continue if fuzzyEqual(t2 - t1, deltaMass, eps): tPairs[t1].append(t2) for t1 in tPairs: found = False temp = None for s1 in S1: if fuzzyEqual(t1, s1, eps): found = True temp = s1 break if found: for t2 in tPairs[t1]: for s2 in S2: if fuzzyEqual(t2, s2, eps): results.append((temp, s2, t1, t2)) break return results
def generatePossiblePaths(startingMass, targetMass, maximumLDist, massEpsilon, alphabet=DEFAULT_ALPHABET): outputPaths = [] if fuzzyEqual(startingMass, targetMass, massEpsilon): outputPaths.append([]) def genPathsHelper(validTransitions, curMass, transitionList, targetMass, remainingLDist, massEpsilon, alphabet, outputPaths): if remainingLDist == 0: return for transition in validTransitions: newMass = curMass + transition.deltaMass() newTransitionList = transitionList + [transition] if fuzzyEqual(newMass, targetMass, massEpsilon): outputPaths.append(newTransitionList) genPathsHelper(validTransitions, newMass, newTransitionList, targetMass, remainingLDist - 1, massEpsilon, alphabet, outputPaths) fakeComp = Composition.fromAminoArr(alphabet.sortedAminos) allPossibleTransitions = Transition.generateValidTransitions( fakeComp.aminoCounts, alphabet) genPathsHelper(allPossibleTransitions, startingMass, [], targetMass, maximumLDist, massEpsilon, alphabet, outputPaths) return outputPaths
def filterPaths(self, possiblePaths, targetMass, massEpsilon): filtered = [] startMass = self.calcMass() for path in possiblePaths: requiredCount = defaultdict(int) curMass = startMass for transition in path: curMass += transition.deltaMass() if transition.action == 0: requiredCount[transition.amino1] -= 1 if transition.action == 1: requiredCount[transition.amino1] += 1 if transition.action == 2: requiredCount[transition.amino1] += 1 requiredCount[transition.amino2] -= 1 #In order to transition through some amino acid you must actually have it in the initial sequence, otherwise its pointless. for key in requiredCount: if requiredCount[key] == 0: requiredCount[key] = 1 isValid = True for key in requiredCount: if self.aminoCounts[key] < requiredCount[key]: isValid = False if not fuzzyEqual(curMass, targetMass, massEpsilon): isValid = False if isValid: filtered.append(path) return filtered
def score(S, T, eps): #Define fuzzyEqual(a,b) as abs(b-a) < eps #Return count of all tuples (s, t) such that: #s is an element of S #t is an element of T and #fuzzyEqual(s, t) results = [] for s in S: for t in T: if fuzzyEqual(s, t, eps): results.append((s, t)) return results
def scoreSingleAnnotation(S, T, eps): #Define fuzzyEqual(a,b) as abs(b-a) < eps #Return count of all elements s such that: #s is an element of S #there exists t is an element of T such that fuzzyEqual(s, t) results = [] for s in S: for t in T: if fuzzyEqual(s, t, eps): results.append((s, t)) break return results
def genPathsHelper(validTransitions, curMass, transitionList, targetMass, remainingLDist, massEpsilon, alphabet, outputPaths): if remainingLDist == 0: return for transition in validTransitions: newMass = curMass + transition.deltaMass() newTransitionList = transitionList + [transition] if fuzzyEqual(newMass, targetMass, massEpsilon): outputPaths.append(newTransitionList) genPathsHelper(validTransitions, newMass, newTransitionList, targetMass, remainingLDist - 1, massEpsilon, alphabet, outputPaths)
def pairwiseScoreMultiAnnotation(S1, S2, T1, T2, deltaMass, eps): if deltaMass <= 0: raise Exception("Delta Mass Must Be > 0") #Define fuzzyEqual(a,b) as abs(b-a) < eps #Return count of all tuples (s1, s2, t1, t2) such that: # s1 is an element of S1, # s2 is an element of S2, # t1 is an element of T1, # t2 is an element of T2, # fuzzyEqual(s2 - s1, deltaMass), # fuzzyEqual(t2 - t1, deltaMass), # fuzzyEqual(t1, s1) and # fuzzyEqual(t2, s2) results = [] sPairs = defaultdict(list) tPairs = defaultdict(list) for s1 in S1: for s2 in S2: if s2 <= s1: continue if fuzzyEqual(s2 - s1, deltaMass, eps): sPairs[s1].append(s2) for t1 in T1: for t2 in T2: if t2 <= t1: continue if fuzzyEqual(t2 - t1, deltaMass, eps): tPairs[t1].append(t2) for s1 in sPairs: for s2 in sPairs[s1]: for t1 in tPairs: for t2 in tPairs[t1]: if fuzzyEqual(t1, s1, eps) and fuzzyEqual(t2, s2, eps): results.append((s1, s2, t1, t2)) return results
def generatePaths(startingComposition, targetMass, maximumLDist, massEpsilon, alphabet=DEFAULT_ALPHABET): startingMass = startingComposition.calcMass() outputComps = [] outputPaths = [] if fuzzyEqual(startingMass, targetMass, massEpsilon): outputPaths.append([]) outputComps.append(startingComposition) #TODO FIXME HACK: Would this be noticeably faster as a dynamic programming implementation with some backtracing step or does it all work out basically the same? I suppose it depends on how many duplicate ways there are to get to the same composition. Probably doesn't matter much until we increase L dist above 2. def genPathsHelper(currentAminoCounts, transitionList, targetMass, remainingLDist, massEpsilon, alphabet, outputPaths, outputComps): if remainingLDist == 0: return validTransitions = Transition.generateValidTransitions( currentAminoCounts, alphabet) for transition in validTransitions: newComp = defaultdict(int) for key in currentAminoCounts: newComp[key] = currentAminoCounts[key] Composition.followTransitionInPlace(newComp, transition) newTransitionList = transitionList + [transition] if fuzzyEqual(Composition.calcMassOnAminoCounts(newComp), targetMass, massEpsilon): outputPaths.append(newTransitionList) outputComps.append(Composition.fromCountsDict(newComp)) genPathsHelper(newComp, newTransitionList, targetMass, remainingLDist - 1, massEpsilon, alphabet, outputPaths, outputComps) startingCounts = defaultdict(int) for key in startingComposition.aminoCounts: startingCounts[key] = startingComposition.aminoCounts[key] genPathsHelper(startingCounts, [], targetMass, maximumLDist, massEpsilon, alphabet, outputPaths, outputComps) outputComps = alphabet.removeStrDups(outputComps) outputPaths = startingComposition.filterPaths( outputPaths, targetMass, massEpsilon ) #Annoying, but without this we can transition through amino acids that aren't in the initial composition: [SWP(Ile, Gly), SWP(Gly, Ala)] is a dumb path if Gly isn't in the initial sequence. return (outputPaths, outputComps)
def genPathsHelper(currentAminoCounts, transitionList, targetMass, remainingLDist, massEpsilon, alphabet, outputPaths, outputComps): if remainingLDist == 0: return validTransitions = Transition.generateValidTransitions( currentAminoCounts, alphabet) for transition in validTransitions: newComp = defaultdict(int) for key in currentAminoCounts: newComp[key] = currentAminoCounts[key] Composition.followTransitionInPlace(newComp, transition) newTransitionList = transitionList + [transition] if fuzzyEqual(Composition.calcMassOnAminoCounts(newComp), targetMass, massEpsilon): outputPaths.append(newTransitionList) outputComps.append(Composition.fromCountsDict(newComp)) genPathsHelper(newComp, newTransitionList, targetMass, remainingLDist - 1, massEpsilon, alphabet, outputPaths, outputComps)