Exemple #1
0
def pairwiseScoreSingleAnnotation(S1, S2, T1, T2, deltaMass, eps):
    #Define fuzzyEqual(a,b) as abs(b-a) < eps
    #Return count of all tuples (t1, t2) such that:
    #   t1 is an element of T1,
    #   t2 is an element of T2,
    #   fuzzyEqual(t2-t1, deltaMass)
    #   There exists s1 is an element of S1 such that fuzzyEqual(s1,t1)
    #   There exists s2 is an element of S2 such that fuzzyEqual(s2,t2)
    results = []
    tPairs = defaultdict(list)
    for t1 in T1:
        for t2 in T2:
            if t2 <= t1:
                continue
            if fuzzyEqual(t2 - t1, deltaMass, eps):
                tPairs[t1].append(t2)

    for t1 in tPairs:
        found = False
        temp = None
        for s1 in S1:
            if fuzzyEqual(t1, s1, eps):
                found = True
                temp = s1
                break
        if found:
            for t2 in tPairs[t1]:
                for s2 in S2:
                    if fuzzyEqual(t2, s2, eps):
                        results.append((temp, s2, t1, t2))
                        break

    return results
Exemple #2
0
    def generatePossiblePaths(startingMass,
                              targetMass,
                              maximumLDist,
                              massEpsilon,
                              alphabet=DEFAULT_ALPHABET):

        outputPaths = []
        if fuzzyEqual(startingMass, targetMass, massEpsilon):
            outputPaths.append([])

        def genPathsHelper(validTransitions, curMass, transitionList,
                           targetMass, remainingLDist, massEpsilon, alphabet,
                           outputPaths):
            if remainingLDist == 0:
                return
            for transition in validTransitions:
                newMass = curMass + transition.deltaMass()
                newTransitionList = transitionList + [transition]
                if fuzzyEqual(newMass, targetMass, massEpsilon):
                    outputPaths.append(newTransitionList)
                genPathsHelper(validTransitions, newMass, newTransitionList,
                               targetMass, remainingLDist - 1, massEpsilon,
                               alphabet, outputPaths)

        fakeComp = Composition.fromAminoArr(alphabet.sortedAminos)
        allPossibleTransitions = Transition.generateValidTransitions(
            fakeComp.aminoCounts, alphabet)
        genPathsHelper(allPossibleTransitions, startingMass, [], targetMass,
                       maximumLDist, massEpsilon, alphabet, outputPaths)
        return outputPaths
Exemple #3
0
    def filterPaths(self, possiblePaths, targetMass, massEpsilon):
        filtered = []
        startMass = self.calcMass()

        for path in possiblePaths:
            requiredCount = defaultdict(int)
            curMass = startMass
            for transition in path:
                curMass += transition.deltaMass()
                if transition.action == 0:
                    requiredCount[transition.amino1] -= 1
                if transition.action == 1:
                    requiredCount[transition.amino1] += 1
                if transition.action == 2:
                    requiredCount[transition.amino1] += 1
                    requiredCount[transition.amino2] -= 1

            #In order to transition through some amino acid you must actually have it in the initial sequence, otherwise its pointless.
            for key in requiredCount:
                if requiredCount[key] == 0:
                    requiredCount[key] = 1
            isValid = True
            for key in requiredCount:
                if self.aminoCounts[key] < requiredCount[key]:
                    isValid = False
            if not fuzzyEqual(curMass, targetMass, massEpsilon):
                isValid = False

            if isValid:
                filtered.append(path)
        return filtered
Exemple #4
0
def score(S, T, eps):
    #Define fuzzyEqual(a,b) as abs(b-a) < eps
    #Return count of all tuples (s, t) such that:
    #s is an element of S
    #t is an element of T and
    #fuzzyEqual(s, t)
    results = []
    for s in S:
        for t in T:
            if fuzzyEqual(s, t, eps):
                results.append((s, t))
    return results
Exemple #5
0
def scoreSingleAnnotation(S, T, eps):
    #Define fuzzyEqual(a,b) as abs(b-a) < eps
    #Return count of all elements s such that:
    #s is an element of S
    #there exists t is an element of T such that fuzzyEqual(s, t)
    results = []
    for s in S:
        for t in T:
            if fuzzyEqual(s, t, eps):
                results.append((s, t))
                break
    return results
Exemple #6
0
 def genPathsHelper(validTransitions, curMass, transitionList,
                    targetMass, remainingLDist, massEpsilon, alphabet,
                    outputPaths):
     if remainingLDist == 0:
         return
     for transition in validTransitions:
         newMass = curMass + transition.deltaMass()
         newTransitionList = transitionList + [transition]
         if fuzzyEqual(newMass, targetMass, massEpsilon):
             outputPaths.append(newTransitionList)
         genPathsHelper(validTransitions, newMass, newTransitionList,
                        targetMass, remainingLDist - 1, massEpsilon,
                        alphabet, outputPaths)
Exemple #7
0
def pairwiseScoreMultiAnnotation(S1, S2, T1, T2, deltaMass, eps):
    if deltaMass <= 0:
        raise Exception("Delta Mass Must Be > 0")

    #Define fuzzyEqual(a,b) as abs(b-a) < eps
    #Return count of all tuples (s1, s2, t1, t2) such that:
    #   s1 is an element of S1,
    #   s2 is an element of S2,
    #   t1 is an element of T1,
    #   t2 is an element of T2,
    #   fuzzyEqual(s2 - s1, deltaMass),
    #   fuzzyEqual(t2 - t1, deltaMass),
    #   fuzzyEqual(t1, s1) and
    #   fuzzyEqual(t2, s2)
    results = []
    sPairs = defaultdict(list)
    tPairs = defaultdict(list)
    for s1 in S1:
        for s2 in S2:
            if s2 <= s1:
                continue
            if fuzzyEqual(s2 - s1, deltaMass, eps):
                sPairs[s1].append(s2)
    for t1 in T1:
        for t2 in T2:
            if t2 <= t1:
                continue
            if fuzzyEqual(t2 - t1, deltaMass, eps):
                tPairs[t1].append(t2)
    for s1 in sPairs:
        for s2 in sPairs[s1]:
            for t1 in tPairs:
                for t2 in tPairs[t1]:
                    if fuzzyEqual(t1, s1, eps) and fuzzyEqual(t2, s2, eps):
                        results.append((s1, s2, t1, t2))
    return results
Exemple #8
0
    def generatePaths(startingComposition,
                      targetMass,
                      maximumLDist,
                      massEpsilon,
                      alphabet=DEFAULT_ALPHABET):
        startingMass = startingComposition.calcMass()

        outputComps = []
        outputPaths = []

        if fuzzyEqual(startingMass, targetMass, massEpsilon):
            outputPaths.append([])
            outputComps.append(startingComposition)

        #TODO FIXME HACK: Would this be noticeably faster as a dynamic programming implementation with some backtracing step or does it all work out basically the same?  I suppose it depends on how many duplicate ways there are to get to the same composition.  Probably doesn't matter much until we increase L dist above 2.
        def genPathsHelper(currentAminoCounts, transitionList, targetMass,
                           remainingLDist, massEpsilon, alphabet, outputPaths,
                           outputComps):
            if remainingLDist == 0:
                return
            validTransitions = Transition.generateValidTransitions(
                currentAminoCounts, alphabet)
            for transition in validTransitions:
                newComp = defaultdict(int)
                for key in currentAminoCounts:
                    newComp[key] = currentAminoCounts[key]
                Composition.followTransitionInPlace(newComp, transition)
                newTransitionList = transitionList + [transition]
                if fuzzyEqual(Composition.calcMassOnAminoCounts(newComp),
                              targetMass, massEpsilon):
                    outputPaths.append(newTransitionList)
                    outputComps.append(Composition.fromCountsDict(newComp))
                genPathsHelper(newComp, newTransitionList, targetMass,
                               remainingLDist - 1, massEpsilon, alphabet,
                               outputPaths, outputComps)

        startingCounts = defaultdict(int)
        for key in startingComposition.aminoCounts:
            startingCounts[key] = startingComposition.aminoCounts[key]

        genPathsHelper(startingCounts, [], targetMass, maximumLDist,
                       massEpsilon, alphabet, outputPaths, outputComps)

        outputComps = alphabet.removeStrDups(outputComps)
        outputPaths = startingComposition.filterPaths(
            outputPaths, targetMass, massEpsilon
        )  #Annoying, but without this we can transition through amino acids that aren't in the initial composition: [SWP(Ile, Gly), SWP(Gly, Ala)] is a dumb path if Gly isn't in the initial sequence.
        return (outputPaths, outputComps)
Exemple #9
0
 def genPathsHelper(currentAminoCounts, transitionList, targetMass,
                    remainingLDist, massEpsilon, alphabet, outputPaths,
                    outputComps):
     if remainingLDist == 0:
         return
     validTransitions = Transition.generateValidTransitions(
         currentAminoCounts, alphabet)
     for transition in validTransitions:
         newComp = defaultdict(int)
         for key in currentAminoCounts:
             newComp[key] = currentAminoCounts[key]
         Composition.followTransitionInPlace(newComp, transition)
         newTransitionList = transitionList + [transition]
         if fuzzyEqual(Composition.calcMassOnAminoCounts(newComp),
                       targetMass, massEpsilon):
             outputPaths.append(newTransitionList)
             outputComps.append(Composition.fromCountsDict(newComp))
         genPathsHelper(newComp, newTransitionList, targetMass,
                        remainingLDist - 1, massEpsilon, alphabet,
                        outputPaths, outputComps)