Beispiel #1
0
        rootGenome = ', '.join(fragment.originalSequence
                               for fragment in ancestralFragments)
if globals.printToConsole:
    print('This is the root ancestral genome!')
    print('\nRoot: %s\n' % (rootGenome))

#Need to traverse tree to ouput appropriate content to file
newickTree.clade.name = ''  #Make sure that the output for the root is not output
traverseNewickTreeAndOutputToFile(newickTree.clade)

#Output the totals for the computation to console and file
outputTotalsToFile(outputFileName, totalTime)

if globals.printToConsole:
    #Output Bar graphs of each event
    createBarGraph(globals.deletionSizeCounter, 'Distribution of Deletions')
    createBarGraph(globals.duplicationSizeCounter,
                   'Distribution of Duplications')
    createBarGraph(globals.inversionSizeDistributionCounter,
                   'Distribution of Inversions')
    createBarGraph(globals.transpositionSizeDistributionCounter,
                   'Distribution of Transpositions')
    createBarGraph(globals.invertedTranspositionSizeDistributionCounter,
                   'Distribution of Inverted Transpositions')

#TODO compute lineage
#target = 'NC_014019'
#print('Computing lineage cost for: %s' % (target))
#lineageCost = computeLineageCost(newickTree.clade, target, None)
#if lineageCost != None:
#print('Successfully found and computed the lineage for: %s' % (target))
Beispiel #2
0
def createAncestor(strain1, strain2, neighborStrain):
    globals.ancestralCounter += 1
    ancestor = None
    ancestralName = 'Ancestor ' + str(globals.ancestralCounter)
    ancestralFragments = None

    strain1Copy = copy.deepcopy(
        strain1)  #Do a deep copy of object for when we compare to the neighbor
    neighborCopy = copy.deepcopy(
        neighborStrain
    )  #Do a deep copy of the neighbor as well b/c we don't want to store those comparisons in the strain either

    if globals.printToConsole:
        print(
            'Performing a series of alignments for the following strains: %s, %s'
            % (strain1.name, strain2.name))

    globals.enableDeletionReversions = True  #Only do the backtrace between these two strains!
    globals.enableSelfAlignmentDetails = True

    events, duplicatesStrain1, duplicatesStrain2 = constructEvents(
        strain1, strain2)

    globals.enableSelfAlignmentDetails = False
    globals.enableDeletionReversions = False

    if globals.printToConsole:
        print('Constructing dot plot for the following strains: %s, %s' %
              (strain1.name, strain2.name))
    points, lostPoints = normalizeIndexesForDotPlot(events, duplicatesStrain1,
                                                    duplicatesStrain2, strain1,
                                                    strain2)
    if globals.printToConsole:
        createDotPlot(points, strain1, strain2, testFileName)
        createBarGraph(strain1.duplicationCounts,
                       'Distribution of Duplications for %s' % (strain1.name))
        createBarGraph(strain2.duplicationCounts,
                       'Distribution of Duplications for %s' % (strain2.name))
        createBarGraph(
            strain1.deletionCounts, 'Distribution of Deletions for %s' %
            (strain1.name))  #Remember! Deletions refer to the other strain!
        createBarGraph(
            strain2.deletionCounts, 'Distribution of Deletions for %s' %
            (strain2.name))  #Remember! Deletions refer to the other strain!

    #Compute and output the inverted, transposed, and inverted transposed regions
    FCR, TR, IR, ITR = determineRegions(points)
    #FCR, TR, IR, ITR, LR = computeOperonArrangements(events)  OLD VERSION

    #inversionDetails1, inversionDetails2 = computeRegionDetails(IR, 'Inversion:')
    #transpositionDetails1, transpositionDetails2 = computeRegionDetails(TR, 'Transposition:')
    #invertedTransposedDetails1, invertedTransposedDetails2 = computeRegionDetails(ITR, 'Inverted Transposition:')

    #Compare one of the siblings to the neighbor if one exists
    if neighborCopy != None:
        if globals.printToConsole:
            print(
                'Now performing a series of alignments between the nighboring strains: %s, %s'
                % (strain1Copy.name, neighborCopy.name))
        neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor = constructEvents(
            strain1Copy, neighborCopy)
        if globals.printToConsole:
            print('Constructing dot plot for the neighboring strains: %s, %s' %
                  (strain1Copy.name, neighborCopy.name))
        neighborPoints, neighborLostPoints = normalizeIndexesForDotPlot(
            neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor,
            strain1Copy, neighborCopy)
        #createDotPlot(neighborPoints, strain1Copy, neighborCopy)

        #Compute the various regions for the neighbor
        #NFCR, NTR, NIR, NITR, NLR = computeOperonArrangements(neighborEvents) OLD VERSION
        NFCR, NTR, NIR, NITR = determineRegions(neighborPoints)
        ancestralFragments, strain1, strain2 = determineAncestralFragmentArrangementUsingNeighbor(
            FCR, TR, IR, ITR, lostPoints, NFCR, NTR, NIR, NITR,
            neighborLostPoints, strain1, strain2)
    else:
        if neighborCopy == None:
            if globals.printToConsole:
                print('No neighbor found!')
        elif len(TR) == 0 and len(IR) == 0 or len(ITR) == 0:
            if globals.printToConsole:
                print('No inverted or transposed regions detected!!')
        ancestralFragments, strain2 = determineAncestralFragmentArrangementWithoutNeighbor(
            FCR, TR, IR, ITR, lostPoints, strain2)

    #Computes the total number of inversions, transpositions, inverted transpositions
    globals.inversionCounter += len(IR)
    globals.transposedCounter += len(TR)
    globals.invertedTransposedCounter += len(ITR)

    #Increments the counters for the size distributions for each event type
    updateGlobalDeletionCounter(strain1)
    updateGlobalDeletionCounter(strain2)
    updateGlobalDuplicationCounter(strain1)
    updateGlobalDuplicationCounter(strain2)
    updateGlobalInversionSizeDistributionCounter(strain1)
    updateGlobalInversionSizeDistributionCounter(strain2)
    updateGlobalTranspositionSizeDistributionCounter(strain1)
    updateGlobalTranspositionSizeDistributionCounter(strain2)
    updateGlobalInvertedTranspositionSizeDistributionCounter(strain1)
    updateGlobalInvertedTranspositionSizeDistributionCounter(strain2)

    #Increment counters (only need to do the count only once otherwise it leads to double counts ie x2 number of events)
    #updateGlobalCodonMismatchCounter(strain1)
    updateGlobalCodonMismatchCounter(strain2)
    #updateGlobalSubstitutionCounter(strain1)
    updateGlobalSubstitutionCounter(strain2)

    #Append all details to file here
    #outputStrainDetailsToFile(outputFileName, strain1)
    #outputStrainDetailsToFile(outputFileName, strain2)

    ancestor = BacterialStrain(ancestralName, ancestralFragments)

    if globals.printToConsole:
        print(strain1.name)
        for frag in strain1.genomeFragments:
            print(frag.originalSequence)
        print(strain2.name)
        for frag in strain2.genomeFragments:
            print(frag.originalSequence)

    ####################################
    #Handle the Codon Mismatches here##
    ###################################
    if '#' in strain1.codonMismatchDetails:
        newDetails1 = 'Codon Mismatch:'
        newDetails2 = 'Codon Mismatch:'

        line1 = strain1.codonMismatchDetails.replace('Codon Mismatch:',
                                                     '').strip()
        line2 = strain2.codonMismatchDetails.replace('Codon Mismatch:',
                                                     '').strip()

        subsList1 = filter(
            None, line1.split(';')
        )  #Ensures we don't have a list with an empty string as an element
        subsList2 = filter(None, line2.split(';'))

        #For each substitution in the list
        for w in range(0, len(subsList1)):
            gene1, idNumber1, position1 = parseDetails(subsList1[w])
            gene2, idNumber2, position2 = parseDetails(subsList2[w])
            processed = False  #Tracks whether the current codon mismatch was handled

            #Check if we have a neighbor
            if neighborCopy:
                #Check if the same codon mismatch occurred when comparing to the neighbor
                if '#' in strain1Copy.codonMismatchDetails:
                    line3 = strain1Copy.codonMismatchDetails.replace(
                        'Codon Mismatch:', '').strip()
                    subsList3 = filter(None, line3.split(';'))
                    for v in range(0, len(subsList3)):
                        gene3, idNumber3, position3 = parseDetails(
                            subsList3[v])
                        if gene1 == gene3 and position1 == position3:
                            #We found the same codon mismatch when comparing with the neighbor, therefore we should keep strain 2's verison of the gene!
                            processed = True
                            fragments = ancestor.genomeFragments
                            for fragment in fragments:
                                if idNumber1 in fragment.originalSequence:
                                    fragment.originalSequence = fragment.originalSequence.replace(
                                        gene1 + '-' + idNumber1,
                                        gene2)  #Put in strain 2's gene
                                    for m in range(0, len(fragment.sequence)):
                                        if idNumber1 in fragment.sequence[m]:
                                            fragment.sequence[m] = gene2
                                            break
                                    break
            if processed:
                #We found the codon mismatch and swapped with strain 2's gene therefore strain 1's gene was the codon mismatch so put the codon mismatch details in strain1
                newDetails1 += gene1 + ' ' + position1 + ';'
            else:
                #We were not able to find the same codon mismatch either due to there being no neighbor or it was just not there. So just assume strain 2 is the codon mismatch
                newDetails2 += gene2 + ' ' + position2 + ';'
                fragments = ancestor.genomeFragments
                for fragment in fragments:
                    if idNumber1 in fragment.originalSequence:
                        fragment.originalSequence = fragment.originalSequence.replace(
                            gene1 + '-' + idNumber1,
                            gene1)  #Put in strain 1's gene
                        for m in range(0, len(fragment.sequence)):
                            if idNumber1 in fragment.sequence[m]:
                                fragment.sequence[m] = gene1
                                break
                        break
        #Insert the new details about the substitution
        strain1.codonMismatchDetails = newDetails1
        strain2.codonMismatchDetails = newDetails2

    ################################
    #Handle the substitutions here##
    ################################
    if '@' in strain1.substitutionDetails:
        newDetails1 = 'Substitution:'
        newDetails2 = 'Substitution:'

        line1 = strain1.substitutionDetails.replace('Substitution:',
                                                    '').strip()
        line2 = strain2.substitutionDetails.replace('Substitution:',
                                                    '').strip()

        subsList1 = filter(
            None, line1.split(';')
        )  #Ensures we don't have a list with an empty string as an element
        subsList2 = filter(None, line2.split(';'))

        #For each substitution in the list
        for w in range(0, len(subsList1)):
            gene1, idNumber1, position1 = parseDetails(subsList1[w])
            gene2, idNumber2, position2 = parseDetails(subsList2[w])
            processed = False  #Tracks whether the current substitution was handled

            #Check if we have a neighbor
            if neighborCopy:
                #Check if the same substitution occurred when comparing to the neighbor
                if '@' in strain1Copy.substitutionDetails:
                    line3 = strain1Copy.substitutionDetails.replace(
                        'Substitution:', '').strip()
                    subsList3 = filter(None, line3.split(';'))
                    for v in range(0, len(subsList3)):
                        gene3, idNumber3, position3 = parseDetails(
                            subsList3[v])
                        if gene1 == gene3 and position1 == position3:
                            #We found the same substitution when comparing with the neighbor, therefore we should keep strain 2's verison of the gene!
                            processed = True
                            fragments = ancestor.genomeFragments
                            for fragment in fragments:
                                if idNumber1 in fragment.originalSequence:
                                    fragment.originalSequence = fragment.originalSequence.replace(
                                        gene1 + '-' + idNumber1,
                                        gene2)  #Put in strain 2's gene
                                    for m in range(0, len(fragment.sequence)):
                                        if idNumber1 in fragment.sequence[m]:
                                            fragment.sequence[m] = gene2
                                            break
                                    break
            if processed:
                #We found the substitution and swapped with strain 2's gene therefore strain 1's gene was the substituion so put the substitution details in strain1
                newDetails1 += gene1 + ' ' + position1 + ';'
            else:
                #We were not able to find the same substitution either due to there being no neighbor or it was just not there. So just assume strain 2 is the substitution
                newDetails2 += gene2 + ' ' + position2 + ';'
                fragments = ancestor.genomeFragments
                for fragment in fragments:
                    if idNumber1 in fragment.originalSequence:
                        fragment.originalSequence = fragment.originalSequence.replace(
                            gene1 + '-' + idNumber1,
                            gene1)  #Put in strain 1's gene
                        for m in range(0, len(fragment.sequence)):
                            if idNumber1 in fragment.sequence[m]:
                                fragment.sequence[m] = gene1
                                break
                        break
        #Insert the new details about the substitution
        strain1.substitutionDetails = newDetails1
        strain2.substitutionDetails = newDetails2

    #Add any codon mismatches from the self global alignment as those details were stored in another variable so it doesn't mess with codon mismatches and substitution handlers in the previous 2 for loops
    strain1.codonMismatchDetails += strain1.tempCodonDetails
    strain2.codonMismatchDetails += strain2.tempCodonDetails
    strain1.substitutionDetails += strain1.tempSubstitutionDetails
    strain2.substitutionDetails += strain2.tempSubstitutionDetails

    return ancestor
Beispiel #3
0
def main():
    globals.initialize()  #Initialize the globals file

    global newickFileName
    global outputFileName
    global testFileName

    if len(sys.argv) != 3:
        print "WARNING: Must provide a Newick tree and test folder name. Exiting..."
        sys.exit(0)

    newickFileName = sys.argv[1]
    if newickFileName == "tree2LeafNeighbour.dnd":
        outputFileName = sys.argv[2] + "/ApplicationNeighbourOutput.txt"
    else:
        outputFileName = sys.argv[2] + "/ApplicationOutput.txt"
    testFileName = sys.argv[2] + '/'

    print('Starting application...')
    startTime = time.time()

    if globals.printToConsole:
        print('Reading newick tree from file: %s...' % (newickFileName))
    newickTree = Phylo.read(newickFileName, 'newick')
    if globals.printToConsole:
        Phylo.draw(newickTree)

    globals.strains = strains  #Assign pointer to the global strains array so we can access it anywhere
    createFile(outputFileName,
               newickTree)  #Creates file where data will be output

    #Traverses the newick tree recursively reconstructing ancestral genomes
    if globals.printToConsole:
        print('Traversing newick tree...')
    result = traverseNewickTree(newickTree.clade, None)

    endTime = time.time()
    totalTime = endTime - startTime

    #Output ancestral genome to console
    if globals.printToConsole:
        print('This is the root ancestral genome!')

    root = newickTree.clade
    rootGenome = []
    if newickFileName == "tree2LeafNeighbour.dnd":
        if len(root.clades) == 2:
            child = root.clades[0]
            if len(child.clades) != 2:
                child = root.clades[1]
                neighbour = root.clades[0]
            else:
                neighbour = root.clades[1]
            if child.name != None and len(child.name) > 0:
                filteredList = iter(
                    filter(lambda x: x.name == child.name, strains))
                foundStrain = next(filteredList, None)
                if foundStrain != None:
                    ancestralFragments = foundStrain.genomeFragments
                    rootGenome = ', '.join(fragment.originalSequence
                                           for fragment in ancestralFragments)

            with open(testFileName + "appNeighbourRoot.txt", "w+") as f:
                f.write(rootGenome)
            neighbour.name = ''
            child.name = ''
    else:
        if root.name != None and len(root.name) > 0:
            filteredList = iter(filter(lambda x: x.name == root.name, strains))
            foundStrain = next(filteredList, None)
            if foundStrain != None:
                ancestralFragments = foundStrain.genomeFragments
                rootGenome = ', '.join(fragment.originalSequence
                                       for fragment in ancestralFragments)

        with open(testFileName + "appRoot.txt", "w+") as f:
            f.write(rootGenome)

    if globals.printToConsole:
        #Output newick tree after the ancestors have been added to it
        Phylo.draw(newickTree)

    #Need to traverse tree to ouput appropriate content to file
    newickTree.clade.name = ''  #Make sure that the output for the root is not output
    traverseNewickTreeAndOutputToFile(newickTree.clade)

    #Output the totals for the computation to console and file
    outputTotalsToFile(outputFileName, totalTime)

    #TODO compute lineage
    #target = 'NC_014019'
    #print('Computing lineage cost for: %s' % (target))
    #lineageCost = computeLineageCost(newickTree.clade, target, None)
    #if lineageCost != None:
    #print('Successfully found and computed the lineage for: %s' % (target))

    #Output Bar graphs of each event
    if globals.printToConsole:
        createBarGraph(globals.deletionSizeCounter,
                       'Distribution of Deletions')
        createBarGraph(globals.duplicationSizeCounter,
                       'Distribution of Duplications')
        createBarGraph(globals.inversionSizeDistributionCounter,
                       'Distribution of Inversions')
        createBarGraph(globals.transpositionSizeDistributionCounter,
                       'Distribution of Transpositions')
        createBarGraph(globals.invertedTranspositionSizeDistributionCounter,
                       'Distribution of Inverted Transpositions')

    print('Total time (in seconds): %s' % (totalTime))
    print('Ending application...')
Beispiel #4
0
def createAncestor(strain1, strain2, neighborStrain):
    globals.ancestralCounter += 1
    ancestor = None
    ancestralName = 'Ancestor ' + str(globals.ancestralCounter)
    ancestralFragments = None

    strain1Copy = copy.deepcopy(
        strain1)  #Do a deep copy of object for when we compare to the neighbor
    neighborCopy = copy.deepcopy(
        neighborStrain
    )  #Do a deep copy of the neighbor as well b/c we don't want to store those comparisons in the strain either

    print(
        'Performing a series of alignments for the following strains: %s, %s' %
        (strain1.name, strain2.name))
    events, duplicatesStrain1, duplicatesStrain2 = constructEvents(
        strain1, strain2)

    print('Constructing dot plot for the following strains: %s, %s' %
          (strain1.name, strain2.name))
    points, lostPoints = normalizeIndexesForDotPlot(events, duplicatesStrain1,
                                                    duplicatesStrain2, strain1,
                                                    strain2)
    createDotPlot(points, strain1, strain2)

    createBarGraph(strain1.duplicationCounts,
                   'Distribution of Duplications for %s' % (strain1.name))
    createBarGraph(strain2.duplicationCounts,
                   'Distribution of Duplications for %s' % (strain2.name))
    createBarGraph(
        strain1.deletionCounts, 'Distribution of Deletions for %s' %
        (strain1.name))  #Remember! Deletions refer to the other strain!
    createBarGraph(
        strain2.deletionCounts, 'Distribution of Deletions for %s' %
        (strain2.name))  #Remember! Deletions refer to the other strain!

    #Compute and output the inverted, transposed, and inverted transposed regions
    FCR, TR, IR, ITR = determineRegions(points)
    #FCR, TR, IR, ITR, LR = computeOperonArrangements(events)  OLD VERSION

    #inversionDetails1, inversionDetails2 = computeRegionDetails(IR, 'Inversion:')
    #transpositionDetails1, transpositionDetails2 = computeRegionDetails(TR, 'Transposition:')
    #invertedTransposedDetails1, invertedTransposedDetails2 = computeRegionDetails(ITR, 'Inverted Transposition:')

    #Compare one of the siblings to the neighbor if one exists
    if neighborCopy != None:
        print(
            'Now performing a series of alignments between the nighboring strains: %s, %s'
            % (strain1Copy.name, neighborCopy.name))
        neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor = constructEvents(
            strain1Copy, neighborCopy)

        print('Constructing dot plot for the neighboring strains: %s, %s' %
              (strain1Copy.name, neighborCopy.name))
        neighborPoints, neighborLostPoints = normalizeIndexesForDotPlot(
            neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor,
            strain1Copy, neighborCopy)
        #createDotPlot(neighborPoints, strain1Copy, neighborCopy)

        #Compute the various regions for the neighbor
        #NFCR, NTR, NIR, NITR, NLR = computeOperonArrangements(neighborEvents) OLD VERSION
        NFCR, NTR, NIR, NITR = determineRegions(neighborPoints)
        ancestralFragments, strain1, strain2 = determineAncestralFragmentArrangementUsingNeighbor(
            FCR, TR, IR, ITR, lostPoints, NFCR, NTR, NIR, NITR,
            neighborLostPoints, strain1, strain2)
    else:
        if neighborCopy == None:
            print('No neighbor found!')
        elif len(TR) == 0 and len(IR) == 0 or len(ITR) == 0:
            print('No inverted or transposed regions detected!!')
        ancestralFragments, strain2 = determineAncestralFragmentArrangementWithoutNeighbor(
            FCR, TR, IR, ITR, lostPoints, strain2)

    #Computes the total number of inversions, transpositions, inverted transpositions
    globals.inversionCounter += len(IR)
    globals.transposedCounter += len(TR)
    globals.invertedTransposedCounter += len(ITR)

    #Increments the counters for the size distributions for each event type
    updateGlobalDeletionCounter(strain1)
    updateGlobalDeletionCounter(strain2)
    updateGlobalDuplicationCounter(strain1)
    updateGlobalDuplicationCounter(strain2)
    updateGlobalInversionSizeDistributionCounter(strain1)
    updateGlobalInversionSizeDistributionCounter(strain2)
    updateGlobalTranspositionSizeDistributionCounter(strain1)
    updateGlobalTranspositionSizeDistributionCounter(strain2)
    updateGlobalInvertedTranspositionSizeDistributionCounter(strain1)
    updateGlobalInvertedTranspositionSizeDistributionCounter(strain2)

    #Append all details to file here
    outputStrainDetailsToFile(outputFileName, strain1)
    outputStrainDetailsToFile(outputFileName, strain2)

    ancestor = BacterialStrain(ancestralName, ancestralFragments)
    return ancestor