rootGenome = ', '.join(fragment.originalSequence for fragment in ancestralFragments) if globals.printToConsole: print('This is the root ancestral genome!') print('\nRoot: %s\n' % (rootGenome)) #Need to traverse tree to ouput appropriate content to file newickTree.clade.name = '' #Make sure that the output for the root is not output traverseNewickTreeAndOutputToFile(newickTree.clade) #Output the totals for the computation to console and file outputTotalsToFile(outputFileName, totalTime) if globals.printToConsole: #Output Bar graphs of each event createBarGraph(globals.deletionSizeCounter, 'Distribution of Deletions') createBarGraph(globals.duplicationSizeCounter, 'Distribution of Duplications') createBarGraph(globals.inversionSizeDistributionCounter, 'Distribution of Inversions') createBarGraph(globals.transpositionSizeDistributionCounter, 'Distribution of Transpositions') createBarGraph(globals.invertedTranspositionSizeDistributionCounter, 'Distribution of Inverted Transpositions') #TODO compute lineage #target = 'NC_014019' #print('Computing lineage cost for: %s' % (target)) #lineageCost = computeLineageCost(newickTree.clade, target, None) #if lineageCost != None: #print('Successfully found and computed the lineage for: %s' % (target))
def createAncestor(strain1, strain2, neighborStrain): globals.ancestralCounter += 1 ancestor = None ancestralName = 'Ancestor ' + str(globals.ancestralCounter) ancestralFragments = None strain1Copy = copy.deepcopy( strain1) #Do a deep copy of object for when we compare to the neighbor neighborCopy = copy.deepcopy( neighborStrain ) #Do a deep copy of the neighbor as well b/c we don't want to store those comparisons in the strain either if globals.printToConsole: print( 'Performing a series of alignments for the following strains: %s, %s' % (strain1.name, strain2.name)) globals.enableDeletionReversions = True #Only do the backtrace between these two strains! globals.enableSelfAlignmentDetails = True events, duplicatesStrain1, duplicatesStrain2 = constructEvents( strain1, strain2) globals.enableSelfAlignmentDetails = False globals.enableDeletionReversions = False if globals.printToConsole: print('Constructing dot plot for the following strains: %s, %s' % (strain1.name, strain2.name)) points, lostPoints = normalizeIndexesForDotPlot(events, duplicatesStrain1, duplicatesStrain2, strain1, strain2) if globals.printToConsole: createDotPlot(points, strain1, strain2, testFileName) createBarGraph(strain1.duplicationCounts, 'Distribution of Duplications for %s' % (strain1.name)) createBarGraph(strain2.duplicationCounts, 'Distribution of Duplications for %s' % (strain2.name)) createBarGraph( strain1.deletionCounts, 'Distribution of Deletions for %s' % (strain1.name)) #Remember! Deletions refer to the other strain! createBarGraph( strain2.deletionCounts, 'Distribution of Deletions for %s' % (strain2.name)) #Remember! Deletions refer to the other strain! #Compute and output the inverted, transposed, and inverted transposed regions FCR, TR, IR, ITR = determineRegions(points) #FCR, TR, IR, ITR, LR = computeOperonArrangements(events) OLD VERSION #inversionDetails1, inversionDetails2 = computeRegionDetails(IR, 'Inversion:') #transpositionDetails1, transpositionDetails2 = computeRegionDetails(TR, 'Transposition:') #invertedTransposedDetails1, invertedTransposedDetails2 = computeRegionDetails(ITR, 'Inverted Transposition:') #Compare one of the siblings to the neighbor if one exists if neighborCopy != None: if globals.printToConsole: print( 'Now performing a series of alignments between the nighboring strains: %s, %s' % (strain1Copy.name, neighborCopy.name)) neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor = constructEvents( strain1Copy, neighborCopy) if globals.printToConsole: print('Constructing dot plot for the neighboring strains: %s, %s' % (strain1Copy.name, neighborCopy.name)) neighborPoints, neighborLostPoints = normalizeIndexesForDotPlot( neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor, strain1Copy, neighborCopy) #createDotPlot(neighborPoints, strain1Copy, neighborCopy) #Compute the various regions for the neighbor #NFCR, NTR, NIR, NITR, NLR = computeOperonArrangements(neighborEvents) OLD VERSION NFCR, NTR, NIR, NITR = determineRegions(neighborPoints) ancestralFragments, strain1, strain2 = determineAncestralFragmentArrangementUsingNeighbor( FCR, TR, IR, ITR, lostPoints, NFCR, NTR, NIR, NITR, neighborLostPoints, strain1, strain2) else: if neighborCopy == None: if globals.printToConsole: print('No neighbor found!') elif len(TR) == 0 and len(IR) == 0 or len(ITR) == 0: if globals.printToConsole: print('No inverted or transposed regions detected!!') ancestralFragments, strain2 = determineAncestralFragmentArrangementWithoutNeighbor( FCR, TR, IR, ITR, lostPoints, strain2) #Computes the total number of inversions, transpositions, inverted transpositions globals.inversionCounter += len(IR) globals.transposedCounter += len(TR) globals.invertedTransposedCounter += len(ITR) #Increments the counters for the size distributions for each event type updateGlobalDeletionCounter(strain1) updateGlobalDeletionCounter(strain2) updateGlobalDuplicationCounter(strain1) updateGlobalDuplicationCounter(strain2) updateGlobalInversionSizeDistributionCounter(strain1) updateGlobalInversionSizeDistributionCounter(strain2) updateGlobalTranspositionSizeDistributionCounter(strain1) updateGlobalTranspositionSizeDistributionCounter(strain2) updateGlobalInvertedTranspositionSizeDistributionCounter(strain1) updateGlobalInvertedTranspositionSizeDistributionCounter(strain2) #Increment counters (only need to do the count only once otherwise it leads to double counts ie x2 number of events) #updateGlobalCodonMismatchCounter(strain1) updateGlobalCodonMismatchCounter(strain2) #updateGlobalSubstitutionCounter(strain1) updateGlobalSubstitutionCounter(strain2) #Append all details to file here #outputStrainDetailsToFile(outputFileName, strain1) #outputStrainDetailsToFile(outputFileName, strain2) ancestor = BacterialStrain(ancestralName, ancestralFragments) if globals.printToConsole: print(strain1.name) for frag in strain1.genomeFragments: print(frag.originalSequence) print(strain2.name) for frag in strain2.genomeFragments: print(frag.originalSequence) #################################### #Handle the Codon Mismatches here## ################################### if '#' in strain1.codonMismatchDetails: newDetails1 = 'Codon Mismatch:' newDetails2 = 'Codon Mismatch:' line1 = strain1.codonMismatchDetails.replace('Codon Mismatch:', '').strip() line2 = strain2.codonMismatchDetails.replace('Codon Mismatch:', '').strip() subsList1 = filter( None, line1.split(';') ) #Ensures we don't have a list with an empty string as an element subsList2 = filter(None, line2.split(';')) #For each substitution in the list for w in range(0, len(subsList1)): gene1, idNumber1, position1 = parseDetails(subsList1[w]) gene2, idNumber2, position2 = parseDetails(subsList2[w]) processed = False #Tracks whether the current codon mismatch was handled #Check if we have a neighbor if neighborCopy: #Check if the same codon mismatch occurred when comparing to the neighbor if '#' in strain1Copy.codonMismatchDetails: line3 = strain1Copy.codonMismatchDetails.replace( 'Codon Mismatch:', '').strip() subsList3 = filter(None, line3.split(';')) for v in range(0, len(subsList3)): gene3, idNumber3, position3 = parseDetails( subsList3[v]) if gene1 == gene3 and position1 == position3: #We found the same codon mismatch when comparing with the neighbor, therefore we should keep strain 2's verison of the gene! processed = True fragments = ancestor.genomeFragments for fragment in fragments: if idNumber1 in fragment.originalSequence: fragment.originalSequence = fragment.originalSequence.replace( gene1 + '-' + idNumber1, gene2) #Put in strain 2's gene for m in range(0, len(fragment.sequence)): if idNumber1 in fragment.sequence[m]: fragment.sequence[m] = gene2 break break if processed: #We found the codon mismatch and swapped with strain 2's gene therefore strain 1's gene was the codon mismatch so put the codon mismatch details in strain1 newDetails1 += gene1 + ' ' + position1 + ';' else: #We were not able to find the same codon mismatch either due to there being no neighbor or it was just not there. So just assume strain 2 is the codon mismatch newDetails2 += gene2 + ' ' + position2 + ';' fragments = ancestor.genomeFragments for fragment in fragments: if idNumber1 in fragment.originalSequence: fragment.originalSequence = fragment.originalSequence.replace( gene1 + '-' + idNumber1, gene1) #Put in strain 1's gene for m in range(0, len(fragment.sequence)): if idNumber1 in fragment.sequence[m]: fragment.sequence[m] = gene1 break break #Insert the new details about the substitution strain1.codonMismatchDetails = newDetails1 strain2.codonMismatchDetails = newDetails2 ################################ #Handle the substitutions here## ################################ if '@' in strain1.substitutionDetails: newDetails1 = 'Substitution:' newDetails2 = 'Substitution:' line1 = strain1.substitutionDetails.replace('Substitution:', '').strip() line2 = strain2.substitutionDetails.replace('Substitution:', '').strip() subsList1 = filter( None, line1.split(';') ) #Ensures we don't have a list with an empty string as an element subsList2 = filter(None, line2.split(';')) #For each substitution in the list for w in range(0, len(subsList1)): gene1, idNumber1, position1 = parseDetails(subsList1[w]) gene2, idNumber2, position2 = parseDetails(subsList2[w]) processed = False #Tracks whether the current substitution was handled #Check if we have a neighbor if neighborCopy: #Check if the same substitution occurred when comparing to the neighbor if '@' in strain1Copy.substitutionDetails: line3 = strain1Copy.substitutionDetails.replace( 'Substitution:', '').strip() subsList3 = filter(None, line3.split(';')) for v in range(0, len(subsList3)): gene3, idNumber3, position3 = parseDetails( subsList3[v]) if gene1 == gene3 and position1 == position3: #We found the same substitution when comparing with the neighbor, therefore we should keep strain 2's verison of the gene! processed = True fragments = ancestor.genomeFragments for fragment in fragments: if idNumber1 in fragment.originalSequence: fragment.originalSequence = fragment.originalSequence.replace( gene1 + '-' + idNumber1, gene2) #Put in strain 2's gene for m in range(0, len(fragment.sequence)): if idNumber1 in fragment.sequence[m]: fragment.sequence[m] = gene2 break break if processed: #We found the substitution and swapped with strain 2's gene therefore strain 1's gene was the substituion so put the substitution details in strain1 newDetails1 += gene1 + ' ' + position1 + ';' else: #We were not able to find the same substitution either due to there being no neighbor or it was just not there. So just assume strain 2 is the substitution newDetails2 += gene2 + ' ' + position2 + ';' fragments = ancestor.genomeFragments for fragment in fragments: if idNumber1 in fragment.originalSequence: fragment.originalSequence = fragment.originalSequence.replace( gene1 + '-' + idNumber1, gene1) #Put in strain 1's gene for m in range(0, len(fragment.sequence)): if idNumber1 in fragment.sequence[m]: fragment.sequence[m] = gene1 break break #Insert the new details about the substitution strain1.substitutionDetails = newDetails1 strain2.substitutionDetails = newDetails2 #Add any codon mismatches from the self global alignment as those details were stored in another variable so it doesn't mess with codon mismatches and substitution handlers in the previous 2 for loops strain1.codonMismatchDetails += strain1.tempCodonDetails strain2.codonMismatchDetails += strain2.tempCodonDetails strain1.substitutionDetails += strain1.tempSubstitutionDetails strain2.substitutionDetails += strain2.tempSubstitutionDetails return ancestor
def main(): globals.initialize() #Initialize the globals file global newickFileName global outputFileName global testFileName if len(sys.argv) != 3: print "WARNING: Must provide a Newick tree and test folder name. Exiting..." sys.exit(0) newickFileName = sys.argv[1] if newickFileName == "tree2LeafNeighbour.dnd": outputFileName = sys.argv[2] + "/ApplicationNeighbourOutput.txt" else: outputFileName = sys.argv[2] + "/ApplicationOutput.txt" testFileName = sys.argv[2] + '/' print('Starting application...') startTime = time.time() if globals.printToConsole: print('Reading newick tree from file: %s...' % (newickFileName)) newickTree = Phylo.read(newickFileName, 'newick') if globals.printToConsole: Phylo.draw(newickTree) globals.strains = strains #Assign pointer to the global strains array so we can access it anywhere createFile(outputFileName, newickTree) #Creates file where data will be output #Traverses the newick tree recursively reconstructing ancestral genomes if globals.printToConsole: print('Traversing newick tree...') result = traverseNewickTree(newickTree.clade, None) endTime = time.time() totalTime = endTime - startTime #Output ancestral genome to console if globals.printToConsole: print('This is the root ancestral genome!') root = newickTree.clade rootGenome = [] if newickFileName == "tree2LeafNeighbour.dnd": if len(root.clades) == 2: child = root.clades[0] if len(child.clades) != 2: child = root.clades[1] neighbour = root.clades[0] else: neighbour = root.clades[1] if child.name != None and len(child.name) > 0: filteredList = iter( filter(lambda x: x.name == child.name, strains)) foundStrain = next(filteredList, None) if foundStrain != None: ancestralFragments = foundStrain.genomeFragments rootGenome = ', '.join(fragment.originalSequence for fragment in ancestralFragments) with open(testFileName + "appNeighbourRoot.txt", "w+") as f: f.write(rootGenome) neighbour.name = '' child.name = '' else: if root.name != None and len(root.name) > 0: filteredList = iter(filter(lambda x: x.name == root.name, strains)) foundStrain = next(filteredList, None) if foundStrain != None: ancestralFragments = foundStrain.genomeFragments rootGenome = ', '.join(fragment.originalSequence for fragment in ancestralFragments) with open(testFileName + "appRoot.txt", "w+") as f: f.write(rootGenome) if globals.printToConsole: #Output newick tree after the ancestors have been added to it Phylo.draw(newickTree) #Need to traverse tree to ouput appropriate content to file newickTree.clade.name = '' #Make sure that the output for the root is not output traverseNewickTreeAndOutputToFile(newickTree.clade) #Output the totals for the computation to console and file outputTotalsToFile(outputFileName, totalTime) #TODO compute lineage #target = 'NC_014019' #print('Computing lineage cost for: %s' % (target)) #lineageCost = computeLineageCost(newickTree.clade, target, None) #if lineageCost != None: #print('Successfully found and computed the lineage for: %s' % (target)) #Output Bar graphs of each event if globals.printToConsole: createBarGraph(globals.deletionSizeCounter, 'Distribution of Deletions') createBarGraph(globals.duplicationSizeCounter, 'Distribution of Duplications') createBarGraph(globals.inversionSizeDistributionCounter, 'Distribution of Inversions') createBarGraph(globals.transpositionSizeDistributionCounter, 'Distribution of Transpositions') createBarGraph(globals.invertedTranspositionSizeDistributionCounter, 'Distribution of Inverted Transpositions') print('Total time (in seconds): %s' % (totalTime)) print('Ending application...')
def createAncestor(strain1, strain2, neighborStrain): globals.ancestralCounter += 1 ancestor = None ancestralName = 'Ancestor ' + str(globals.ancestralCounter) ancestralFragments = None strain1Copy = copy.deepcopy( strain1) #Do a deep copy of object for when we compare to the neighbor neighborCopy = copy.deepcopy( neighborStrain ) #Do a deep copy of the neighbor as well b/c we don't want to store those comparisons in the strain either print( 'Performing a series of alignments for the following strains: %s, %s' % (strain1.name, strain2.name)) events, duplicatesStrain1, duplicatesStrain2 = constructEvents( strain1, strain2) print('Constructing dot plot for the following strains: %s, %s' % (strain1.name, strain2.name)) points, lostPoints = normalizeIndexesForDotPlot(events, duplicatesStrain1, duplicatesStrain2, strain1, strain2) createDotPlot(points, strain1, strain2) createBarGraph(strain1.duplicationCounts, 'Distribution of Duplications for %s' % (strain1.name)) createBarGraph(strain2.duplicationCounts, 'Distribution of Duplications for %s' % (strain2.name)) createBarGraph( strain1.deletionCounts, 'Distribution of Deletions for %s' % (strain1.name)) #Remember! Deletions refer to the other strain! createBarGraph( strain2.deletionCounts, 'Distribution of Deletions for %s' % (strain2.name)) #Remember! Deletions refer to the other strain! #Compute and output the inverted, transposed, and inverted transposed regions FCR, TR, IR, ITR = determineRegions(points) #FCR, TR, IR, ITR, LR = computeOperonArrangements(events) OLD VERSION #inversionDetails1, inversionDetails2 = computeRegionDetails(IR, 'Inversion:') #transpositionDetails1, transpositionDetails2 = computeRegionDetails(TR, 'Transposition:') #invertedTransposedDetails1, invertedTransposedDetails2 = computeRegionDetails(ITR, 'Inverted Transposition:') #Compare one of the siblings to the neighbor if one exists if neighborCopy != None: print( 'Now performing a series of alignments between the nighboring strains: %s, %s' % (strain1Copy.name, neighborCopy.name)) neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor = constructEvents( strain1Copy, neighborCopy) print('Constructing dot plot for the neighboring strains: %s, %s' % (strain1Copy.name, neighborCopy.name)) neighborPoints, neighborLostPoints = normalizeIndexesForDotPlot( neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor, strain1Copy, neighborCopy) #createDotPlot(neighborPoints, strain1Copy, neighborCopy) #Compute the various regions for the neighbor #NFCR, NTR, NIR, NITR, NLR = computeOperonArrangements(neighborEvents) OLD VERSION NFCR, NTR, NIR, NITR = determineRegions(neighborPoints) ancestralFragments, strain1, strain2 = determineAncestralFragmentArrangementUsingNeighbor( FCR, TR, IR, ITR, lostPoints, NFCR, NTR, NIR, NITR, neighborLostPoints, strain1, strain2) else: if neighborCopy == None: print('No neighbor found!') elif len(TR) == 0 and len(IR) == 0 or len(ITR) == 0: print('No inverted or transposed regions detected!!') ancestralFragments, strain2 = determineAncestralFragmentArrangementWithoutNeighbor( FCR, TR, IR, ITR, lostPoints, strain2) #Computes the total number of inversions, transpositions, inverted transpositions globals.inversionCounter += len(IR) globals.transposedCounter += len(TR) globals.invertedTransposedCounter += len(ITR) #Increments the counters for the size distributions for each event type updateGlobalDeletionCounter(strain1) updateGlobalDeletionCounter(strain2) updateGlobalDuplicationCounter(strain1) updateGlobalDuplicationCounter(strain2) updateGlobalInversionSizeDistributionCounter(strain1) updateGlobalInversionSizeDistributionCounter(strain2) updateGlobalTranspositionSizeDistributionCounter(strain1) updateGlobalTranspositionSizeDistributionCounter(strain2) updateGlobalInvertedTranspositionSizeDistributionCounter(strain1) updateGlobalInvertedTranspositionSizeDistributionCounter(strain2) #Append all details to file here outputStrainDetailsToFile(outputFileName, strain1) outputStrainDetailsToFile(outputFileName, strain2) ancestor = BacterialStrain(ancestralName, ancestralFragments) return ancestor