コード例 #1
0
def createAncestor(strain1, strain2, neighborStrain):
    globals.ancestralCounter += 1
    ancestor = None
    ancestralName = 'Ancestor ' + str(globals.ancestralCounter)
    ancestralFragments = None

    strain1Copy = copy.deepcopy(
        strain1)  #Do a deep copy of object for when we compare to the neighbor
    neighborCopy = copy.deepcopy(
        neighborStrain
    )  #Do a deep copy of the neighbor as well b/c we don't want to store those comparisons in the strain either

    if globals.printToConsole:
        print(
            'Performing a series of alignments for the following strains: %s, %s'
            % (strain1.name, strain2.name))

    globals.enableDeletionReversions = True  #Only do the backtrace between these two strains!
    globals.enableSelfAlignmentDetails = True

    events, duplicatesStrain1, duplicatesStrain2 = constructEvents(
        strain1, strain2)

    globals.enableSelfAlignmentDetails = False
    globals.enableDeletionReversions = False

    if globals.printToConsole:
        print('Constructing dot plot for the following strains: %s, %s' %
              (strain1.name, strain2.name))
    points, lostPoints = normalizeIndexesForDotPlot(events, duplicatesStrain1,
                                                    duplicatesStrain2, strain1,
                                                    strain2)
    if globals.printToConsole:
        createDotPlot(points, strain1, strain2)
        #createBarGraph(strain1.duplicationCounts, 'Distribution of Duplications for %s'%(strain1.name))
        #createBarGraph(strain2.duplicationCounts, 'Distribution of Duplications for %s'%(strain2.name))
        #createBarGraph(strain1.deletionCounts, 'Distribution of Deletions for %s'%(strain1.name)) #Remember! Deletions refer to the other strain!
        #createBarGraph(strain2.deletionCounts, 'Distribution of Deletions for %s'%(strain2.name)) #Remember! Deletions refer to the other strain!

    #Compute and output the inverted, transposed, and inverted transposed regions
    FCR, TR, IR, ITR = determineRegions(points)
    #FCR, TR, IR, ITR, LR = computeOperonArrangements(events)  OLD VERSION

    #inversionDetails1, inversionDetails2 = computeRegionDetails(IR, 'Inversion:')
    #transpositionDetails1, transpositionDetails2 = computeRegionDetails(TR, 'Transposition:')
    #invertedTransposedDetails1, invertedTransposedDetails2 = computeRegionDetails(ITR, 'Inverted Transposition:')

    #Compare one of the siblings to the neighbor if one exists
    if neighborCopy != None:
        if globals.printToConsole:
            print(
                'Now performing a series of alignments between the nighboring strains: %s, %s'
                % (strain1Copy.name, neighborCopy.name))
        neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor = constructEvents(
            strain1Copy, neighborCopy)
        if globals.printToConsole:
            print('Constructing dot plot for the neighboring strains: %s, %s' %
                  (strain1Copy.name, neighborCopy.name))
        neighborPoints, neighborLostPoints = normalizeIndexesForDotPlot(
            neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor,
            strain1Copy, neighborCopy)
        #createDotPlot(neighborPoints, strain1Copy, neighborCopy)

        #Compute the various regions for the neighbor
        #NFCR, NTR, NIR, NITR, NLR = computeOperonArrangements(neighborEvents) OLD VERSION
        NFCR, NTR, NIR, NITR = determineRegions(neighborPoints)
        ancestralFragments, strain1, strain2 = determineAncestralFragmentArrangementUsingNeighbor(
            FCR, TR, IR, ITR, lostPoints, NFCR, NTR, NIR, NITR,
            neighborLostPoints, strain1, strain2)
    else:
        if neighborCopy == None:
            if globals.printToConsole:
                print('No neighbor found!')
        elif len(TR) == 0 and len(IR) == 0 or len(ITR) == 0:
            if globals.printToConsole:
                print('No inverted or transposed regions detected!!')
        ancestralFragments, strain2 = determineAncestralFragmentArrangementWithoutNeighbor(
            FCR, TR, IR, ITR, lostPoints, strain2)

    #Computes the total number of inversions, transpositions, inverted transpositions
    globals.inversionCounter += len(IR)
    globals.transposedCounter += len(TR)
    globals.invertedTransposedCounter += len(ITR)

    #Increments the counters for the size distributions for each event type
    updateGlobalDeletionCounter(strain1)
    updateGlobalDeletionCounter(strain2)
    updateGlobalDuplicationCounter(strain1)
    updateGlobalDuplicationCounter(strain2)
    updateGlobalInversionSizeDistributionCounter(strain1)
    updateGlobalInversionSizeDistributionCounter(strain2)
    updateGlobalTranspositionSizeDistributionCounter(strain1)
    updateGlobalTranspositionSizeDistributionCounter(strain2)
    updateGlobalInvertedTranspositionSizeDistributionCounter(strain1)
    updateGlobalInvertedTranspositionSizeDistributionCounter(strain2)

    #Increment counters (only need to do the count only once otherwise it leads to double counts ie x2 number of events)
    #updateGlobalCodonMismatchCounter(strain1)
    updateGlobalCodonMismatchCounter(strain2)
    #updateGlobalSubstitutionCounter(strain1)
    updateGlobalSubstitutionCounter(strain2)

    #Append all details to file here
    #outputStrainDetailsToFile(outputFileName, strain1)
    #outputStrainDetailsToFile(outputFileName, strain2)

    ancestor = BacterialStrain(ancestralName, ancestralFragments)

    if globals.printToConsole:
        print(strain1.name)
        for frag in strain1.genomeFragments:
            print(frag.originalSequence)
        print(strain2.name)
        for frag in strain2.genomeFragments:
            print(frag.originalSequence)

    ####################################
    #Handle the Codon Mismatches here##
    ###################################
    if '#' in strain1.codonMismatchDetails:
        newDetails1 = 'Codon Mismatch:'
        newDetails2 = 'Codon Mismatch:'

        line1 = strain1.codonMismatchDetails.replace('Codon Mismatch:',
                                                     '').strip()
        line2 = strain2.codonMismatchDetails.replace('Codon Mismatch:',
                                                     '').strip()

        subsList1 = filter(
            None, line1.split(';')
        )  #Ensures we don't have a list with an empty string as an element
        subsList2 = filter(None, line2.split(';'))

        #For each substitution in the list
        for w in range(0, len(subsList1)):
            gene1, idNumber1, position1 = parseDetails(subsList1[w])
            gene2, idNumber2, position2 = parseDetails(subsList2[w])
            processed = False  #Tracks whether the current codon mismatch was handled

            #Check if we have a neighbor
            if neighborCopy:
                #Check if the same codon mismatch occurred when comparing to the neighbor
                if '#' in strain1Copy.codonMismatchDetails:
                    line3 = strain1Copy.codonMismatchDetails.replace(
                        'Codon Mismatch:', '').strip()
                    subsList3 = filter(None, line3.split(';'))
                    for v in range(0, len(subsList3)):
                        gene3, idNumber3, position3 = parseDetails(
                            subsList3[v])
                        if gene1 == gene3 and position1 == position3:
                            #We found the same codon mismatch when comparing with the neighbor, therefore we should keep strain 2's verison of the gene!
                            processed = True
                            fragments = ancestor.genomeFragments
                            for fragment in fragments:
                                if idNumber1 in fragment.originalSequence:
                                    fragment.originalSequence = fragment.originalSequence.replace(
                                        gene1 + '-' + idNumber1,
                                        gene2)  #Put in strain 2's gene
                                    for m in range(0, len(fragment.sequence)):
                                        if idNumber1 in fragment.sequence[m]:
                                            fragment.sequence[m] = gene2
                                            break
                                    break
            if processed:
                #We found the codon mismatch and swapped with strain 2's gene therefore strain 1's gene was the codon mismatch so put the codon mismatch details in strain1
                newDetails1 += gene1 + ' ' + position1 + ';'
            else:
                #We were not able to find the same codon mismatch either due to there being no neighbor or it was just not there. So just assume strain 2 is the codon mismatch
                newDetails2 += gene2 + ' ' + position2 + ';'
                fragments = ancestor.genomeFragments
                for fragment in fragments:
                    if idNumber1 in fragment.originalSequence:
                        fragment.originalSequence = fragment.originalSequence.replace(
                            gene1 + '-' + idNumber1,
                            gene1)  #Put in strain 1's gene
                        for m in range(0, len(fragment.sequence)):
                            if idNumber1 in fragment.sequence[m]:
                                fragment.sequence[m] = gene1
                                break
                        break
        #Insert the new details about the substitution
        strain1.codonMismatchDetails = newDetails1
        strain2.codonMismatchDetails = newDetails2

    ################################
    #Handle the substitutions here##
    ################################
    if '@' in strain1.substitutionDetails:
        newDetails1 = 'Substitution:'
        newDetails2 = 'Substitution:'

        line1 = strain1.substitutionDetails.replace('Substitution:',
                                                    '').strip()
        line2 = strain2.substitutionDetails.replace('Substitution:',
                                                    '').strip()

        subsList1 = filter(
            None, line1.split(';')
        )  #Ensures we don't have a list with an empty string as an element
        subsList2 = filter(None, line2.split(';'))

        #For each substitution in the list
        for w in range(0, len(subsList1)):
            gene1, idNumber1, position1 = parseDetails(subsList1[w])
            gene2, idNumber2, position2 = parseDetails(subsList2[w])
            processed = False  #Tracks whether the current substitution was handled

            #Check if we have a neighbor
            if neighborCopy:
                #Check if the same substitution occurred when comparing to the neighbor
                if '@' in strain1Copy.substitutionDetails:
                    line3 = strain1Copy.substitutionDetails.replace(
                        'Substitution:', '').strip()
                    subsList3 = filter(None, line3.split(';'))
                    for v in range(0, len(subsList3)):
                        gene3, idNumber3, position3 = parseDetails(
                            subsList3[v])
                        if gene1 == gene3 and position1 == position3:
                            #We found the same substitution when comparing with the neighbor, therefore we should keep strain 2's verison of the gene!
                            processed = True
                            fragments = ancestor.genomeFragments
                            for fragment in fragments:
                                if idNumber1 in fragment.originalSequence:
                                    fragment.originalSequence = fragment.originalSequence.replace(
                                        gene1 + '-' + idNumber1,
                                        gene2)  #Put in strain 2's gene
                                    for m in range(0, len(fragment.sequence)):
                                        if idNumber1 in fragment.sequence[m]:
                                            fragment.sequence[m] = gene2
                                            break
                                    break
            if processed:
                #We found the substitution and swapped with strain 2's gene therefore strain 1's gene was the substituion so put the substitution details in strain1
                newDetails1 += gene1 + ' ' + position1 + ';'
            else:
                #We were not able to find the same substitution either due to there being no neighbor or it was just not there. So just assume strain 2 is the substitution
                newDetails2 += gene2 + ' ' + position2 + ';'
                fragments = ancestor.genomeFragments
                for fragment in fragments:
                    if idNumber1 in fragment.originalSequence:
                        fragment.originalSequence = fragment.originalSequence.replace(
                            gene1 + '-' + idNumber1,
                            gene1)  #Put in strain 1's gene
                        for m in range(0, len(fragment.sequence)):
                            if idNumber1 in fragment.sequence[m]:
                                fragment.sequence[m] = gene1
                                break
                        break
        #Insert the new details about the substitution
        strain1.substitutionDetails = newDetails1
        strain2.substitutionDetails = newDetails2

    #Add any codon mismatches from the self global alignment as those details were stored in another variable so it doesn't mess with codon mismatches and substitution handlers in the previous 2 for loops
    strain1.codonMismatchDetails += strain1.tempCodonDetails
    strain2.codonMismatchDetails += strain2.tempCodonDetails
    strain1.substitutionDetails += strain1.tempSubstitutionDetails
    strain2.substitutionDetails += strain2.tempSubstitutionDetails

    return ancestor
コード例 #2
0
ファイル: main.py プロジェクト: umpawlis/MastersProject
def createAncestor(strain1, strain2, neighborStrain):
    globals.ancestralCounter += 1
    ancestor = None
    ancestralName = 'Ancestor ' + str(globals.ancestralCounter)
    ancestralFragments = None

    strain1Copy = copy.deepcopy(
        strain1)  #Do a deep copy of object for when we compare to the neighbor
    neighborCopy = copy.deepcopy(
        neighborStrain
    )  #Do a deep copy of the neighbor as well b/c we don't want to store those comparisons in the strain either

    print(
        'Performing a series of alignments for the following strains: %s, %s' %
        (strain1.name, strain2.name))
    events, duplicatesStrain1, duplicatesStrain2 = constructEvents(
        strain1, strain2)

    print('Constructing dot plot for the following strains: %s, %s' %
          (strain1.name, strain2.name))
    points, lostPoints = normalizeIndexesForDotPlot(events, duplicatesStrain1,
                                                    duplicatesStrain2, strain1,
                                                    strain2)
    createDotPlot(points, strain1, strain2)

    createBarGraph(strain1.duplicationCounts,
                   'Distribution of Duplications for %s' % (strain1.name))
    createBarGraph(strain2.duplicationCounts,
                   'Distribution of Duplications for %s' % (strain2.name))
    createBarGraph(
        strain1.deletionCounts, 'Distribution of Deletions for %s' %
        (strain1.name))  #Remember! Deletions refer to the other strain!
    createBarGraph(
        strain2.deletionCounts, 'Distribution of Deletions for %s' %
        (strain2.name))  #Remember! Deletions refer to the other strain!

    #Compute and output the inverted, transposed, and inverted transposed regions
    FCR, TR, IR, ITR = determineRegions(points)
    #FCR, TR, IR, ITR, LR = computeOperonArrangements(events)  OLD VERSION

    #inversionDetails1, inversionDetails2 = computeRegionDetails(IR, 'Inversion:')
    #transpositionDetails1, transpositionDetails2 = computeRegionDetails(TR, 'Transposition:')
    #invertedTransposedDetails1, invertedTransposedDetails2 = computeRegionDetails(ITR, 'Inverted Transposition:')

    #Compare one of the siblings to the neighbor if one exists
    if neighborCopy != None:
        print(
            'Now performing a series of alignments between the nighboring strains: %s, %s'
            % (strain1Copy.name, neighborCopy.name))
        neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor = constructEvents(
            strain1Copy, neighborCopy)

        print('Constructing dot plot for the neighboring strains: %s, %s' %
              (strain1Copy.name, neighborCopy.name))
        neighborPoints, neighborLostPoints = normalizeIndexesForDotPlot(
            neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor,
            strain1Copy, neighborCopy)
        #createDotPlot(neighborPoints, strain1Copy, neighborCopy)

        #Compute the various regions for the neighbor
        #NFCR, NTR, NIR, NITR, NLR = computeOperonArrangements(neighborEvents) OLD VERSION
        NFCR, NTR, NIR, NITR = determineRegions(neighborPoints)
        ancestralFragments, strain1, strain2 = determineAncestralFragmentArrangementUsingNeighbor(
            FCR, TR, IR, ITR, lostPoints, NFCR, NTR, NIR, NITR,
            neighborLostPoints, strain1, strain2)
    else:
        if neighborCopy == None:
            print('No neighbor found!')
        elif len(TR) == 0 and len(IR) == 0 or len(ITR) == 0:
            print('No inverted or transposed regions detected!!')
        ancestralFragments, strain2 = determineAncestralFragmentArrangementWithoutNeighbor(
            FCR, TR, IR, ITR, lostPoints, strain2)

    #Computes the total number of inversions, transpositions, inverted transpositions
    globals.inversionCounter += len(IR)
    globals.transposedCounter += len(TR)
    globals.invertedTransposedCounter += len(ITR)

    #Increments the counters for the size distributions for each event type
    updateGlobalDeletionCounter(strain1)
    updateGlobalDeletionCounter(strain2)
    updateGlobalDuplicationCounter(strain1)
    updateGlobalDuplicationCounter(strain2)
    updateGlobalInversionSizeDistributionCounter(strain1)
    updateGlobalInversionSizeDistributionCounter(strain2)
    updateGlobalTranspositionSizeDistributionCounter(strain1)
    updateGlobalTranspositionSizeDistributionCounter(strain2)
    updateGlobalInvertedTranspositionSizeDistributionCounter(strain1)
    updateGlobalInvertedTranspositionSizeDistributionCounter(strain2)

    #Append all details to file here
    outputStrainDetailsToFile(outputFileName, strain1)
    outputStrainDetailsToFile(outputFileName, strain2)

    ancestor = BacterialStrain(ancestralName, ancestralFragments)
    return ancestor