def main():
    sequences = readfasta('transversionVsTransitionTest.fasta')

    rates = calcTransitionTransversionRates(sequences)

    print('The percentage of transitions is {}%'.format(rates[0]))
    print('The percentage of transversions is {}%'.format(rates[1]))
Beispiel #2
0
def main(geneFileName, outputFileName):
    geneList = []
    geneList = readfasta(geneFileName)
    outputFile = open(outputFileName, 'w')

    for gene in geneList:
        AASequence = translateGeneDNAToAASequence(gene[2])
        AASequence = AASequence[1:(len(AASequence) - 1)]
        transmemDomainsHH = calcTransmemDomainsHH(AASequence)
        transmemDomainsKD = calcTransmemDomainsKD(AASequence)
        outputFile.write(gene[1] + '\n')
        outputFile.write(AASequence + '\n\n')
        outputFile.write(
            'According to hydrophobicity and helicity there are ' +
            str(len(transmemDomainsHH)) +
            ' that identify as transmembrane domains. They are:\n')
        for TDregion in transmemDomainsHH:
            outputFile.write(AASequence[TDregion[0]:TDregion[1]] + ', ')
        outputFile.write(
            '\n\nAccording to the Kyte Dolittle scale there are ' +
            str(len(transmemDomainsKD)) +
            ' that identify as transmembrane domains. They are:\n')
        for TDregion in transmemDomainsKD:
            outputFile.write(AASequence[TDregion[0]:TDregion[1]] + ', ')

        outputFile.write('\n\n')
def main(DNAFileName):
    sys.setrecursionlimit(100000)
    params = [5, -4, -11]
    DNAList = readfasta(DNAFileName)
    TempDNAList = DNAList[:]

    for i in range(len(DNAList) - 1):
        currentBest = bestCombination(TempDNAList, params)
        TempDNAList.remove(currentBest[0])
        TempDNAList.remove(currentBest[1])
        TempDNAList.append([
            str(i),
            str(i),
            backtraceMatchOnly(currentBest[2], currentBest[0][2],
                               currentBest[1][2], params)
        ])

    bestAlignment = TempDNAList[0][2]

    for DNA in DNAList:
        memo = buildBlankMemo(len(DNA[2]), len(bestAlignment), params)
        val = calcMemo(len(DNA[2]), len(bestAlignment), DNA[2], bestAlignment,
                       memo, params)
        print('Alignment Score for ' + DNA[0] + ': ' + str(val))
        print(str(backtrace(memo, DNA[2], bestAlignment, params)[0]) + '\n')
def main():
    sequences = readfasta('FinalProblemCS4.txt')
    consensusTrees = []
    resampleMatrices = []
    bootstrapReplicates = int(input('How many bootstrap replicates should be made?: '))

    for rep in range(bootstrapReplicates):
        treeSubsitutions = {'Alpha' : 0,
                            'Beta' : 0,
                            'Gamma' : 0}
        resampleMatrix = []
        for site in range(len(sequences[0][2])):
            siteValues = []
            randomSite = random.randrange(0, len(sequences[0][2]))
            for seq in sequences:
                siteValues.append(seq[2][randomSite])
            resampleMatrix.append(siteValues)

            if isInformative(siteValues):
                currentScores = getParsimonyScores(siteValues)
                treeSubsitutions['Alpha'] += currentScores[0]
                treeSubsitutions['Beta'] += currentScores[1]
                treeSubsitutions['Gamma'] += currentScores[2]
        
        currentConsensusTree = max(treeSubsitutions, key=treeSubsitutions.get)
        consensusTrees.append(currentConsensusTree)
        resampleMatrices.append(resampleMatrix)

    consensusTreeCount = Counter(consensusTrees)
    consensusTree = max(consensusTreeCount, key=consensusTreeCount.get)

    #Output
    for sample in range(3):
        replicates = makeBootstrapReplicatesPrintable(resampleMatrices[sample])
        print('Bootstrap number {}'.format(sample))
        for rep in range(len(replicates)):
            print('Replicate number {}'.format(rep))
            print(replicates[rep] + '\n')
        print('Supported Topology: ')
        printConsensusTree(consensusTrees[sample], sequences)
        print('\n')
        
    print('The Final Consensus Tree Topology:')
    printConsensusTree(consensusTree, sequences)
    print('Bootstrap Value: {}%'.format(
        consensusTreeCount[consensusTree]/bootstrapReplicates * 100))
def main():
    sequences = readfasta('originalSequences.fasta')
    mutationRates = [0.00241, 0.00139]
    transiTransvRates = [0.77, 0.23]
    yamaAntigenicSites = [[116, 137], [141, 150], [162, 167], [195, 203]]
    victAntigenicSites = [[116, 137], [141, 150], [162, 167], [197, 205]]
    totalPopulation = 100
    simulationYears = 10
    data = influenzaBSimulation(totalPopulation, simulationYears, sequences[0],
                                sequences[1], mutationRates, transiTransvRates,
                                yamaAntigenicSites, victAntigenicSites)

    #generates output file for use in a phylogenetic tree
    outputFile = open('dataForTree.fasta', 'w')
    yearlySampleSize = 5
    yearCounter = 0
    for year in data:
        for sample in range(yearlySampleSize):
            randomSeq = random.choice(year)
            outputFile.write(str(randomSeq[1]) + '\n')
            outputFile.write(str(randomSeq[2]) + '\n')
            outputFile.write('\n')
        yearCounter += 1
Beispiel #6
0
def main(DNAFileName):
    sys.setrecursionlimit(100000)
    #sys.tracebacklimit = 0
    hoxD55Scoring = {
        'A': {
            'A': 91,
            'C': -90,
            'G': -25,
            'T': -100
        },
        'C': {
            'A': -90,
            'C': 100,
            'G': -100,
            'T': -25
        },
        'G': {
            'A': -25,
            'C': -100,
            'G': 100,
            'T': -90
        },
        'T': {
            'A': -100,
            'C': -25,
            'G': -90,
            'T': 91
        }
    }
    hoxD70Scoring = {
        'A': {
            'A': 91,
            'C': -114,
            'G': -31,
            'T': -123
        },
        'C': {
            'A': -114,
            'C': 100,
            'G': -125,
            'T': -31
        },
        'G': {
            'A': -31,
            'C': -125,
            'G': 100,
            'T': -114
        },
        'T': {
            'A': -123,
            'C': -31,
            'G': -114,
            'T': 91
        }
    }
    gapPenalties = [-120, -140]
    hiScoreCords = []
    DNAList = readfasta(DNAFileName)
    tempDNAList = DNAList[:]

    for i in range(len(DNAList) - 1):
        bestCombo = bestLocalCombination(tempDNAList, hoxD70Scoring,
                                         gapPenalties[1])
        hiScoreCords = calcHighScoreCord(bestCombo[2])
        alignedSeqs = localBacktrace(bestCombo[2], bestCombo[0][2],
                                     bestCombo[1][2], gapPenalties[1],
                                     hiScoreCords)
        assembledSeq = assembleContigs(bestCombo[0][2], bestCombo[1][2],
                                       alignedSeqs[0], alignedSeqs[1])
        if assembledSeq == 'flag':
            raise NameError(bestCombo[0][0] + ' and ' + bestCombo[1][0] +
                            ' do not fit into defined assemblies')
        tempDNAList.remove(bestCombo[0])
        tempDNAList.remove(bestCombo[1])
        tempDNAList.append([
            bestCombo[0][0] + bestCombo[1][0],
            bestCombo[0][1] + bestCombo[1][1], assembledSeq
        ])
        print(tempDNAList[-1][0])
        print(tempDNAList[-1][2] + '\n')

    averageCoverage = calcCoverage(tempDNAList[0][2], DNAList, hoxD70Scoring,
                                   gapPenalties[1])

    print('The fully assembled sequence:\n' + tempDNAList[0][2])
    print('Coverage: ' + str(averageCoverage))
    ''' 
def main():
    sequence = readfasta()

    while not alphaHelixFound and count < len(sequence[0][2]):