def main(): sequences = readfasta('transversionVsTransitionTest.fasta') rates = calcTransitionTransversionRates(sequences) print('The percentage of transitions is {}%'.format(rates[0])) print('The percentage of transversions is {}%'.format(rates[1]))
def main(geneFileName, outputFileName): geneList = [] geneList = readfasta(geneFileName) outputFile = open(outputFileName, 'w') for gene in geneList: AASequence = translateGeneDNAToAASequence(gene[2]) AASequence = AASequence[1:(len(AASequence) - 1)] transmemDomainsHH = calcTransmemDomainsHH(AASequence) transmemDomainsKD = calcTransmemDomainsKD(AASequence) outputFile.write(gene[1] + '\n') outputFile.write(AASequence + '\n\n') outputFile.write( 'According to hydrophobicity and helicity there are ' + str(len(transmemDomainsHH)) + ' that identify as transmembrane domains. They are:\n') for TDregion in transmemDomainsHH: outputFile.write(AASequence[TDregion[0]:TDregion[1]] + ', ') outputFile.write( '\n\nAccording to the Kyte Dolittle scale there are ' + str(len(transmemDomainsKD)) + ' that identify as transmembrane domains. They are:\n') for TDregion in transmemDomainsKD: outputFile.write(AASequence[TDregion[0]:TDregion[1]] + ', ') outputFile.write('\n\n')
def main(DNAFileName): sys.setrecursionlimit(100000) params = [5, -4, -11] DNAList = readfasta(DNAFileName) TempDNAList = DNAList[:] for i in range(len(DNAList) - 1): currentBest = bestCombination(TempDNAList, params) TempDNAList.remove(currentBest[0]) TempDNAList.remove(currentBest[1]) TempDNAList.append([ str(i), str(i), backtraceMatchOnly(currentBest[2], currentBest[0][2], currentBest[1][2], params) ]) bestAlignment = TempDNAList[0][2] for DNA in DNAList: memo = buildBlankMemo(len(DNA[2]), len(bestAlignment), params) val = calcMemo(len(DNA[2]), len(bestAlignment), DNA[2], bestAlignment, memo, params) print('Alignment Score for ' + DNA[0] + ': ' + str(val)) print(str(backtrace(memo, DNA[2], bestAlignment, params)[0]) + '\n')
def main(): sequences = readfasta('FinalProblemCS4.txt') consensusTrees = [] resampleMatrices = [] bootstrapReplicates = int(input('How many bootstrap replicates should be made?: ')) for rep in range(bootstrapReplicates): treeSubsitutions = {'Alpha' : 0, 'Beta' : 0, 'Gamma' : 0} resampleMatrix = [] for site in range(len(sequences[0][2])): siteValues = [] randomSite = random.randrange(0, len(sequences[0][2])) for seq in sequences: siteValues.append(seq[2][randomSite]) resampleMatrix.append(siteValues) if isInformative(siteValues): currentScores = getParsimonyScores(siteValues) treeSubsitutions['Alpha'] += currentScores[0] treeSubsitutions['Beta'] += currentScores[1] treeSubsitutions['Gamma'] += currentScores[2] currentConsensusTree = max(treeSubsitutions, key=treeSubsitutions.get) consensusTrees.append(currentConsensusTree) resampleMatrices.append(resampleMatrix) consensusTreeCount = Counter(consensusTrees) consensusTree = max(consensusTreeCount, key=consensusTreeCount.get) #Output for sample in range(3): replicates = makeBootstrapReplicatesPrintable(resampleMatrices[sample]) print('Bootstrap number {}'.format(sample)) for rep in range(len(replicates)): print('Replicate number {}'.format(rep)) print(replicates[rep] + '\n') print('Supported Topology: ') printConsensusTree(consensusTrees[sample], sequences) print('\n') print('The Final Consensus Tree Topology:') printConsensusTree(consensusTree, sequences) print('Bootstrap Value: {}%'.format( consensusTreeCount[consensusTree]/bootstrapReplicates * 100))
def main(): sequences = readfasta('originalSequences.fasta') mutationRates = [0.00241, 0.00139] transiTransvRates = [0.77, 0.23] yamaAntigenicSites = [[116, 137], [141, 150], [162, 167], [195, 203]] victAntigenicSites = [[116, 137], [141, 150], [162, 167], [197, 205]] totalPopulation = 100 simulationYears = 10 data = influenzaBSimulation(totalPopulation, simulationYears, sequences[0], sequences[1], mutationRates, transiTransvRates, yamaAntigenicSites, victAntigenicSites) #generates output file for use in a phylogenetic tree outputFile = open('dataForTree.fasta', 'w') yearlySampleSize = 5 yearCounter = 0 for year in data: for sample in range(yearlySampleSize): randomSeq = random.choice(year) outputFile.write(str(randomSeq[1]) + '\n') outputFile.write(str(randomSeq[2]) + '\n') outputFile.write('\n') yearCounter += 1
def main(DNAFileName): sys.setrecursionlimit(100000) #sys.tracebacklimit = 0 hoxD55Scoring = { 'A': { 'A': 91, 'C': -90, 'G': -25, 'T': -100 }, 'C': { 'A': -90, 'C': 100, 'G': -100, 'T': -25 }, 'G': { 'A': -25, 'C': -100, 'G': 100, 'T': -90 }, 'T': { 'A': -100, 'C': -25, 'G': -90, 'T': 91 } } hoxD70Scoring = { 'A': { 'A': 91, 'C': -114, 'G': -31, 'T': -123 }, 'C': { 'A': -114, 'C': 100, 'G': -125, 'T': -31 }, 'G': { 'A': -31, 'C': -125, 'G': 100, 'T': -114 }, 'T': { 'A': -123, 'C': -31, 'G': -114, 'T': 91 } } gapPenalties = [-120, -140] hiScoreCords = [] DNAList = readfasta(DNAFileName) tempDNAList = DNAList[:] for i in range(len(DNAList) - 1): bestCombo = bestLocalCombination(tempDNAList, hoxD70Scoring, gapPenalties[1]) hiScoreCords = calcHighScoreCord(bestCombo[2]) alignedSeqs = localBacktrace(bestCombo[2], bestCombo[0][2], bestCombo[1][2], gapPenalties[1], hiScoreCords) assembledSeq = assembleContigs(bestCombo[0][2], bestCombo[1][2], alignedSeqs[0], alignedSeqs[1]) if assembledSeq == 'flag': raise NameError(bestCombo[0][0] + ' and ' + bestCombo[1][0] + ' do not fit into defined assemblies') tempDNAList.remove(bestCombo[0]) tempDNAList.remove(bestCombo[1]) tempDNAList.append([ bestCombo[0][0] + bestCombo[1][0], bestCombo[0][1] + bestCombo[1][1], assembledSeq ]) print(tempDNAList[-1][0]) print(tempDNAList[-1][2] + '\n') averageCoverage = calcCoverage(tempDNAList[0][2], DNAList, hoxD70Scoring, gapPenalties[1]) print('The fully assembled sequence:\n' + tempDNAList[0][2]) print('Coverage: ' + str(averageCoverage)) '''
def main(): sequence = readfasta() while not alphaHelixFound and count < len(sequence[0][2]):