/
BioDNA.py
409 lines (333 loc) · 12.9 KB
/
BioDNA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
import collections
import re
from PIL import Image
import time
from ansicol import color
import sys
def inputExplan(explain, lineLength):
# param explain: text file of explanations
# param lineLength: length of each line for explanation
# type explain: opened text file in ereading mode
# type lineLength: integer
# return explainDict: all explanations
# rtype explainDict: dictionary
# puts explanations into list (if not \n)
explainList = []
for line in explain:
if line is not "\n":
explainList.append(line)
# split lines according to characters
find = "." * lineLength
explainDict = collections.OrderedDict()
for item in explainList:
x = explainList.index(item)
# split into list (with replacement field with x in name)
name = "explain{}".format(x)
explainDict[name] = re.findall(find, item)
remainder = len(item) % lineLength
length = len(item)
if remainder != 0:
explainDict[name].append(item[length-remainder:])
# put whole words into end of previous item in list
for listDict in explainDict.values():
for itemDict in listDict:
itemIndex = listDict.index(itemDict)
if itemIndex is not 0:
addToItem = ""
previous = listDict[itemIndex-1]
while previous is not " " and itemDict[0] is not " ":
addToItem += itemDict[0]
itemDict = itemDict[1:]
listDict[itemIndex-1] += addToItem
listDict[itemIndex] = itemDict[1:]
return explainDict
def printExplan(explainDict, num):
# param explainDict: all explanations
# param num: number of explanation to print
# type explainDict: dictionary
# type num: integer
print("\n")
for text in explainDict["explain{}".format(num)]:
print(text)
print("\n")
def printImage(img):
# param img: name of image file
# type img: string
image = Image.open(img)
image.show()
time.sleep(5)
# process = subprocess.Popen(["display", img])
# process.kill() # TODO: close image window
def inputDNA():
# return DNA: user input gene of interest
# rtype DNA: string
DNA = input("Input gene of interest: ")
# data validation and verification (must be A, T, C or G)
restart = True
while True:
while restart:
DNA = DNA.upper()
for letter in DNA:
if letter is "A" or letter is "T" or letter is "C" or letter is "G":
continue
else:
DNA = input("Input invalid. Input gene of interest: ")
restart = False
break
break
if restart is False:
restart = True
continue
break
return DNA
def makeAminoAcidTable():
# return AATable: RNA triples and corresponding amino acid abbreviations
# rtype AATable: dictionary
AAtable = { "UUU":"F|Phe","UUC":"F|Phe","UUA":"L|Leu","UUG":"L|Leu","UCU":"S|Ser","UCC":"S|Ser",
"UCA":"S|Ser","UCG":"S|Ser","UAU":"Y|Tyr","UAC":"Y|Tyr","UAA":"*|***","UAG":"*|***",
"UGU":"C|Cys","UGC":"C|Cys","UGA":"*|***","UGG":"W|Trp","CUU":"L|Leu","CUC":"L|Leu",
"CUA":"L|Leu","CUG":"L|Leu","CCU":"P|Pro","CCC":"P|Pro","CCA":"P|Pro","CCG":"P|Pro",
"CAU":"H|His","CAC":"H|His","CAA":"Q|Gln","CAG":"Q|Gln","CGU":"R|Arg","CGC":"R|Arg",
"CGA":"R|Arg","CGG":"R|Arg","AUU":"I|Ile","AUC":"I|Ile","AUA":"I|Ile","AUG":"M|Met",
"ACU":"T|Thr","ACC":"T|Thr","ACA":"T|Thr","ACG":"T|Thr","AAU":"N|Asn","AAC":"N|Asn",
"AAA":"K|Lys","AAG":"K|Lys","AGU":"S|Ser","AGC":"S|Ser","AGA":"R|Arg","AGG":"R|Arg",
"GUU":"V|Val","GUC":"V|Val","GUA":"V|Val","GUG":"V|Val","GCU":"A|Ala","GCC":"A|Ala",
"GCA":"A|Ala","GCG":"A|Ala","GAU":"D|Asp","GAC":"D|Asp","GAA":"E|Glu",
"GAG":"E|Glu","GGU":"G|Gly","GGC":"G|Gly","GGA":"G|Gly","GGG":"G|Gly"}
return AAtable
def complementDNA_to_DNA(DNA):
# param DNA: user input (gene of interest)
# type DNA: string
# return compStrandDNA: DNA template strand (matching gene of interest)
# rtype compStrandDNA: string
compStrandDNAList = []
# loop through DNA to create list of complement strand
for letter in DNA:
if letter == "A":
compStrandDNAList.append("T")
elif letter == "T":
compStrandDNAList.append("A")
elif letter == "C":
compStrandDNAList.append("G")
elif letter == "G":
compStrandDNAList.append("C")
compStrandDNA = "".join(compStrandDNAList)
return compStrandDNA
def complementDNA_to_RNA(DNA):
# param DNA: user input (gene of interest)
# type DNA: String
# return compStrandmRNA: mRNA complement strand (matching gene of interest)
# rType compStrandRNA: string
compStrandRNAList = list(DNA)
while "T" in compStrandRNAList:
index = compStrandRNAList.index("T")
compStrandRNAList.remove("T")
compStrandRNAList.insert(index, "U")
compStrandRNA = "".join(compStrandRNAList)
return compStrandRNA
def transcribe(DNA):
# param DNA: user input (gene of interest)
# type DNA: String
# return compStrandmRNA: DNA complement strand (matching gene of interest)
# rType compStrandRNA: string
# return compStrandmRNA: mRNA complement strand (matching gene of interest)
# rType compStrandRNA: string
compStrandDNA = complementDNA_to_DNA(DNA)
compStrandmRNA = complementDNA_to_RNA(DNA)
return compStrandDNA, compStrandmRNA
def askSymbol():
# return symbol: user input of desired symbol
# rType symbol: integer
symbol = input("Do you want 3- or 1-letter symbols for the amino acids? ")
while True:
try:
symbol = int(symbol)
except ValueError:
symbol = input("Must enter number. Do you want 3- or 1-letter symbols? ")
continue
if symbol is not 1 and symbol is not 3:
symbol = input("Must enter 1 or 3. Do you want 3- or 1-letter symbols? ")
continue
break
return symbol
def translate(mRNA, AAtable, symbol):
# param mRNA: mRNA complement to DNA
# param AATable: RNA triples and corresponding amino acid abbreviations
# symbol: 1- or 3-letter symbol
# type mRNA: String
# type AATable: dictionary
# type symbol: integer
# return protein: amino acids of gene of interest
# return metStart: Met at the start of gene
# return metIn: Met in gene
# return stopEnd: stop codon at the end of gene
# return stopIn: stop codon in gene
# rType protein: string
# rType metStart: boolean
# rType metIn: boolean
# rType stopEnd: boolean
# rType stopIn: boolean
startBP = 0
endBP = len(mRNA)
nextCodonStart = startBP # the index of the first nucleotide in the codon to be read, increments by 3 each time
nextCodonEnd = nextCodonStart + 3
protein = ""
AASymbol = ""
# process of translation -- until there are less than 3 base pairs to translate
while nextCodonEnd <= endBP:
nextCodon = mRNA[nextCodonStart:nextCodonEnd] # the three nucleotides to be read
nextAA = AAtable[nextCodon] # amino acid produced by codon
# parse symbols based on user input
if symbol is 1:
AASymbol = " " + nextAA[0] + " " # amino acid symbol -- parsed from AATable dictionary
elif symbol is 3:
AASymbol = nextAA[2:]
protein += AASymbol
nextCodonStart += 3
nextCodonEnd = nextCodonStart + 3
# check for summary
metStart = False
metIn = False
stopEnd = False
stopIn = False
if len(protein) >= 3:
# check for Met
if protein[:3] == "Met" or protein[0] == "M":
metStart = True
for index in range(0, len(protein), symbol):
if index != 0:
if protein[index:index+3] == "Met" or protein[index] == "M":
metIn = True
break
# check for stop codon
if protein[len(protein)-3:] == "***":
stopEnd = True
for index in range(0, len(protein)-symbol, symbol):
if protein[index:index+3] == "***" or protein[index] == "*":
stopIn = True
break
return protein, metStart, metIn, stopEnd, stopIn
def printReadingFrames(num, compStrandDNA, mRNA, protein, metStart, metIn, stopEnd, stopIn):
# param num: reading frame number
# param compStrandDNA: DNA template strand (matching gene of interest)
# param compStrandmRNA: mRNA complement strand (matching gene of interest)
# param protein: amino acids of gene of interest
# param metStart: Met at the start of gene
# param metIn: Met in gene
# param stopEnd: stop codon at the end of gene
# param stopIn: stop codon in gene
# type num: integer
# type compStrandDNA: string
# type mRNA: string
# type protein: string
# type metStart: boolean
# type metIn: boolean
# type stopEnd: boolean
# type stopIn: boolean
read = ""
# beginning of reading frame
read += "Reading Frame #{}\n".format(num)
read += "RNA length: %i\n" % len(compStrandDNA)
# add color to mRNA
mRNAList = list(mRNA)
for letter in mRNAList:
letterIndex = mRNAList.index(letter)
if letter == "A":
mRNAList[letterIndex] = color('cyan') + letter + color('reset')
elif letter == "U":
mRNAList[letterIndex] = color('blue') + letter + color('reset')
elif letter == "C":
mRNAList[letterIndex] = color('bold red') + letter + color('reset')
elif letter == "G":
mRNAList[letterIndex] = color('bold magenta') + letter + color('reset')
mRNA = ''.join(mRNAList)
# continued beginning
read += "\tRNA:\t\t%s\n" % mRNA
lines = "| " * (len(compStrandDNA) // 3)
read += "\t\t\t\t %s\n" % lines
read += "\tProtein:\t%s\n\n" % protein
# summary for Met at start
if metStart == True:
read += "There is a Met at the start site.\n"
else:
read += "There is no Met at the start site.\n"
# summary for stop at end
if stopEnd == True:
read += "There is a stop at the end.\n"
else:
read += "There is no stop at the end.\n"
# summary for Met inside strand
if metIn == True:
read += "There is at least one Met somewhere other than at the start.\n"
else:
read += "There are no Mets other than at the start.\n"
# summary for stop inside strand
if stopIn == True:
read += "There is at least one stop interrupting the gene."
else:
read += "There are no interrupting stops."
return read
def chooseFrame(read1, read2, read3):
# param read1: first reading frame
# param read2: second reading frame
# param read3: third reading frame
# type read1: string
# type read2: string
# type read3: string
# ask user for input to choose best reading frame
print("\n")
choose = input("Which is the best reading frame? ")
while True:
try:
choose = int(choose)
except ValueError:
choose = input("Must enter number. Which is the best reading frame? ")
continue
if choose is not 1 and choose is not 2 and choose is not 3:
choose = input("Must enter 1, 2 or 3. Which is the best reading frame? ")
break
# save to text file
file = input("Enter the desired file name: ") + ".txt"
fileInsert = input("Do you want to write over (\"w\") or append (\"a\") to the file? ")
fileInsert.lower()
while True:
if fileInsert is not "w" and fileInsert is not "a":
fileInsert = input("Invalid input. Do you want to write over (\"w\") or append (\"a\") to the file? ")
else:
break
fileOpen = open(file, fileInsert)
findColor = ["[0m", "[34m", "[36m", "[1;35m", "[1;31m"]
if choose == 1:
for colorCode in findColor:
while colorCode in read1:
read1 = read1.replace(colorCode, "")
if fileInsert == "a":
read1 = "\n\n" + read1
fileOpen.write(read1)
elif choose == 2:
for colorCode in findColor:
while colorCode in read2:
read2 = read2.replace(colorCode, "")
if fileInsert == "a":
read2 = "\n\n" + read2
fileOpen.write(read2)
elif choose == 3:
for colorCode in findColor:
while colorCode in read3:
read3 = read3.replace(colorCode, "")
if fileInsert == "a":
read3 = "\n\n" + read3
fileOpen.write(read3)
fileOpen.close()
def programCont():
# ask user for input to continue or quit program
next = input("Do you want to continue or quit? ")
while True:
next = next.lower()
if next == 'c':
print("\n\n")
break
elif next == "q":
sys.exit()
else:
next = input("Invalid input. Do you want to continue or quit? ")