forked from SembeiNorimaki/Bioinformatics
/
BioPython.py
110 lines (76 loc) · 2.85 KB
/
BioPython.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
my_seq = Seq("AGTACACTGGT") # without alphabet defined
my_seq = Seq("AGTACACTGGT", IUPAC.unambiguous_dna) # DNA alphabet
# get the sequence alphabet
my_seq.alphabet
# print the position, sequence and lenght
for index, letter in enumerate(my_seq) :
print index, letter
print len(letter)
# access elements of the sequence
print my_seq[0] #first element
print my_seq[2] #third element
print my_seq[-1] #last element
# count
my_seq.count('G')
# GC percentage (#G + #C) / #Total
from Bio.SeqUtils import GC
GC(my_seq)
# slicing
my_seq[4:12] # includes 4th, excludes 12th
# reversing:
my_seq[::-1]
# convert to string
my_seq.tostring()
# concatenate sequences
seq1 + seq2 # ONLY if alphabets are compatible
# otherwise, convers both seq to generic alphabets
from Bio.Alphabet import generic_alphabet
seq1.alphabet = generic_alphabet
seq2.alphabet = generic_alphabet
seq1 + seq2
# sequence complement (only if alphabet allows complement)
my_seq.complement()
# reverse complement (only if alphabet allows complement)
my_seq.reverse_complement()
# transcribe RNA (DNA -> mRNA)
#The actual biological transcription process works from the template strand, doing a reverse complement
#(TCAG → CUGA) to give the mRNA. However, in Biopython and bioinformatics in general, we typically
#work directly with the coding strand because this means we can get the mRNA sequence just by switching
#T → U.
from Bio.Seq import transcribe
# just changes T with U from the coding strand (5' -> 3')
messenger_rna = transcribe(coding_dna)
# if we want to transcribe from the template strand (3' -> 5'):
transcribe(template_dna.reverse_complement())
# transcribing back to DNA:
from Bio.Seq import Seq, back_transcribe
back_transcribe(messenger_rna) # just changes U -> T and gives the coding strand
# 3.8 Translation (mRNA -> Protein)
# Uses standard genetic code
from Bio.Seq import Seq, translate
from Bio.Alphabet import IUPAC
messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", IUPAC.unambiguous_rna)
translate(messenger_rna)
# Direct translation (DNA -> Protein
from Bio.Seq import Seq, translate
from Bio.Alphabet import IUPAC
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", IUPAC.unambiguous_dna)
translate(coding_dna)
# we can specify other translation tables by name
translate(coding_dna, table="Vertebrate Mitochondrial")
# or by NCBI number
translate(coding_dna, table=2)
# 3.9 Transcription and Translation
# 3.10 Mutable Seqs
# convert existing sequence to mutable
mutable_seq = my_seq.tomutable()
# or directly create a mutable one
from Bio.Seq import MutableSeq
from Bio.Alphabet import IUPAC
mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
# now we can do
mutable_seq[5] = "T"
# and convert it back to an inmutable seq
new_seq = mutable_seq.toseq()