-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_fasta_file_cds.py
executable file
·63 lines (50 loc) · 1.43 KB
/
check_fasta_file_cds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
import random
import os
import subprocess
import re
import getopt
import sys
import signal
from math import ceil
opts, arguments = getopt.getopt(sys.argv[1:], "f:",
["fasta"])
for option, argument in opts:
if option in ("-f", "--fasta"):
fasta_file = argument
record_dict = SeqIO.index(argument, "fasta")
pwd = os.getcwd()
def find_cds ():
seq_des = str(record_dict[keys].description).split("|")
for i in seq_des:
if re.match("CDS", i):
feature, cds_start, cds_end = re.split(":|-", i)
f = FeatureLocation(int(cds_start)-1, int(cds_end))
cds_sequence = f.extract(record_dict[keys].seq)
protein_sequence = cds_sequence.translate()
if "*" not in protein_sequence:
return 0
else
return 1
else
return 0
def write_file(object_name, file_name, mode):
file_path=pwd + file_name
handle = open(file_path, mode)
handle.write(object_name)
handle.close()
i = 1
check = 0
for keys in record_dict:
print i
check = find_cds()
if check == 0:
write_file(record_dict[keys].id, "/error_sequences.txt", "a" )
else:
pass
i +=1