def invert_sequence(chrSeq, start, stop): invert = chrSeq[int(start) - 1:int(stop)] MutableSeq.reverse_complement(invert) begin = MutableSeq.__add__(chrSeq[:int(start) - 1], invert) chrSeq = MutableSeq.__add__(begin, chrSeq[int(stop):]) return chrSeq
def create_genome_seq(aligned): aligned_seq = aligned.seq if type(aligned.seq) == str else aligned.seq.decode('UTF-8') genome_seq = MutableSeq(aligned_seq) # see samtools documentation for MD string err = re.findall(MD_REGEX, aligned.opt("MD")) seq_ix = 0 # step through sequence for matched_bases, curr_err in err: seq_ix += int(matched_bases) assert '^' not in curr_err assert curr_err != genome_seq[seq_ix] genome_seq[seq_ix] = curr_err seq_ix += 1 if aligned.is_reverse: genome_seq.reverse_complement() return genome_seq
def add_to_pileup_dict(sams, aligned_read_set, pileup_dict): # sanity check that all the qnames (RNA read IDs) are the same for read in aligned_read_set: assert read.qname == aligned_read_set[0].qname if not True in [read.is_unmapped for read in aligned_read_set]: # all alignments mapped for read in aligned_read_set: for op, op_len in read.cigar: if op > 0 and op < 7: # do not sample reads where there are insertions or deletions return assert len(read.seq) == len(aligned_read_set[0].seq) # if aligned reads are reversed, we reverse them and hold on to that info. pos_dicts = [dict(read.aligned_pairs) for read in aligned_read_set] genome_seqs = [create_genome_seq(read) for read in aligned_read_set] qual = bytearray(aligned_read_set[0].qual, 'utf-8') seq = MutableSeq(aligned_read_set[0].seq if type(aligned_read_set[0].seq) == str else aligned_read_set[0].seq.decode('UTF-8')) if aligned_read_set[0].is_reverse: seq.reverse_complement() qual = qual[::-1] for genome_seq in genome_seqs: assert len(genome_seq) == len(seq) for i in range(0, len(seq)): # need (chrom, pos, genome_seq[i]) tuples for each aligned_read chroms = [sam.getrname(a.tid) for sam, a in izip(sams, aligned_read_set)] positions = [d[i] if not a.is_reverse else d[len(seq) - i - 1] for d, a in zip(pos_dicts, aligned_read_set)] genome_seq_i = [g[i] for g in genome_seqs] genomic_locs = tuple(zip(chroms, positions, genome_seq_i)) pileup_dict[genomic_locs][seq[i]][qual[i]] += 1
def seq_batch_query(): con = sqlite3.connect('./data/DB') cur = con.cursor() list_file = input('list file name:\n') with open(list_file, 'r') as In: organism_list = In.read().split(sep='\n') cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);') for organism in organism_list: cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism, )) cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head', (organism)) result = cur.fetchall() cur.execute('DROP TABLE tasklist;') cur.close() con.close() query_result = [] for i in result: title = '|'.join([str(i[0]), i[1], i[2], i[3]]) filename = i[2] sequence = MutableSeq(i[5]) if i[4] == '-1': sequence.seq = sequence.reverse_complement() record = [title, filename, sequence] query_result.append(record) for i in query_result: with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout: Fileout.write('>%s\n%s\n' % (i[0], i[2])) # rps12 may have larger than 50k fragments, here to filter it rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta') rps12short = list() for item in rps12: if len(item.seq) < 4000: rps12short.append(item) SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta') print('Done.\n')
def seq_batch_query(): con = sqlite3.connect('./data/DB') cur = con.cursor() list_file = input('list file name:\n') with open(list_file, 'r') as In: organism_list = In.read().split(sep='\n') cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);') for organism in organism_list: cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism,)) cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head', (organism)) result = cur.fetchall() cur.execute('DROP TABLE tasklist;') cur.close() con.close() query_result = [] for i in result: title = '|'.join([str(i[0]), i[1], i[2], i[3]]) filename = i[2] sequence = MutableSeq(i[5]) if i[4] == '-1': sequence.seq = sequence.reverse_complement() record = [title, filename, sequence] query_result.append(record) for i in query_result: with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout: Fileout.write('>%s\n%s\n' % (i[0], i[2])) # rps12 may have larger than 50k fragments, here to filter it rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta') rps12short = list() for item in rps12: if len(item.seq) < 4000: rps12short.append(item) SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta') print('Done.\n')
class TestMutableSeq(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG") self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG") def test_mutableseq_creation(self): """Test creating MutableSeqs in multiple ways.""" mutable_s = MutableSeq("TCAAAAGGATGCATCATG") self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq") mutable_s = self.s.tomutable() self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable") array_seq = MutableSeq(array.array("u", "TCAAAAGGATGCATCATG")) self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array") def test_repr(self): self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG')", repr(self.mutable_s)) def test_truncated_repr(self): seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA" expected = ( "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA')" ) self.assertEqual(expected, repr(MutableSeq(seq))) def test_equal_comparison(self): """Test __eq__ comparison method.""" self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_not_equal_comparison(self): """Test __ne__ comparison method.""" self.assertNotEqual(self.mutable_s, "other thing") def test_less_than_comparison(self): """Test __lt__ comparison method.""" self.assertLess(self.mutable_s[:-1], self.mutable_s) def test_less_than_comparison_of_incompatible_types(self): with self.assertRaises(TypeError): self.mutable_s < 1 def test_less_than_comparison_without_alphabet(self): self.assertLessEqual(self.mutable_s[:-1], "TCAAAAGGATGCATCATG") def test_less_than_or_equal_comparison(self): """Test __le__ comparison method.""" self.assertLessEqual(self.mutable_s[:-1], self.mutable_s) def test_less_than_or_equal_comparison_of_incompatible_types(self): with self.assertRaises(TypeError): self.mutable_s <= 1 def test_less_than_or_equal_comparison_without_alphabet(self): self.assertLessEqual(self.mutable_s[:-1], "TCAAAAGGATGCATCATG") def test_greater_than_comparison(self): """Test __gt__ comparison method.""" self.assertGreater(self.mutable_s, self.mutable_s[:-1]) def test_greater_than_comparison_of_incompatible_types(self): with self.assertRaises(TypeError): self.mutable_s > 1 def test_greater_than_comparison_without_alphabet(self): self.assertGreater(self.mutable_s, "TCAAAAGGATGCATCAT") def test_greater_than_or_equal_comparison(self): """Test __ge__ comparison method.""" self.assertGreaterEqual(self.mutable_s, self.mutable_s) def test_greater_than_or_equal_comparison_of_incompatible_types(self): with self.assertRaises(TypeError): self.mutable_s >= 1 def test_greater_than_or_equal_comparison_without_alphabet(self): self.assertGreaterEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_add_method(self): """Test adding wrong type to MutableSeq.""" with self.assertRaises(TypeError): self.mutable_s + 1234 def test_radd_method(self): self.assertEqual( "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.mutable_s), ) def test_radd_method_incompatible_alphabets(self): self.assertEqual( "UCAAAAGGATCAAAAGGATGCATCATG", self.mutable_s.__radd__(MutableSeq("UCAAAAGGA")), ) def test_radd_method_using_seq_object(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.s)) def test_radd_method_wrong_type(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(1234) def test_as_string(self): self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s)) def test_length(self): self.assertEqual(18, len(self.mutable_s)) def test_converting_to_immutable(self): self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq) def test_first_nucleotide(self): self.assertEqual("T", self.mutable_s[0]) def test_setting_slices(self): self.assertEqual( MutableSeq("CAAA"), self.mutable_s[1:5], "Slice mutable seq", ) self.mutable_s[1:3] = "GAT" self.assertEqual( MutableSeq("TGATAAAGGATGCATCATG"), self.mutable_s, "Set slice with string and adding extra nucleotide", ) self.mutable_s[1:3] = self.mutable_s[5:7] self.assertEqual( MutableSeq("TAATAAAGGATGCATCATG"), self.mutable_s, "Set slice with MutableSeq", ) self.mutable_s[1:3] = array.array("u", "GAT") self.assertEqual( MutableSeq("TGATTAAAGGATGCATCATG"), self.mutable_s, "Set slice with array", ) def test_setting_item(self): self.mutable_s[3] = "G" self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG"), self.mutable_s) def test_deleting_slice(self): del self.mutable_s[4:5] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s) def test_deleting_item(self): del self.mutable_s[3] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s) def test_appending(self): self.mutable_s.append("C") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC"), self.mutable_s) def test_inserting(self): self.mutable_s.insert(4, "G") self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG"), self.mutable_s) def test_popping_last_item(self): self.assertEqual("G", self.mutable_s.pop()) def test_remove_items(self): self.mutable_s.remove("G") self.assertEqual(MutableSeq("TCAAAAGATGCATCATG"), self.mutable_s, "Remove first G") self.assertRaises(ValueError, self.mutable_s.remove, "Z") def test_count(self): self.assertEqual(7, self.mutable_s.count("A")) self.assertEqual(2, self.mutable_s.count("AA")) def test_index(self): self.assertEqual(2, self.mutable_s.index("A")) self.assertRaises(ValueError, self.mutable_s.index, "8888") def test_reverse(self): """Test using reverse method.""" self.mutable_s.reverse() self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT"), self.mutable_s) def test_reverse_with_stride(self): """Test reverse using -1 stride.""" self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT"), self.mutable_s[::-1]) def test_complement(self): self.mutable_s.complement() self.assertEqual("AGTTTTCCTACGTAGTAC", str(self.mutable_s)) def test_complement_rna(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual("UACuuuGAC", str(seq)) def test_complement_mixed_aphabets(self): seq = Seq.MutableSeq("AUGaaaCTG") with self.assertRaises(ValueError): seq.complement() def test_complement_rna_string(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual("UACuuuGAC", str(seq)) def test_complement_dna_string(self): seq = Seq.MutableSeq("ATGaaaCTG") seq.complement() self.assertEqual("TACtttGAC", str(seq)) def test_reverse_complement(self): self.mutable_s.reverse_complement() self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s)) def test_extend_method(self): self.mutable_s.extend("GAT") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT"), self.mutable_s) def test_extend_with_mutable_seq(self): self.mutable_s.extend(MutableSeq("TTT")) self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT"), self.mutable_s) def test_delete_stride_slice(self): del self.mutable_s[4:6 - 1] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s) def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3).""" self.assertEqual(MutableSeq("TAGTAA"), self.mutable_s[0::3]) self.assertEqual(MutableSeq("CAGGTT"), self.mutable_s[1::3]) self.assertEqual(MutableSeq("AAACCG"), self.mutable_s[2::3]) def test_set_wobble_codon_to_n(self): """Test setting wobble codon to N (set slice with stride 3).""" self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3]) self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN"), self.mutable_s)
def seq_query(): """Sequence query function, to be continued. """ query_type = input('1.Specific fragment\n' '2.Specific Organism\n' '3.Specific gene\n' '4.All\n' '5.All cds\n') organize = input('Organize output?(y/n)\n') if query_type not in ['1', '2', '3', '4', '5']: raise ValueError('wrong input!\n') con = sqlite3.connect('./data/DB') cur = con.cursor() if query_type == '1': organism = input('Organism:\n') gene = input('Gene:\n') frag_type = input( 'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n') cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?', ('%' + gene + '%', frag_type, organism)) result = cur.fetchall() elif query_type == '2': organism = input('Organism:\n') frag_type = input( 'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n' ) if frag_type == 'fragments': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ? ORDER BY Head', (organism, )) else: cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head', ('%' + organism + '%', frag_type)) result = cur.fetchall() elif query_type == '3': gene = input('Gene:\n') frag_type = input( 'Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n') cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon', ('%' + gene + '%', frag_type)) result = cur.fetchall() elif query_type == '4': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon' ) result = cur.fetchall() elif query_type == '5': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon' ) result = cur.fetchall() query_result = [] for i in result: title = '|'.join([str(i[0]), i[1], i[2], i[3]]) sequence = MutableSeq(i[5]) gene = i[2] if i[4] == '-1': sequence.seq = sequence.reverse_complement() record = [title, gene, sequence] query_result.append(record) if organize == 'y': if not exists('output'): makedirs('output') for i in query_result: file_name = ''.join( ['output', '/', i[1].replace('/', ''), '.fasta']) with open(file_name, 'a') as output_file: output_file.write('>%s\n%s\n' % (i[0], i[2])) else: output = input('Enter output filename:\n') with open('.'.join([output, 'fasta']), 'w') as output_file: for i in query_result: output_file.write('>%s\n%s\n' % (i[0], i[2])) cur.close() con.close() print('Done.\n')
class TestMutableSeq(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna) self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) def test_mutableseq_creation(self): """Test creating MutableSeqs in multiple ways""" mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq") mutable_s = self.s.tomutable() self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable") array_seq = MutableSeq( array.array(array_indicator, "TCAAAAGGATGCATCATG"), IUPAC.ambiguous_dna) self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array") def test_repr(self): self.assertEqual( "MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())", repr(self.mutable_s)) def test_truncated_repr(self): seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA" expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())" self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna))) def test_equal_comparison(self): """Test __eq__ comparison method""" self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna) def test_not_equal_comparison(self): """Test __ne__ comparison method""" self.assertNotEqual(self.mutable_s, "other thing") def test_less_than_comparison(self): """Test __lt__ comparison method""" self.assertTrue(self.mutable_s[:-1] < self.mutable_s) def test_less_than_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG") def test_less_than_or_equal_comparison(self): """Test __le__ comparison method""" self.assertTrue(self.mutable_s[:-1] <= self.mutable_s) def test_less_than_or_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_or_equal_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG") def test_add_method(self): """Test adding wrong type to MutableSeq""" with self.assertRaises(TypeError): self.mutable_s + 1234 def test_radd_method(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.mutable_s)) def test_radd_method_incompatible_alphabets(self): with self.assertRaises(TypeError): self.mutable_s.__radd__( MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna)) def test_radd_method_using_seq_object(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.s)) def test_radd_method_wrong_type(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(1234) def test_as_string(self): self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s)) def test_length(self): self.assertEqual(18, len(self.mutable_s)) def test_converting_to_immutable(self): self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq) def test_first_nucleotide(self): self.assertEqual('T', self.mutable_s[0]) def test_setting_slices(self): self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna), self.mutable_s[1:5], "Slice mutable seq") self.mutable_s[1:3] = "GAT" self.assertEqual( MutableSeq("TGATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with string and adding extra nucleotide") self.mutable_s[1:3] = self.mutable_s[5:7] self.assertEqual( MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with MutableSeq") self.mutable_s[1:3] = array.array(array_indicator, "GAT") self.assertEqual( MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with array") def test_setting_item(self): self.mutable_s[3] = "G" self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_slice(self): del self.mutable_s[4:5] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_item(self): del self.mutable_s[3] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_appending(self): self.mutable_s.append("C") self.assertEqual( MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna), self.mutable_s) def test_inserting(self): self.mutable_s.insert(4, "G") self.assertEqual( MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_popping_last_item(self): self.assertEqual("G", self.mutable_s.pop()) def test_remove_items(self): self.mutable_s.remove("G") self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Remove first G") self.assertRaises(ValueError, self.mutable_s.remove, 'Z') def test_count(self): self.assertEqual(7, self.mutable_s.count("A")) self.assertEqual(2, self.mutable_s.count("AA")) def test_index(self): self.assertEqual(2, self.mutable_s.index("A")) self.assertRaises(ValueError, self.mutable_s.index, "8888") def test_reverse(self): """Test using reverse method""" self.mutable_s.reverse() self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s) def test_reverse_with_stride(self): """Test reverse using -1 stride""" self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s[::-1]) def test_complement(self): self.mutable_s.complement() self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s)) def test_complement_rna(self): seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna) seq.complement() self.assertEqual(str("UACuuuGAC"), str(seq)) def test_complement_mixed_aphabets(self): seq = Seq.MutableSeq("AUGaaaCTG") with self.assertRaises(ValueError): seq.complement() def test_complement_rna_string(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual('UACuuuGAC', str(seq)) def test_complement_dna_string(self): seq = Seq.MutableSeq("ATGaaaCTG") seq.complement() self.assertEqual('TACtttGAC', str(seq)) def test_reverse_complement(self): self.mutable_s.reverse_complement() self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s)) def test_reverse_complement_of_protein(self): seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein) with self.assertRaises(ValueError): seq.reverse_complement() def test_to_string_method(self): """This method is currently deprecated, probably will need to remove this test soon""" with warnings.catch_warnings(record=True): self.mutable_s.tostring() def test_extend_method(self): self.mutable_s.extend("GAT") self.assertEqual( MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna), self.mutable_s) def test_extend_with_mutable_seq(self): self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna)) self.assertEqual( MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna), self.mutable_s) def test_delete_stride_slice(self): del self.mutable_s[4:6 - 1] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3)""" self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna), self.mutable_s[0::3]) self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna), self.mutable_s[1::3]) self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna), self.mutable_s[2::3]) def test_set_wobble_codon_to_n(self): """Test setting wobble codon to N (set slice with stride 3)""" self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3]) self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna), self.mutable_s)
class TestMutableSeq(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna) self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) def test_mutableseq_creation(self): """Test creating MutableSeqs in multiple ways""" mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq") mutable_s = self.s.tomutable() self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable") array_seq = MutableSeq(array.array(array_indicator, "TCAAAAGGATGCATCATG"), IUPAC.ambiguous_dna) self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array") def test_repr(self): self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())", repr(self.mutable_s)) def test_truncated_repr(self): seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA" expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())" self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna))) def test_equal_comparison(self): """Test __eq__ comparison method""" self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna) def test_not_equal_comparison(self): """Test __ne__ comparison method""" self.assertNotEqual(self.mutable_s, "other thing") def test_less_than_comparison(self): """Test __lt__ comparison method""" self.assertTrue(self.mutable_s[:-1] < self.mutable_s) def test_less_than_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG") def test_less_than_or_equal_comparison(self): """Test __le__ comparison method""" self.assertTrue(self.mutable_s[:-1] <= self.mutable_s) def test_less_than_or_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_or_equal_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG") def test_add_method(self): """Test adding wrong type to MutableSeq""" with self.assertRaises(TypeError): self.mutable_s + 1234 def test_radd_method(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.mutable_s)) def test_radd_method_incompatible_alphabets(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna)) def test_radd_method_using_seq_object(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.s)) def test_radd_method_wrong_type(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(1234) def test_as_string(self): self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s)) def test_length(self): self.assertEqual(18, len(self.mutable_s)) def test_converting_to_immutable(self): self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq) def test_first_nucleotide(self): self.assertEqual('T', self.mutable_s[0]) def test_setting_slices(self): self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna), self.mutable_s[1:5], "Slice mutable seq") self.mutable_s[1:3] = "GAT" self.assertEqual(MutableSeq("TGATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with string and adding extra nucleotide") self.mutable_s[1:3] = self.mutable_s[5:7] self.assertEqual(MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with MutableSeq") self.mutable_s[1:3] = array.array(array_indicator, "GAT") self.assertEqual(MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with array") def test_setting_item(self): self.mutable_s[3] = "G" self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_slice(self): del self.mutable_s[4:5] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_item(self): del self.mutable_s[3] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_appending(self): self.mutable_s.append("C") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna), self.mutable_s) def test_inserting(self): self.mutable_s.insert(4, "G") self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_popping_last_item(self): self.assertEqual("G", self.mutable_s.pop()) def test_remove_items(self): self.mutable_s.remove("G") self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Remove first G") self.assertRaises(ValueError, self.mutable_s.remove, 'Z') def test_count(self): self.assertEqual(7, self.mutable_s.count("A")) self.assertEqual(2, self.mutable_s.count("AA")) def test_index(self): self.assertEqual(2, self.mutable_s.index("A")) self.assertRaises(ValueError, self.mutable_s.index, "8888") def test_reverse(self): """Test using reverse method""" self.mutable_s.reverse() self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s) def test_reverse_with_stride(self): """Test reverse using -1 stride""" self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s[::-1]) def test_complement(self): self.mutable_s.complement() self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s)) def test_complement_rna(self): seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna) seq.complement() self.assertEqual(str("UACuuuGAC"), str(seq)) def test_complement_mixed_aphabets(self): seq = Seq.MutableSeq("AUGaaaCTG") with self.assertRaises(ValueError): seq.complement() def test_complement_rna_string(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual('UACuuuGAC', str(seq)) def test_complement_dna_string(self): seq = Seq.MutableSeq("ATGaaaCTG") seq.complement() self.assertEqual('TACtttGAC', str(seq)) def test_reverse_complement(self): self.mutable_s.reverse_complement() self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s)) def test_reverse_complement_of_protein(self): seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein) with self.assertRaises(ValueError): seq.reverse_complement() def test_to_string_method(self): """This method is currently deprecated, probably will need to remove this test soon""" with warnings.catch_warnings(record=True): self.mutable_s.tostring() def test_extend_method(self): self.mutable_s.extend("GAT") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna), self.mutable_s) def test_extend_with_mutable_seq(self): self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna)) self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna), self.mutable_s) def test_delete_stride_slice(self): del self.mutable_s[4:6 - 1] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3)""" self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna), self.mutable_s[0::3]) self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna), self.mutable_s[1::3]) self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna), self.mutable_s[2::3]) def test_set_wobble_codon_to_n(self): """Test setting wobble codon to N (set slice with stride 3)""" self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3]) self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna), self.mutable_s)
rcl_file = tempfn + '-rcl.fa' f = open(rcl_file, 'w') nbr = 0 for rii in xrange(len(break_pos[1])): if (break_pos[0][rii] == break_nb_ord[cl_ii]): nbr += 1 pbr = break_pos[1][rii] pbseq = pb_reads[pbr] if (read_blocks[pbr][0]['refE'] == break_nb_ord[2]): readbp = read_blocks[pbr][0]['readE'] else: readbp = read_blocks[pbr][0]['readS'] pbseq = pbseq[max(0, readbp - 400):min(len(pbseq), readbp + 400)] if (read_blocks[pbr][0]['refStd'] == '-'): pbseq = MutableSeq(pbseq, generic_dna) pbseq.reverse_complement() pbseq = str(pbseq) f.write('>' + pbr + '\n') f.write(pbseq) f.write("\n") f.close() # Run Clustal clo_outfile = tempfn + '-clo-out.fa' clo_cmd = ['clustalo', '-i', rcl_file, '-o', clo_outfile, '--force'] clo_out = subprocess.check_output(clo_cmd) # Get consensus msa_out = [] for record in SeqIO.parse(clo_outfile, "fasta"): msa_out.append(str(record.seq)) clo_cons = "" for cons_i in xrange(len(msa_out[0])):
qstart = int(bline[6]) qend = int(bline[7]) sstart = int(bline[8]) send = int(bline[9]) revcomp = False if(sstart < send): sstart = sstart - qstart send = send + reglen - qend else: sswitch = sstart sstart = send - reglen + qend send = sswitch + qstart revcomp = True if(sstart < 0): sstart = 0 graph_title = bline[0] + ' ' + bline[1] + '_' + str(sstart) + '_' if(send > len(assfa[bline[1]])): send = len(assfa[bline[1]]) - 1 graph_title = graph_title + str(send) + ' (end)' else: graph_title = graph_title + str(send) print str(sstart) + '-' + str(send) assseq = assfa[bline[1]][sstart:send] if(revcomp): assseq = MutableSeq(str(assseq), generic_dna) assseq.reverse_complement() assseq = str(assseq) plotMummer(regseq, assseq, args.outdir + '/' + bline[0] + '-' + bline[1]) blast_con.close()
st=str(seq) #toString print st #tipo de dato secuencia editable from Bio.Seq import MutableSeq mut_seq=seq.tomutable() #convertirlo a tipo seq mutable print mut_seq mut_seq[0]='C' print mut_seq mut_seq=MutableSeq('ATGCCG',IUPAC.IUPACUnambiguousDNA()) #has methods as a list: append(), insert(), pop(), remove() mut_seq[1:3]='TTT' mut_seq.reverse() mut_seq.complement() print mut_seq mut_seq.reverse_complement() print mut_seq #tipo de dato metadatos de secuencia from Bio.SeqRecord import SeqRecord seqrec=SeqRecord(seq,id='001', name='My Secuencia') #2 main attributes: # id: string identifier, optional, recommended # seq: Seq object, required #additional attributes # name, description: name and more info of sequence # dbxrefs: list of strings, each string an id of a DB # features: list of SeqFeature objects, those found in Genbank records # annotations: dictionary with further info, can't be set on initialization seqrec=SeqRecord(Seq('mdstnvrsgmksrkkkpkttvidddddcmtcsacqsklvkisditkvsldyintmrgntlacaacgsslkllndfas',Bio.Alphabet.generic_protein), id='P20994.1', name='P20994', description='Protein A19', dbxrefs=['Pfam:PF05077', 'InterPro:IPR007769', 'DIP:2186N']) seqrec.annotations['note']='A simple note'
def seq_query(): """Sequence query function, to be continued. """ query_type = input( '1.Specific fragment\n' '2.Specific Organism\n' '3.Specific gene\n' '4.All\n' '5.All cds\n' ) organize = input('Organize output?(y/n)\n') if query_type not in ['1', '2', '3', '4', '5']: raise ValueError('wrong input!\n') con = sqlite3.connect('./data/DB') cur = con.cursor() if query_type == '1': organism = input('Organism:\n') gene = input('Gene:\n') frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n') cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?', ('%' + gene + '%', frag_type, organism)) result = cur.fetchall() elif query_type == '2': organism = input('Organism:\n') frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n') if frag_type == 'fragments': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ? ORDER BY Head', (organism,)) else: cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head', ('%' + organism + '%', frag_type)) result = cur.fetchall() elif query_type == '3': gene = input('Gene:\n') frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n') cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon', ('%' + gene + '%', frag_type)) result = cur.fetchall() elif query_type == '4': cur.execute('SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon') result = cur.fetchall() elif query_type == '5': cur.execute( 'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon') result = cur.fetchall() query_result = [] for i in result: title = '|'.join([str(i[0]), i[1], i[2], i[3]]) sequence = MutableSeq(i[5]) gene = i[2] if i[4] == '-1': sequence.seq = sequence.reverse_complement() record = [title, gene, sequence] query_result.append(record) if organize == 'y': if not exists('output'): makedirs('output') for i in query_result: file_name = ''.join([ 'output', '/', i[1].replace('/', ''), '.fasta' ]) with open(file_name, 'a') as output_file: output_file.write('>%s\n%s\n' % (i[0], i[2])) else: output = input('Enter output filename:\n') with open('.'.join([output, 'fasta']), 'w') as output_file: for i in query_result: output_file.write('>%s\n%s\n' % (i[0], i[2])) cur.close() con.close() print('Done.\n')