def generate_rolls(num_rolls): """Generate a bunch of rolls corresponding to the casino probabilities. Returns: - The generate roll sequence - The state sequence that generated the roll. """ # start off in the fair state cur_state = 'F' roll_seq = MutableSeq('', DiceRollAlphabet()) state_seq = MutableSeq('', DiceTypeAlphabet()) # generate the sequence for roll in range(num_rolls): state_seq.append(cur_state) # generate a random number chance_num = random.random() # add on a new roll to the sequence new_roll = _loaded_dice_roll(chance_num, cur_state) roll_seq.append(new_roll) # now give us a chance to switch to a new state chance_num = random.random() if cur_state == 'F': if chance_num <= .05: cur_state = 'L' elif cur_state == 'L': if chance_num <= .1: cur_state = 'F' return roll_seq.toseq(), state_seq.toseq()
def random_generator(num): states = MutableSeq('',state()) for i in range(num): states.append(random.choice('123')) sequence = MutableSeq('',DNA()) for i in range(num): sequence.append(random.choice('ACTG')) return states.toseq(),sequence.toseq()
def get_optimal_alignment(self): """Follow the traceback to get the optimal alignment.""" # intialize the two sequences which will return the alignment align_seq1 = MutableSeq(array.array("c"), Alphabet.Gapped(IUPAC.protein, GAP_CHAR)) align_seq2 = MutableSeq(array.array("c"), Alphabet.Gapped(IUPAC.protein, GAP_CHAR)) # take care of the initial case with the bottom corner matrix # item current_cell = self.dpmatrix[(len(self.seq1), len(self.seq2))] align_seq1.append(current_cell.seq1item) align_seq2.append(current_cell.seq2item) next_cell = current_cell.get_parent() current_cell = next_cell next_cell = current_cell.get_parent() # keeping adding sequence until we reach (0, 0) while next_cell: # add the new sequence--three cases: # 1. Move up diaganolly, add a new seq1 and seq2 to the # aligned sequences if ((next_cell.col_pos == current_cell.col_pos - 1) and (next_cell.row_pos == current_cell.row_pos - 1)): # print "case 1 -> seq1 %s, seq2 %s" % ( # current_cell.seq1item, current_cell.seq2item) align_seq1.append(current_cell.seq1item) align_seq2.append(current_cell.seq2item) # 2. Move upwards, add a new seq2 and a gap in seq1 elif ((next_cell.col_pos == current_cell.col_pos) and (next_cell.row_pos == current_cell.row_pos - 1)): #print "case 2 -> seq2 %s" % current_cell.seq2item align_seq1.append(GAP_CHAR) align_seq2.append(current_cell.seq2item) # 3. Move to the right, add a new seq1 and a gap in seq2 elif ((next_cell.col_pos == current_cell.col_pos - 1) and (next_cell.row_pos == current_cell.row_pos)): #print "case 3 -> seq1 % s" % current_cell.seq1item align_seq1.append(current_cell.seq1item) align_seq2.append(GAP_CHAR) # now move on to the next sequence current_cell = next_cell next_cell = current_cell.get_parent() # reverse the returned alignments since we are reading them in # backwards align_seq1.reverse() align_seq2.reverse() return align_seq1.toseq(), align_seq2.toseq()
def point_mutation(seq, cdna_variant, original_protein, verbose=False): if verbose: print("cdna_variant", cdna_variant) if not cdna_variant: return "", "" variant_parse = re.match(r'(\d+)([ACTG])>([ACTG])', cdna_variant) if not variant_parse: print("not parseable:", cdna_variant) exit() pos = int(variant_parse.group(1)) nt_from = variant_parse.group(2) nt_to = variant_parse.group(3) pos -= 1 # 0-offset # sanity if seq[pos]!=nt_from: if verbose: print(f"seq mismatch in from: {nt_from} should be {seq[pos]}") protein_pos = floor(pos/3) biopython_dna = MutableSeq(seq) biopython_dna[pos] = nt_to new_protein = str(biopython_dna.toseq().translate()) protein_effect = f"{three_letter_code[original_protein[protein_pos]]}{protein_pos+1}{three_letter_code[new_protein[protein_pos]]}" # special warning: if the last G before splice is mutated, that might affect splicing if pos+1 in abca4_donor_splice and nt_from=="G": protein_effect += "splice" if verbose: print("protein_effect", protein_effect) return cdna_variant, protein_effect
def Gthg01471(): ori=Seq("ATGAGCATAAGTTTATCGGTTCCAAAATGGTTATTAACAGTTTTATCAATTTTATCTTTAGTCGTAGCATTTATTTTCGGTACCGTTTCCAATGCATCAGCAACAATTAACTATGGGGAGGAAGTCGCGGCAGTAGCAAATGACTATGTAGGAAGCCCATATAAATATGGAGGTACAACGCCAAAAGGATTTGATGCGAGTGGCTTTACTCAGTATGTGTATAAAAATGCTGCAACCAAATTGGCTATTCCGCGAACGAGTGCCGCACAGTATAAAGTCGGTAAATTTGTTAAACAAAGTGCGTTACAAAGAGGCGATTTAGTGTTTTATGCAACAGGAGCAAAAGGAAAGGTATCCTTTGTGGGAATTTATAATGGAAATGGTACGTTTATTGGTGCCACATCAAAAGGGGTAAAAGTGGTTAAAATGAGTGATAAATATTGGAAAGACCGGTATATAGGGGCTAAGCGAGTCATTAAGTAA", IUPAC.unambiguous_dna) mut=MutableSeq("ATGAGCATAAGTTTATCGGTTCCAAAATGGTTATTAACAGTTTTATCAATTTTATCTTTAGTCGTAGCATTTATTTTCGGTACCGTTTCCAATGCATCAGCAACAATTAACTATGGGGAGGAAGTCGCGGCAGTAGCAAATGACTATGTAGGAAGCCCATATAAATATGGAGGTACAACGCCAAAAGGATTTGATGCGAGTGGCTTTACTCAGTATGTGTATAAAAATGCTGCAACCAAATTGGCTATTCCGCGAACGAGTGCCGCACAGTATAAAGTCGGTAAATTTGTTAAACAAAGTGCGTTACAAAGAGGCGATTTAGTGTTTTATGCAACAGGAGCAAAAGGAAAGGTATCCTTTGTGGGAATTTATAATGGAAATGGTACGTTTATTGGTGCCACATCAAAAGGGGTAAAAGTGGTTAAAATGAGTGATAAATATTGGAAAGACCGGTATATAGGGGCTAAGCGAGTCATTAAGTAA", IUPAC.unambiguous_dna) a="AGTCGA" b="GACTAG" for i,v in enumerate([259,277,282,295,299,306]): print(mut[v-1]+a[i]) mut[v-1]=b[i] print(ori.translate()) print(mut.toseq().translate())
def MutableGeneSeq(): my_seq = Seq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna) #my_seq[5] = "G" #error 'Seq' object does not support item assignment mutable_seq = my_seq.tomutable() mutable_seq[1] = 'T' print('mutable_seq = ', mutable_seq) mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna) mutable_seq[0] = 'T' print('mutable_seq = ', mutable_seq) new_seq = mutable_seq.toseq() #convert to readonly seq
def Gthg01471(): ori = Seq( "ATGAGCATAAGTTTATCGGTTCCAAAATGGTTATTAACAGTTTTATCAATTTTATCTTTAGTCGTAGCATTTATTTTCGGTACCGTTTCCAATGCATCAGCAACAATTAACTATGGGGAGGAAGTCGCGGCAGTAGCAAATGACTATGTAGGAAGCCCATATAAATATGGAGGTACAACGCCAAAAGGATTTGATGCGAGTGGCTTTACTCAGTATGTGTATAAAAATGCTGCAACCAAATTGGCTATTCCGCGAACGAGTGCCGCACAGTATAAAGTCGGTAAATTTGTTAAACAAAGTGCGTTACAAAGAGGCGATTTAGTGTTTTATGCAACAGGAGCAAAAGGAAAGGTATCCTTTGTGGGAATTTATAATGGAAATGGTACGTTTATTGGTGCCACATCAAAAGGGGTAAAAGTGGTTAAAATGAGTGATAAATATTGGAAAGACCGGTATATAGGGGCTAAGCGAGTCATTAAGTAA", IUPAC.unambiguous_dna) mut = MutableSeq( "ATGAGCATAAGTTTATCGGTTCCAAAATGGTTATTAACAGTTTTATCAATTTTATCTTTAGTCGTAGCATTTATTTTCGGTACCGTTTCCAATGCATCAGCAACAATTAACTATGGGGAGGAAGTCGCGGCAGTAGCAAATGACTATGTAGGAAGCCCATATAAATATGGAGGTACAACGCCAAAAGGATTTGATGCGAGTGGCTTTACTCAGTATGTGTATAAAAATGCTGCAACCAAATTGGCTATTCCGCGAACGAGTGCCGCACAGTATAAAGTCGGTAAATTTGTTAAACAAAGTGCGTTACAAAGAGGCGATTTAGTGTTTTATGCAACAGGAGCAAAAGGAAAGGTATCCTTTGTGGGAATTTATAATGGAAATGGTACGTTTATTGGTGCCACATCAAAAGGGGTAAAAGTGGTTAAAATGAGTGATAAATATTGGAAAGACCGGTATATAGGGGCTAAGCGAGTCATTAAGTAA", IUPAC.unambiguous_dna) a = "AGTCGA" b = "GACTAG" for i, v in enumerate([259, 277, 282, 295, 299, 306]): print(mut[v - 1] + a[i]) mut[v - 1] = b[i] print(ori.translate()) print(mut.toseq().translate())
from Bio.Seq import Seq from Bio.Seq import MutableSeq from Bio.Alphabet import IUPAC my_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna) # my_seq[6] = "C" my_seq.remove("T") my_seq.reverse() print(repr(my_seq)) non_mutable_seq = my_seq.toseq()
coding_dna.transcribe()#T→U coding_dna.reverse_complement().transcribe()#true_transcribe coding_dna.translate(to_stop=True,cds=True)#RNA和DNA都可以直接翻译,table参数可以选择密码子表 from Bio.Data import CodonTable standard_table=CodonTable.unambiguous_dna_by_id[1] mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"] standard_table = CodonTable.unambiguous_dna_by_name["Standard"] mito_table = CodonTable.unambiguous_dna_by_id[2] from Bio.Seq import MutableSeq mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna) ##或者 mutable_seq = my_seq.tomutable() mutable_seq[5] = "C" mutable_seq.remove("T") mutable_seq.reverse() new_seq=mutable_seq.toseq() from Bio.Seq import UnknownSeq unk_dna=UnknownSeq(20,alphabet=IUPAC.ambiguous_dna) from Bio.Seq import reverse_complement,transcribe,back_transcribe,translate my_string = "GCTGTTATGGGTCGTTGGAAGGGTGGTCGTGCTGCTGGTTAG" reverse_complement(my_string) transcribe(my_string) back_transcribe(my_string) translate(my_string)
from Bio.Alphabet import generic_dna gene = Seq("GTGAAAAAGATGCAATCTATCGTACTCGCACTTTCCCTGGTTCTGGTCGCTCCCATGGCA" +\ "GCACAGGCTGCGGAAATTACGTTAGTCCCGTCAGTAAAATTACAGATAGGCGATCGTGAT" +\ "AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT" +\ "TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT" +\ "AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA", generic_dna) print(gene.translate(table="Bacterial", cds=True)) # 细菌密码中,GTG在正常和作为起始密码子时意义不一样,CDS=True告诉此序列是完整的CDS序列 # 翻译表 # 翻译表名称参考网址ftp://ftp.ncbi.nlm.nih.gov/entrez/misc/data/gc.prt或https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi from Bio.Data import CodonTable standard_table = CodonTable.unambiguous_dna_by_name["Standard"] # 引入标准翻译表 mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"] # 引入线粒体翻译表 # standard_table = CodonTable.unambiguous_dna_by_id[1] # mito_table = CodonTable.unambiguous_dna_by_id[2] print(standard_table) print(mito_table) print(mito_table.stop_codons) print(mito_table.start_codons) # 可修改的seq my_seq = Seq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna) mutable_seq = my_seq.tomutable() print(mutable_seq) # MutableSeq对象 from Bio.Seq import MutableSeq mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna) mutable_seq[5] = "C" mutable_seq.reverse() new_seq = mutable_seq.toseq() # 回到只读对象
#Join the sequences outside of the mutated area #The parts in parenthesis are omitted # # --------|------------------>|(--->) # | | # (<---)|<------------------|------------- # cand = SeqRecord(str(fw[:227] + rev[:67].reverse_complement()), id = read1.id, name = read1.name, description=read1.description) #Correcting the quirk of the dataset dummy = MutableSeq(cand.seq) dummy[-9] = 'G' cand.seq = dummy.toseq() #Checking for the correct length if len(cand.seq) == 294: SeqIO.write(cand,myFile,'fasta') succes += 1 #Printing the progress of the code if count % 25000 == 0: print(count) print(succes) #Finalizing the script and saving the merged fasta files print('The total counts is: ' + str(count)) print('The total succes is: ' + str(succes))
def viterbi(self, sequence, state_alphabet): """Calculate the most probable state path using the Viterbi algorithm. This implements the Viterbi algorithm (see pgs 55-57 in Durbin et al for a full explanation -- this is where I took my implementation ideas from), to allow decoding of the state path, given a sequence of emissions. Arguments: o sequence -- A Seq object with the emission sequence that we want to decode. o state_alphabet -- The alphabet of the possible state sequences that can be generated. """ # calculate logarithms of the initial, transition, and emission probs log_initial = self._log_transform(self.initial_prob) log_trans = self._log_transform(self.transition_prob) log_emission = self._log_transform(self.emission_prob) viterbi_probs = {} pred_state_seq = {} state_letters = state_alphabet.letters # --- recursion # loop over the training squence (i = 1 .. L) # NOTE: My index numbers are one less than what is given in Durbin # et al, since we are indexing the sequence going from 0 to # (Length - 1) not 1 to Length, like in Durbin et al. for i in range(0, len(sequence)): # loop over all of the possible i-th states in the state path for cur_state in state_letters: # e_{l}(x_{i}) emission_part = log_emission[(cur_state, sequence[i])] max_prob = 0 if i == 0: # for the first state, use the initial probability rather # than looking back to previous states max_prob = log_initial[cur_state] else: # loop over all possible (i-1)-th previous states possible_state_probs = {} for prev_state in self.transitions_to(cur_state): # a_{kl} trans_part = log_trans[(prev_state, cur_state)] # v_{k}(i - 1) viterbi_part = viterbi_probs[(prev_state, i - 1)] cur_prob = viterbi_part + trans_part possible_state_probs[prev_state] = cur_prob # calculate the viterbi probability using the max max_prob = max(possible_state_probs.values()) # v_{k}(i) viterbi_probs[(cur_state, i)] = (emission_part + max_prob) if i > 0: # get the most likely prev_state leading to cur_state for state in possible_state_probs: if possible_state_probs[state] == max_prob: pred_state_seq[(i - 1, cur_state)] = state break # --- termination # calculate the probability of the state path # loop over all states all_probs = {} for state in state_letters: # v_{k}(L) all_probs[state] = viterbi_probs[(state, len(sequence) - 1)] state_path_prob = max(all_probs.values()) # find the last pointer we need to trace back from last_state = '' for state in all_probs: if all_probs[state] == state_path_prob: last_state = state assert last_state != '', "Didn't find the last state to trace from!" # --- traceback traceback_seq = MutableSeq('', state_alphabet) loop_seq = range(1, len(sequence)) loop_seq.reverse() # last_state is the last state in the most probable state sequence. # Compute that sequence by walking backwards in time. From the i-th # state in the sequence, find the (i-1)-th state as the most # probable state preceding the i-th state. state = last_state traceback_seq.append(state) for i in loop_seq: state = pred_state_seq[(i - 1, state)] traceback_seq.append(state) # put the traceback sequence in the proper orientation traceback_seq.reverse() return traceback_seq.toseq(), state_path_prob
for jcod in xrange(3): for ai in xrange(4): cod_anc[:] = conss_gene[3 * j: 3 * (j+1)] # Ancestral allele, skip (we only look at propagation of MINOR alleles) if alpha[ai] == cod_anc[jcod]: continue cod_new[:] = conss_gene[3 * j: 3 * (j+1)] cod_new[jcod] = alpha[ai] aftmp = aft_der_gene[:, ai, j + jcod] aftmp = aftmp[(aftmp >= bins[0]) & (aftmp <= bins[-1])] if not len(aftmp): continue if str(cod_new.toseq().translate()) != str(cod_anc.toseq().translate()): nu_syn.extend(aftmp) else: nu_nonsyn.extend(aftmp) if len(nu_syn): hist_syn += np.histogram(nu_syn, bins=bins)[0] if len(nu_nonsyn): hist_nonsyn += np.histogram(nu_nonsyn, bins=bins)[0] # Normalize hist_norm = hist.copy() hist_norm /= hist_norm.sum() hist_norm /= bins[1:] - bins[:-1]
class TestMutableSeq(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna) self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) def test_mutableseq_creation(self): """Test creating MutableSeqs in multiple ways""" mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq") mutable_s = self.s.tomutable() self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable") array_seq = MutableSeq(array.array(array_indicator, "TCAAAAGGATGCATCATG"), IUPAC.ambiguous_dna) self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array") def test_repr(self): self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())", repr(self.mutable_s)) def test_truncated_repr(self): seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA" expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())" self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna))) def test_equal_comparison(self): """Test __eq__ comparison method""" self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna) def test_not_equal_comparison(self): """Test __ne__ comparison method""" self.assertNotEqual(self.mutable_s, "other thing") def test_less_than_comparison(self): """Test __lt__ comparison method""" self.assertTrue(self.mutable_s[:-1] < self.mutable_s) def test_less_than_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG") def test_less_than_or_equal_comparison(self): """Test __le__ comparison method""" self.assertTrue(self.mutable_s[:-1] <= self.mutable_s) def test_less_than_or_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_or_equal_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG") def test_add_method(self): """Test adding wrong type to MutableSeq""" with self.assertRaises(TypeError): self.mutable_s + 1234 def test_radd_method(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.mutable_s)) def test_radd_method_incompatible_alphabets(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna)) def test_radd_method_using_seq_object(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.s)) def test_radd_method_wrong_type(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(1234) def test_as_string(self): self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s)) def test_length(self): self.assertEqual(18, len(self.mutable_s)) def test_converting_to_immutable(self): self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq) def test_first_nucleotide(self): self.assertEqual('T', self.mutable_s[0]) def test_setting_slices(self): self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna), self.mutable_s[1:5], "Slice mutable seq") self.mutable_s[1:3] = "GAT" self.assertEqual(MutableSeq("TGATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with string and adding extra nucleotide") self.mutable_s[1:3] = self.mutable_s[5:7] self.assertEqual(MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with MutableSeq") self.mutable_s[1:3] = array.array(array_indicator, "GAT") self.assertEqual(MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with array") def test_setting_item(self): self.mutable_s[3] = "G" self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_slice(self): del self.mutable_s[4:5] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_item(self): del self.mutable_s[3] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_appending(self): self.mutable_s.append("C") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna), self.mutable_s) def test_inserting(self): self.mutable_s.insert(4, "G") self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_popping_last_item(self): self.assertEqual("G", self.mutable_s.pop()) def test_remove_items(self): self.mutable_s.remove("G") self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Remove first G") self.assertRaises(ValueError, self.mutable_s.remove, 'Z') def test_count(self): self.assertEqual(7, self.mutable_s.count("A")) self.assertEqual(2, self.mutable_s.count("AA")) def test_index(self): self.assertEqual(2, self.mutable_s.index("A")) self.assertRaises(ValueError, self.mutable_s.index, "8888") def test_reverse(self): """Test using reverse method""" self.mutable_s.reverse() self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s) def test_reverse_with_stride(self): """Test reverse using -1 stride""" self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s[::-1]) def test_complement(self): self.mutable_s.complement() self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s)) def test_complement_rna(self): seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna) seq.complement() self.assertEqual(str("UACuuuGAC"), str(seq)) def test_complement_mixed_aphabets(self): seq = Seq.MutableSeq("AUGaaaCTG") with self.assertRaises(ValueError): seq.complement() def test_complement_rna_string(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual('UACuuuGAC', str(seq)) def test_complement_dna_string(self): seq = Seq.MutableSeq("ATGaaaCTG") seq.complement() self.assertEqual('TACtttGAC', str(seq)) def test_reverse_complement(self): self.mutable_s.reverse_complement() self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s)) def test_reverse_complement_of_protein(self): seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein) with self.assertRaises(ValueError): seq.reverse_complement() def test_to_string_method(self): """This method is currently deprecated, probably will need to remove this test soon""" with warnings.catch_warnings(record=True): self.mutable_s.tostring() def test_extend_method(self): self.mutable_s.extend("GAT") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna), self.mutable_s) def test_extend_with_mutable_seq(self): self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna)) self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna), self.mutable_s) def test_delete_stride_slice(self): del self.mutable_s[4:6 - 1] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3)""" self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna), self.mutable_s[0::3]) self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna), self.mutable_s[1::3]) self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna), self.mutable_s[2::3]) def test_set_wobble_codon_to_n(self): """Test setting wobble codon to N (set slice with stride 3)""" self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3]) self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna), self.mutable_s)
def viterbi(self, sequence, state_alphabet): """Calculate the most probable state path using the Viterbi algorithm. This implements the Viterbi algorithm (see pgs 55-57 in Durbin et al for a full explanation -- this is where I took my implementation ideas from), to allow decoding of the state path, given a sequence of emissions. Arguments: o sequence -- A Seq object with the emission sequence that we want to decode. o state_alphabet -- The alphabet of the possible state sequences that can be generated. """ # calculate logarithms of the transition and emission probs log_trans = self._log_transform(self.transition_prob) log_emission = self._log_transform(self.emission_prob) viterbi_probs = {} pred_state_seq = {} state_letters = state_alphabet.letters # --- initialization # # NOTE: My index numbers are one less than what is given in Durbin # et al, since we are indexing the sequence going from 0 to # (Length - 1) not 1 to Length, like in Durbin et al. # # v_{0}(0) = 1 viterbi_probs[(state_letters[0], -1)] = 1 # v_{k}(0) = 0 for k > 0 for state_letter in state_letters[1:]: viterbi_probs[(state_letter, -1)] = 0 # --- recursion # loop over the training squence (i = 1 .. L) for i in range(0, len(sequence)): # now loop over all of the letters in the state path for main_state in state_letters: # e_{l}(x_{i}) emission_part = log_emission[(main_state, sequence[i])] # loop over all possible states possible_state_probs = {} for cur_state in self.transitions_from(main_state): # a_{kl} trans_part = log_trans[(cur_state, main_state)] # v_{k}(i - 1) viterbi_part = viterbi_probs[(cur_state, i - 1)] cur_prob = viterbi_part + trans_part possible_state_probs[cur_state] = cur_prob # finally calculate the viterbi probability using the max max_prob = max(possible_state_probs.values()) viterbi_probs[(main_state, i)] = (emission_part + max_prob) # now get the most likely state for state in possible_state_probs.keys(): if possible_state_probs[state] == max_prob: pred_state_seq[(i - 1, main_state)] = state break # --- termination # calculate the probability of the state path # loop over all letters all_probs = {} for state in state_letters: # v_{k}(L) viterbi_part = viterbi_probs[(state, len(sequence) - 1)] # a_{k0} transition_part = log_trans[(state, state_letters[0])] all_probs[state] = viterbi_part * transition_part state_path_prob = max(all_probs.values()) # find the last pointer we need to trace back from last_state = '' for state in all_probs.keys(): if all_probs[state] == state_path_prob: last_state = state assert last_state != '', "Didn't find the last state to trace from!" # --- traceback traceback_seq = MutableSeq('', state_alphabet) loop_seq = range(0, len(sequence)) loop_seq.reverse() cur_state = last_state for i in loop_seq: traceback_seq.append(cur_state) cur_state = pred_state_seq[(i - 1, cur_state)] # put the traceback sequence in the proper orientation traceback_seq.reverse() return traceback_seq.toseq(), state_path_prob
"AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT" + "TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT" + "AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA", generic_dna) print(gene.translate(table="Bacterial")) print(gene.translate(table="Bacterial", cds=True)) ##查看密码子表 from Bio.Data import CodonTable standard_table = CodonTable.unambiguous_dna_by_name["Standard"] mito_table = CodonTable.unambiguous_dna_by_id[2] print(standard_table) print(mito_table.start_codons) print(mito_table.stop_codons) print(mito_table.forward_table["ACG"]) ##可变对象 from Bio.Seq import MutableSeq mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna) print(mutable_seq) mutable_seq[5] = "C" print(mutable_seq) mutable_seq.remove("T") print(mutable_seq) mutable_seq.reverse() print(mutable_seq) new_seq = mutable_seq.toseq() print(new_seq)
class TestMutableSeq(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna) self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) def test_mutableseq_creation(self): """Test creating MutableSeqs in multiple ways""" mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq") mutable_s = self.s.tomutable() self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable") array_seq = MutableSeq( array.array(array_indicator, "TCAAAAGGATGCATCATG"), IUPAC.ambiguous_dna) self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array") def test_repr(self): self.assertEqual( "MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())", repr(self.mutable_s)) def test_truncated_repr(self): seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA" expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())" self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna))) def test_equal_comparison(self): """Test __eq__ comparison method""" self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna) def test_not_equal_comparison(self): """Test __ne__ comparison method""" self.assertNotEqual(self.mutable_s, "other thing") def test_less_than_comparison(self): """Test __lt__ comparison method""" self.assertTrue(self.mutable_s[:-1] < self.mutable_s) def test_less_than_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG") def test_less_than_or_equal_comparison(self): """Test __le__ comparison method""" self.assertTrue(self.mutable_s[:-1] <= self.mutable_s) def test_less_than_or_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_or_equal_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG") def test_add_method(self): """Test adding wrong type to MutableSeq""" with self.assertRaises(TypeError): self.mutable_s + 1234 def test_radd_method(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.mutable_s)) def test_radd_method_incompatible_alphabets(self): with self.assertRaises(TypeError): self.mutable_s.__radd__( MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna)) def test_radd_method_using_seq_object(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.s)) def test_radd_method_wrong_type(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(1234) def test_as_string(self): self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s)) def test_length(self): self.assertEqual(18, len(self.mutable_s)) def test_converting_to_immutable(self): self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq) def test_first_nucleotide(self): self.assertEqual('T', self.mutable_s[0]) def test_setting_slices(self): self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna), self.mutable_s[1:5], "Slice mutable seq") self.mutable_s[1:3] = "GAT" self.assertEqual( MutableSeq("TGATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with string and adding extra nucleotide") self.mutable_s[1:3] = self.mutable_s[5:7] self.assertEqual( MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with MutableSeq") self.mutable_s[1:3] = array.array(array_indicator, "GAT") self.assertEqual( MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with array") def test_setting_item(self): self.mutable_s[3] = "G" self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_slice(self): del self.mutable_s[4:5] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_item(self): del self.mutable_s[3] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_appending(self): self.mutable_s.append("C") self.assertEqual( MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna), self.mutable_s) def test_inserting(self): self.mutable_s.insert(4, "G") self.assertEqual( MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_popping_last_item(self): self.assertEqual("G", self.mutable_s.pop()) def test_remove_items(self): self.mutable_s.remove("G") self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Remove first G") self.assertRaises(ValueError, self.mutable_s.remove, 'Z') def test_count(self): self.assertEqual(7, self.mutable_s.count("A")) self.assertEqual(2, self.mutable_s.count("AA")) def test_index(self): self.assertEqual(2, self.mutable_s.index("A")) self.assertRaises(ValueError, self.mutable_s.index, "8888") def test_reverse(self): """Test using reverse method""" self.mutable_s.reverse() self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s) def test_reverse_with_stride(self): """Test reverse using -1 stride""" self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s[::-1]) def test_complement(self): self.mutable_s.complement() self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s)) def test_complement_rna(self): seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna) seq.complement() self.assertEqual(str("UACuuuGAC"), str(seq)) def test_complement_mixed_aphabets(self): seq = Seq.MutableSeq("AUGaaaCTG") with self.assertRaises(ValueError): seq.complement() def test_complement_rna_string(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual('UACuuuGAC', str(seq)) def test_complement_dna_string(self): seq = Seq.MutableSeq("ATGaaaCTG") seq.complement() self.assertEqual('TACtttGAC', str(seq)) def test_reverse_complement(self): self.mutable_s.reverse_complement() self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s)) def test_reverse_complement_of_protein(self): seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein) with self.assertRaises(ValueError): seq.reverse_complement() def test_to_string_method(self): """This method is currently deprecated, probably will need to remove this test soon""" with warnings.catch_warnings(record=True): self.mutable_s.tostring() def test_extend_method(self): self.mutable_s.extend("GAT") self.assertEqual( MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna), self.mutable_s) def test_extend_with_mutable_seq(self): self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna)) self.assertEqual( MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna), self.mutable_s) def test_delete_stride_slice(self): del self.mutable_s[4:6 - 1] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3)""" self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna), self.mutable_s[0::3]) self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna), self.mutable_s[1::3]) self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna), self.mutable_s[2::3]) def test_set_wobble_codon_to_n(self): """Test setting wobble codon to N (set slice with stride 3)""" self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3]) self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna), self.mutable_s)
def viterbi(self, sequence, state_alphabet): """Calculate the most probable state path using the Viterbi algorithm. This implements the Viterbi algorithm (see pgs 55-57 in Durbin et al for a full explanation -- this is where I took my implementation ideas from), to allow decoding of the state path, given a sequence of emissions. Arguments: o sequence -- A Seq object with the emission sequence that we want to decode. o state_alphabet -- The alphabet of the possible state sequences that can be generated. """ # calculate logarithms of the initial, transition, and emission probs log_initial = self._log_transform(self.initial_prob) log_trans = self._log_transform(self.transition_prob) log_emission = self._log_transform(self.emission_prob) viterbi_probs = {} pred_state_seq = {} state_letters = state_alphabet.letters # --- recursion # loop over the training squence (i = 1 .. L) # NOTE: My index numbers are one less than what is given in Durbin # et al, since we are indexing the sequence going from 0 to # (Length - 1) not 1 to Length, like in Durbin et al. for i in range(0, len(sequence)): # loop over all of the possible i-th states in the state path for cur_state in state_letters: # e_{l}(x_{i}) emission_part = log_emission[(cur_state, sequence[i])] max_prob = 0 if i == 0: # for the first state, use the initial probability rather # than looking back to previous states max_prob = log_initial[cur_state] else: # loop over all possible (i-1)-th previous states possible_state_probs = {} for prev_state in self.transitions_to(cur_state): # a_{kl} trans_part = log_trans[(prev_state, cur_state)] # v_{k}(i - 1) viterbi_part = viterbi_probs[(prev_state, i - 1)] cur_prob = viterbi_part + trans_part possible_state_probs[prev_state] = cur_prob # calculate the viterbi probability using the max max_prob = max(possible_state_probs.values()) # v_{k}(i) viterbi_probs[(cur_state, i)] = (emission_part + max_prob) if i > 0: # get the most likely prev_state leading to cur_state for state in possible_state_probs: if possible_state_probs[state] == max_prob: pred_state_seq[(i - 1, cur_state)] = state break # --- termination # calculate the probability of the state path # loop over all states all_probs = {} for state in state_letters: # v_{k}(L) all_probs[state] = viterbi_probs[(state, len(sequence) - 1)] state_path_prob = max(all_probs.values()) # find the last pointer we need to trace back from last_state = '' for state in all_probs: if all_probs[state] == state_path_prob: last_state = state assert last_state != '', "Didn't find the last state to trace from!" # --- traceback traceback_seq = MutableSeq('', state_alphabet) loop_seq = list(range(1, len(sequence))) loop_seq.reverse() # last_state is the last state in the most probable state sequence. # Compute that sequence by walking backwards in time. From the i-th # state in the sequence, find the (i-1)-th state as the most # probable state preceding the i-th state. state = last_state traceback_seq.append(state) for i in loop_seq: state = pred_state_seq[(i - 1, state)] traceback_seq.append(state) # put the traceback sequence in the proper orientation traceback_seq.reverse() return traceback_seq.toseq(), state_path_prob
def viterbi(self, sequence, state_alphabet): """Calculate the most probable state path using the Viterbi algorithm. This implements the Viterbi algorithm (see pgs 55-57 in Durbin et al for a full explanation -- this is where I took my implementation ideas from), to allow decoding of the state path, given a sequence of emissions. Arguments: o sequence -- A Seq object with the emission sequence that we want to decode. o state_alphabet -- The alphabet of the possible state sequences that can be generated. """ # calculate logarithms of the transition and emission probs log_trans = self._log_transform(self.transition_prob) log_emission = self._log_transform(self.emission_prob) viterbi_probs = {} pred_state_seq = {} state_letters = state_alphabet.letters # --- initialization # # NOTE: My index numbers are one less than what is given in Durbin # et al, since we are indexing the sequence going from 0 to # (Length - 1) not 1 to Length, like in Durbin et al. # # v_{0}(0) = 1 viterbi_probs[(state_letters[0], -1)] = 1 # v_{k}(0) = 0 for k > 0 for state_letter in state_letters[1:]: viterbi_probs[(state_letter, -1)] = 0 # --- recursion # loop over the training squence (i = 1 .. L) for i in range(0, len(sequence)): # now loop over all of the letters in the state path for main_state in state_letters: # e_{l}(x_{i}) emission_part = log_emission[(main_state, sequence[i])] # loop over all possible states possible_state_probs = {} for cur_state in self.transitions_from(main_state): # a_{kl} trans_part = log_trans[(cur_state, main_state)] # v_{k}(i - 1) viterbi_part = viterbi_probs[(cur_state, i - 1)] cur_prob = viterbi_part + trans_part possible_state_probs[cur_state] = cur_prob # finally calculate the viterbi probability using the max max_prob = max(possible_state_probs.values()) viterbi_probs[(main_state, i)] = (emission_part + max_prob) # now get the most likely state for state in possible_state_probs: if possible_state_probs[state] == max_prob: pred_state_seq[(i - 1, main_state)] = state break # --- termination # calculate the probability of the state path # loop over all letters all_probs = {} for state in state_letters: # v_{k}(L) viterbi_part = viterbi_probs[(state, len(sequence) - 1)] # a_{k0} transition_part = log_trans[(state, state_letters[0])] all_probs[state] = viterbi_part * transition_part state_path_prob = max(all_probs.values()) # find the last pointer we need to trace back from last_state = '' for state in all_probs: if all_probs[state] == state_path_prob: last_state = state assert last_state != '', "Didn't find the last state to trace from!" # --- traceback traceback_seq = MutableSeq('', state_alphabet) loop_seq = range(0, len(sequence)) loop_seq.reverse() cur_state = last_state for i in loop_seq: traceback_seq.append(cur_state) cur_state = pred_state_seq[(i - 1, cur_state)] # put the traceback sequence in the proper orientation traceback_seq.reverse() return traceback_seq.toseq(), state_path_prob
class TestMutableSeq(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG") self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG") def test_mutableseq_creation(self): """Test creating MutableSeqs in multiple ways.""" mutable_s = MutableSeq("TCAAAAGGATGCATCATG") self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq") mutable_s = self.s.tomutable() self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable") array_seq = MutableSeq(array.array("u", "TCAAAAGGATGCATCATG")) self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array") def test_repr(self): self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG')", repr(self.mutable_s)) def test_truncated_repr(self): seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA" expected = ( "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA')" ) self.assertEqual(expected, repr(MutableSeq(seq))) def test_equal_comparison(self): """Test __eq__ comparison method.""" self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_not_equal_comparison(self): """Test __ne__ comparison method.""" self.assertNotEqual(self.mutable_s, "other thing") def test_less_than_comparison(self): """Test __lt__ comparison method.""" self.assertLess(self.mutable_s[:-1], self.mutable_s) def test_less_than_comparison_of_incompatible_types(self): with self.assertRaises(TypeError): self.mutable_s < 1 def test_less_than_comparison_without_alphabet(self): self.assertLessEqual(self.mutable_s[:-1], "TCAAAAGGATGCATCATG") def test_less_than_or_equal_comparison(self): """Test __le__ comparison method.""" self.assertLessEqual(self.mutable_s[:-1], self.mutable_s) def test_less_than_or_equal_comparison_of_incompatible_types(self): with self.assertRaises(TypeError): self.mutable_s <= 1 def test_less_than_or_equal_comparison_without_alphabet(self): self.assertLessEqual(self.mutable_s[:-1], "TCAAAAGGATGCATCATG") def test_greater_than_comparison(self): """Test __gt__ comparison method.""" self.assertGreater(self.mutable_s, self.mutable_s[:-1]) def test_greater_than_comparison_of_incompatible_types(self): with self.assertRaises(TypeError): self.mutable_s > 1 def test_greater_than_comparison_without_alphabet(self): self.assertGreater(self.mutable_s, "TCAAAAGGATGCATCAT") def test_greater_than_or_equal_comparison(self): """Test __ge__ comparison method.""" self.assertGreaterEqual(self.mutable_s, self.mutable_s) def test_greater_than_or_equal_comparison_of_incompatible_types(self): with self.assertRaises(TypeError): self.mutable_s >= 1 def test_greater_than_or_equal_comparison_without_alphabet(self): self.assertGreaterEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_add_method(self): """Test adding wrong type to MutableSeq.""" with self.assertRaises(TypeError): self.mutable_s + 1234 def test_radd_method(self): self.assertEqual( "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.mutable_s), ) def test_radd_method_incompatible_alphabets(self): self.assertEqual( "UCAAAAGGATCAAAAGGATGCATCATG", self.mutable_s.__radd__(MutableSeq("UCAAAAGGA")), ) def test_radd_method_using_seq_object(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.s)) def test_radd_method_wrong_type(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(1234) def test_as_string(self): self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s)) def test_length(self): self.assertEqual(18, len(self.mutable_s)) def test_converting_to_immutable(self): self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq) def test_first_nucleotide(self): self.assertEqual("T", self.mutable_s[0]) def test_setting_slices(self): self.assertEqual( MutableSeq("CAAA"), self.mutable_s[1:5], "Slice mutable seq", ) self.mutable_s[1:3] = "GAT" self.assertEqual( MutableSeq("TGATAAAGGATGCATCATG"), self.mutable_s, "Set slice with string and adding extra nucleotide", ) self.mutable_s[1:3] = self.mutable_s[5:7] self.assertEqual( MutableSeq("TAATAAAGGATGCATCATG"), self.mutable_s, "Set slice with MutableSeq", ) self.mutable_s[1:3] = array.array("u", "GAT") self.assertEqual( MutableSeq("TGATTAAAGGATGCATCATG"), self.mutable_s, "Set slice with array", ) def test_setting_item(self): self.mutable_s[3] = "G" self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG"), self.mutable_s) def test_deleting_slice(self): del self.mutable_s[4:5] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s) def test_deleting_item(self): del self.mutable_s[3] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s) def test_appending(self): self.mutable_s.append("C") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC"), self.mutable_s) def test_inserting(self): self.mutable_s.insert(4, "G") self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG"), self.mutable_s) def test_popping_last_item(self): self.assertEqual("G", self.mutable_s.pop()) def test_remove_items(self): self.mutable_s.remove("G") self.assertEqual(MutableSeq("TCAAAAGATGCATCATG"), self.mutable_s, "Remove first G") self.assertRaises(ValueError, self.mutable_s.remove, "Z") def test_count(self): self.assertEqual(7, self.mutable_s.count("A")) self.assertEqual(2, self.mutable_s.count("AA")) def test_index(self): self.assertEqual(2, self.mutable_s.index("A")) self.assertRaises(ValueError, self.mutable_s.index, "8888") def test_reverse(self): """Test using reverse method.""" self.mutable_s.reverse() self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT"), self.mutable_s) def test_reverse_with_stride(self): """Test reverse using -1 stride.""" self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT"), self.mutable_s[::-1]) def test_complement(self): self.mutable_s.complement() self.assertEqual("AGTTTTCCTACGTAGTAC", str(self.mutable_s)) def test_complement_rna(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual("UACuuuGAC", str(seq)) def test_complement_mixed_aphabets(self): seq = Seq.MutableSeq("AUGaaaCTG") with self.assertRaises(ValueError): seq.complement() def test_complement_rna_string(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual("UACuuuGAC", str(seq)) def test_complement_dna_string(self): seq = Seq.MutableSeq("ATGaaaCTG") seq.complement() self.assertEqual("TACtttGAC", str(seq)) def test_reverse_complement(self): self.mutable_s.reverse_complement() self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s)) def test_extend_method(self): self.mutable_s.extend("GAT") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT"), self.mutable_s) def test_extend_with_mutable_seq(self): self.mutable_s.extend(MutableSeq("TTT")) self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT"), self.mutable_s) def test_delete_stride_slice(self): del self.mutable_s[4:6 - 1] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s) def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3).""" self.assertEqual(MutableSeq("TAGTAA"), self.mutable_s[0::3]) self.assertEqual(MutableSeq("CAGGTT"), self.mutable_s[1::3]) self.assertEqual(MutableSeq("AAACCG"), self.mutable_s[2::3]) def test_set_wobble_codon_to_n(self): """Test setting wobble codon to N (set slice with stride 3).""" self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3]) self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN"), self.mutable_s)
for jcod in xrange(3): for ai in xrange(4): cod_anc[:] = conss_gene[3 * j:3 * (j + 1)] # Ancestral allele, skip (we only look at propagation of MINOR alleles) if alpha[ai] == cod_anc[jcod]: continue cod_new[:] = conss_gene[3 * j:3 * (j + 1)] cod_new[jcod] = alpha[ai] aftmp = aft_der_gene[:, ai, j + jcod] aftmp = aftmp[(aftmp >= bins[0]) & (aftmp <= bins[-1])] if not len(aftmp): continue if str(cod_new.toseq().translate()) != str( cod_anc.toseq().translate()): nu_syn.extend(aftmp) else: nu_nonsyn.extend(aftmp) if len(nu_syn): hist_syn += np.histogram(nu_syn, bins=bins)[0] if len(nu_nonsyn): hist_nonsyn += np.histogram(nu_nonsyn, bins=bins)[0] # Normalize hist_norm = hist.copy() hist_norm /= hist_norm.sum() hist_norm /= bins[1:] - bins[:-1]