def test_omit_bad_seqs(self): """correctly omit bad sequences from an alignment""" data = { "s1": "---ACC---TT-", "s2": "---ACC---TT-", "s3": "---ACC---TT-", "s4": "--AACCG-GTT-", "s5": "--AACCGGGTTT", "s6": "AGAACCGGGTT-", "s7": "------------", } aln = make_aligned_seqs(data=data, moltype=DNA) # default just eliminates strict gap sequences dropbad = sample.omit_bad_seqs() got = dropbad(aln) expect = data.copy() del expect["s7"] self.assertEqual(got.to_dict(), expect) # providing a more stringent gap_frac dropbad = sample.omit_bad_seqs(gap_fraction=0.5) got = dropbad(aln) expect = data.copy() for n in ("s1", "s2", "s3", "s7"): del expect[n] self.assertEqual(got.to_dict(), expect) # setting quantile drops additional sequences dropbad = sample.omit_bad_seqs(quantile=6 / 7) got = dropbad(aln) expect = data.copy() for n in ("s6", "s7"): del expect[n] self.assertEqual(got.to_dict(), expect)
def _get_all_composables(tmp_dir_name): test_model1 = evo.model("HKY85") test_model2 = evo.model("GN") test_hyp = evo.hypothesis(test_model1, test_model2) test_num_reps = 100 applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), evo.model("GN"), evo.tabulate_stats(), sample.fixed_length(100), sample.min_length(100), io.write_db(tmp_dir_name, create=True), io.write_json(tmp_dir_name, create=True), io.write_seqs(tmp_dir_name, create=True), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), tree.scale_branches(), tree.uniformize_tree(), ] return applications
def _get_all_composable_apps(): applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), sample.fixed_length(100), sample.min_length(100), io.write_seqs(os.getcwd()), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), ] return applications