def _get_all_composables(tmp_dir_name): test_model1 = evo.model("HKY85") test_model2 = evo.model("GN") test_hyp = evo.hypothesis(test_model1, test_model2) test_num_reps = 100 applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), evo.model("GN"), evo.tabulate_stats(), sample.fixed_length(100), sample.min_length(100), io.write_db(tmp_dir_name, create=True), io.write_json(tmp_dir_name, create=True), io.write_seqs(tmp_dir_name, create=True), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), tree.scale_branches(), tree.uniformize_tree(), ] return applications
def _get_all_composable_apps(): applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), sample.fixed_length(100), sample.min_length(100), io.write_seqs(os.getcwd()), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), ] return applications
def test_filter_degen(self): """just_nucs correctly identifies data with only nucleotides""" aln = make_aligned_seqs(data=[("a", "ACGA-GACG"), ("b", "GATGATGYT")]) degen = sample.omit_degenerates(moltype="dna") got = degen(aln) self.assertEqual(got.to_dict(), {"a": "ACGAGAG", "b": "GATGTGT"}) self.assertIsInstance(got, alignment.ArrayAlignment) # no ungapped columns aln = make_aligned_seqs(data=[("a", "-C-A-G-C-"), ("b", "G-T-A-G-T")]) got = degen(aln) self.assertIsInstance(got, composable.NotCompleted) # we get back the alignment type we passed in aln = make_aligned_seqs(data=[("a", "ACGA-GACG"), ("b", "GATGATGYT")], array_align=False) got = degen(aln) self.assertIsInstance(got, alignment.Alignment)
def test_checkpointable(self): """chained funcs should be be able to apply a checkpoint""" path = "data" + os.sep + "brca1.fasta" reader = io_app.load_aligned(moltype="dna") omit_degens = sample_app.omit_degenerates(moltype="dna") with TemporaryDirectory(dir=".") as dirname: writer = io_app.write_seqs(dirname) aln = reader(path) outpath = writer(aln) read_write = reader + writer got = read_write(path) # should skip reading and return path self.assertEqual(got, outpath) read_write.disconnect() # allows us to reuse bits read_write_degen = reader + writer + omit_degens # should return an alignment instance got = read_write_degen(path) self.assertIsInstance(got, ArrayAlignment) self.assertTrue(len(got) > 1000)
def test_composite_pickleable(self): """composable functions should be pickleable""" from pickle import dumps from cogent3.app import io, sample, evo, tree, translate, align read = io.load_aligned(moltype="dna") dumps(read) trans = translate.select_translatable() dumps(trans) aln = align.progressive_align("nucleotide") dumps(aln) just_nucs = sample.omit_degenerates(moltype="dna") dumps(just_nucs) limit = sample.fixed_length(1000, random=True) dumps(limit) mod = evo.model("HKY85") dumps(mod) qt = tree.quick_tree() dumps(qt) proc = read + trans + aln + just_nucs + limit + mod dumps(proc)
def test_str(self): """str representation correctly represents parameterisations""" func = select_translatable() got = str(func) self.assertEqual( got, "select_translatable(type='sequences', " "moltype='dna', gc='Standard Nuclear', " "allow_rc=False, trim_terminal_stop=True)", ) func = select_translatable(allow_rc=True) got = str(func) self.assertEqual( got, "select_translatable(type='sequences', " "moltype='dna', gc='Standard Nuclear', " "allow_rc=True, trim_terminal_stop=True)", ) nodegen = omit_degenerates() got = str(nodegen) self.assertEqual( got, "omit_degenerates(type='aligned', moltype=None, " "gap_is_degen=True, motif_length=1)", ) ml = min_length(100) got = str(ml) self.assertEqual( got, "min_length(type='sequences', length=100, " "motif_length=1, subtract_degen=True, " "moltype=None)", ) qt = quick_tree() self.assertEqual(str(qt), "quick_tree(type='tree', drop_invalid=False)")