def _get_all_composables(tmp_dir_name): test_model1 = evo.model("HKY85") test_model2 = evo.model("GN") test_hyp = evo.hypothesis(test_model1, test_model2) test_num_reps = 100 applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), evo.model("GN"), evo.tabulate_stats(), sample.fixed_length(100), sample.min_length(100), io.write_db(tmp_dir_name, create=True), io.write_json(tmp_dir_name, create=True), io.write_seqs(tmp_dir_name, create=True), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), tree.scale_branches(), tree.uniformize_tree(), ] return applications
def test_apply_to(self): """correctly applies iteratively""" from cogent3.core.alignment import SequenceCollection dstore = io_app.get_data_store("data", suffix="fasta", limit=3) reader = io_app.load_unaligned(format="fasta", moltype="dna") got = reader.apply_to(dstore, show_progress=False) self.assertEqual(len(got), len(dstore)) # should also be able to apply the results to another composable func min_length = sample_app.min_length(10) got = min_length.apply_to(got, show_progress=False, logger=True) self.assertEqual(len(got), len(dstore)) # should work on a chained function proc = reader + min_length got = proc.apply_to(dstore, show_progress=False) self.assertEqual(len(got), len(dstore)) # and works on a list of just strings got = proc.apply_to([str(m) for m in dstore], show_progress=False) self.assertEqual(len(got), len(dstore)) # or a single string got = proc.apply_to(str(dstore[0]), show_progress=False) self.assertEqual(len(got), 1) self.assertIsInstance(got[0], SequenceCollection) # raises ValueError if empty list with self.assertRaises(ValueError): proc.apply_to([]) # raises ValueError if list with empty string with self.assertRaises(ValueError): proc.apply_to(["", ""])
def test_apply_to_not_completed(self): """correctly creates notcompleted""" dstore = io_app.get_data_store("data", suffix="fasta", limit=3) with TemporaryDirectory(dir=".") as dirname: reader = io_app.load_aligned(format="fasta", moltype="dna") # trigger creation of notcompleted min_length = sample_app.min_length(3000) outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb") writer = io_app.write_db(outpath) process = reader + min_length + writer r = process.apply_to(dstore, show_progress=False) self.assertEqual(len(process.data_store.incomplete), 3) process.data_store.close()
def test_apply_to_strings(self): """apply_to handles strings as paths""" dstore = io_app.get_data_store("data", suffix="fasta", limit=3) dstore = [str(m) for m in dstore] with TemporaryDirectory(dir=".") as dirname: reader = io_app.load_aligned(format="fasta", moltype="dna") min_length = sample_app.min_length(10) outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb") writer = io_app.write_db(outpath) process = reader + min_length + writer # create paths as strings r = process.apply_to(dstore, show_progress=False) self.assertEqual(len(process.data_store.logs), 1) process.data_store.close()
def _get_all_composable_apps(): applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), sample.fixed_length(100), sample.min_length(100), io.write_seqs(os.getcwd()), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), ] return applications
def test_minlength(self): """correctly identifies data with minimal length""" aln = make_aligned_seqs(data=[("a", "GCAAGCGTTTAT"), ("b", "GCTTTTGTCAAT")]) # if using subtract_degen, fails if incorect moltype ml = sample.min_length(9, subtract_degen=True) got = ml(aln) self.assertIsInstance(got, NotCompleted) self.assertEqual(got.type, "ERROR") # but works if subtract_degen is False ml = sample.min_length(9, subtract_degen=False) aln = ml(aln) self.assertEqual(len(aln), 12) # or if moltype provided ml = sample.min_length(9, subtract_degen=True, moltype="dna") aln = ml(aln) self.assertEqual(len(aln), 12) alns = [ make_aligned_seqs(data=[("a", "GCAAGCGTTTAT"), ("b", "GCTTTTGTCAAT")], moltype=DNA), make_aligned_seqs(data=[("a", "GGAAGCGT"), ("b", "GCTTT-GT")], moltype=DNA), ] ml = sample.min_length(9) got = [aln.to_dict() for aln in map(ml, alns) if aln] expected = [dict((("a", "GCAAGCGTTTAT"), ("b", "GCTTTTGTCAAT")))] self.assertEqual(got, expected) # returns NotCompletedResult if nothing satisifies got = ml(alns[1]) self.assertTrue(type(got) == sample.NotCompleted) alns = [ make_unaligned_seqs(data=[("a", "GGAAGCGT"), ("b", "GCTTNGT")], moltype=DNA) ] ml = sample.min_length(6) got = [aln.to_dict() for aln in map(ml, alns) if aln] expected = [dict((("a", "GGAAGCGT"), ("b", "GCTTNGT")))] self.assertEqual(got, expected) ml = sample.min_length(7) got = [aln.to_dict() for aln in map(ml, alns) if aln] expected = [] self.assertEqual(got, expected)
def test_str(self): """str representation correctly represents parameterisations""" func = select_translatable() got = str(func) self.assertEqual( got, "select_translatable(type='sequences', " "moltype='dna', gc='Standard Nuclear', " "allow_rc=False, trim_terminal_stop=True)", ) func = select_translatable(allow_rc=True) got = str(func) self.assertEqual( got, "select_translatable(type='sequences', " "moltype='dna', gc='Standard Nuclear', " "allow_rc=True, trim_terminal_stop=True)", ) nodegen = omit_degenerates() got = str(nodegen) self.assertEqual( got, "omit_degenerates(type='aligned', moltype=None, " "gap_is_degen=True, motif_length=1)", ) ml = min_length(100) got = str(ml) self.assertEqual( got, "min_length(type='sequences', length=100, " "motif_length=1, subtract_degen=True, " "moltype=None)", ) qt = quick_tree() self.assertEqual(str(qt), "quick_tree(type='tree', drop_invalid=False)")