Exemple #1
0
def _get_all_composables(tmp_dir_name):
    test_model1 = evo.model("HKY85")
    test_model2 = evo.model("GN")
    test_hyp = evo.hypothesis(test_model1, test_model2)
    test_num_reps = 100

    applications = [
        align.align_to_ref(),
        align.progressive_align(model="GY94"),
        evo.ancestral_states(),
        evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps),
        evo.hypothesis(test_model1, test_model2),
        evo.model("GN"),
        evo.tabulate_stats(),
        sample.fixed_length(100),
        sample.min_length(100),
        io.write_db(tmp_dir_name, create=True),
        io.write_json(tmp_dir_name, create=True),
        io.write_seqs(tmp_dir_name, create=True),
        sample.omit_bad_seqs(),
        sample.omit_degenerates(),
        sample.omit_duplicated(),
        sample.take_codon_positions(1),
        sample.take_named_seqs(),
        sample.trim_stop_codons(gc=1),
        translate.select_translatable(),
        tree.quick_tree(),
        tree.scale_branches(),
        tree.uniformize_tree(),
    ]
    return applications
Exemple #2
0
    def test_write_db(self):
        """writing with overwrite in MPI should reset db"""
        dstore = io_app.get_data_store("data", suffix="fasta")
        members = dstore.filtered(
            callback=lambda x: "brca1.fasta" not in x.split("/"))
        reader = io_app.load_unaligned()
        aligner = align_app.align_to_ref()
        writer = write_db("delme.tinydb", create=True, if_exists="overwrite")
        process = reader + aligner + writer

        r = process.apply_to(
            members,
            logger=False,
            show_progress=False,
            parallel=True,
            par_kw=dict(use_mpi=True),
        )

        expect = [str(m) for m in process.data_store]
        process.data_store.close()

        # now get read only and check what's in there
        result = io_app.get_data_store("delme.tinydb")
        got = [str(m) for m in result]

        assert got == expect
Exemple #3
0
 def test_align_to_ref_generic_moltype(self):
     """tests when the moltype is generic"""
     test_moltypes = ["text", "rna", "protein", "protein_with_stop", "bytes", "ab"]
     for test_moltype in test_moltypes:
         aligner = align_app.align_to_ref(moltype=test_moltype)
         self.assertEqual(aligner._moltype.label, test_moltype)
         self.assertEqual(
             aligner._kwargs["S"],
             make_generic_scoring_dict(10, get_moltype(test_moltype)),
         )
Exemple #4
0
 def test_align_to_ref(self):
     """correctly aligns to a reference"""
     aligner = align_app.align_to_ref(ref_seq="Human")
     aln = aligner(self.seqs)
     expect = {
         "Bandicoot": "---NACTCATTAATGCTTGAAACCAGCAGTTTATTGTCCAAC",
         "FlyingFox": "GCCAGCTCTTTACAGCATGAGAACAG---TTTATTATACACT",
         "Human": "GCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT",
         "Rhesus": "GCCAGCTCATTACAGCATGAGAAC---AGTTTGTTACTCACT",
     }
     self.assertEqual(aln.to_dict(), expect)
Exemple #5
0
def _get_all_composable_apps():
    applications = [
        align.align_to_ref(),
        align.progressive_align(model="GY94"),
        sample.fixed_length(100),
        sample.min_length(100),
        io.write_seqs(os.getcwd()),
        sample.omit_bad_seqs(),
        sample.omit_degenerates(),
        sample.take_codon_positions(1),
        sample.take_named_seqs(),
        sample.trim_stop_codons(gc=1),
    ]
    return applications
Exemple #6
0
 def test_aln_to_ref_known(self):
     """correctly recapitulates known case"""
     orig = make_aligned_seqs(
         {
             "Ref": "CAG---GAGAACAGAAACCCAT--TACTCACT",
             "Qu1": "CAG---GAGAACAG---CCCGTGTTACTCACT",
             "Qu2": "CAGCATGAGAACAGAAACCCGT--TA---ACT",
             "Qu3": "CAGCATGAGAACAGAAACCCGT----CTCACT",
             "Qu4": "CAGCATGAGAACAGAAACCCGTGTTACTCACT",
             "Qu5": "CAG---GAGAACAG---CCCAT--TACTCACT",
             "Qu6": "CAG---GA-AACAG---CCCAT--TACTCACT",
             "Qu7": "CAG---GA--ACAGA--CCCGT--TA---ACT",
         },
         moltype="dna",
     )
     expect = orig.to_dict()
     aligner = align_app.align_to_ref(ref_seq="Ref")
     aln = aligner(orig.degap())
     self.assertEqual(aln.to_dict(), expect)
Exemple #7
0
 def test_align_to_ref_result_has_moltype(self):
     """aligned object has correct moltype"""
     aligner = align_app.align_to_ref(moltype="dna")
     got = aligner(self.seqs)
     self.assertEqual(got.moltype.label, "dna")
Exemple #8
0
    def test_est_dist_pair_slow(self):
        """tests the distance between seq pairs in aln"""

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref(ref_seq="Mouse")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        aligner = align.align_to_ref(ref_seq="Opossum")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        treestring = "(Human:0.2,Bandicoot:0.2)"
        aligner = align.progressive_align(model="WG01", guide_tree=treestring)
        _ = aligner(self.seqs5)
Exemple #9
0
    def test_est_dist_pair_slow(self):
        """tests the distance between seq pairs in aln"""

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref(ref_seq="Mouse")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Mouse", "Human")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        aligner = align.align_to_ref(ref_seq="Opossum")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        # now as a process
        proc = align.align_to_ref() + dist_app.fast_slow_dist(
            fast_calc="hamming", moltype="dna")
        got = proc(self.seqs1)
        self.assertEqual(got[("Human", "Rhesus")], 1)

        treestring = "(Human:0.2,Bandicoot:0.2)"
        aligner = align.progressive_align(model="WG01", guide_tree=treestring)
        _ = aligner(self.seqs5)