Ejemplo n.º 1
0
    def test_composable_apps(self):
        """checks the ability of these two apps(fast_slow_dist and quick_tree) to communicate"""
        path = os.path.join(data_path, "brca1_5.paml")
        aln1 = load_aligned_seqs(path, moltype=DNA)
        fast_slow_dist = dist.fast_slow_dist(fast_calc="hamming", moltype="dna")
        quick = tree_app.quick_tree(drop_invalid=False)
        proc = fast_slow_dist + quick
        self.assertEqual(
            str(proc),
            "fast_slow_dist(type='distance', distance=None, moltype='dna',\n"
            "fast_calc='hamming', slow_calc=None) + quick_tree(type='tree',\n"
            "drop_invalid=False)",
        )
        self.assertIsInstance(proc, tree_app.quick_tree)
        self.assertEqual(proc._type, "tree")
        self.assertIsInstance(proc.input, dist.fast_slow_dist)
        self.assertIs(proc.output, None)
        self.assertIsInstance(proc._input_types, frozenset)
        self.assertIsInstance(proc._output_types, frozenset)
        self.assertIsInstance(proc._in, dist.fast_slow_dist)
        self.assertIs(proc._out, None)

        tree1 = proc(aln1)
        self.assertIsInstance(tree1, PhyloNode)
        self.assertIsNotNone(tree1.children)
        self.assertEqual(set(tree1.get_tip_names()), set(aln1.names))

        # tests when distances contain None
        data = dict(
            seq1="AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
            seq2="TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC",
        )
        aln2 = make_aligned_seqs(data=data, moltype=DNA)
        tree2 = proc(aln2)
        self.assertIsInstance(tree2, NotCompleted)
Ejemplo n.º 2
0
def _get_all_composables(tmp_dir_name):
    test_model1 = evo.model("HKY85")
    test_model2 = evo.model("GN")
    test_hyp = evo.hypothesis(test_model1, test_model2)
    test_num_reps = 100

    applications = [
        align.align_to_ref(),
        align.progressive_align(model="GY94"),
        dist.fast_slow_dist(moltype="dna", fast_calc="hamming"),
        evo.ancestral_states(),
        evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps),
        evo.hypothesis(test_model1, test_model2),
        evo.model("GN"),
        evo.tabulate_stats(),
        sample.fixed_length(100),
        sample.min_length(100),
        io.write_db(tmp_dir_name, create=True),
        io.write_json(tmp_dir_name, create=True),
        io.write_seqs(tmp_dir_name, create=True),
        sample.omit_bad_seqs(),
        sample.omit_degenerates(),
        sample.omit_duplicated(),
        sample.take_codon_positions(1),
        sample.take_named_seqs(),
        sample.take_n_seqs(2),
        sample.trim_stop_codons(gc=1),
        translate.select_translatable(),
        tree.quick_tree(),
        tree.scale_branches(),
        tree.uniformize_tree(),
    ]
    return applications
Ejemplo n.º 3
0
 def test_composes_with_write_tabular(self):
     """correctly links to tabular"""
     with TemporaryDirectory(dir=".") as dirname:
         writer = io.write_tabular(dirname)
         dist_calc = dist_app.fast_slow_dist(distance="hamming",
                                             moltype="protein")
         _ = dist_calc + writer
Ejemplo n.º 4
0
    def test_init(self):
        """tests if fast_slow_dist can be initialised correctly"""

        fast_slow_dist = dist_app.fast_slow_dist(fast_calc="hamming",
                                                 moltype="dna")
        self.assertIsInstance(fast_slow_dist.fast_calc, HammingPair)
        self.assertIsNone(fast_slow_dist._sm)

        fast_slow_dist = dist_app.fast_slow_dist(distance="TN93")
        self.assertIsInstance(fast_slow_dist.fast_calc, TN93Pair)
        self.assertEqual(fast_slow_dist._sm.name, "TN93")
        fast_slow_dist = dist_app.fast_slow_dist(distance="GTR")
        self.assertEqual(fast_slow_dist._sm.name, "GTR")

        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        self.assertEqual(fast_slow_dist._sm.name, "TN93")
        self.assertIsNone(fast_slow_dist.fast_calc)

        with self.assertRaises(ValueError):
            fast_slow_dist = dist_app.fast_slow_dist(distance="TN93",
                                                     fast_calc="TN93",
                                                     slow_calc="TN93")

        with self.assertRaises(ValueError):
            fast_slow_dist = dist_app.fast_slow_dist(fast_calc="GTR")

        with self.assertRaises(ValueError):
            fast_slow_dist = dist_app.fast_slow_dist(slow_calc="hamming")
Ejemplo n.º 5
0
 def test_quick_tree(self):
     """correctly calc a nj tree"""
     path = os.path.join(data_path, "brca1_5.paml")
     aln = load_aligned_seqs(path, moltype=DNA)
     fast_slow_dist = dist.fast_slow_dist(fast_calc="hamming", moltype="dna")
     dist_matrix = fast_slow_dist(aln)
     quick1 = tree_app.quick_tree()
     tree1 = quick1.quick_tree(dist_matrix)
     self.assertEqual(set(tree1.get_tip_names()), set(aln.names))
Ejemplo n.º 6
0
    def test_functions_as_composable(self):
        """works as a composable app"""
        from pathlib import Path

        loader = io.load_aligned(moltype="dna", format="paml")
        dist = dist_app.fast_slow_dist("hamming", moltype="dna")
        with TemporaryDirectory(dir=".") as dirname:
            dirname = Path(dirname)
            writer = io.write_tabular(dirname)
            proc = loader + dist + writer
            _ = proc("data/brca1_5.250.paml")
            output = dirname / "brca1_5.250.tsv"
            self.assertTrue(output.exists())
Ejemplo n.º 7
0
 def test_composable_apps(self):
     """tests two composable apps"""
     composable_apps = _get_all_composable_apps()
     fast_slow_dist = dist_app.fast_slow_dist(fast_calc="hamming",
                                              moltype="dna")
     for app in composable_apps:
         # Compose two composable applications, there should not be exceptions.
         got = app + fast_slow_dist
         self.assertIsInstance(got, dist_app.fast_slow_dist)
         self.assertEqual(got._type, "distance")
         self.assertIs(got.input, app)
         self.assertIs(got.output, None)
         self.assertIsInstance(got._input_types, frozenset)
         self.assertIsInstance(got._output_types, frozenset)
         self.assertIs(got._in, app)
         self.assertIs(got._out, None)
         app.disconnect()
         fast_slow_dist.disconnect()
Ejemplo n.º 8
0
    def test_compatible_parameters(self):
        """tests if the input parameters are compatible with fast_slow_dist initialisation"""
        fast_slow_dist = dist_app.fast_slow_dist(fast_calc="hamming",
                                                 moltype="dna")
        fast_slow_dist = dist_app.fast_slow_dist(fast_calc="TN93")
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        fast_slow_dist = dist_app.fast_slow_dist(fast_calc="TN93")

        # fails for paralinear or hamming if no moltype
        with self.assertRaises(ValueError):
            fast_slow_dist = dist_app.fast_slow_dist(slow_calc="hamming")
        with self.assertRaises(ValueError):
            fast_slow_dist = dist_app.fast_slow_dist(slow_calc="paralinear")

        # fails for hamming as slow_calc
        with self.assertRaises(ValueError):
            fast_slow_dist = dist_app.fast_slow_dist(slow_calc="hamming",
                                                     moltype="dna")
        with self.assertRaises(ValueError):
            fast_slow_dist = dist_app.fast_slow_dist(fast_calc="GTR")
Ejemplo n.º 9
0
    def __init__(
        self,
        model,
        gc=None,
        param_vals=None,
        guide_tree=None,
        unique_guides=False,
        indel_length=1e-1,
        indel_rate=1e-10,
        distance="percent",
    ):
        """
        Parameters
        ----------
        model
            substitution model instance or name. If 'codon'
            (uses MG94HKY), 'nucleotide' (uses HKY85), 'protein'
            (uses WG01). These choices provide also provide default
            settings for param_vals.
        gc : int or string
            the genetic code for a codon alignment, defaults to the standard
            genetic code
        param_vals : dict
            param name, values for parameters in model. Overrides
            default choices.
        guide_tree
            newick string, tree instance (must have branch lengths), or a
            callable that will build a tree from unaligned collection. If not
            provided, estimated ONCE via constructing a crude alignment. In the
            case of callable, or not provided, the computed guide tree is stored
            in the returned alignment.info['guide_tree'].
        unique_guides : bool
            whether each alignment requires a new guide tree
        indel_rate : float
            probability of gap insertion
        indel_length : float
            probability of gap extension
        distance : string
            the distance measure for building a guide tree. Default is 'percent',
            the proportion of differences. This is applicable for any moltype,
            and sequences with very high percent identity. For more diverged
            sequences we recommend 'paralinear'.
        """
        super(progressive_align, self).__init__(
            input_types=self._input_types,
            output_types=self._output_types,
            data_types=self._data_types,
        )

        self._param_vals = {
            "codon": dict(omega=0.4, kappa=3),
            "nucleotide": dict(kappa=3),
        }.get(model, param_vals)
        sm = {"codon": "MG94HKY", "nucleotide": "HKY85", "protein": "JTT92"}.get(
            model, model
        )
        self._formatted_params()
        kwargs = {} if gc is None else dict(gc=gc)
        sm = get_model(sm, **kwargs)
        moltype = sm.alphabet.moltype
        self._model = sm
        self._scalar = sm.word_length
        self._indel_length = indel_length
        self._indel_rate = indel_rate
        self._moltype = moltype
        self._unique_guides = unique_guides
        self._distance = distance
        if callable(guide_tree):
            self._make_tree = guide_tree
            guide_tree = None  # callback takes precedence
        else:
            al_to_ref = align_to_ref(moltype=self._moltype)
            dist_calc = dist.fast_slow_dist(
                distance=self._distance, moltype=self._moltype
            )
            est_tree = quick_tree()
            self._make_tree = al_to_ref + dist_calc + est_tree

        if guide_tree is not None:
            if type(guide_tree) == str:
                guide_tree = make_tree(treestring=guide_tree, underscore_unmunge=True)
                if guide_tree.children[0].length is None:
                    raise ValueError("Guide tree must have branch lengths")
            # make sure no zero lengths
            guide_tree = scale_branches()(guide_tree)

        self._guide_tree = guide_tree
        self._kwargs = dict(
            indel_length=self._indel_length,
            indel_rate=self._indel_rate,
            tree=self._guide_tree,
            param_vals=self._param_vals,
            show_progress=False,
        )

        self.func = self.multiple_align
Ejemplo n.º 10
0
    def test_est_dist_pair_slow(self):
        """tests the distance between seq pairs in aln"""

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref(ref_seq="Mouse")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        aligner = align.align_to_ref(ref_seq="Opossum")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        treestring = "(Human:0.2,Bandicoot:0.2)"
        aligner = align.progressive_align(model="WG01", guide_tree=treestring)
        _ = aligner(self.seqs5)
Ejemplo n.º 11
0
    def test_est_dist_pair_slow(self):
        """tests the distance between seq pairs in aln"""

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref(ref_seq="Mouse")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Mouse", "Human")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        aligner = align.align_to_ref(ref_seq="Opossum")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        # now as a process
        proc = align.align_to_ref() + dist_app.fast_slow_dist(
            fast_calc="hamming", moltype="dna")
        got = proc(self.seqs1)
        self.assertEqual(got[("Human", "Rhesus")], 1)

        treestring = "(Human:0.2,Bandicoot:0.2)"
        aligner = align.progressive_align(model="WG01", guide_tree=treestring)
        _ = aligner(self.seqs5)