Beispiel #1
0
def deserialise_object(data):
    """
    deserialises from json
    Parameters
    ----------
    data
        path to json file, json string or a dict

    Returns
    -------
    If the dict from json.loads does not contain a "type" key, the object will
    be returned as is. Otherwise, it will be deserialised to a cogent3 object.
    """
    if path_exists(data):
        with open_(data) as infile:
            data = json.load(infile)

    if type(data) is str:
        data = json.loads(data)

    type_ = data.get("type", None)
    if type_ is None:
        return data

    if "core.sequence" in type_:
        func = deserialise_seq
    elif "core.alignment" in type_:
        func = deserialise_seq_collections
    elif "core.tree" in type_:
        func = deserialise_tree
    elif (
        "evolve.substitution_model" in type_ or "evolve.ns_substitution_model" in type_
    ):
        func = deserialise_substitution_model
    elif "evolve.parameter_controller" in type_:
        func = deserialise_likelihood_function
    elif "core.moltype" in type_:
        func = deserialise_moltype
    elif "core.alphabet" in type_:
        func = deserialise_alphabet
    elif "app.result" in type_:
        func = deserialise_result
    elif "notcompleted" in type_.lower():
        func = deserialise_not_completed
    elif type_.lower().endswith("table"):
        func = deserialise_tabular
    elif "dictarray" in type_.lower():
        func = deserialise_tabular
    elif "distancematrix" in type_.lower():
        func = deserialise_tabular
    else:
        msg = "deserialising '%s' from json" % type_
        raise NotImplementedError(msg)
    return func(data)
Beispiel #2
0
 def test_path_exists(self):
     """robustly identifies whether an object is a valid path and exists"""
     self.assertFalse(path_exists({}))
     self.assertFalse(path_exists("not an existing path"))
     self.assertFalse(path_exists("(a,b,(c,d))"))
     self.assertFalse(path_exists("(a:0.1,b:0.1,(c:0.1,d:0.1):0.1)"))
     # works for a Path instance
     p = pathlib.Path(__file__)
     self.assertTrue(path_exists(p))
     # or string instance
     self.assertTrue(path_exists(__file__))
Beispiel #3
0
    def __init__(
        self,
        sm,
        tree=None,
        sm_args=None,
        gc=1,
        optimise_motif_probs=False,
        tip1=None,
        tip2=None,
        outgroup=None,
        stem=False,
        clade=True,
        is_independent=False,
        lf_args=None,
        upper_omega=20,
        opt_args=None,
        show_progress=False,
        verbose=False,
    ):
        """
        Parameters
        ----------
        sm : str or instance
            substitution model, if string must be available via get_model()
            (see cogent3.available_models).
        tree
            if None, assumes a star phylogeny (only valid for 3 taxa). Can be a
            newick formatted tree, a path to a file containing one, or a Tree
            instance.
        sm_args
            arguments to be passed to the substitution model constructor, e.g.
            dict(optimise_motif_probs=True)
        gc
            genetic code, either name or number (see cogent3.available_codes)
        optimise_motif_probs : bool
            If True, motif probabilities are free parameters. If False (default)
            they are estimated frokm the alignment.
        tip1 : str
            name of tip 1
        tip2 : str
            name of tip 1
        outgroup : str
            name of tip outside clade of interest
        stem : bool
            include name of stem to clade defined by tip1, tip2, outgroup
        clade : bool
            include names of edges within clade defined by tip1, tip2, outgroup
        is_independent : bool
            if True, all edges specified by the scoping info get their own
            value of omega, if False, only a single omega
        lf_args
            arguments to be passed to the likelihood function constructor
        upper_omega : float
            upper bound for omega
        param_rules
            other parameter rules, passed to the likelihood function
            set_param_rule() method
        opt_args
            arguments for the numerical optimiser, e.g.
            dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000,
            limit_action='ignore')
        show_progress : bool
            show progress bars during numerical optimisation
        verbose : bool
            prints intermediate states to screen during fitting
        """
        super(natsel_timehet, self).__init__(
            input_types=("aligned", "serialisable"),
            output_types=("result", "hypothesis_result", "serialisable"),
            data_types=("ArrayAlignment", "Alignment"),
        )
        self._formatted_params()
        if not is_codon_model(sm):
            raise ValueError(f"{sm} is not a codon model")

        if not any([tip1, tip2]):
            raise ValueError("must provide at least a single tip name")

        if misc.path_exists(tree):
            tree = load_tree(filename=tree, underscore_unmunge=True)
        elif type(tree) == str:
            tree = make_tree(treestring=tree, underscore_unmunge=True)

        if tree and not isinstance(tree, TreeNode):
            raise TypeError(f"invalid tree type {type(tree)}")

        if all([tip1, tip2]) and tree:
            edges = tree.get_edge_names(tip1,
                                        tip2,
                                        stem=stem,
                                        clade=clade,
                                        outgroup_name=outgroup)
        elif all([tip1, tip2]):
            edges = [tip1, tip2]
        elif tip1:
            edges = [tip1]
        elif tip2:
            edges = [tip2]

        assert edges, "No edges"

        # instantiate model, ensuring genetic code setting passed on
        sm_args = sm_args or {}
        sm_args["gc"] = sm_args.get("gc", gc)
        sm_args["optimise_motif_probs"] = optimise_motif_probs
        if type(sm) == str:
            sm = get_model(sm, **sm_args)

        model_name = sm.name
        # defining the null model
        lf_args = lf_args or {}
        null_lf_args = lf_args.copy()
        null = model(
            sm,
            tree,
            name=f"{model_name}-null",
            sm_args=sm_args,
            lf_args=null_lf_args,
            opt_args=opt_args,
            show_progress=show_progress,
            verbose=verbose,
        )

        # defining the alternate model
        param_rules = [
            dict(
                par_name="omega",
                edges=edges,
                upper=upper_omega,
                is_independent=is_independent,
            )
        ]
        alt = model(
            sm,
            tree,
            name=f"{model_name}-alt",
            sm_args=sm_args,
            opt_args=opt_args,
            show_progress=show_progress,
            param_rules=param_rules,
            lf_args=lf_args,
            verbose=verbose,
        )
        hyp = hypothesis(null, alt)

        self.func = hyp
Beispiel #4
0
    def __init__(
        self,
        sm,
        tree=None,
        sm_args=None,
        gc=1,
        optimise_motif_probs=False,
        upper_omega=20.0,
        lf_args=None,
        opt_args=None,
        show_progress=False,
        verbose=False,
    ):
        """
        Parameters
        ----------
        sm : str or instance
            substitution model, if string must be available via get_model()
            (see cogent3.available_models).
        tree
            if None, assumes a star phylogeny (only valid for 3 taxa). Can be a
            newick formatted tree, a path to a file containing one, or a Tree
            instance.
        sm_args
            arguments to be passed to the substitution model constructor, e.g.
            dict(optimise_motif_probs=True)
        gc
            genetic code, either name or number (see cogent3.available_codes)
        optimise_motif_probs : bool
            If True, motif probabilities are free parameters. If False (default)
            they are estimated from the alignment.
        upper_omega : float
            upper bound for positive selection omega
        lf_args
            arguments to be passed to the likelihood function constructor
        opt_args
            arguments for the numerical optimiser, e.g.
            dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000,
            limit_action='ignore')
        show_progress : bool
            show progress bars during numerical optimisation
        verbose : bool
            prints intermediate states to screen during fitting
        """
        super(natsel_sitehet, self).__init__(
            input_types=("aligned", "serialisable"),
            output_types=("result", "hypothesis_result", "serialisable"),
            data_types=("ArrayAlignment", "Alignment"),
        )
        self._formatted_params()
        if not is_codon_model(sm):
            raise ValueError(f"{sm} is not a codon model")

        if misc.path_exists(tree):
            tree = load_tree(filename=tree, underscore_unmunge=True)
        elif type(tree) == str:
            tree = make_tree(treestring=tree, underscore_unmunge=True)

        if tree and not isinstance(tree, TreeNode):
            raise TypeError(f"invalid tree type {type(tree)}")

        # instantiate model, ensuring genetic code setting passed on
        sm_args = sm_args or {}
        sm_args["gc"] = sm_args.get("gc", gc)
        sm_args["optimise_motif_probs"] = optimise_motif_probs
        if type(sm) == str:
            sm = get_model(sm, **sm_args)

        model_name = sm.name
        # defining the null model
        epsilon = 1e-6
        null_param_rules = [
            dict(par_name="omega",
                 bins="-ve",
                 upper=1 - epsilon,
                 init=1 - epsilon),
            dict(par_name="omega", bins="neutral", is_constant=True,
                 value=1.0),
        ]
        lf_args = lf_args or {}
        null_lf_args = lf_args.copy()
        null_lf_args.update(dict(bins=("-ve", "neutral")))
        self.null = model(
            sm,
            tree,
            name=f"{model_name}-null",
            sm_args=sm_args,
            param_rules=null_param_rules,
            lf_args=null_lf_args,
            opt_args=opt_args,
            show_progress=show_progress,
            verbose=verbose,
        )

        # defining the alternate model, param rules to be completed each call
        alt_lf_args = lf_args.copy()
        alt_lf_args.update(dict(bins=("-ve", "neutral", "+ve")))
        self.alt_args = dict(
            sm=sm,
            tree=tree,
            name=f"{model_name}-alt",
            sm_args=sm_args,
            lf_args=alt_lf_args,
            opt_args=opt_args,
            show_progress=show_progress,
            verbose=verbose,
            upper_omega=upper_omega,
        )

        self.func = self.test_hypothesis
Beispiel #5
0
    def __init__(
        self,
        sm,
        tree=None,
        name=None,
        sm_args=None,
        lf_args=None,
        time_het=None,
        param_rules=None,
        opt_args=None,
        split_codons=False,
        show_progress=False,
        verbose=False,
    ):
        """
        Parameters
        ----------
        sm : str or instance
            substitution model if string must be available via get_model()
        tree
            if None, assumes a star phylogeny (only valid for 3 taxa). Can be a
            newick formatted tree, a path to a file containing one, or a Tree
            instance.
        name
            name of the model
        sm_args
            arguments to be passed to the substitution model constructor, e.g.
            dict(optimise_motif_probs=True)
        lf_args
            arguments to be passed to the likelihood function constructor
        time_het
            'max' or a list of dicts corresponding to edge_sets, e.g.
            [dict(edges=['Human', 'Chimp'], is_independent=False, upper=10)].
            Passed to the likelihood function .set_time_heterogeneity()
            method.
        param_rules
            other parameter rules, passed to the likelihood function
            set_param_rule() method
        opt_args
            arguments for the numerical optimiser, e.g.
            dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000,
            limit_action='ignore')
        split_codons : bool
            if True, incoming alignments are split into the 3 frames and each
            frame is fit separately
        show_progress : bool
            show progress bars during numerical optimisation
        verbose : bool
            prints intermediate states to screen during fitting

        Returns
        -------
        Calling an instance with an alignment returns a model_result instance
        with the optimised likelihood function. In the case of split_codons,
        the result object has a separate entry for each.
        """
        super(model, self).__init__(
            input_types=("aligned", "serialisable"),
            output_types=("result", "model_result", "serialisable"),
            data_types=("ArrayAlignment", "Alignment"),
        )
        self._verbose = verbose
        self._formatted_params()
        sm_args = sm_args or {}
        if type(sm) == str:
            sm = get_model(sm, **sm_args)
        self._sm = sm
        if len(sm.get_motifs()[0]) > 1:
            split_codons = False

        if misc.path_exists(tree):
            tree = load_tree(filename=tree, underscore_unmunge=True)
        elif type(tree) == str:
            tree = make_tree(treestring=tree, underscore_unmunge=True)

        if tree and not isinstance(tree, TreeNode):
            raise TypeError(f"invalid tree type {type(tree)}")

        self._tree = tree
        self._lf_args = lf_args or {}
        if not name:
            name = sm.name or "unnamed model"
        self.name = name
        self._opt_args = opt_args or dict(max_restarts=5,
                                          show_progress=show_progress)
        self._opt_args["show_progress"] = self._opt_args.get(
            "show_progress", show_progress)
        param_rules = param_rules or {}
        if param_rules:
            for rule in param_rules:
                if rule.get("is_constant"):
                    continue
                rule["upper"] = rule.get("upper", 50)  # default upper bound
        self._param_rules = param_rules
        self._time_het = time_het
        self._split_codons = split_codons
        self.func = self.fit
Beispiel #6
0
    def __init__(
        self,
        sm,
        tree=None,
        sm_args=None,
        gc=1,
        optimise_motif_probs=False,
        tip1=None,
        tip2=None,
        outgroup=None,
        stem=False,
        clade=True,
        lf_args=None,
        upper_omega=20,
        opt_args=None,
        show_progress=False,
        verbose=False,
    ):
        """
        Parameters
        ----------
        sm : str or instance
            substitution model, if string must be available via get_model()
            (see cogent3.available_models).
        tree
            if None, assumes a star phylogeny (only valid for 3 taxa). Can be a
            newick formatted tree, a path to a file containing one, or a Tree
            instance.
        sm_args
            arguments to be passed to the substitution model constructor, e.g.
            dict(optimise_motif_probs=True)
        gc
            genetic code, either name or number (see cogent3.available_codes)
        optimise_motif_probs : bool
            If True, motif probabilities are free parameters. If False (default)
            they are estimated frokm the alignment.
        tip1 : str
            name of tip 1
        tip2 : str
            name of tip 1
        outgroup : str
            name of tip outside clade of interest
        stem : bool
            include name of stem to clade defined by tip1, tip2, outgroup
        clade : bool
            include names of edges within clade defined by tip1, tip2, outgroup
        lf_args
            arguments to be passed to the likelihood function constructor
        upper_omega : float
            upper bound for positive selection omega
        param_rules
            other parameter rules, passed to the likelihood function
            set_param_rule() method
        opt_args
            arguments for the numerical optimiser, e.g.
            dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000,
            limit_action='ignore')
        show_progress : bool
            show progress bars during numerical optimisation
        verbose : bool
            prints intermediate states to screen during fitting
        Notes
        -----
        The scoping parameters (tip1, tip2, outgroup, stem, clade) define the
        foreground edges.
        """
        super(natsel_zhang, self).__init__(
            input_types=(ALIGNED_TYPE, SERIALISABLE_TYPE),
            output_types=(RESULT_TYPE, HYPOTHESIS_RESULT_TYPE, SERIALISABLE_TYPE),
            data_types=("ArrayAlignment", "Alignment"),
        )
        self._formatted_params()
        if not is_codon_model(sm):
            raise ValueError(f"{sm} is not a codon model")

        if not any([tip1, tip2]):
            raise ValueError("must provide at least a single tip name")

        if misc.path_exists(tree):
            tree = load_tree(filename=tree, underscore_unmunge=True)
        elif type(tree) == str:
            tree = make_tree(treestring=tree, underscore_unmunge=True)

        if tree and not isinstance(tree, TreeNode):
            raise TypeError(f"invalid tree type {type(tree)}")

        if all([tip1, tip2]) and tree:
            edges = tree.get_edge_names(
                tip1, tip2, stem=stem, clade=clade, outgroup_name=outgroup
            )
        elif all([tip1, tip2]):
            edges = [tip1, tip2]
        elif tip1:
            edges = [tip1]
        elif tip2:
            edges = [tip2]

        assert edges, "No edges"

        # instantiate model, ensuring genetic code setting passed on
        sm_args = sm_args or {}
        sm_args["gc"] = sm_args.get("gc", gc)
        sm_args["optimise_motif_probs"] = optimise_motif_probs
        if type(sm) == str:
            sm = get_model(sm, **sm_args)

        model_name = sm.name
        # defining the null model
        epsilon = 1e-6
        null_param_rules = [
            dict(par_name="omega", bins="0", upper=1 - epsilon, init=1 - epsilon),
            dict(par_name="omega", bins="1", is_constant=True, value=1.0),
        ]
        lf_args = lf_args or {}
        null_lf_args = lf_args.copy()
        null_lf_args.update(dict(bins=("0", "1")))
        self.null = model(
            sm,
            tree,
            name=f"{model_name}-null",
            sm_args=sm_args,
            param_rules=null_param_rules,
            lf_args=null_lf_args,
            opt_args=opt_args,
            show_progress=show_progress,
            verbose=verbose,
        )

        # defining the alternate model, param rules to be completed each call
        alt_lf_args = lf_args.copy()
        alt_lf_args.update(dict(bins=("0", "1", "2a", "2b")))
        self.alt_args = dict(
            sm=sm,
            tree=tree,
            name=f"{model_name}-alt",
            sm_args=sm_args,
            edges=edges,
            lf_args=alt_lf_args,
            opt_args=opt_args,
            show_progress=show_progress,
            verbose=verbose,
            upper_omega=upper_omega,
        )

        self.func = self.test_hypothesis
Beispiel #7
0
    def __init__(
        self,
        sm,
        tree=None,
        sm_args=None,
        gc=1,
        optimise_motif_probs=False,
        lf_args=None,
        opt_args=None,
        show_progress=False,
        verbose=False,
    ):
        """
        Parameters
        ----------
        sm : str or instance
            substitution model, if string must be available via get_model()
            (see cogent3.available_models).
        tree
            if None, assumes a star phylogeny (only valid for 3 taxa). Can be a
            newick formatted tree, a path to a file containing one, or a Tree
            instance.
        sm_args
            arguments to be passed to the substitution model constructor, e.g.
            dict(optimise_motif_probs=True)
        gc
            genetic code, either name or number (see cogent3.available_codes)
        optimise_motif_probs : bool
            If True, motif probabilities are free parameters. If False (default)
            they are estimated frokm the alignment.
        lf_args
            arguments to be passed to the likelihood function constructor
        opt_args
            arguments for the numerical optimiser, e.g.
            dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000,
            limit_action='ignore')
        show_progress : bool
            show progress bars during numerical optimisation
        verbose : bool
            prints intermediate states to screen during fitting
        """
        super(natsel_neutral, self).__init__(
            input_types=(ALIGNED_TYPE, SERIALISABLE_TYPE),
            output_types=(RESULT_TYPE, HYPOTHESIS_RESULT_TYPE, SERIALISABLE_TYPE),
            data_types=("ArrayAlignment", "Alignment"),
        )
        self._formatted_params()
        if not is_codon_model(sm):
            raise ValueError(f"{sm} is not a codon model")

        if misc.path_exists(tree):
            tree = load_tree(filename=tree, underscore_unmunge=True)
        elif type(tree) == str:
            tree = make_tree(treestring=tree, underscore_unmunge=True)

        if tree and not isinstance(tree, TreeNode):
            raise TypeError(f"invalid tree type {type(tree)}")

        # instantiate model, ensuring genetic code setting passed on
        sm_args = sm_args or {}
        sm_args["gc"] = sm_args.get("gc", gc)
        sm_args["optimise_motif_probs"] = optimise_motif_probs
        if type(sm) == str:
            sm = get_model(sm, **sm_args)

        model_name = sm.name
        # defining the null model
        lf_args = lf_args or {}
        null = model(
            sm,
            tree,
            name=f"{model_name}-null",
            sm_args=sm_args,
            opt_args=opt_args,
            show_progress=show_progress,
            param_rules=[dict(par_name="omega", is_constant=True, value=1.0)],
            lf_args=lf_args,
            verbose=verbose,
        )

        # defining the alternate model
        alt = model(
            sm,
            tree,
            name=f"{model_name}-alt",
            sm_args=sm_args,
            opt_args=opt_args,
            show_progress=show_progress,
            lf_args=lf_args,
            verbose=verbose,
        )
        hyp = hypothesis(null, alt)

        self.func = hyp