def deserialise_object(data): """ deserialises from json Parameters ---------- data path to json file, json string or a dict Returns ------- If the dict from json.loads does not contain a "type" key, the object will be returned as is. Otherwise, it will be deserialised to a cogent3 object. """ if path_exists(data): with open_(data) as infile: data = json.load(infile) if type(data) is str: data = json.loads(data) type_ = data.get("type", None) if type_ is None: return data if "core.sequence" in type_: func = deserialise_seq elif "core.alignment" in type_: func = deserialise_seq_collections elif "core.tree" in type_: func = deserialise_tree elif ( "evolve.substitution_model" in type_ or "evolve.ns_substitution_model" in type_ ): func = deserialise_substitution_model elif "evolve.parameter_controller" in type_: func = deserialise_likelihood_function elif "core.moltype" in type_: func = deserialise_moltype elif "core.alphabet" in type_: func = deserialise_alphabet elif "app.result" in type_: func = deserialise_result elif "notcompleted" in type_.lower(): func = deserialise_not_completed elif type_.lower().endswith("table"): func = deserialise_tabular elif "dictarray" in type_.lower(): func = deserialise_tabular elif "distancematrix" in type_.lower(): func = deserialise_tabular else: msg = "deserialising '%s' from json" % type_ raise NotImplementedError(msg) return func(data)
def test_path_exists(self): """robustly identifies whether an object is a valid path and exists""" self.assertFalse(path_exists({})) self.assertFalse(path_exists("not an existing path")) self.assertFalse(path_exists("(a,b,(c,d))")) self.assertFalse(path_exists("(a:0.1,b:0.1,(c:0.1,d:0.1):0.1)")) # works for a Path instance p = pathlib.Path(__file__) self.assertTrue(path_exists(p)) # or string instance self.assertTrue(path_exists(__file__))
def __init__( self, sm, tree=None, sm_args=None, gc=1, optimise_motif_probs=False, tip1=None, tip2=None, outgroup=None, stem=False, clade=True, is_independent=False, lf_args=None, upper_omega=20, opt_args=None, show_progress=False, verbose=False, ): """ Parameters ---------- sm : str or instance substitution model, if string must be available via get_model() (see cogent3.available_models). tree if None, assumes a star phylogeny (only valid for 3 taxa). Can be a newick formatted tree, a path to a file containing one, or a Tree instance. sm_args arguments to be passed to the substitution model constructor, e.g. dict(optimise_motif_probs=True) gc genetic code, either name or number (see cogent3.available_codes) optimise_motif_probs : bool If True, motif probabilities are free parameters. If False (default) they are estimated frokm the alignment. tip1 : str name of tip 1 tip2 : str name of tip 1 outgroup : str name of tip outside clade of interest stem : bool include name of stem to clade defined by tip1, tip2, outgroup clade : bool include names of edges within clade defined by tip1, tip2, outgroup is_independent : bool if True, all edges specified by the scoping info get their own value of omega, if False, only a single omega lf_args arguments to be passed to the likelihood function constructor upper_omega : float upper bound for omega param_rules other parameter rules, passed to the likelihood function set_param_rule() method opt_args arguments for the numerical optimiser, e.g. dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000, limit_action='ignore') show_progress : bool show progress bars during numerical optimisation verbose : bool prints intermediate states to screen during fitting """ super(natsel_timehet, self).__init__( input_types=("aligned", "serialisable"), output_types=("result", "hypothesis_result", "serialisable"), data_types=("ArrayAlignment", "Alignment"), ) self._formatted_params() if not is_codon_model(sm): raise ValueError(f"{sm} is not a codon model") if not any([tip1, tip2]): raise ValueError("must provide at least a single tip name") if misc.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) if tree and not isinstance(tree, TreeNode): raise TypeError(f"invalid tree type {type(tree)}") if all([tip1, tip2]) and tree: edges = tree.get_edge_names(tip1, tip2, stem=stem, clade=clade, outgroup_name=outgroup) elif all([tip1, tip2]): edges = [tip1, tip2] elif tip1: edges = [tip1] elif tip2: edges = [tip2] assert edges, "No edges" # instantiate model, ensuring genetic code setting passed on sm_args = sm_args or {} sm_args["gc"] = sm_args.get("gc", gc) sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) model_name = sm.name # defining the null model lf_args = lf_args or {} null_lf_args = lf_args.copy() null = model( sm, tree, name=f"{model_name}-null", sm_args=sm_args, lf_args=null_lf_args, opt_args=opt_args, show_progress=show_progress, verbose=verbose, ) # defining the alternate model param_rules = [ dict( par_name="omega", edges=edges, upper=upper_omega, is_independent=is_independent, ) ] alt = model( sm, tree, name=f"{model_name}-alt", sm_args=sm_args, opt_args=opt_args, show_progress=show_progress, param_rules=param_rules, lf_args=lf_args, verbose=verbose, ) hyp = hypothesis(null, alt) self.func = hyp
def __init__( self, sm, tree=None, sm_args=None, gc=1, optimise_motif_probs=False, upper_omega=20.0, lf_args=None, opt_args=None, show_progress=False, verbose=False, ): """ Parameters ---------- sm : str or instance substitution model, if string must be available via get_model() (see cogent3.available_models). tree if None, assumes a star phylogeny (only valid for 3 taxa). Can be a newick formatted tree, a path to a file containing one, or a Tree instance. sm_args arguments to be passed to the substitution model constructor, e.g. dict(optimise_motif_probs=True) gc genetic code, either name or number (see cogent3.available_codes) optimise_motif_probs : bool If True, motif probabilities are free parameters. If False (default) they are estimated from the alignment. upper_omega : float upper bound for positive selection omega lf_args arguments to be passed to the likelihood function constructor opt_args arguments for the numerical optimiser, e.g. dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000, limit_action='ignore') show_progress : bool show progress bars during numerical optimisation verbose : bool prints intermediate states to screen during fitting """ super(natsel_sitehet, self).__init__( input_types=("aligned", "serialisable"), output_types=("result", "hypothesis_result", "serialisable"), data_types=("ArrayAlignment", "Alignment"), ) self._formatted_params() if not is_codon_model(sm): raise ValueError(f"{sm} is not a codon model") if misc.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) if tree and not isinstance(tree, TreeNode): raise TypeError(f"invalid tree type {type(tree)}") # instantiate model, ensuring genetic code setting passed on sm_args = sm_args or {} sm_args["gc"] = sm_args.get("gc", gc) sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) model_name = sm.name # defining the null model epsilon = 1e-6 null_param_rules = [ dict(par_name="omega", bins="-ve", upper=1 - epsilon, init=1 - epsilon), dict(par_name="omega", bins="neutral", is_constant=True, value=1.0), ] lf_args = lf_args or {} null_lf_args = lf_args.copy() null_lf_args.update(dict(bins=("-ve", "neutral"))) self.null = model( sm, tree, name=f"{model_name}-null", sm_args=sm_args, param_rules=null_param_rules, lf_args=null_lf_args, opt_args=opt_args, show_progress=show_progress, verbose=verbose, ) # defining the alternate model, param rules to be completed each call alt_lf_args = lf_args.copy() alt_lf_args.update(dict(bins=("-ve", "neutral", "+ve"))) self.alt_args = dict( sm=sm, tree=tree, name=f"{model_name}-alt", sm_args=sm_args, lf_args=alt_lf_args, opt_args=opt_args, show_progress=show_progress, verbose=verbose, upper_omega=upper_omega, ) self.func = self.test_hypothesis
def __init__( self, sm, tree=None, name=None, sm_args=None, lf_args=None, time_het=None, param_rules=None, opt_args=None, split_codons=False, show_progress=False, verbose=False, ): """ Parameters ---------- sm : str or instance substitution model if string must be available via get_model() tree if None, assumes a star phylogeny (only valid for 3 taxa). Can be a newick formatted tree, a path to a file containing one, or a Tree instance. name name of the model sm_args arguments to be passed to the substitution model constructor, e.g. dict(optimise_motif_probs=True) lf_args arguments to be passed to the likelihood function constructor time_het 'max' or a list of dicts corresponding to edge_sets, e.g. [dict(edges=['Human', 'Chimp'], is_independent=False, upper=10)]. Passed to the likelihood function .set_time_heterogeneity() method. param_rules other parameter rules, passed to the likelihood function set_param_rule() method opt_args arguments for the numerical optimiser, e.g. dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000, limit_action='ignore') split_codons : bool if True, incoming alignments are split into the 3 frames and each frame is fit separately show_progress : bool show progress bars during numerical optimisation verbose : bool prints intermediate states to screen during fitting Returns ------- Calling an instance with an alignment returns a model_result instance with the optimised likelihood function. In the case of split_codons, the result object has a separate entry for each. """ super(model, self).__init__( input_types=("aligned", "serialisable"), output_types=("result", "model_result", "serialisable"), data_types=("ArrayAlignment", "Alignment"), ) self._verbose = verbose self._formatted_params() sm_args = sm_args or {} if type(sm) == str: sm = get_model(sm, **sm_args) self._sm = sm if len(sm.get_motifs()[0]) > 1: split_codons = False if misc.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) if tree and not isinstance(tree, TreeNode): raise TypeError(f"invalid tree type {type(tree)}") self._tree = tree self._lf_args = lf_args or {} if not name: name = sm.name or "unnamed model" self.name = name self._opt_args = opt_args or dict(max_restarts=5, show_progress=show_progress) self._opt_args["show_progress"] = self._opt_args.get( "show_progress", show_progress) param_rules = param_rules or {} if param_rules: for rule in param_rules: if rule.get("is_constant"): continue rule["upper"] = rule.get("upper", 50) # default upper bound self._param_rules = param_rules self._time_het = time_het self._split_codons = split_codons self.func = self.fit
def __init__( self, sm, tree=None, sm_args=None, gc=1, optimise_motif_probs=False, tip1=None, tip2=None, outgroup=None, stem=False, clade=True, lf_args=None, upper_omega=20, opt_args=None, show_progress=False, verbose=False, ): """ Parameters ---------- sm : str or instance substitution model, if string must be available via get_model() (see cogent3.available_models). tree if None, assumes a star phylogeny (only valid for 3 taxa). Can be a newick formatted tree, a path to a file containing one, or a Tree instance. sm_args arguments to be passed to the substitution model constructor, e.g. dict(optimise_motif_probs=True) gc genetic code, either name or number (see cogent3.available_codes) optimise_motif_probs : bool If True, motif probabilities are free parameters. If False (default) they are estimated frokm the alignment. tip1 : str name of tip 1 tip2 : str name of tip 1 outgroup : str name of tip outside clade of interest stem : bool include name of stem to clade defined by tip1, tip2, outgroup clade : bool include names of edges within clade defined by tip1, tip2, outgroup lf_args arguments to be passed to the likelihood function constructor upper_omega : float upper bound for positive selection omega param_rules other parameter rules, passed to the likelihood function set_param_rule() method opt_args arguments for the numerical optimiser, e.g. dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000, limit_action='ignore') show_progress : bool show progress bars during numerical optimisation verbose : bool prints intermediate states to screen during fitting Notes ----- The scoping parameters (tip1, tip2, outgroup, stem, clade) define the foreground edges. """ super(natsel_zhang, self).__init__( input_types=(ALIGNED_TYPE, SERIALISABLE_TYPE), output_types=(RESULT_TYPE, HYPOTHESIS_RESULT_TYPE, SERIALISABLE_TYPE), data_types=("ArrayAlignment", "Alignment"), ) self._formatted_params() if not is_codon_model(sm): raise ValueError(f"{sm} is not a codon model") if not any([tip1, tip2]): raise ValueError("must provide at least a single tip name") if misc.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) if tree and not isinstance(tree, TreeNode): raise TypeError(f"invalid tree type {type(tree)}") if all([tip1, tip2]) and tree: edges = tree.get_edge_names( tip1, tip2, stem=stem, clade=clade, outgroup_name=outgroup ) elif all([tip1, tip2]): edges = [tip1, tip2] elif tip1: edges = [tip1] elif tip2: edges = [tip2] assert edges, "No edges" # instantiate model, ensuring genetic code setting passed on sm_args = sm_args or {} sm_args["gc"] = sm_args.get("gc", gc) sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) model_name = sm.name # defining the null model epsilon = 1e-6 null_param_rules = [ dict(par_name="omega", bins="0", upper=1 - epsilon, init=1 - epsilon), dict(par_name="omega", bins="1", is_constant=True, value=1.0), ] lf_args = lf_args or {} null_lf_args = lf_args.copy() null_lf_args.update(dict(bins=("0", "1"))) self.null = model( sm, tree, name=f"{model_name}-null", sm_args=sm_args, param_rules=null_param_rules, lf_args=null_lf_args, opt_args=opt_args, show_progress=show_progress, verbose=verbose, ) # defining the alternate model, param rules to be completed each call alt_lf_args = lf_args.copy() alt_lf_args.update(dict(bins=("0", "1", "2a", "2b"))) self.alt_args = dict( sm=sm, tree=tree, name=f"{model_name}-alt", sm_args=sm_args, edges=edges, lf_args=alt_lf_args, opt_args=opt_args, show_progress=show_progress, verbose=verbose, upper_omega=upper_omega, ) self.func = self.test_hypothesis
def __init__( self, sm, tree=None, sm_args=None, gc=1, optimise_motif_probs=False, lf_args=None, opt_args=None, show_progress=False, verbose=False, ): """ Parameters ---------- sm : str or instance substitution model, if string must be available via get_model() (see cogent3.available_models). tree if None, assumes a star phylogeny (only valid for 3 taxa). Can be a newick formatted tree, a path to a file containing one, or a Tree instance. sm_args arguments to be passed to the substitution model constructor, e.g. dict(optimise_motif_probs=True) gc genetic code, either name or number (see cogent3.available_codes) optimise_motif_probs : bool If True, motif probabilities are free parameters. If False (default) they are estimated frokm the alignment. lf_args arguments to be passed to the likelihood function constructor opt_args arguments for the numerical optimiser, e.g. dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000, limit_action='ignore') show_progress : bool show progress bars during numerical optimisation verbose : bool prints intermediate states to screen during fitting """ super(natsel_neutral, self).__init__( input_types=(ALIGNED_TYPE, SERIALISABLE_TYPE), output_types=(RESULT_TYPE, HYPOTHESIS_RESULT_TYPE, SERIALISABLE_TYPE), data_types=("ArrayAlignment", "Alignment"), ) self._formatted_params() if not is_codon_model(sm): raise ValueError(f"{sm} is not a codon model") if misc.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) if tree and not isinstance(tree, TreeNode): raise TypeError(f"invalid tree type {type(tree)}") # instantiate model, ensuring genetic code setting passed on sm_args = sm_args or {} sm_args["gc"] = sm_args.get("gc", gc) sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) model_name = sm.name # defining the null model lf_args = lf_args or {} null = model( sm, tree, name=f"{model_name}-null", sm_args=sm_args, opt_args=opt_args, show_progress=show_progress, param_rules=[dict(par_name="omega", is_constant=True, value=1.0)], lf_args=lf_args, verbose=verbose, ) # defining the alternate model alt = model( sm, tree, name=f"{model_name}-alt", sm_args=sm_args, opt_args=opt_args, show_progress=show_progress, lf_args=lf_args, verbose=verbose, ) hyp = hypothesis(null, alt) self.func = hyp