def add_likelihood(self, likelihood): """ Add likelihood distribution corresponding to all features in the dataset. """ for n, f in enumerate(self.features): fname = "%s:%s" % (self.name, xml.valid_id(f)) attribs = { "id": "featureLikelihood:%s" % fname, "spec": "TreeLikelihood", "useAmbiguities": "true" } if self.pruned: distribution = xml.distribution(likelihood, attrib=attribs) # Create pruned tree tree_id = "Tree.t:prunedBeastlingTree.%s" % fname tree = xml.tree(distribution, id=tree_id, spec="beast.evolution.tree.PrunedTree", quickshortcut="true", attrib={'assert': "false"}) xml.tree(tree, idref="Tree.t:beastlingTree") xml.alignment(tree, idref="pruned_data_%s" % fname) # Create pruned branchrate self.clock.add_pruned_branchrate_model(distribution, fname, tree_id) else: attribs[ "branchRateModel"] = "@%s" % self.clock.branchrate_model_id attribs["tree"] = "@Tree.t:beastlingTree" distribution = xml.distribution(likelihood, attrib=attribs) if f in self.reconstruct: # Use a different likelihood spec (also depending on whether # the whole tree is reconstructed, or only some nodes) if self.treewide_reconstruction: distribution.attrib[ "spec"] = "AncestralStateTreeLikelihood" self.treedata.append(attribs["id"]) distribution.attrib["tag"] = f else: distribution.attrib[ "spec"] = "lucl.beast.statereconstruction.AncestralStatesLogger" distribution.attrib["value"] = " ".join( self.pattern_names(f)) for label in self.reconstruct_at: langs = self.config.language_group(label) self.beastxml.add_taxon_set(distribution, label, langs) self.metadata.append(attribs["id"]) distribution.attrib["useAmbiguities"] = "false" # Sitemodel self.add_sitemodel(distribution, f, fname) # Data self.add_feature_data(distribution, n, f, fname)
def add_monophyly_constraints(self): """ Add monophyly constraints to prior distribution. """ if self.config.languages.monophyly: xml.distribution( self.prior, id="constraints", spec="beast.math.distributions.MultiMonophyleticConstraint", tree="@{:}".format(self.config.treeprior.tree_id), newick=self.config.languages.monophyly_newick, )
def add_calibrations(self): """ Add timing calibrations to prior distribution. """ # This itertools.cchain is a bit ugly, I wonder if we can get away with sticking them all in one list... for clade, cal in sorted( itertools.chain(self.config.calibrations.items(), self.config.tip_calibrations.items())): # Don't add an MRCA cal for point calibrations, those only exist to # cause the initial tip height to be set if cal.dist == "point": continue # BEAST's logcombiner chokes on spaces... clade = clade.replace(" ", "_") # Create MRCAPrior node attribs = {} attribs["id"] = clade + "MRCA" attribs["monophyletic"] = "true" attribs["spec"] = "beast.math.distributions.MRCAPrior" attribs["tree"] = "@{:}".format(self.config.treeprior.tree_id) if cal.originate: attribs["useOriginate"] = "true" elif len( cal.langs ) == 1: # If there's only 1 lang and it's not an originate cal, it must be a tip cal attribs["tipsonly"] = "true" cal_prior = xml.distribution(self.prior, attrib=attribs) # Create "taxonset" param for MRCAPrior self.add_taxon_set( cal_prior, clade[:-len("_originate")] if clade.endswith("_originate") else clade, cal.langs) cal.generate_xml_element(cal_prior)
def add_likelihood(self): """ Add all likelihood distribution elements. """ self.likelihood = xml.distribution(self.posterior, id="likelihood", spec="util.CompoundDistribution") for model in self.config.all_models: model.add_likelihood(self.likelihood)
def add_distributions(self): """ Add all probability distributions under the <run> element. """ self.posterior = xml.distribution(self.run, id="posterior", spec="util.CompoundDistribution") self.add_prior() self.add_likelihood()
def add_prior(self, beastxml): """Add a (calibrated) birth-death tree prior.""" # Tree prior attribs = {} attribs["id"] = "BirthDeathModel.t:beastlingTree" attribs["tree"] = "@Tree.t:beastlingTree" attribs[ "spec"] = "beast.evolution.speciation.BirthDeathGernhard08Model" attribs["birthDiffRate"] = "@birthRate.t:beastlingTree" attribs["relativeDeathRate"] = "@deathRate.t:beastlingTree" attribs["sampleProbability"] = "@sampling.t:beastlingTree" attribs[ "type"] = "unscaled" #TODO: Someone dropped the "restricted" type here, which does not exist. xml.distribution(beastxml.prior, attrib=attribs) # Birth rate prior attribs = {} attribs["id"] = "BirthRatePrior.t:beastlingTree" attribs["name"] = "distribution" attribs["x"] = "@birthRate.t:beastlingTree" sub_prior = xml.prior(beastxml.prior, attrib=attribs) xml.Uniform(sub_prior, id="Uniform.0", name="distr", upper="Infinity") # Relative death rate prior attribs = {} attribs["id"] = "relativeDeathRatePrior.t:beastlingTree" attribs["name"] = "distribution" attribs["x"] = "@deathRate.t:beastlingTree" sub_prior = xml.prior(beastxml.prior, attrib=attribs) xml.Uniform(sub_prior, id="Uniform.1", name="distr", upper="Infinity") # Sample probability prior attribs = {} attribs["id"] = "samplingPrior.t:beastlingTree" attribs["name"] = "distribution" attribs["x"] = "@sampling.t:beastlingTree" sub_prior = xml.prior(beastxml.prior, attrib=attribs) xml.Uniform(sub_prior, id="Uniform.3", name="distr", lower="0", upper="1")
def add_prior(self, beastxml): """ Add Yule birth-process tree prior. """ # Tree prior ## Decide whether to use the standard Yule or the fancy calibrated one if len(beastxml.config.calibrations) == 1: yule = "calibrated" elif len(beastxml.config.calibrations) == 2: # Two calibrations can be handled by the calibrated Yule if they # are nested langs1, langs2 = [ c.langs for c in beastxml.config.calibrations.values() ] if len(set(langs1) & set(langs2)) in (len(langs1), len(langs2)): yule = "calibrated" else: yule = "standard" else: yule = "standard" attribs = {} attribs["id"] = "YuleModel.t:beastlingTree" attribs["tree"] = "@Tree.t:beastlingTree" if yule == "standard": attribs["spec"] = "beast.evolution.speciation.YuleModel" attribs["birthDiffRate"] = "@birthRate.t:beastlingTree" if "root" in beastxml.config.calibrations: attribs["conditionalOnRoot"] = "true" elif yule == "calibrated": attribs["spec"] = "beast.evolution.speciation.CalibratedYuleModel" attribs["birthRate"] = "@birthRate.t:beastlingTree" xml.distribution(beastxml.prior, attrib=attribs) # Birth rate prior sub_prior = xml.prior(beastxml.prior, id="YuleBirthRatePrior.t:beastlingTree", name="distribution", x="@birthRate.t:beastlingTree") xml.Uniform(sub_prior, id="Uniform.0", name="distr", upper="Infinity")
def add_prior(self, beastxml): """Add a Yule tree prior.""" coalescent = xml.distribution(beastxml.prior, id="Coalescent.t:beastlingTree", spec="Coalescent") popmod = xml.populationModel(coalescent, id="ConstantPopulation:beastlingTree", spec="ConstantPopulation") xml.parameter(popmod, idref="popSize.t:beastlingTree", name="popSize") xml.treeIntervals(coalescent, id="TreeIntervals", spec="TreeIntervals", tree="@Tree.t:beastlingTree")
def add_prior(self): """ Add all prior distribution elements. """ self.prior = xml.distribution(self.posterior, id="prior", spec="util.CompoundDistribution") self.add_monophyly_constraints() self.add_calibrations() self.config.treeprior.add_prior(self) for clock in self.config.clocks: clock.add_prior(self.prior) for model in self.config.all_models: model.add_prior(self.prior)
def add_single_sitemodel_likelihood(self, likelihood): attribs = { "id": "DataLikelihood:%s" % self.name, "spec": "TreeLikelihood", "useAmbiguities": "true", "branchRateModel": "@%s" % self.clock.branchrate_model_id, "tree": "@Tree.t:beastlingTree", } distribution = xml.distribution(likelihood, attrib=attribs) if not self.reconstruct: pass elif set(self.reconstruct) >= set(self.features): # Use a different likelihood spec (also depending on whether # the whole tree is reconstructed, or only some nodes) if self.treewide_reconstruction: distribution.attrib["spec"] = "ancestralstatetreelikelihood" self.treedata.append(attribs["id"]) distribution.attrib["tag"] = f else: distribution.attrib[ "spec"] = "lucl.beast.statereconstruction.ancestralstateslogger" distribution.attrib["value"] = " ".join(self.pattern_names(f)) for label in self.reconstruct_at: langs = self.config.language_group(label) self.beastxml.add_taxon_set(distribution, label, langs) self.metadata.append(attribs["id"]) distribution.attrib["useAmbiguities"] = "false" else: raise NotImplementedError( "The model {:} is a binarised model with a single site " "model, so it uses a global likelihood. Reconstructing " "only a subset of features is not supported.".format( self.name)) self.add_sitemodel(distribution, None, None) data = xml.data(distribution, id="filtered_data_%s" % self.name, spec="FilteredAlignment", data="@data_%s" % self.name, filter="-") if self.recoded: data.set("ascertained", "true") data.set("excludefrom", "0") if self.ascertained: data.set("excludeto", "2") else: data.set("excludeto", "1") data.append(self.get_userdatatype(None, None))
def add_single_sitemodel_likelihood(self, likelihood): attribs = { "id": "DataLikelihood:%s" % self.name, "spec": "TreeLikelihood", "useAmbiguities": "true", "branchRateModel": "@%s" % self.clock.branchrate_model_id, "tree": "@Tree.t:beastlingTree", } distribution = xml.distribution(likelihood, attrib=attribs) self.add_sitemodel(distribution, None, None) data = xml.data(distribution, id="filtered_data_%s" % self.name, spec="FilteredAlignment", data="@data_%s" % self.name, filter="-") if self.recoded: data.set("ascertained", "true") data.set("excludefrom", "0") if self.ascertained: data.set("excludeto", "2") else: data.set("excludeto", "1") data.append(self.get_userdatatype(None, None))
def add_likelihood(self, likelihood): """ Add likelihood distribution corresponding to all features in the dataset. """ attribs = { "id": "sphericalGeographyLikelihood", "tree": "@Tree.t:beastlingTree", "logAverage": "true", "scale": "false", "location": "@location.geo" } # Use appropriate Likelihood implementation depending # upon presence/absence of sampled locations if self.sampling_points: attribs[ "spec"] = "sphericalGeo.ApproxMultivariateTraitLikelihoodF2" else: attribs["spec"] = "sphericalGeo.ApproxMultivariateTraitLikelihood" distribution = xml.distribution(likelihood, attrib=attribs) # Add data first, as this may trigger the creation of additional # sampling points (for langs with missing locations data) self.add_data(distribution) # Now add geopriors if self.sampling_points: multi = xml.multiGeoprior(distribution, id="multiGeoPrior", spec="sphericalGeo.MultiGeoPrior", tree="@Tree.t:beastlingTree", newick="") for clade in self.sampling_points: # Get languages in clade if clade == "root": langs = self.config.languages.languages else: langs = self.config.language_group(clade) if not langs: continue # Add the geo prior, which will trigger sampling geoprior = xml.geoprior(multi, id="%s.geoPrior" % clade, spec="sphericalGeo.GeoPrior", location="@location.geo", tree="@Tree.t:beastlingTree") if len(langs) > 1: self.beastxml.add_taxon_set(geoprior, "%s.geo" % clade, langs) else: xml.taxon(geoprior, idref=list(langs)[0]) # Drop back to F, not F2, so singletons can be sampled distribution.set( "spec", "sphericalGeo.ApproxMultivariateTraitLikelihoodF") # Also add the KML file if we have an actual constraint if clade in self.geo_priors: xml.region(geoprior, spec="sphericalGeo.region.KMLRegion", kml=self.geo_priors[clade]) self.add_sitemodel(distribution) xml.branchRateModel(distribution, idref=self.clock.branchrate_model_id)