def add_master_data(self, beast): self.filters = {} data = xml.data(beast, id="data_%s" % self.name, name="data_%s" % self.name, dataType="integer") for lang in self.languages: formatted_points = [ self.format_datapoint(f, self.data[lang].get(f, ["?"])) for f in self.features ] value_string = self.data_separator.join(formatted_points) if not self.filters: n = 1 for f, x in zip(self.features, formatted_points): # Format the FilteredAlignment filter appropriately if self.data_separator: length = len(x.split(self.data_separator)) else: length = len(x) if length == 1: self.filters[f] = str(n) else: self.filters[f] = "%d-%d" % (n, n + length - 1) n += length xml.sequence(data, id="language_data_%s:%s" % (self.name, lang), taxon=lang, value=value_string)
def add_single_sitemodel_likelihood(self, likelihood): attribs = { "id": "DataLikelihood:%s" % self.name, "spec": "TreeLikelihood", "useAmbiguities": "true", "branchRateModel": "@%s" % self.clock.branchrate_model_id, "tree": "@Tree.t:beastlingTree", } distribution = xml.distribution(likelihood, attrib=attribs) if not self.reconstruct: pass elif set(self.reconstruct) >= set(self.features): # Use a different likelihood spec (also depending on whether # the whole tree is reconstructed, or only some nodes) if self.treewide_reconstruction: distribution.attrib["spec"] = "ancestralstatetreelikelihood" self.treedata.append(attribs["id"]) distribution.attrib["tag"] = f else: distribution.attrib[ "spec"] = "lucl.beast.statereconstruction.ancestralstateslogger" distribution.attrib["value"] = " ".join(self.pattern_names(f)) for label in self.reconstruct_at: langs = self.config.language_group(label) self.beastxml.add_taxon_set(distribution, label, langs) self.metadata.append(attribs["id"]) distribution.attrib["useAmbiguities"] = "false" else: raise NotImplementedError( "The model {:} is a binarised model with a single site " "model, so it uses a global likelihood. Reconstructing " "only a subset of features is not supported.".format( self.name)) self.add_sitemodel(distribution, None, None) data = xml.data(distribution, id="filtered_data_%s" % self.name, spec="FilteredAlignment", data="@data_%s" % self.name, filter="-") if self.recoded: data.set("ascertained", "true") data.set("excludefrom", "0") if self.ascertained: data.set("excludeto", "2") else: data.set("excludeto", "1") data.append(self.get_userdatatype(None, None))
def test_validate_ids(config_factory): config = config_factory('basic') bml = BeastXml(config) xml.data(bml.beast, id='theid') xml.data(bml.beast, id='theid') with pytest.raises(ValueError, match='Duplicate'): bml.validate_ids() bml = BeastXml(config) xml.data(bml.beast, idref='theid') with pytest.raises(ValueError, match='missing'): bml.validate_ids()
def add_data(self, distribution): """ Add <data> element corresponding to the indicated feature, descending from the indicated likelihood distribution. """ data = xml.data(distribution, id="locationData", spec="sphericalGeo.AlignmentFromTraitMap") traitmap = xml.traitMap(data, id="geographyTraitmap", spec="sphericalGeo.TreeTraitMap", initByMean="true", randomizelower="-90 -180", randomizeupper="90 180", traitName="location", tree="@Tree.t:beastlingTree") xml.parameter(traitmap, text="0.0 0.0", id="locationParameter", spec="sphericalGeo.LocationParameter", dimension=2 * (2 * len(self.config.languages.languages) - 1), minordimension="2") loc_data_text_bits = [] for lang in self.config.languages.languages: lat, lon = self.config.locations.get(lang, ("?", "?")) if "?" in (lat, lon): if lang not in self.sampling_points: self.sampling_points.append(lang) log.info( "Location of language %s will be sampled. You may wish to add a prior." % lang, model=self) else: bit = "%s=%.2f %.2f" % (lang, lat, lon) loc_data_text_bits.append(bit) traitmap.text = ",\n".join(loc_data_text_bits) xml.userDataType(data, id="LocationDataType", spec="sphericalGeo.LocationDataType")
def add_single_sitemodel_likelihood(self, likelihood): attribs = { "id": "DataLikelihood:%s" % self.name, "spec": "TreeLikelihood", "useAmbiguities": "true", "branchRateModel": "@%s" % self.clock.branchrate_model_id, "tree": "@Tree.t:beastlingTree", } distribution = xml.distribution(likelihood, attrib=attribs) self.add_sitemodel(distribution, None, None) data = xml.data(distribution, id="filtered_data_%s" % self.name, spec="FilteredAlignment", data="@data_%s" % self.name, filter="-") if self.recoded: data.set("ascertained", "true") data.set("excludefrom", "0") if self.ascertained: data.set("excludeto", "2") else: data.set("excludeto", "1") data.append(self.get_userdatatype(None, None))
def add_feature_data(self, distribution, index, feature, fname): """ Add <data> element corresponding to the indicated feature, descending from the indicated likelihood distribution. """ if self.pruned: parent = xml.data(distribution, id="pruned_data_%s" % fname, spec="PrunedAlignment") name = "source" else: parent = distribution name = "data" data = getattr(xml, name)(parent, id="feature_data_%s" % fname, spec="FilteredAlignment", data="@data_%s" % self.name, filter=self.filters[feature]) if self.ascertained: data.set("ascertained", "true") data.set("excludefrom", "0") data.set("excludeto", str(self.valuecounts[feature])) data.append(self.get_userdatatype(feature, fname)) return data