def from_lff(cls, path, name_and_codes, level, dry_run=False): assert isinstance(level, Level) lname, codes = name_and_codes.split('[', 1) lname = lname.strip() glottocode, isocode = codes[:-1].split('][') if not glottocode: glottocode = Glottocode.from_name(lname, dry_run=dry_run) lineage = [] if path: for i, comp in enumerate(path.split('], ')): if comp.endswith(']'): comp = comp[:-1] name, id_ = comp.split(' [', 1) _level = Level.family if level == Level.dialect: _level = Level.language if i == 0 else Level.dialect lineage.append((name, id_, _level)) cfg = INI(interpolation=None) cfg.read_dict(dict(core=dict(name=lname, glottocode=glottocode))) res = cls(cfg, lineage) res.level = level if isocode: res.iso = isocode return res
def from_name_id_level(cls, tree, name, id, level, **kw): cfg = INI(interpolation=None) cfg.read_dict(dict(core=dict(name=name))) res = cls(cfg, kw.pop('lineage', []), id_=Glottocode(id), tree=tree) res.level = Level.get(level) for k, v in kw.items(): setattr(res, k, v) return res
def from_name_id_level(cls, name, id, level, **kw): cfg = INI(interpolation=None) cfg.read_dict(dict(core=dict(name=name, glottocode=id))) res = cls(cfg, []) res.level = Level(level) for k, v in kw.items(): setattr(res, k, v) return res
def from_name_id_level(cls, tree, name, id, level, **kw): cfg = INI(interpolation=None) cfg.read_dict(dict(core=dict(name=name))) res = cls(cfg, kw.pop('lineage', []), id_=Glottocode(id), tree=tree) for k, v in kw.items(): setattr(res, k, v) # Note: Setting the level behaves differently when `_api` is available, so must be done # after all other attributes are initialized. res.level = level return res
def make(glottolog, concepticon): ini = INI() ini.read_dict( {'paths': { 'concepticon': concepticon, 'glottolog': glottolog }}) p = str(tmpdir.join('config.ini')) ini.write(p) return p
def from_name_id_level(cls, tree, name, id, level, **kw): """ This method is used in `pyglottolog.lff` to instantiate `Languoid` s for new nodes encountered in "lff"-format trees. """ cfg = INI(interpolation=None) cfg.read_dict(dict(core=dict(name=name))) res = cls(cfg, kw.pop('lineage', []), id_=Glottocode(id), tree=tree) for k, v in kw.items(): setattr(res, k, v) # Note: Setting the level behaves differently when `_api` is available, so must be done # after all other attributes are initialized. res.level = level return res
def test_extractor(self): config = self.make_cfg( [config_path(f).as_posix() for f in ("admin", "mk", "embed_data")]) xml = beastling.beastxml.BeastXml(config) xmlfile = self.tmp.joinpath("beastling.xml") xml.write_file(xmlfile.as_posix()) self.assertTrue(bool(self._extract(xmlfile))) config = self.make_cfg({ 'admin': { 'basename': 'abcdefg' }, 'model': { 'model': 'mk', 'data': data_path('basic.csv').as_posix() } }) xml = beastling.beastxml.BeastXml(config) xmlfile = self.tmp.joinpath("beastling.xml") xml.write_file(xmlfile.as_posix()) beastling.extractor.extract(xmlfile) p = Path('abcdefg.conf') self.assertTrue(p.exists()) cfg = INI(interpolation=None) cfg.read(p.as_posix()) remove(p) self.assertEqual(cfg['admin']['basename'], 'abcdefg') self.assertEqual(cfg['model']['model'], 'mk') fname = self.tmp.joinpath('test.xml') datafile = self.tmp.joinpath(('test.csv')) self.assertFalse(datafile.exists()) with fname.open('w', encoding='utf8') as fp: fp.write("""<?xml version="1.0" encoding="UTF-8"?> <r> <!--%s %s [admin] [model] --> <!--%s:%s--> </r> """ % (beastling.extractor._generated_str, beastling.extractor._config_file_str, beastling.extractor._data_file_str, datafile.as_posix())) res = self._extract(fname) self.assertIn(datafile.name, ''.join(res))
def test_extractor(config_factory, tmppath, data_dir): config = config_factory("admin", "mk", "embed_data") xml = beastling.beastxml.BeastXml(config) xmlfile = str(tmppath / "beastling.xml") xml.write_file(xmlfile) assert bool(_extract(xmlfile)) config = config_factory({ 'admin': {'basename': 'abcdefg'}, 'model model': { 'model': 'mk', 'data': str(data_dir / 'basic.csv')}}) xml = beastling.beastxml.BeastXml(config) xmlfile = str(tmppath / "beastling.xml") xml.write_file(xmlfile) beastling.extractor.extract(xmlfile) p = Path('abcdefg.conf') assert p.exists() cfg = INI(interpolation=None) cfg.read(p.as_posix()) remove(p) assert cfg['admin']['basename'] == 'abcdefg' assert cfg['model model']['model'] == 'mk' fname = tmppath / 'test.xml' datafile = tmppath / 'test.csv' assert not datafile.exists() with fname.open('w', encoding='utf8') as fp: fp.write("""<?xml version="1.0" encoding="UTF-8"?> <r> <!--%s %s [admin] [model model] --> <!--%s:%s--> </r> """ % (beastling.extractor._generated_str, beastling.extractor._config_file_str, beastling.extractor._data_file_str, datafile.as_posix())) res = _extract(fname) assert datafile.name in ''.join(res)
def write_config(comment_text, overwrite): lines = comment_text.split("\n") lines = [l for l in lines if l] assert lines[1] in (_config_file_str, _proggen_str) if lines[1] == _proggen_str: return "Original configuration was generated programmatically, no configuration to extract." truths = [_do_not_edit_str in line for line in lines] if any(truths): lines = lines[0:truths.index(True)] config_text = "\n".join(lines[2:]) p = INI() p.read_string(config_text) filename = p.get("admin", "basename") \ if p.has_option("admin", "basename") else 'beastling' filename = Path(filename + '.conf') if filename.exists() and not overwrite: return "BEASTling configuration file %s already exists! Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename if not filename.parent.exists(): filename.parent.mkdir() p.write(filename) return "Wrote BEASTling configuration file %s.\n" % filename
def test_INI(tmppath): ini = INI() ini.set('äüü', 'äöü', ('ä', 'ö', 'ü')) ini.set('a', 'b', 5) assert ini['a'].getint('b') == 5 ini.set('a', 'c', None) assert 'c' not in ini['a'] assert 'ä\n' in ini.write_string() assert len(ini.getlist('äüü', 'äöü')) == 3 mt = '- a\n - aa\n - ab\n- b' ini.settext('text', 'multi', mt) tmp = tmppath / 'test' ini.write(tmp.as_posix()) with tmp.open(encoding='utf8') as fp: res = fp.read() assert 'coding: utf-8' in res ini2 = INI.from_file(tmp) assert ini2.gettext('text', 'multi') == mt assert ini2.write_string() == ini.write_string()
def from_ini(cls, ini, nodes=None): nodes = nodes or {} ini = Path(ini) directory = ini.parent cfg = INI(interpolation=None) cfg.read(ini.as_posix(), encoding='utf8') lineage = [] for parent in directory.parents: id_ = parent.name assert id_ != directory.name if not Glottocode.pattern.match(id_): # we ignore leading non-languoid-dir path components. break if id_ not in nodes: l = Languoid.from_dir(parent, nodes=nodes) nodes[id_] = (l.name, l.id, l.level) lineage.append(nodes[id_]) res = cls(cfg, list(reversed(lineage)), directory=directory) nodes[res.id] = (res.name, res.id, res.level) return res
def from_lff(cls, path, name_and_codes, level): lname, codes = name_and_codes.split('[', 1) lname = lname.strip() glottocode, isocode = codes[:-1].split('][') lineage = [] for i, comp in enumerate(path.split('], ')): if comp.endswith(']'): comp = comp[:-1] name, id_ = comp.split(' [', 1) if id_ != '-isolate-': _level = 'family' if level == 'dialect': _level = 'language' if i == 0 else 'dialect' lineage.append((name, id_, _level)) cfg = INI() cfg.read_dict( dict(core=dict(name=lname, glottocode=glottocode, level=level))) res = cls(cfg, lineage) if isocode: res.iso = isocode return res
def from_ini(cls, ini, nodes={}): if not isinstance(ini, Path): ini = Path(ini) directory = ini.parent cfg = INI() cfg.read(ini.as_posix(), encoding='utf8') lineage = [] for parent in directory.parents: id_ = parent.name.split('.')[-1] assert id_ != directory.name.split('.')[-1] if not cls.id_pattern.match(id_): # we ignore leading non-languoid-dir path components. break if id_ not in nodes: l = Languoid.from_dir(parent, nodes=nodes) nodes[id_] = (l.name, l.id, l.level) lineage.append(nodes[id_]) res = cls(cfg, list(reversed(lineage))) nodes[res.id] = (res.name, res.id, res.level) return res
def test_existing_config(self): cfg = INI() cfg.read_dict({'section': {'option': '12'}}) cfg.write(self.tmp_path('test.ini')) cfg = Config('test', dir_=self.tmp_path()) self.assertEqual(cfg.get('section', 'option'), '12')
def read_from_file(self, configfile): """ Read one or several INI-style configuration files and overwrite default option settings accordingly. """ self.configfile = INI(interpolation=None) self.configfile.optionxform = str if isinstance(configfile, dict): self.configfile.read_dict(configfile) else: if isinstance(configfile, six.string_types): configfile = (configfile, ) for conf in configfile: self.configfile.read(conf) p = self.configfile for sec, opts in { 'admin': { 'basename': p.get, 'embed_data': p.getboolean, 'screenlog': p.getboolean, 'log_all': p.getboolean, 'log_dp': p.getint, 'log_every': p.getint, 'log_probabilities': p.getboolean, 'log_fine_probs': p.getboolean, 'log_params': p.getboolean, 'log_trees': p.getboolean, 'log_pure_tree': p.getboolean, 'glottolog_release': p.get, }, 'MCMC': { 'chainlength': p.getint, 'sample_from_prior': p.getboolean, }, 'languages': { 'exclusions': p.get, 'languages': p.get, 'families': p.get, 'macroareas': p.get, 'location_data': p.get, 'overlap': p.get, 'starting_tree': p.get, 'sample_branch_lengths': p.getboolean, 'sample_topology': p.getboolean, 'monophyly_start_depth': p.getint, 'monophyly_end_depth': p.getint, 'monophyly_levels': p.getint, 'monophyly_direction': lambda s, o: p.get(s, o).lower(), }, }.items(): for opt, getter in opts.items(): if p.has_option(sec, opt): setattr(self, opt, getter(sec, opt)) ## MCMC self.sample_from_prior |= self.prior if self.prior and not self.basename.endswith("_prior"): self.basename += "_prior" ## Languages sec = "languages" if self.overlap.lower() not in ("union", "intersection"): # pragma: no cover raise ValueError( "Value for overlap needs to be either 'union', or 'intersection'." ) if p.has_option(sec, "monophyletic"): self.monophyly = p.getboolean(sec, "monophyletic") elif p.has_option(sec, "monophyly"): self.monophyly = p.getboolean(sec, "monophyly") if p.has_option(sec, "monophyly_newick"): value = p.get(sec, "monophyly_newick") if os.path.exists(value): with io.open(value, encoding="UTF-8") as fp: self.monophyly_newick = fp.read() else: self.monophyly_newick = value if p.has_option(sec, 'minimum_data'): self.minimum_data = p.getfloat(sec, "minimum_data") ## Calibration if p.has_section("calibration"): for clade, calibration in p.items("calibration"): self.calibration_configs[clade] = calibration ## Clocks clock_sections = [ s for s in p.sections() if s.lower().startswith("clock") ] for section in clock_sections: self.clock_configs.append(self.get_clock_config(p, section)) ## Models model_sections = [ s for s in p.sections() if s.lower().startswith("model") ] for section in model_sections: self.model_configs.append(self.get_model_config(p, section)) # Geography if p.has_section("geography"): self.geo_config = self.get_geo_config(p, "geography") else: self.geo_config = {} if p.has_section("geo_priors"): if not p.has_section("geography"): raise ValueError( "Config file contains geo_priors section but no geography section." ) self.geo_config["geo_priors"] = {} for clades, klm in p.items("geo_priors"): for clade in clades.split(','): clade = clade.strip() if clade not in self.geo_config["sampling_points"]: self.geo_config["sampling_points"].append(clade) self.geo_config["geo_priors"][clade] = klm sampled_points = self.geo_config.get("sampling_points", []) if [p for p in sampled_points if p.lower() != "root" ] and self.sample_topology and not self.monophyly: self.messages.append( "[WARNING] Geographic sampling and/or prior specified for clades other than root, but tree topology is being sampled without monophyly constraints. BEAST may crash." ) # Make sure analysis is non-empty if not model_sections and not self.geo_config: raise ValueError( "Config file contains no model sections and no geography section." )