예제 #1
0
    def from_lff(cls, path, name_and_codes, level, dry_run=False):
        assert isinstance(level, Level)
        lname, codes = name_and_codes.split('[', 1)
        lname = lname.strip()
        glottocode, isocode = codes[:-1].split('][')
        if not glottocode:
            glottocode = Glottocode.from_name(lname, dry_run=dry_run)

        lineage = []
        if path:
            for i, comp in enumerate(path.split('], ')):
                if comp.endswith(']'):
                    comp = comp[:-1]
                name, id_ = comp.split(' [', 1)
                _level = Level.family
                if level == Level.dialect:
                    _level = Level.language if i == 0 else Level.dialect
                lineage.append((name, id_, _level))

        cfg = INI(interpolation=None)
        cfg.read_dict(dict(core=dict(name=lname, glottocode=glottocode)))
        res = cls(cfg, lineage)
        res.level = level
        if isocode:
            res.iso = isocode
        return res
예제 #2
0
 def from_name_id_level(cls, tree, name, id, level, **kw):
     cfg = INI(interpolation=None)
     cfg.read_dict(dict(core=dict(name=name)))
     res = cls(cfg, kw.pop('lineage', []), id_=Glottocode(id), tree=tree)
     res.level = Level.get(level)
     for k, v in kw.items():
         setattr(res, k, v)
     return res
예제 #3
0
 def from_name_id_level(cls, name, id, level, **kw):
     cfg = INI(interpolation=None)
     cfg.read_dict(dict(core=dict(name=name, glottocode=id)))
     res = cls(cfg, [])
     res.level = Level(level)
     for k, v in kw.items():
         setattr(res, k, v)
     return res
예제 #4
0
 def from_name_id_level(cls, tree, name, id, level, **kw):
     cfg = INI(interpolation=None)
     cfg.read_dict(dict(core=dict(name=name)))
     res = cls(cfg, kw.pop('lineage', []), id_=Glottocode(id), tree=tree)
     for k, v in kw.items():
         setattr(res, k, v)
     # Note: Setting the level behaves differently when `_api` is available, so must be done
     # after all other attributes are initialized.
     res.level = level
     return res
예제 #5
0
 def make(glottolog, concepticon):
     ini = INI()
     ini.read_dict(
         {'paths': {
             'concepticon': concepticon,
             'glottolog': glottolog
         }})
     p = str(tmpdir.join('config.ini'))
     ini.write(p)
     return p
예제 #6
0
 def from_name_id_level(cls, tree, name, id, level, **kw):
     """
     This method is used in `pyglottolog.lff` to instantiate `Languoid` s for new nodes
     encountered in "lff"-format trees.
     """
     cfg = INI(interpolation=None)
     cfg.read_dict(dict(core=dict(name=name)))
     res = cls(cfg, kw.pop('lineage', []), id_=Glottocode(id), tree=tree)
     for k, v in kw.items():
         setattr(res, k, v)
     # Note: Setting the level behaves differently when `_api` is available, so must be done
     # after all other attributes are initialized.
     res.level = level
     return res
예제 #7
0
    def test_extractor(self):
        config = self.make_cfg(
            [config_path(f).as_posix() for f in ("admin", "mk", "embed_data")])
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        self.assertTrue(bool(self._extract(xmlfile)))

        config = self.make_cfg({
            'admin': {
                'basename': 'abcdefg'
            },
            'model': {
                'model': 'mk',
                'data': data_path('basic.csv').as_posix()
            }
        })
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        beastling.extractor.extract(xmlfile)
        p = Path('abcdefg.conf')
        self.assertTrue(p.exists())
        cfg = INI(interpolation=None)
        cfg.read(p.as_posix())
        remove(p)
        self.assertEqual(cfg['admin']['basename'], 'abcdefg')
        self.assertEqual(cfg['model']['model'], 'mk')

        fname = self.tmp.joinpath('test.xml')
        datafile = self.tmp.joinpath(('test.csv'))
        self.assertFalse(datafile.exists())
        with fname.open('w', encoding='utf8') as fp:
            fp.write("""<?xml version="1.0" encoding="UTF-8"?>
<r>
  <!--%s
%s
[admin]
[model]
-->
  <!--%s:%s-->
</r>
""" % (beastling.extractor._generated_str,
            beastling.extractor._config_file_str,
            beastling.extractor._data_file_str, datafile.as_posix()))
        res = self._extract(fname)
        self.assertIn(datafile.name, ''.join(res))
예제 #8
0
def test_extractor(config_factory, tmppath, data_dir):
    config = config_factory("admin", "mk", "embed_data")
    xml = beastling.beastxml.BeastXml(config)
    xmlfile = str(tmppath / "beastling.xml")
    xml.write_file(xmlfile)
    assert bool(_extract(xmlfile))

    config = config_factory({
            'admin': {'basename': 'abcdefg'},
            'model model': {
                'model': 'mk',
                'data': str(data_dir / 'basic.csv')}})
    xml = beastling.beastxml.BeastXml(config)
    xmlfile = str(tmppath / "beastling.xml")
    xml.write_file(xmlfile)
    beastling.extractor.extract(xmlfile)
    p = Path('abcdefg.conf')
    assert p.exists()
    cfg = INI(interpolation=None)
    cfg.read(p.as_posix())
    remove(p)
    assert cfg['admin']['basename'] == 'abcdefg'
    assert cfg['model model']['model'] == 'mk'

    fname = tmppath / 'test.xml'
    datafile = tmppath / 'test.csv'
    assert not datafile.exists()
    with fname.open('w', encoding='utf8') as fp:
        fp.write("""<?xml version="1.0" encoding="UTF-8"?>
<r>
  <!--%s
%s
[admin]
[model model]
-->
  <!--%s:%s-->
</r>
""" % (beastling.extractor._generated_str,
       beastling.extractor._config_file_str,
       beastling.extractor._data_file_str,
       datafile.as_posix()))
    res = _extract(fname)
    assert datafile.name in ''.join(res)
예제 #9
0
def write_config(comment_text, overwrite):
    lines = comment_text.split("\n")
    lines = [l for l in lines if l]
    assert lines[1] in (_config_file_str, _proggen_str)
    if lines[1] == _proggen_str:
        return "Original configuration was generated programmatically, no configuration to extract."
    truths = [_do_not_edit_str in line for line in lines]
    if any(truths):
        lines = lines[0:truths.index(True)]
    config_text = "\n".join(lines[2:])
    p = INI()
    p.read_string(config_text)
    filename = p.get("admin", "basename") \
        if p.has_option("admin", "basename") else 'beastling'
    filename = Path(filename + '.conf')
    if filename.exists() and not overwrite:
        return "BEASTling configuration file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()

    p.write(filename)
    return "Wrote BEASTling configuration file %s.\n" % filename
예제 #10
0
def test_INI(tmppath):
    ini = INI()
    ini.set('äüü', 'äöü', ('ä', 'ö', 'ü'))
    ini.set('a', 'b', 5)
    assert ini['a'].getint('b') == 5
    ini.set('a', 'c', None)
    assert 'c' not in ini['a']
    assert 'ä\n' in ini.write_string()
    assert len(ini.getlist('äüü', 'äöü')) == 3

    mt = '- a\n  - aa\n  - ab\n- b'
    ini.settext('text', 'multi', mt)

    tmp = tmppath / 'test'
    ini.write(tmp.as_posix())
    with tmp.open(encoding='utf8') as fp:
        res = fp.read()
    assert 'coding: utf-8' in res

    ini2 = INI.from_file(tmp)
    assert ini2.gettext('text', 'multi') == mt
    assert ini2.write_string() == ini.write_string()
예제 #11
0
    def from_ini(cls, ini, nodes=None):
        nodes = nodes or {}
        ini = Path(ini)
        directory = ini.parent
        cfg = INI(interpolation=None)
        cfg.read(ini.as_posix(), encoding='utf8')

        lineage = []
        for parent in directory.parents:
            id_ = parent.name
            assert id_ != directory.name
            if not Glottocode.pattern.match(id_):
                # we ignore leading non-languoid-dir path components.
                break

            if id_ not in nodes:
                l = Languoid.from_dir(parent, nodes=nodes)
                nodes[id_] = (l.name, l.id, l.level)
            lineage.append(nodes[id_])

        res = cls(cfg, list(reversed(lineage)), directory=directory)
        nodes[res.id] = (res.name, res.id, res.level)
        return res
예제 #12
0
    def from_lff(cls, path, name_and_codes, level):
        lname, codes = name_and_codes.split('[', 1)
        lname = lname.strip()
        glottocode, isocode = codes[:-1].split('][')

        lineage = []
        for i, comp in enumerate(path.split('], ')):
            if comp.endswith(']'):
                comp = comp[:-1]
            name, id_ = comp.split(' [', 1)
            if id_ != '-isolate-':
                _level = 'family'
                if level == 'dialect':
                    _level = 'language' if i == 0 else 'dialect'
                lineage.append((name, id_, _level))

        cfg = INI()
        cfg.read_dict(
            dict(core=dict(name=lname, glottocode=glottocode, level=level)))
        res = cls(cfg, lineage)
        if isocode:
            res.iso = isocode
        return res
예제 #13
0
    def from_ini(cls, ini, nodes={}):
        if not isinstance(ini, Path):
            ini = Path(ini)

        directory = ini.parent
        cfg = INI()
        cfg.read(ini.as_posix(), encoding='utf8')

        lineage = []
        for parent in directory.parents:
            id_ = parent.name.split('.')[-1]
            assert id_ != directory.name.split('.')[-1]
            if not cls.id_pattern.match(id_):
                # we ignore leading non-languoid-dir path components.
                break

            if id_ not in nodes:
                l = Languoid.from_dir(parent, nodes=nodes)
                nodes[id_] = (l.name, l.id, l.level)
            lineage.append(nodes[id_])

        res = cls(cfg, list(reversed(lineage)))
        nodes[res.id] = (res.name, res.id, res.level)
        return res
예제 #14
0
 def test_existing_config(self):
     cfg = INI()
     cfg.read_dict({'section': {'option': '12'}})
     cfg.write(self.tmp_path('test.ini'))
     cfg = Config('test', dir_=self.tmp_path())
     self.assertEqual(cfg.get('section', 'option'), '12')
예제 #15
0
    def read_from_file(self, configfile):
        """
        Read one or several INI-style configuration files and overwrite
        default option settings accordingly.
        """
        self.configfile = INI(interpolation=None)
        self.configfile.optionxform = str
        if isinstance(configfile, dict):
            self.configfile.read_dict(configfile)
        else:
            if isinstance(configfile, six.string_types):
                configfile = (configfile, )
            for conf in configfile:
                self.configfile.read(conf)
        p = self.configfile

        for sec, opts in {
                'admin': {
                    'basename': p.get,
                    'embed_data': p.getboolean,
                    'screenlog': p.getboolean,
                    'log_all': p.getboolean,
                    'log_dp': p.getint,
                    'log_every': p.getint,
                    'log_probabilities': p.getboolean,
                    'log_fine_probs': p.getboolean,
                    'log_params': p.getboolean,
                    'log_trees': p.getboolean,
                    'log_pure_tree': p.getboolean,
                    'glottolog_release': p.get,
                },
                'MCMC': {
                    'chainlength': p.getint,
                    'sample_from_prior': p.getboolean,
                },
                'languages': {
                    'exclusions': p.get,
                    'languages': p.get,
                    'families': p.get,
                    'macroareas': p.get,
                    'location_data': p.get,
                    'overlap': p.get,
                    'starting_tree': p.get,
                    'sample_branch_lengths': p.getboolean,
                    'sample_topology': p.getboolean,
                    'monophyly_start_depth': p.getint,
                    'monophyly_end_depth': p.getint,
                    'monophyly_levels': p.getint,
                    'monophyly_direction': lambda s, o: p.get(s, o).lower(),
                },
        }.items():
            for opt, getter in opts.items():
                if p.has_option(sec, opt):
                    setattr(self, opt, getter(sec, opt))

        ## MCMC
        self.sample_from_prior |= self.prior
        if self.prior and not self.basename.endswith("_prior"):
            self.basename += "_prior"

        ## Languages
        sec = "languages"
        if self.overlap.lower() not in ("union",
                                        "intersection"):  # pragma: no cover
            raise ValueError(
                "Value for overlap needs to be either 'union', or 'intersection'."
            )
        if p.has_option(sec, "monophyletic"):
            self.monophyly = p.getboolean(sec, "monophyletic")
        elif p.has_option(sec, "monophyly"):
            self.monophyly = p.getboolean(sec, "monophyly")
        if p.has_option(sec, "monophyly_newick"):
            value = p.get(sec, "monophyly_newick")
            if os.path.exists(value):
                with io.open(value, encoding="UTF-8") as fp:
                    self.monophyly_newick = fp.read()
            else:
                self.monophyly_newick = value
        if p.has_option(sec, 'minimum_data'):
            self.minimum_data = p.getfloat(sec, "minimum_data")

        ## Calibration
        if p.has_section("calibration"):
            for clade, calibration in p.items("calibration"):
                self.calibration_configs[clade] = calibration

        ## Clocks
        clock_sections = [
            s for s in p.sections() if s.lower().startswith("clock")
        ]
        for section in clock_sections:
            self.clock_configs.append(self.get_clock_config(p, section))

        ## Models
        model_sections = [
            s for s in p.sections() if s.lower().startswith("model")
        ]
        for section in model_sections:
            self.model_configs.append(self.get_model_config(p, section))
        # Geography
        if p.has_section("geography"):
            self.geo_config = self.get_geo_config(p, "geography")
        else:
            self.geo_config = {}
        if p.has_section("geo_priors"):
            if not p.has_section("geography"):
                raise ValueError(
                    "Config file contains geo_priors section but no geography section."
                )
            self.geo_config["geo_priors"] = {}
            for clades, klm in p.items("geo_priors"):
                for clade in clades.split(','):
                    clade = clade.strip()
                    if clade not in self.geo_config["sampling_points"]:
                        self.geo_config["sampling_points"].append(clade)
                    self.geo_config["geo_priors"][clade] = klm
        sampled_points = self.geo_config.get("sampling_points", [])
        if [p for p in sampled_points if p.lower() != "root"
            ] and self.sample_topology and not self.monophyly:
            self.messages.append(
                "[WARNING] Geographic sampling and/or prior specified for clades other than root, but tree topology is being sampled without monophyly constraints.  BEAST may crash."
            )

        # Make sure analysis is non-empty
        if not model_sections and not self.geo_config:
            raise ValueError(
                "Config file contains no model sections and no geography section."
            )