Example #1
0
    def cmd_download(self, **kw):
        # download source
        self.raw.write('sources.bib', getEvoBibAsBibtex(SOURCE, **kw))

        # download data
        all_records = []
        for i in pb(list(range(1, 20 * self.pages + 1, 20))):
            with self.raw.temp_download(self._url(i),
                                        'file-{0}'.format(i),
                                        log=self.log) as fname:
                soup = BeautifulSoup(
                    fname.open(encoding='utf8').read(), 'html.parser')
                for record in soup.findAll(name='div',
                                           attrs={"class": "results_record"}):
                    if isinstance(record, bs4.element.Tag):
                        children = list(record.children)
                        number = children[0].findAll('span')[1].text.strip()
                        concept = children[1].findAll('span')[1].text
                        for child in children[2:]:
                            if isinstance(child, bs4.element.Tag):
                                dpoints = child.findAll('span')
                                if len(dpoints) >= 3:
                                    lname = dpoints[1].text
                                    glottolog = re.findall(
                                        'Glottolog: (........)',
                                        str(dpoints[1]))[0]
                                    entry = dpoints[2].text
                                    cogid = list(
                                        child.children)[4].text.strip()
                                    all_records.append(
                                        (number, concept, lname, glottolog,
                                         entry, cogid))
        with UnicodeWriter(self.raw.posix('output.csv')) as f:
            f.writerows(all_records)
Example #2
0
 def cmd_download(self, args):
     with self.raw_dir.temp_download(
             "http://edictor.digling.org/triples/get_data.py?file=tukano",
             "tukano.tsv") as data:
         self.raw_dir.write_csv("tukano.csv",
                                self.raw_dir.read_csv(data, delimiter="\t"))
     self.raw_dir.write("sources.bib", getEvoBibAsBibtex("Chacon2014"))
 def cmd_download(self, args):
     d = Path('SequenceComparison-SupplementaryMaterial-cc4bf85/benchmark/cognates/')
     self.raw_dir.download_and_unpack(
         self.metadata.url,
         *[d.joinpath(dset) for dset in self.DSETS],
         **{'log': args.log})
     self.raw_dir.write('sources.bib', 
             getEvoBibAsBibtex(*set(v[0] for v in self.DSETS.values())))
Example #4
0
    def cmd_download(self, args):
        self.raw_dir.download_and_unpack(
            "https://zenodo.org/record/16760/files/"
            "Network-perspectives-on-Chinese-dialect-history-1.zip",
            self.raw_dir.joinpath("chinese.tsv"),
            self.raw_dir.joinpath("old_chinese.csv"),
            log=args.log,
        )

        self.raw_dir.write("sources.bib",
                           getEvoBibAsBibtex("Hamed2006", "List2015d"))
Example #5
0
def test_getEvoBibAsBibtex(mocker):
    bib = '<pre>@book{key,\ntitle={The Title}\n}\n</pre>'
    mocker.patch('pylexibank.util.get_url',
                 mocker.Mock(return_value=mocker.Mock(text=bib)))
    assert '@book' in util.getEvoBibAsBibtex('')