def cmd_download(self, **kw): # download source self.raw.write('sources.bib', getEvoBibAsBibtex(SOURCE, **kw)) # download data all_records = [] for i in pb(list(range(1, 20 * self.pages + 1, 20))): with self.raw.temp_download(self._url(i), 'file-{0}'.format(i), log=self.log) as fname: soup = BeautifulSoup( fname.open(encoding='utf8').read(), 'html.parser') for record in soup.findAll(name='div', attrs={"class": "results_record"}): if isinstance(record, bs4.element.Tag): children = list(record.children) number = children[0].findAll('span')[1].text.strip() concept = children[1].findAll('span')[1].text for child in children[2:]: if isinstance(child, bs4.element.Tag): dpoints = child.findAll('span') if len(dpoints) >= 3: lname = dpoints[1].text glottolog = re.findall( 'Glottolog: (........)', str(dpoints[1]))[0] entry = dpoints[2].text cogid = list( child.children)[4].text.strip() all_records.append( (number, concept, lname, glottolog, entry, cogid)) with UnicodeWriter(self.raw.posix('output.csv')) as f: f.writerows(all_records)
def cmd_download(self, args): with self.raw_dir.temp_download( "http://edictor.digling.org/triples/get_data.py?file=tukano", "tukano.tsv") as data: self.raw_dir.write_csv("tukano.csv", self.raw_dir.read_csv(data, delimiter="\t")) self.raw_dir.write("sources.bib", getEvoBibAsBibtex("Chacon2014"))
def cmd_download(self, args): d = Path('SequenceComparison-SupplementaryMaterial-cc4bf85/benchmark/cognates/') self.raw_dir.download_and_unpack( self.metadata.url, *[d.joinpath(dset) for dset in self.DSETS], **{'log': args.log}) self.raw_dir.write('sources.bib', getEvoBibAsBibtex(*set(v[0] for v in self.DSETS.values())))
def cmd_download(self, args): self.raw_dir.download_and_unpack( "https://zenodo.org/record/16760/files/" "Network-perspectives-on-Chinese-dialect-history-1.zip", self.raw_dir.joinpath("chinese.tsv"), self.raw_dir.joinpath("old_chinese.csv"), log=args.log, ) self.raw_dir.write("sources.bib", getEvoBibAsBibtex("Hamed2006", "List2015d"))
def test_getEvoBibAsBibtex(mocker): bib = '<pre>@book{key,\ntitle={The Title}\n}\n</pre>' mocker.patch('pylexibank.util.get_url', mocker.Mock(return_value=mocker.Mock(text=bib))) assert '@book' in util.getEvoBibAsBibtex('')