def crawl(crawler): out = crawler.get_output(language='mi') crawl_udhr(crawler, out, filename='udhr_mri.txt') _scrape_maoritelevision(crawler, out) _scrape_paiperatapu(crawler, out)
def crawl(crawler): out = crawler.get_output(language='sk') crawl_udhr(crawler, out, filename='udhr_slk.txt') _crawl_pravda_sk(crawler, out)
def crawl(crawler): out = crawler.get_output(language='ar') crawl_udhr(crawler, out, filename='udhr_arb.txt') crawl_deutsche_welle(crawler, out, prefix='/ar/') crawl_sputnik_news(crawler, out, host='arabic.sputniknews.com') crawl_bbc_news(crawler, out, urlprefix='/arabic/')
def crawl(crawler): out = crawler.get_output(language='yo') crawl_udhr(crawler, out, filename='udhr_yor.txt') crawl_voice_of_nigeria(crawler, out, urlprefix='/yoruba/') crawl_bibleis(crawler, out, bible='YORUBS')
def crawl(crawler): out = crawler.get_output(language='fuv') crawl_udhr(crawler, out, filename='udhr_fuv.txt') crawl_voice_of_nigeria(crawler, out, urlprefix='/fulfulde/') crawl_bibleis(crawler, out, bible='FUVTBL')
def crawl(crawler): out = crawler.get_output(language='cs') crawl_udhr(crawler, out, filename='udhr_ces.txt') crawl_sputnik_news(crawler, out, host='cz.sputniknews.com')
def crawl(crawler): out = crawler.get_output(language='es') crawl_udhr(crawler, out, filename='udhr_spa.txt') crawl_bbc_news(crawler, out, urlprefix='/mundo/') crawl_deutsche_welle(crawler, out, prefix='/es/')
def crawl(crawler): out = crawler.get_output(language='el') crawl_udhr(crawler, out, filename='udhr_ell_monotonic.txt') crawl_deutsche_welle(crawler, out, prefix='/el/')
def crawl(crawler): out = crawler.get_output(language='it') crawl_udhr(crawler, out, filename='udhr_ita.txt') crawl_sputnik_news(crawler, out, host='it.sputniknews.com') _crawl_iltirreno_gelocal_it(crawler, out)
def crawl(crawler): out = crawler.get_output(language='ig') crawl_udhr(crawler, out, filename='udhr_ibo.txt') crawl_voice_of_nigeria(crawler, out, urlprefix='/igbo/')
def crawl(crawler): out = crawler.get_output(language='vi') crawl_udhr(crawler, out, filename='udhr_vie.txt') crawl_bbc_news(crawler, out, urlprefix='/vietnamese/') crawler.crawl_voice_of_america(out, host='voatiengviet.com')
def crawl(crawler): unicode_out = crawler.get_output(language='mnw') crawl_udhr(crawler, unicode_out, filename='udhr_mnw.txt') crawl_mon_news(crawler, unicode_out)
def crawl(crawler): out = crawler.get_output(language='ku') crawl_udhr(crawler, out, filename='udhr_kmr.txt') crawl_sputnik_news(crawler, out, host='krd.sputniknews.com')
def crawl(crawler): out = crawler.get_output(language='os') crawl_udhr(crawler, out, filename='udhr_oss.txt') _crawl_raestdzinad_ru(crawler, out)
def crawl(crawler): out = crawler.get_output(language='mt') crawl_udhr(crawler, out, filename='udhr_mlt.txt') crawl_newsbook_mt(crawler, out)
def crawl(crawler): out = crawler.get_output(language='bm') crawl_udhr(crawler, out, filename='udhr_bam.txt') crawler.crawl_voice_of_america(out, host='voabambara.com')
def crawl(crawler): out = crawler.get_output(language='lt') crawl_udhr(crawler, out, filename='udhr_lit.txt') _crawl_kauno_diena_lt(crawler, out)
def crawl(crawler): out = crawler.get_output(language='sw') crawl_udhr(crawler, out, filename='udhr_swh.txt') crawl_bbc_news(crawler, out, urlprefix='/swahili/') crawl_deutsche_welle(crawler, out, prefix='/sw/')
def crawl(crawler): out = crawler.get_output(language='nan') crawl_udhr(crawler, out, filename='udhr_nan.txt')
def crawl(crawler): out = crawler.get_output(language='vec') crawl_udhr(crawler, out, filename='udhr_vec.txt') crawl_larenadomila_it(crawler) crawl_quatrociacoe_it(crawler) crawl_wikisource_trieste_vernacola(crawler)
def crawl(crawler): out = crawler.get_output(language='chr') crawl_udhr(crawler, out, filename='udhr_chr_uppercase.txt') crawler.crawl_churchio(out, bible_id='chr-cherokee')
def crawl(crawler): out = crawler.get_output(language='ru') crawl_udhr(crawler, out, filename='udhr_rus.txt') crawl_deutsche_welle(crawler, out, prefix='/ru/')
def crawl(crawler): out = crawler.get_output(language='mk') crawl_udhr(crawler, out, filename='udhr_mkd.txt') crawl_deutsche_welle(crawler, out, prefix='/mk/')
def crawl(crawler): out = crawler.get_output(language='pt-PT') crawl_udhr(crawler, out, filename='udhr_por_PT.txt') _crawl_observador_pt(crawler, out) _crawl_visao_sapo_pt(crawler, out)
def crawl(crawler): out = crawler.get_output(language='fi') crawl_udhr(crawler, out, filename='udhr_fin.txt') crawler.crawl_sverigesradio(out, program_id=185)
def crawl(crawler): out = crawler.get_output(language='dz') crawl_udhr(crawler, out, filename='udhr_dzo.txt') crawl_kuensel(crawler, out)
def crawl(crawler): out = crawler.get_output(language='ps') crawl_udhr(crawler, out, filename='udhr_pbu.txt') crawl_deutsche_welle(crawler, out, prefix='/ps/', need_percent_in_url=True)
def crawl(crawler): out = crawler.get_output(language='yue') crawl_udhr(crawler, out, filename='udhr_yue.txt') crawler.crawl_voice_of_america(out, host='www.voacantonese.com') crawl_radio_free_asia(crawler, out, edition='cantonese', start_year=1998) crawl_bibleis(crawler, out, bible='YUHUNV')
def crawl(crawler): out = crawler.get_output(language='bg') crawl_udhr(crawler, out, filename='udhr_bul.txt') crawl_deutsche_welle(crawler, out, prefix='/bg/') # There's audio for one part of Matthew under BLGAMB that matches this text crawl_bibleis(crawler, out, 'BULB40')
def crawl(crawler): out = crawler.get_output(language='my') crawl_udhr(crawler, out, filename='udhr_mya.txt') crawl_bbc_news(crawler, out, urlprefix='/burmese/')