def crawl(crawler): out = crawler.get_output(language='id') crawler.crawl_abc_net_au(out, program_id='indonesian') crawler.crawl_voice_of_america(out, host='voaindonesia.com') crawl_bbc_news(crawler, out, urlprefix='/indonesia/') crawl_deutsche_welle(crawler, out, prefix='/id/') crawl_udhr(crawler, out, filename='udhr_ind.txt') crawl_bibleis(crawler, out, bible='INDASV') crawl_bibleis(crawler, out, bible='INDWBT') crawl_bibleis(crawler, out, bible='INDSHV')
def crawl(crawler): out = crawler.get_output(language='uk') crawl_udhr(crawler, out, filename='udhr_ukr.txt') crawl_deutsche_welle(crawler, out, prefix='/uk/')
def crawl(crawler): out = crawler.get_output(language='hr') crawl_udhr(crawler, out, filename='udhr_hrv.txt') crawl_deutsche_welle(crawler, out, prefix='/hr/')
def crawl(crawler): out = crawler.get_output(language='pt') crawl_udhr(crawler, out, filename='udhr_por_BR.txt') crawl_deutsche_welle(crawler, out, prefix='/pt-br/') crawl_sputnik_news(crawler, out, host='br.sputniknews.com')
def crawl(crawler): out = crawler.get_output(language='fa') crawl_udhr(crawler, out, filename='udhr_pes_1.txt') crawl_deutsche_welle(crawler, out, prefix='/fa-ir/')
def crawl_dari(crawler): out = crawler.get_output(language='fa-AF') crawl_udhr(crawler, out, filename='udhr_pes_2.txt') crawl_deutsche_welle(crawler, out, prefix='/fa-af/')
def crawl(crawler): out = crawler.get_output(language='ru') crawl_udhr(crawler, out, filename='udhr_rus.txt') crawl_deutsche_welle(crawler, out, prefix='/ru/')
def crawl_modern_standard_arabic(crawler): out = crawler.get_output(language='ar') crawl_udhr(crawler, out, filename='udhr_arb.txt') crawl_deutsche_welle(crawler, out, prefix='/ar/') crawl_sputnik_news(crawler, out, host='arabic.sputniknews.com') crawl_bbc_news(crawler, out, urlprefix='/arabic/')
def crawl(crawler): out = crawler.get_output(language='el') crawl_udhr(crawler, out, filename='udhr_ell_monotonic.txt') crawl_deutsche_welle(crawler, out, prefix='/el/')
def crawl(crawler): out = crawler.get_output(language='sw') crawl_udhr(crawler, out, filename='udhr_swh.txt') crawl_bbc_news(crawler, out, urlprefix='/swahili/') crawl_deutsche_welle(crawler, out, prefix='/sw/')
def crawl(crawler): out = crawler.get_output(language='sr-Latn') crawl_udhr(crawler, out, filename='udhr_srp_latn.txt') crawl_deutsche_welle(crawler, out, prefix='/sr/')
def crawl(crawler): out = crawler.get_output(language='ro') crawl_udhr(crawler, out, filename='udhr_ron_2006.txt') crawl_deutsche_welle(crawler, out, prefix='/ro/')
def crawl(crawler): out = crawler.get_output(language='bs') crawl_udhr(crawler, out, filename='udhr_bos_latn.txt') crawl_deutsche_welle(crawler, out, prefix='/bs/')
def crawl(crawler): out = crawler.get_output(language='am') crawl_udhr(crawler, out, filename='udhr_amh.txt') crawl_deutsche_welle(crawler, out, prefix='/am/') crawl_bibleis(crawler, out, 'AMHEVG')
def crawl(crawler): out = crawler.get_output(language='es') crawl_udhr(crawler, out, filename='udhr_spa.txt') crawl_bbc_news(crawler, out, urlprefix='/mundo/') crawl_deutsche_welle(crawler, out, prefix='/es/')
def crawl(crawler): out = crawler.get_output(language='pl') crawl_udhr(crawler, out, filename='udhr_pol.txt') crawl_deutsche_welle(crawler, out, prefix='/pl/')
def crawl(crawler): out = crawler.get_output(language='ps') crawl_udhr(crawler, out, filename='udhr_pbu.txt') crawl_deutsche_welle(crawler, out, prefix='/ps/', need_percent_in_url=True)
def crawl(crawler): out = crawler.get_output(language='de') crawl_udhr(crawler, out, filename='udhr_deu_1996.txt') crawl_deutsche_welle(crawler, out, prefix='/de/')
def crawl(crawler): out = crawler.get_output(language='bg') crawl_udhr(crawler, out, filename='udhr_bul.txt') crawl_deutsche_welle(crawler, out, prefix='/bg/') # There's audio for one part of Matthew under BLGAMB that matches this text crawl_bibleis(crawler, out, 'BULB40')
def crawl(crawler): out = crawler.get_output(language='id') crawl_udhr(crawler, out, filename='udhr_ind.txt') crawl_deutsche_welle(crawler, out, prefix='/id/')
def crawl(crawler): out = crawler.get_output(language='fr') crawl_udhr(crawler, out, filename='udhr_fra.txt') crawl_deutsche_welle(crawler, out, prefix='/fr/') crawl_sputnik_news(crawler, out, host='fr.sputniknews.com')