def crawl_modern_standard_arabic(crawler): out = crawler.get_output(language='ar') crawl_udhr(crawler, out, filename='udhr_arb.txt') crawl_deutsche_welle(crawler, out, prefix='/ar/') crawl_sputnik_news(crawler, out, host='arabic.sputniknews.com') crawl_bbc_news(crawler, out, urlprefix='/arabic/')
def crawl(crawler): out = crawler.get_output(language='lv') crawl_udhr(crawler, out, filename='udhr_lav.txt') crawl_sputnik_news(crawler, out, host='sputniknewslv.com')
def crawl(crawler): out = crawler.get_output(language='kk') crawl_udhr(crawler, out, filename='udhr_kaz.txt') crawl_sputnik_news(crawler, out, host='sputniknews.kz')
def crawl(crawler): out = crawler.get_output(language='ro-MD') crawl_sputnik_news(crawler, out, host='sputnik.md')
def crawl(crawler): out = crawler.get_output(language='az') crawl_udhr(crawler, out, filename='udhr_azj_latn.txt') crawl_sputnik_news(crawler, out, host='sputnik.az')
def crawl(crawler): out = crawler.get_output(language='be') crawl_udhr(crawler, out, filename='udhr_bel.txt') crawl_sputnik_news(crawler, out, host='bel.sputnik.by')
def crawl(crawler): out = crawler.get_output(language='cs') crawl_udhr(crawler, out, filename='udhr_ces.txt') crawl_sputnik_news(crawler, out, host='cz.sputniknews.com')
def crawl(crawler): out = crawler.get_output(language='pt') crawl_udhr(crawler, out, filename='udhr_por_BR.txt') crawl_deutsche_welle(crawler, out, prefix='/pt-br/') crawl_sputnik_news(crawler, out, host='br.sputniknews.com')
def crawl(crawler): out = crawler.get_output(language='it') crawl_udhr(crawler, out, filename='udhr_ita.txt') crawl_sputnik_news(crawler, out, host='it.sputniknews.com') _crawl_iltirreno_gelocal_it(crawler, out)
def crawl(crawler): out = crawler.get_output(language='ku') crawl_udhr(crawler, out, filename='udhr_kmr.txt') crawl_sputnik_news(crawler, out, host='krd.sputniknews.com')
def crawl(crawler): out = crawler.get_output(language='et') crawl_udhr(crawler, out, filename='udhr_est.txt') crawl_sputnik_news(crawler, out, host='sputnik-news.ee') _crawl_eestikirik_ee(crawler, out)
def crawl(crawler): out = crawler.get_output(language='sr') crawl_udhr(crawler, out, filename='udhr_srp_cyrl.txt') crawl_sputnik_news(crawler, out, host='rs.sputniknews.com')
def crawl(crawler): out = crawler.get_output(language='fr') crawl_udhr(crawler, out, filename='udhr_fra.txt') crawl_deutsche_welle(crawler, out, prefix='/fr/') crawl_sputnik_news(crawler, out, host='fr.sputniknews.com')