예제 #1
0
def crawl_modern_standard_arabic(crawler):
    out = crawler.get_output(language='ar')
    crawl_udhr(crawler, out, filename='udhr_arb.txt')
    crawl_deutsche_welle(crawler, out, prefix='/ar/')
    crawl_sputnik_news(crawler, out, host='arabic.sputniknews.com')
    crawl_bbc_news(crawler, out, urlprefix='/arabic/')
예제 #2
0
def crawl(crawler):
    out = crawler.get_output(language='lv')
    crawl_udhr(crawler, out, filename='udhr_lav.txt')
    crawl_sputnik_news(crawler, out, host='sputniknewslv.com')
예제 #3
0
def crawl(crawler):
    out = crawler.get_output(language='kk')
    crawl_udhr(crawler, out, filename='udhr_kaz.txt')
    crawl_sputnik_news(crawler, out, host='sputniknews.kz')
예제 #4
0
def crawl(crawler):
    out = crawler.get_output(language='ro-MD')
    crawl_sputnik_news(crawler, out, host='sputnik.md')
예제 #5
0
def crawl(crawler):
    out = crawler.get_output(language='az')
    crawl_udhr(crawler, out, filename='udhr_azj_latn.txt')
    crawl_sputnik_news(crawler, out, host='sputnik.az')
예제 #6
0
def crawl(crawler):
    out = crawler.get_output(language='be')
    crawl_udhr(crawler, out, filename='udhr_bel.txt')
    crawl_sputnik_news(crawler, out, host='bel.sputnik.by')
예제 #7
0
def crawl(crawler):
    out = crawler.get_output(language='cs')
    crawl_udhr(crawler, out, filename='udhr_ces.txt')
    crawl_sputnik_news(crawler, out, host='cz.sputniknews.com')
예제 #8
0
def crawl(crawler):
    out = crawler.get_output(language='pt')
    crawl_udhr(crawler, out, filename='udhr_por_BR.txt')
    crawl_deutsche_welle(crawler, out, prefix='/pt-br/')
    crawl_sputnik_news(crawler, out, host='br.sputniknews.com')
예제 #9
0
def crawl(crawler):
    out = crawler.get_output(language='it')
    crawl_udhr(crawler, out, filename='udhr_ita.txt')
    crawl_sputnik_news(crawler, out, host='it.sputniknews.com')
    _crawl_iltirreno_gelocal_it(crawler, out)
예제 #10
0
def crawl(crawler):
    out = crawler.get_output(language='ku')
    crawl_udhr(crawler, out, filename='udhr_kmr.txt')
    crawl_sputnik_news(crawler, out, host='krd.sputniknews.com')
예제 #11
0
def crawl(crawler):
    out = crawler.get_output(language='et')
    crawl_udhr(crawler, out, filename='udhr_est.txt')
    crawl_sputnik_news(crawler, out, host='sputnik-news.ee')
    _crawl_eestikirik_ee(crawler, out)
예제 #12
0
def crawl(crawler):
    out = crawler.get_output(language='sr')
    crawl_udhr(crawler, out, filename='udhr_srp_cyrl.txt')
    crawl_sputnik_news(crawler, out, host='rs.sputniknews.com')
예제 #13
0
def crawl(crawler):
    out = crawler.get_output(language='fr')
    crawl_udhr(crawler, out, filename='udhr_fra.txt')
    crawl_deutsche_welle(crawler, out, prefix='/fr/')
    crawl_sputnik_news(crawler, out, host='fr.sputniknews.com')