Exemple #1
0
def crawl(crawler):
    out = crawler.get_output(language='id')
    crawler.crawl_abc_net_au(out, program_id='indonesian')
    crawler.crawl_voice_of_america(out, host='voaindonesia.com')
    crawl_bbc_news(crawler, out, urlprefix='/indonesia/')
    crawl_deutsche_welle(crawler, out, prefix='/id/')
    crawl_udhr(crawler, out, filename='udhr_ind.txt')
    crawl_bibleis(crawler, out, bible='INDASV')   
    crawl_bibleis(crawler, out, bible='INDWBT')
    crawl_bibleis(crawler, out, bible='INDSHV')
Exemple #2
0
def crawl(crawler):
    out = crawler.get_output(language='uk')
    crawl_udhr(crawler, out, filename='udhr_ukr.txt')
    crawl_deutsche_welle(crawler, out, prefix='/uk/')
Exemple #3
0
def crawl(crawler):
    out = crawler.get_output(language='hr')
    crawl_udhr(crawler, out, filename='udhr_hrv.txt')
    crawl_deutsche_welle(crawler, out, prefix='/hr/')
Exemple #4
0
def crawl(crawler):
    out = crawler.get_output(language='pt')
    crawl_udhr(crawler, out, filename='udhr_por_BR.txt')
    crawl_deutsche_welle(crawler, out, prefix='/pt-br/')
    crawl_sputnik_news(crawler, out, host='br.sputniknews.com')
Exemple #5
0
def crawl(crawler):
    out = crawler.get_output(language='fa')
    crawl_udhr(crawler, out, filename='udhr_pes_1.txt')
    crawl_deutsche_welle(crawler, out, prefix='/fa-ir/')
Exemple #6
0
def crawl_dari(crawler):
    out = crawler.get_output(language='fa-AF')
    crawl_udhr(crawler, out, filename='udhr_pes_2.txt')
    crawl_deutsche_welle(crawler, out, prefix='/fa-af/')
Exemple #7
0
def crawl(crawler):
    out = crawler.get_output(language='ru')
    crawl_udhr(crawler, out, filename='udhr_rus.txt')
    crawl_deutsche_welle(crawler, out, prefix='/ru/')
Exemple #8
0
def crawl_modern_standard_arabic(crawler):
    out = crawler.get_output(language='ar')
    crawl_udhr(crawler, out, filename='udhr_arb.txt')
    crawl_deutsche_welle(crawler, out, prefix='/ar/')
    crawl_sputnik_news(crawler, out, host='arabic.sputniknews.com')
    crawl_bbc_news(crawler, out, urlprefix='/arabic/')
Exemple #9
0
def crawl(crawler):
    out = crawler.get_output(language='el')
    crawl_udhr(crawler, out, filename='udhr_ell_monotonic.txt')
    crawl_deutsche_welle(crawler, out, prefix='/el/')
Exemple #10
0
def crawl(crawler):
    out = crawler.get_output(language='sw')
    crawl_udhr(crawler, out, filename='udhr_swh.txt')
    crawl_bbc_news(crawler, out, urlprefix='/swahili/')
    crawl_deutsche_welle(crawler, out, prefix='/sw/')
Exemple #11
0
def crawl(crawler):
    out = crawler.get_output(language='sr-Latn')
    crawl_udhr(crawler, out, filename='udhr_srp_latn.txt')
    crawl_deutsche_welle(crawler, out, prefix='/sr/')
Exemple #12
0
def crawl(crawler):
    out = crawler.get_output(language='ro')
    crawl_udhr(crawler, out, filename='udhr_ron_2006.txt')
    crawl_deutsche_welle(crawler, out, prefix='/ro/')
Exemple #13
0
def crawl(crawler):
    out = crawler.get_output(language='bs')
    crawl_udhr(crawler, out, filename='udhr_bos_latn.txt')
    crawl_deutsche_welle(crawler, out, prefix='/bs/')
Exemple #14
0
def crawl(crawler):
    out = crawler.get_output(language='am')
    crawl_udhr(crawler, out, filename='udhr_amh.txt')
    crawl_deutsche_welle(crawler, out, prefix='/am/')
    crawl_bibleis(crawler, out, 'AMHEVG')
Exemple #15
0
def crawl(crawler):
    out = crawler.get_output(language='es')
    crawl_udhr(crawler, out, filename='udhr_spa.txt')
    crawl_bbc_news(crawler, out, urlprefix='/mundo/')
    crawl_deutsche_welle(crawler, out, prefix='/es/')
Exemple #16
0
def crawl(crawler):
    out = crawler.get_output(language='pl')
    crawl_udhr(crawler, out, filename='udhr_pol.txt')
    crawl_deutsche_welle(crawler, out, prefix='/pl/')
Exemple #17
0
def crawl(crawler):
    out = crawler.get_output(language='ps')
    crawl_udhr(crawler, out, filename='udhr_pbu.txt')
    crawl_deutsche_welle(crawler, out, prefix='/ps/', need_percent_in_url=True)
Exemple #18
0
def crawl(crawler):
    out = crawler.get_output(language='de')
    crawl_udhr(crawler, out, filename='udhr_deu_1996.txt')
    crawl_deutsche_welle(crawler, out, prefix='/de/')
Exemple #19
0
def crawl(crawler):
    out = crawler.get_output(language='bg')
    crawl_udhr(crawler, out, filename='udhr_bul.txt')
    crawl_deutsche_welle(crawler, out, prefix='/bg/')
    # There's audio for one part of Matthew under BLGAMB that matches this text
    crawl_bibleis(crawler, out, 'BULB40')
Exemple #20
0
def crawl(crawler):
    out = crawler.get_output(language='id')
    crawl_udhr(crawler, out, filename='udhr_ind.txt')
    crawl_deutsche_welle(crawler, out, prefix='/id/')
Exemple #21
0
def crawl(crawler):
    out = crawler.get_output(language='fr')
    crawl_udhr(crawler, out, filename='udhr_fra.txt')
    crawl_deutsche_welle(crawler, out, prefix='/fr/')
    crawl_sputnik_news(crawler, out, host='fr.sputniknews.com')