Example #1
0
def crawl(crawler):
    out = crawler.get_output(language='my')
    crawl_udhr(crawler, out, filename='udhr_mya.txt')
    crawl_bbc_news(crawler, out, urlprefix='/burmese/')

    zawgyi_out = crawler.get_output(language='my-t-d0-zawgyi')
    crawl_than_lwin_times(crawler, zawgyi_out)
Example #2
0
def crawl(crawler):
    out = crawler.get_output(language='id')
    crawler.crawl_abc_net_au(out, program_id='indonesian')
    crawler.crawl_voice_of_america(out, host='voaindonesia.com')
    crawl_bbc_news(crawler, out, urlprefix='/indonesia/')
    crawl_deutsche_welle(crawler, out, prefix='/id/')
    crawl_udhr(crawler, out, filename='udhr_ind.txt')
    crawl_bibleis(crawler, out, bible='INDASV')   
    crawl_bibleis(crawler, out, bible='INDWBT')
    crawl_bibleis(crawler, out, bible='INDSHV')
Example #3
0
def crawl(crawler):
    out = crawler.get_output(language='cy')
    crawl_udhr(crawler, out, filename='udhr_cym.txt')
    crawl_bbc_news(crawler, out, urlprefix='/cymrufyw/')
Example #4
0
def crawl(crawler):
    out = crawler.get_output(language='ja')
    crawl_udhr(crawler, out, filename='udhr_jpn.txt')
    crawl_bbc_news(crawler, out, urlprefix='/japanese/')
Example #5
0
def crawl(crawler):
    out = crawler.get_output(language='si')
    crawl_udhr(crawler, out, filename='udhr_sin.txt')
    crawl_bbc_news(crawler, out, urlprefix='/sinhala/')
Example #6
0
def crawl(crawler):
    out = crawler.get_output(language='ky')
    crawl_udhr(crawler, out, filename='udhr_kir.txt')
    crawl_bbc_news(crawler, out, urlprefix='/kyrgyz/')
    crawl_azattyk_org(crawler, out)
Example #7
0
def crawl(crawler):
    out = crawler.get_output(language='gd')
    crawl_udhr(crawler, out, filename='udhr_gla.txt')
    _crawl_dasg(crawler, out)
    crawl_bbc_news(crawler, out, urlprefix='/naidheachdan/')
Example #8
0
def crawl(crawler):
    out = crawler.get_output(language='pcm')
    crawl_udhr(crawler, out, filename='udhr_pcm.txt')
    crawl_bbc_news(crawler, out, urlprefix='/pidgin/')
Example #9
0
def crawl(crawler):
    out = crawler.get_output(language='sw')
    crawl_udhr(crawler, out, filename='udhr_swh.txt')
    crawl_bbc_news(crawler, out, urlprefix='/swahili/')
    crawl_deutsche_welle(crawler, out, prefix='/sw/')
Example #10
0
def crawl(crawler):
    out = crawler.get_output(language='es')
    crawl_udhr(crawler, out, filename='udhr_spa.txt')
    crawl_bbc_news(crawler, out, urlprefix='/mundo/')
    crawl_deutsche_welle(crawler, out, prefix='/es/')
Example #11
0
def crawl(crawler):
    out = crawler.get_output(language='vi')
    crawl_udhr(crawler, out, filename='udhr_vie.txt')
    crawl_bbc_news(crawler, out, urlprefix='/vietnamese/')
    crawler.crawl_voice_of_america(out, host='voatiengviet.com')
Example #12
0
def crawl(crawler):
    out = crawler.get_output(language='my')
    crawl_udhr(crawler, out, filename='udhr_mya.txt')
    crawl_bbc_news(crawler, out, urlprefix='/burmese/')
Example #13
0
def crawl(crawler):
    out = crawler.get_output(language='ur')
    crawl_udhr(crawler, out, filename='udhr_urd.txt')
    crawl_bbc_news(crawler, out, urlprefix='/urdu/')
Example #14
0
def crawl(crawler):
    out = crawler.get_output(language='ta')
    crawl_udhr(crawler, out, filename='udhr_tam.txt')
    crawl_bbc_news(crawler, out, urlprefix='/tamil/')
Example #15
0
def crawl(crawler):
    out = crawler.get_output(language='rw')
    crawl_udhr(crawler, out, filename='udhr_kin.txt')
    crawl_bbc_news(crawler, out, urlprefix='/gahuza/')
Example #16
0
def crawl_modern_standard_arabic(crawler):
    out = crawler.get_output(language='ar')
    crawl_udhr(crawler, out, filename='udhr_arb.txt')
    crawl_deutsche_welle(crawler, out, prefix='/ar/')
    crawl_sputnik_news(crawler, out, host='arabic.sputniknews.com')
    crawl_bbc_news(crawler, out, urlprefix='/arabic/')
Example #17
0
def crawl(crawler):
    out = crawler.get_output(language='so')
    crawl_udhr(crawler, out, filename='udhr_som.txt')
    crawl_bbc_news(crawler, out, urlprefix='/somali/')