Example #1
0
def download_wmt_monolingual(lang, maxp=5):
    langdir = 'wmt-data/mono/' + lang + '/'
    os.makedirs(langdir, exist_ok=True)
    os.chdir(langdir)
    _urls = wmt_data.monolingual[lang]
    parallelized_download('wget', _urls, max_processes=maxp)
    os.chdir('../../..')
Example #2
0
def download_wmt_parallel(corpus_name):    
    corpusdir = 'wmt-data/parallel/' + corpus_name + '/'
    os.makedirs(corpusdir, exist_ok=True)
    os.chdir(corpusdir) 
    url = wmt_data.parallel[corpus_name]
    parallelized_download('wget', [url])
    run_command('tar zxvf *.tgz')
    run_command('tar -xvf *.tar')
    #run_command('tar -xvf *.tar.gz')
    os.chdir('../../..')