def download_wmt_monolingual(lang, maxp=5): langdir = 'wmt-data/mono/' + lang + '/' os.makedirs(langdir, exist_ok=True) os.chdir(langdir) _urls = wmt_data.monolingual[lang] parallelized_download('wget', _urls, max_processes=maxp) os.chdir('../../..')
def download_wmt_parallel(corpus_name): corpusdir = 'wmt-data/parallel/' + corpus_name + '/' os.makedirs(corpusdir, exist_ok=True) os.chdir(corpusdir) url = wmt_data.parallel[corpus_name] parallelized_download('wget', [url]) run_command('tar zxvf *.tgz') run_command('tar -xvf *.tar') #run_command('tar -xvf *.tar.gz') os.chdir('../../..')