def download_cateogries(src_language, dst_language, src_c, dst_c): WikiFetcher(src_language, src_c, 1000, None, 25).fetch_to_files() WikiFetcher(dst_language, dst_c, 400, None, 25).fetch_to_files() src_c = urllib.quote(src_c) dst_c = urllib.quote(dst_c) clean_english_articles_with_spanish_parallels(src_c, dst_c) src_lang = Language(Language.path_to_lang[src_language]) dst_lang = Language(Language.path_to_lang[dst_language]) CategoryTranslator(src_lang, [dst_lang], src_c).do_translation() CategoryTranslator(dst_lang, [src_lang], dst_c).do_translation() load_category(src_language, src_c) load_category(dst_language, dst_c)
__author__ = 'Niv & Ori' def download_cateogries(language, categories, max_articles_num): # for category in categories: # wf = WikiFetcher(language, category, max_articles_num) # wf.fetch_to_files() # for c in es_categories: # tr = CategoryTranslator(Language(Language.Spanish), [Language(Language.English)], c) # tr.do_translation() pass # en_cs = ['Asian_art', 'Latin_American_art'] # es_cs = ['Arte_de_Asia', 'Arte_latinoamericano'] en_cs = ['Black_holes', 'Dark_matter'] es_cs = ['Agujeros_negros', 'Materia_oscura'] for c in en_cs: # clean_untranslated_articels('en', c) load_category('en', c) for c in es_cs: # clean_untranslated_articels('es', c) load_category('es', c)