Esempio n. 1
0
def download_cateogries(src_language, dst_language, src_c, dst_c):

    WikiFetcher(src_language, src_c, 1000, None, 25).fetch_to_files()
    WikiFetcher(dst_language, dst_c, 400, None, 25).fetch_to_files()

    src_c = urllib.quote(src_c)
    dst_c = urllib.quote(dst_c)
    clean_english_articles_with_spanish_parallels(src_c, dst_c)

    src_lang = Language(Language.path_to_lang[src_language])
    dst_lang = Language(Language.path_to_lang[dst_language])
    CategoryTranslator(src_lang, [dst_lang], src_c).do_translation()
    CategoryTranslator(dst_lang, [src_lang], dst_c).do_translation()

    load_category(src_language, src_c)
    load_category(dst_language, dst_c)
Esempio n. 2
0

__author__ = 'Niv & Ori'


def download_cateogries(language, categories, max_articles_num):

    # for category in categories:
    #     wf = WikiFetcher(language, category, max_articles_num)
    #     wf.fetch_to_files()

    # for c in es_categories:
    #     tr = CategoryTranslator(Language(Language.Spanish), [Language(Language.English)], c)
    #     tr.do_translation()
    pass



# en_cs = ['Asian_art', 'Latin_American_art']
# es_cs = ['Arte_de_Asia', 'Arte_latinoamericano']

en_cs = ['Black_holes', 'Dark_matter']
es_cs = ['Agujeros_negros', 'Materia_oscura']

for c in en_cs:
    # clean_untranslated_articels('en', c)
    load_category('en', c)
for c in es_cs:
    # clean_untranslated_articels('es', c)
    load_category('es', c)