Exemple #1
0
    def test_generate_output(self):
        parser = XMLParserTask()
        xml_file = parser.run()

        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)

        fix_snippets = FixSnippetsTask()
        fixed_snippets = fix_snippets.run(fixed_result)
        get_categories = GetCategoriesTask()
        out_entries, definitions_category, others_categories = get_categories.run(
            fixed_snippets)
        curate_titles = CurateTitlesTask()
        curate_results = curate_titles.run(out_entries)
        get_definitions = GetDefinitionsTask()
        definitions, others = get_definitions.run(curate_results,
                                                  definitions_category)
        get_others = GetOthersTask()
        theorems, lemmas, corollaries = get_others.run(others,
                                                       others_categories)
        check_premises = CheckPremisesTask()
        lemmas, corollaries, theorems = check_premises.run(
            definitions, lemmas, corollaries, theorems)
        generate_output = GenerateOutputTask()

        (
            out_definitions,
            out_lemmas,
            out_theorems,
            out_corollaries,
        ) = generate_output.run(definitions, lemmas, corollaries, theorems)
    def test_gen(self):

        xml_file = dict()
        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)
        fix_snippets = FixSnippetsTask()
        fixed_snippets = fix_snippets.run(fixed_result)
        get_categories = GetCategoriesTask()
        out_entries = get_categories.run(fixed_snippets)
        curate_titles = CurateTitlesTask()
        curate_results = curate_titles.run(out_entries["out_entries"])
        get_definitions = GetDefinitionsTask()
        definitions = get_definitions.run(
            curate_results, out_entries["definitions_category"]
        )
        get_others = GetOthersTask()
        theorems = get_others.run(
            definitions["others"], out_entries["others_categories"]
        )
        logger.info("Number of Theorems:")
        logger.info(len(theorems["theorems"]))
        logger.info("Number of Lemmas:")
        logger.info(len(theorems["lemmas"]))
        logger.info("Number of Corollaries:")
        logger.info(len(theorems["corollaries"]))
        gen_task = GenerateDatasetTask()
        gen_task.run(
            definitions["definitions"],
            theorems["lemmas"],
            theorems["corollaries"],
            theorems["theorems"],
        )
Exemple #3
0
    def test_check_pemises(self):
        parser = XMLParserTask()
        xml_file = parser.run()
        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)
        fix_snippets = FixSnippetsTask()
        fixed_snippets = fix_snippets.run(fixed_result)
        get_categories = GetCategoriesTask()
        out_entries, definitions_category, others_categories = get_categories.run(
            fixed_snippets
        )
        curate_titles = CurateTitlesTask()
        curate_results = curate_titles.run(out_entries)
        get_definitions = GetDefinitionsTask()
        definitions, others = get_definitions.run(curate_results, definitions_category)
        get_others = GetOthersTask()
        theorems, lemmas, corollaries = get_others.run(others, others_categories)
        check_premises = CheckPremisesTask()
        lemmas, corollaries, theorems = check_premises.run(
            definitions, lemmas, corollaries, theorems
        )
        logger.info("Number of Definitions:")
        logger.info(len(definitions))
        logger.info("Number of Theorems:")
        logger.info(len(theorems))
        logger.info("Number of Lemmas:")
        logger.info(len(lemmas))
        logger.info("Number of Corollaries:")
        logger.info(len(corollaries))

        title_t, content_t = random.choice(list(theorems.items()))
        title_l, content_l = random.choice(list(lemmas.items()))
        title_c, content_c = random.choice(list(corollaries.items()))
        title_d, content_d = random.choice(list(definitions.items()))
Exemple #4
0
 def test_fix_snippets(self):
     parser = XMLParserTask()
     xml_file = parser.run()
     fix_redirect = FixRedirectsTask()
     fixed_result = fix_redirect.run(xml_file)
     fix_snippets = FixSnippetsTask()
     fixed_snippets = fix_snippets.run(fixed_result)
     title, content = random.choice(list(fixed_snippets.items()))
     logger.info(title)
     logger.info(content)
Exemple #5
0
 def test_categories(self):
     parser = XMLParserTask()
     xml_file = parser.run()
     fix_redirect = FixRedirectsTask()
     fixed_result = fix_redirect.run(xml_file)
     fix_snippets = FixSnippetsTask()
     fixed_snippets = fix_snippets.run(fixed_result)
     get_categories = GetCategoriesTask()
     out_entries, definitions_category, top_categories = get_categories.run(
         fixed_snippets
     )
Exemple #6
0
 def test_get_definitions(self):
     parser = XMLParserTask()
     xml_file = parser.run()
     fix_redirect = FixRedirectsTask()
     fixed_result = fix_redirect.run(xml_file)
     fix_snippets = FixSnippetsTask()
     fixed_snippets = fix_snippets.run(fixed_result)
     get_categories = GetCategoriesTask()
     out_entries, definitions_category, top_categories = get_categories.run(
         fixed_snippets)
     curate_titles = CurateTitlesTask()
     curate_results = curate_titles.run(out_entries)
     get_definitions = GetDefinitionsTask()
     definitions, others = get_definitions.run(curate_results,
                                               definitions_category)
Exemple #7
0
    def test_fix_redirect(self):
        parser = XMLParserTask()
        xml_file = parser.run()
        logger.info(random.choice(list(xml_file.values())))
        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)

        for title, content in fixed_result.items():
            lower_content = content.lower()
            self.assertFalse("#redirect" in lower_content)

        logger.info(random.choice(list(fixed_result.values())))
        logger.info("Total of entries before removing redirects")
        logger.info(len(xml_file))

        logger.info("Total of entries after removing redirects")
        logger.info(len(fixed_result))
Exemple #8
0
    def test_curate_titles(self):
        parser = XMLParserTask()
        xml_file = parser.run()

        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)

        fix_snippets = FixSnippetsTask()
        fixed_snippets = fix_snippets.run(fixed_result)
        get_categories = GetCategoriesTask()
        out_entries = get_categories.run(fixed_snippets)
        curate_titles = CurateTitlesTask()
        logger.info("Before curation")
        logger.info(len(out_entries["out_entries"]))
        logger.info("After curation")
        curate_results = curate_titles.run(out_entries["out_entries"])
        logger.info(len(curate_results))
Exemple #9
0
    FixSnippetsTask,
    GetCategoriesTask,
    CurateTitlesTask,
    GetDefinitionsTask,
    GetOthersTask,
    GenerateOutputTask,
)

cache_args = dict(
    target="{task_name}.pkl",
    checkpoint=True,
    result=LocalResult(dir=f"./cache/"),
)

parser_task = XMLParserTask(**cache_args)
fix_redirect_task = FixRedirectsTask(**cache_args)
fix_snippets_task = FixSnippetsTask(**cache_args)
get_categories_task = GetCategoriesTask(**cache_args)
curate_titles_task = CurateTitlesTask(**cache_args)
get_definitions_task = GetDefinitionsTask(**cache_args)
get_others_task = GetOthersTask(**cache_args)
generate_output_task = GenerateOutputTask()

# generate_output_task = GenerateOutputTask()

with Flow("Run extraction flow") as flow:
    xml_file = parser_task()
    fixed_result = fix_redirect_task(xml_file)
    fixed_snippets = fix_snippets_task(fixed_result)
    out_categories = get_categories_task(fixed_snippets)
    curate_results = curate_titles_task(out_categories["out_entries"])