コード例 #1
0
ファイル: generate_dataset_test.py プロジェクト: debymf/nl-ps
    def test_gen(self):

        xml_file = dict()
        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)
        fix_snippets = FixSnippetsTask()
        fixed_snippets = fix_snippets.run(fixed_result)
        get_categories = GetCategoriesTask()
        out_entries = get_categories.run(fixed_snippets)
        curate_titles = CurateTitlesTask()
        curate_results = curate_titles.run(out_entries["out_entries"])
        get_definitions = GetDefinitionsTask()
        definitions = get_definitions.run(
            curate_results, out_entries["definitions_category"]
        )
        get_others = GetOthersTask()
        theorems = get_others.run(
            definitions["others"], out_entries["others_categories"]
        )
        logger.info("Number of Theorems:")
        logger.info(len(theorems["theorems"]))
        logger.info("Number of Lemmas:")
        logger.info(len(theorems["lemmas"]))
        logger.info("Number of Corollaries:")
        logger.info(len(theorems["corollaries"]))
        gen_task = GenerateDatasetTask()
        gen_task.run(
            definitions["definitions"],
            theorems["lemmas"],
            theorems["corollaries"],
            theorems["theorems"],
        )
コード例 #2
0
ファイル: generate_output_test.py プロジェクト: debymf/nl-ps
    def test_generate_output(self):
        parser = XMLParserTask()
        xml_file = parser.run()

        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)

        fix_snippets = FixSnippetsTask()
        fixed_snippets = fix_snippets.run(fixed_result)
        get_categories = GetCategoriesTask()
        out_entries, definitions_category, others_categories = get_categories.run(
            fixed_snippets)
        curate_titles = CurateTitlesTask()
        curate_results = curate_titles.run(out_entries)
        get_definitions = GetDefinitionsTask()
        definitions, others = get_definitions.run(curate_results,
                                                  definitions_category)
        get_others = GetOthersTask()
        theorems, lemmas, corollaries = get_others.run(others,
                                                       others_categories)
        check_premises = CheckPremisesTask()
        lemmas, corollaries, theorems = check_premises.run(
            definitions, lemmas, corollaries, theorems)
        generate_output = GenerateOutputTask()

        (
            out_definitions,
            out_lemmas,
            out_theorems,
            out_corollaries,
        ) = generate_output.run(definitions, lemmas, corollaries, theorems)
コード例 #3
0
    def test_check_pemises(self):
        parser = XMLParserTask()
        xml_file = parser.run()
        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)
        fix_snippets = FixSnippetsTask()
        fixed_snippets = fix_snippets.run(fixed_result)
        get_categories = GetCategoriesTask()
        out_entries, definitions_category, others_categories = get_categories.run(
            fixed_snippets
        )
        curate_titles = CurateTitlesTask()
        curate_results = curate_titles.run(out_entries)
        get_definitions = GetDefinitionsTask()
        definitions, others = get_definitions.run(curate_results, definitions_category)
        get_others = GetOthersTask()
        theorems, lemmas, corollaries = get_others.run(others, others_categories)
        check_premises = CheckPremisesTask()
        lemmas, corollaries, theorems = check_premises.run(
            definitions, lemmas, corollaries, theorems
        )
        logger.info("Number of Definitions:")
        logger.info(len(definitions))
        logger.info("Number of Theorems:")
        logger.info(len(theorems))
        logger.info("Number of Lemmas:")
        logger.info(len(lemmas))
        logger.info("Number of Corollaries:")
        logger.info(len(corollaries))

        title_t, content_t = random.choice(list(theorems.items()))
        title_l, content_l = random.choice(list(lemmas.items()))
        title_c, content_c = random.choice(list(corollaries.items()))
        title_d, content_d = random.choice(list(definitions.items()))
コード例 #4
0
ファイル: get_categories_test.py プロジェクト: debymf/nl-ps
 def test_categories(self):
     parser = XMLParserTask()
     xml_file = parser.run()
     fix_redirect = FixRedirectsTask()
     fixed_result = fix_redirect.run(xml_file)
     fix_snippets = FixSnippetsTask()
     fixed_snippets = fix_snippets.run(fixed_result)
     get_categories = GetCategoriesTask()
     out_entries, definitions_category, top_categories = get_categories.run(
         fixed_snippets
     )
コード例 #5
0
ファイル: get_definitions_test.py プロジェクト: debymf/nl-ps
 def test_get_definitions(self):
     parser = XMLParserTask()
     xml_file = parser.run()
     fix_redirect = FixRedirectsTask()
     fixed_result = fix_redirect.run(xml_file)
     fix_snippets = FixSnippetsTask()
     fixed_snippets = fix_snippets.run(fixed_result)
     get_categories = GetCategoriesTask()
     out_entries, definitions_category, top_categories = get_categories.run(
         fixed_snippets)
     curate_titles = CurateTitlesTask()
     curate_results = curate_titles.run(out_entries)
     get_definitions = GetDefinitionsTask()
     definitions, others = get_definitions.run(curate_results,
                                               definitions_category)
コード例 #6
0
ファイル: curate_titles_test.py プロジェクト: debymf/nl-ps
    def test_curate_titles(self):
        parser = XMLParserTask()
        xml_file = parser.run()

        fix_redirect = FixRedirectsTask()
        fixed_result = fix_redirect.run(xml_file)

        fix_snippets = FixSnippetsTask()
        fixed_snippets = fix_snippets.run(fixed_result)
        get_categories = GetCategoriesTask()
        out_entries = get_categories.run(fixed_snippets)
        curate_titles = CurateTitlesTask()
        logger.info("Before curation")
        logger.info(len(out_entries["out_entries"]))
        logger.info("After curation")
        curate_results = curate_titles.run(out_entries["out_entries"])
        logger.info(len(curate_results))
コード例 #7
0
ファイル: extract_proofwiki.py プロジェクト: debymf/nl-ps
    CurateTitlesTask,
    GetDefinitionsTask,
    GetOthersTask,
    GenerateOutputTask,
)

cache_args = dict(
    target="{task_name}.pkl",
    checkpoint=True,
    result=LocalResult(dir=f"./cache/"),
)

parser_task = XMLParserTask(**cache_args)
fix_redirect_task = FixRedirectsTask(**cache_args)
fix_snippets_task = FixSnippetsTask(**cache_args)
get_categories_task = GetCategoriesTask(**cache_args)
curate_titles_task = CurateTitlesTask(**cache_args)
get_definitions_task = GetDefinitionsTask(**cache_args)
get_others_task = GetOthersTask(**cache_args)
generate_output_task = GenerateOutputTask()

# generate_output_task = GenerateOutputTask()

with Flow("Run extraction flow") as flow:
    xml_file = parser_task()
    fixed_result = fix_redirect_task(xml_file)
    fixed_snippets = fix_snippets_task(fixed_result)
    out_categories = get_categories_task(fixed_snippets)
    curate_results = curate_titles_task(out_categories["out_entries"])
    out_definitions_task = get_definitions_task(
        curate_results, out_categories["definitions_category"])