def test_gen(self): xml_file = dict() fix_redirect = FixRedirectsTask() fixed_result = fix_redirect.run(xml_file) fix_snippets = FixSnippetsTask() fixed_snippets = fix_snippets.run(fixed_result) get_categories = GetCategoriesTask() out_entries = get_categories.run(fixed_snippets) curate_titles = CurateTitlesTask() curate_results = curate_titles.run(out_entries["out_entries"]) get_definitions = GetDefinitionsTask() definitions = get_definitions.run( curate_results, out_entries["definitions_category"] ) get_others = GetOthersTask() theorems = get_others.run( definitions["others"], out_entries["others_categories"] ) logger.info("Number of Theorems:") logger.info(len(theorems["theorems"])) logger.info("Number of Lemmas:") logger.info(len(theorems["lemmas"])) logger.info("Number of Corollaries:") logger.info(len(theorems["corollaries"])) gen_task = GenerateDatasetTask() gen_task.run( definitions["definitions"], theorems["lemmas"], theorems["corollaries"], theorems["theorems"], )
def test_generate_output(self): parser = XMLParserTask() xml_file = parser.run() fix_redirect = FixRedirectsTask() fixed_result = fix_redirect.run(xml_file) fix_snippets = FixSnippetsTask() fixed_snippets = fix_snippets.run(fixed_result) get_categories = GetCategoriesTask() out_entries, definitions_category, others_categories = get_categories.run( fixed_snippets) curate_titles = CurateTitlesTask() curate_results = curate_titles.run(out_entries) get_definitions = GetDefinitionsTask() definitions, others = get_definitions.run(curate_results, definitions_category) get_others = GetOthersTask() theorems, lemmas, corollaries = get_others.run(others, others_categories) check_premises = CheckPremisesTask() lemmas, corollaries, theorems = check_premises.run( definitions, lemmas, corollaries, theorems) generate_output = GenerateOutputTask() ( out_definitions, out_lemmas, out_theorems, out_corollaries, ) = generate_output.run(definitions, lemmas, corollaries, theorems)
def test_check_pemises(self): parser = XMLParserTask() xml_file = parser.run() fix_redirect = FixRedirectsTask() fixed_result = fix_redirect.run(xml_file) fix_snippets = FixSnippetsTask() fixed_snippets = fix_snippets.run(fixed_result) get_categories = GetCategoriesTask() out_entries, definitions_category, others_categories = get_categories.run( fixed_snippets ) curate_titles = CurateTitlesTask() curate_results = curate_titles.run(out_entries) get_definitions = GetDefinitionsTask() definitions, others = get_definitions.run(curate_results, definitions_category) get_others = GetOthersTask() theorems, lemmas, corollaries = get_others.run(others, others_categories) check_premises = CheckPremisesTask() lemmas, corollaries, theorems = check_premises.run( definitions, lemmas, corollaries, theorems ) logger.info("Number of Definitions:") logger.info(len(definitions)) logger.info("Number of Theorems:") logger.info(len(theorems)) logger.info("Number of Lemmas:") logger.info(len(lemmas)) logger.info("Number of Corollaries:") logger.info(len(corollaries)) title_t, content_t = random.choice(list(theorems.items())) title_l, content_l = random.choice(list(lemmas.items())) title_c, content_c = random.choice(list(corollaries.items())) title_d, content_d = random.choice(list(definitions.items()))
def test_categories(self): parser = XMLParserTask() xml_file = parser.run() fix_redirect = FixRedirectsTask() fixed_result = fix_redirect.run(xml_file) fix_snippets = FixSnippetsTask() fixed_snippets = fix_snippets.run(fixed_result) get_categories = GetCategoriesTask() out_entries, definitions_category, top_categories = get_categories.run( fixed_snippets )
def test_get_definitions(self): parser = XMLParserTask() xml_file = parser.run() fix_redirect = FixRedirectsTask() fixed_result = fix_redirect.run(xml_file) fix_snippets = FixSnippetsTask() fixed_snippets = fix_snippets.run(fixed_result) get_categories = GetCategoriesTask() out_entries, definitions_category, top_categories = get_categories.run( fixed_snippets) curate_titles = CurateTitlesTask() curate_results = curate_titles.run(out_entries) get_definitions = GetDefinitionsTask() definitions, others = get_definitions.run(curate_results, definitions_category)
def test_curate_titles(self): parser = XMLParserTask() xml_file = parser.run() fix_redirect = FixRedirectsTask() fixed_result = fix_redirect.run(xml_file) fix_snippets = FixSnippetsTask() fixed_snippets = fix_snippets.run(fixed_result) get_categories = GetCategoriesTask() out_entries = get_categories.run(fixed_snippets) curate_titles = CurateTitlesTask() logger.info("Before curation") logger.info(len(out_entries["out_entries"])) logger.info("After curation") curate_results = curate_titles.run(out_entries["out_entries"]) logger.info(len(curate_results))
CurateTitlesTask, GetDefinitionsTask, GetOthersTask, GenerateOutputTask, ) cache_args = dict( target="{task_name}.pkl", checkpoint=True, result=LocalResult(dir=f"./cache/"), ) parser_task = XMLParserTask(**cache_args) fix_redirect_task = FixRedirectsTask(**cache_args) fix_snippets_task = FixSnippetsTask(**cache_args) get_categories_task = GetCategoriesTask(**cache_args) curate_titles_task = CurateTitlesTask(**cache_args) get_definitions_task = GetDefinitionsTask(**cache_args) get_others_task = GetOthersTask(**cache_args) generate_output_task = GenerateOutputTask() # generate_output_task = GenerateOutputTask() with Flow("Run extraction flow") as flow: xml_file = parser_task() fixed_result = fix_redirect_task(xml_file) fixed_snippets = fix_snippets_task(fixed_result) out_categories = get_categories_task(fixed_snippets) curate_results = curate_titles_task(out_categories["out_entries"]) out_definitions_task = get_definitions_task( curate_results, out_categories["definitions_category"])