def setUp(self): self.tmp_dump_file = getOutputFile("wiki_knowledge_output.xml") self.tmp_wdb_file = getOutputFile("wiki_knowledge_output.wdb") self.tmp_parse_file = getOutputFile("wiki_knowledge_output.parsed.xml") self.expected_articles = ['Knowledge', 'Love', 'War'] self.expected_xml_path = os.path.join(os.path.dirname(__file__) ,"expected_results/expected_xml_Knowledge_Love_War.xml") self.expected_wdb_path = os.path.join(os.path.dirname(__file__) ,"expected_results/expected_Knowledge_Love_War.wdb")
def setUp(self): self.tmp_dump_file = getOutputFile("wiki_knowledge_output.xml") self.tmp_wdb_file = getOutputFile("wiki_knowledge_output.wdb") self.tmp_parse_file = getOutputFile("wiki_knowledge_output.parsed.xml") self.expected_articles = ['Knowledge', 'Love', 'War'] self.expected_xml_path = os.path.join( os.path.dirname(__file__), "expected_results/expected_xml_Knowledge_Love_War.xml") self.expected_wdb_path = os.path.join( os.path.dirname(__file__), "expected_results/expected_Knowledge_Love_War.wdb")
def test__same_text_correlation(self): """ Test that for same text correlation is 1""" _log.info('-' * 80) # arrange text1 = "love is rain as long story short" text2 = text1 dump_file = getInputFile("swiki_knowledge_output.xml") parsed_file = getOutputFile("swiki_knowledge_output.parsed.xml") #wdb_file = getOutputFile("swiki_knowledge_output.wdb") articles = ['Rain', 'Love', 'Tree'] # act wn.make_dump(dump_file, articles, compress=False) wn.parse_dump(dump_file, parsed_file) db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([])) #self.addCleanup(os.remove, self.tmp_dump_file) comparer = SemanticComparer(db_wrapper) correlation = comparer.compare(text1, text2) _log.info( test_utils.get_texts_correlation_message(text1, text2, correlation)) self.assertAlmostEqual(correlation, 1.0, msg="for same text correlation should be 1")
def test__same_text_correlation(self): """ Test that for same text correlation is 1""" _log.info('-'*80) # arrange text1 = "love is rain as long story short" text2 = text1 dump_file = getInputFile("swiki_knowledge_output.xml") parsed_file = getOutputFile("swiki_knowledge_output.parsed.xml") #wdb_file = getOutputFile("swiki_knowledge_output.wdb") articles = ['Rain', 'Love', 'Tree'] # act wn.make_dump(dump_file, articles, compress=False) wn.parse_dump(dump_file, parsed_file) db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([])) #self.addCleanup(os.remove, self.tmp_dump_file) comparer = SemanticComparer(db_wrapper) correlation = comparer.compare(text1, text2) _log.info(test_utils.get_texts_correlation_message(text1, text2, correlation)) self.assertAlmostEqual(correlation, 1.0, msg="for same text correlation should be 1")
def test__many_articles(self): wiki_dump_path = getInputFile("many_articles_dump.xml") parsed_xml_path = getOutputFile("many_articles_dump.parsed.xml") wiki_knowledge.parse_dump(wiki_dump_path, parsed_xml_path) db_wrapper = wiki_knowledge.build_database_wrapper(parsed_xml_path, PorterStemmer()) c = db_wrapper.get_readable_centroid(ibm_licence_text) print c
def test__many_articles(self): wiki_dump_path = getInputFile("many_articles_dump.xml") parsed_xml_path = getOutputFile("many_articles_dump.parsed.xml") wiki_knowledge.parse_dump(wiki_dump_path, parsed_xml_path) db_wrapper = wiki_knowledge.build_database_wrapper( parsed_xml_path, PorterStemmer()) c = db_wrapper.get_readable_centroid(ibm_licence_text) print c
def test_number_of_concepts(self): """ db builder reads parsed xml properly""" _log.info('-'*80) # arrange dump_file = getInputFile("wikidump_Knowledge_Love_War.xml") parsed_file = getOutputFile("wikidump_Knowledge_Love_War.parsed.xml") # act wn.parse_dump(dump_file, parsed_file) db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([])) titles_count =len(db_wrapper.title_index) concepts_count =len(db_wrapper.concepts_index) # assert self.assertEqual(titles_count, 3, "number of tiltes should be 3, got {0}".format(titles_count)) self.assertEqual(concepts_count, 3, "number of tiltes should be 3, got {0}".format(concepts_count))
def test_number_of_concepts(self): """ db builder reads parsed xml properly""" _log.info('-' * 80) # arrange dump_file = getInputFile("wikidump_Knowledge_Love_War.xml") parsed_file = getOutputFile("wikidump_Knowledge_Love_War.parsed.xml") # act wn.parse_dump(dump_file, parsed_file) db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([])) titles_count = len(db_wrapper.title_index) concepts_count = len(db_wrapper.concepts_index) # assert self.assertEqual( titles_count, 3, "number of tiltes should be 3, got {0}".format(titles_count)) self.assertEqual( concepts_count, 3, "number of tiltes should be 3, got {0}".format(concepts_count))
def test__parse_dump(self): wiki_dump_path = io_tu.getInputFile(io_tu.FilesList.test__parse_tools) wiki_parsed_dump_path = io_tu.getOutputFile( io_tu.FilesList.test__parse_tools) wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)
def test__parse_dump(self): wiki_dump_path = io_tu.getInputFile(io_tu.FilesList.test__parse_tools) wiki_parsed_dump_path = io_tu.getOutputFile(io_tu.FilesList.test__parse_tools) wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)