Esempio n. 1
0
    def test__same_text_correlation(self):
        """ Test that for same text correlation is 1"""
        
        _log.info('-'*80)
        
        # arrange 
        text1 = "love is rain as long story short"
        text2 = text1

        dump_file = getInputFile("swiki_knowledge_output.xml")
        parsed_file = getOutputFile("swiki_knowledge_output.parsed.xml")
        #wdb_file = getOutputFile("swiki_knowledge_output.wdb")

        articles = ['Rain', 'Love', 'Tree'] 
        
        # act
        wn.make_dump(dump_file, articles, compress=False)
        wn.parse_dump(dump_file, parsed_file)
        db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([]))
                             
        #self.addCleanup(os.remove, self.tmp_dump_file)
        
        comparer = SemanticComparer(db_wrapper)
        correlation = comparer.compare(text1, text2)
        _log.info(test_utils.get_texts_correlation_message(text1, text2, correlation))
        self.assertAlmostEqual(correlation, 1.0, msg="for same text correlation should be 1")
Esempio n. 2
0
    def test__same_text_correlation(self):
        """ Test that for same text correlation is 1"""

        _log.info('-' * 80)

        # arrange
        text1 = "love is rain as long story short"
        text2 = text1

        dump_file = getInputFile("swiki_knowledge_output.xml")
        parsed_file = getOutputFile("swiki_knowledge_output.parsed.xml")
        #wdb_file = getOutputFile("swiki_knowledge_output.wdb")

        articles = ['Rain', 'Love', 'Tree']

        # act
        wn.make_dump(dump_file, articles, compress=False)
        wn.parse_dump(dump_file, parsed_file)
        db_wrapper = wn.build_database_wrapper(parsed_file,
                                               StopWordsStemmer([]))

        #self.addCleanup(os.remove, self.tmp_dump_file)

        comparer = SemanticComparer(db_wrapper)
        correlation = comparer.compare(text1, text2)
        _log.info(
            test_utils.get_texts_correlation_message(text1, text2,
                                                     correlation))
        self.assertAlmostEqual(correlation,
                               1.0,
                               msg="for same text correlation should be 1")
Esempio n. 3
0
    def test__save_and_load(self):

        wiki_knowledge.parse_dump(self.expected_xml_path, self.tmp_parse_file)
        expected_db = wiki_knowledge.build_database_wrapper(self.tmp_parse_file, StopWordsStemmer([]))
        wiki_knowledge.save_db_wrapper_to_wdb(expected_db, self.tmp_wdb_file)                
        actual = wiki_knowledge.load_db_wrapper_from_wdb(self.tmp_wdb_file) 
        self.assertEqual(expected_db.words_num, actual.words_num, "Mismatch WikiKnowledges number of words")
        self.assertEqual(expected_db.title_index, actual.title_index, "Mismatch WikiKnowledges titles")
Esempio n. 4
0
    def test__many_articles(self):
        wiki_dump_path = getInputFile("many_articles_dump.xml")
        parsed_xml_path = getOutputFile("many_articles_dump.parsed.xml")
        
        wiki_knowledge.parse_dump(wiki_dump_path, parsed_xml_path)

        db_wrapper = wiki_knowledge.build_database_wrapper(parsed_xml_path, PorterStemmer())
        
        c = db_wrapper.get_readable_centroid(ibm_licence_text)
        print c 
Esempio n. 5
0
    def test__save_and_load(self):

        wiki_knowledge.parse_dump(self.expected_xml_path, self.tmp_parse_file)
        expected_db = wiki_knowledge.build_database_wrapper(
            self.tmp_parse_file, StopWordsStemmer([]))
        wiki_knowledge.save_db_wrapper_to_wdb(expected_db, self.tmp_wdb_file)
        actual = wiki_knowledge.load_db_wrapper_from_wdb(self.tmp_wdb_file)
        self.assertEqual(expected_db.words_num, actual.words_num,
                         "Mismatch WikiKnowledges number of words")
        self.assertEqual(expected_db.title_index, actual.title_index,
                         "Mismatch WikiKnowledges titles")
Esempio n. 6
0
    def test__many_articles(self):
        wiki_dump_path = getInputFile("many_articles_dump.xml")
        parsed_xml_path = getOutputFile("many_articles_dump.parsed.xml")

        wiki_knowledge.parse_dump(wiki_dump_path, parsed_xml_path)

        db_wrapper = wiki_knowledge.build_database_wrapper(
            parsed_xml_path, PorterStemmer())

        c = db_wrapper.get_readable_centroid(ibm_licence_text)
        print c
Esempio n. 7
0
    def test__execution(self):
        """ This is not exactly a test, but a program execution..."""
        text1 = "i love to learn"
        text2 = "the world we know"

        dump_file = self.tmp_dump_file

        wiki_knowledge.make_dump(dump_file, self.expected_articles, compress=False)
        wiki_knowledge.parse_dump(dump_file, self.tmp_parse_file)
        db_wrapper = wiki_knowledge.build_database_wrapper(self.tmp_parse_file, StopWordsStemmer([]))
                             
        #wiki_knowledge = test_utils.Factory.build_wiki_knowledge()
        #clean up file created by factory at end
        self.addCleanup(os.remove, self.tmp_dump_file)
        
        comparer = semantic_interpreter.SemanticComparer(db_wrapper)
        correlation = comparer.compare(text1, text2)
        _log.info(test_utils.get_texts_correlation_message(text1, text2, correlation))
Esempio n. 8
0
 def test_number_of_concepts(self):
     """ db builder reads parsed xml properly"""
     
     _log.info('-'*80)
     
     # arrange 
     dump_file = getInputFile("wikidump_Knowledge_Love_War.xml")
     parsed_file = getOutputFile("wikidump_Knowledge_Love_War.parsed.xml")
     
     # act
     wn.parse_dump(dump_file, parsed_file)
     db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([]))
     
     titles_count =len(db_wrapper.title_index)
     concepts_count =len(db_wrapper.concepts_index)
     
     # assert
     self.assertEqual(titles_count, 3, "number of tiltes should be 3, got {0}".format(titles_count))                     
     self.assertEqual(concepts_count, 3, "number of tiltes should be 3, got {0}".format(concepts_count)) 
Esempio n. 9
0
    def test__execution(self):
        """ This is not exactly a test, but a program execution..."""
        text1 = "i love to learn"
        text2 = "the world we know"

        dump_file = self.tmp_dump_file

        wiki_knowledge.make_dump(dump_file,
                                 self.expected_articles,
                                 compress=False)
        wiki_knowledge.parse_dump(dump_file, self.tmp_parse_file)
        db_wrapper = wiki_knowledge.build_database_wrapper(
            self.tmp_parse_file, StopWordsStemmer([]))

        #wiki_knowledge = test_utils.Factory.build_wiki_knowledge()
        #clean up file created by factory at end
        self.addCleanup(os.remove, self.tmp_dump_file)

        comparer = semantic_interpreter.SemanticComparer(db_wrapper)
        correlation = comparer.compare(text1, text2)
        _log.info(
            test_utils.get_texts_correlation_message(text1, text2,
                                                     correlation))
Esempio n. 10
0
    def test_number_of_concepts(self):
        """ db builder reads parsed xml properly"""

        _log.info('-' * 80)

        # arrange
        dump_file = getInputFile("wikidump_Knowledge_Love_War.xml")
        parsed_file = getOutputFile("wikidump_Knowledge_Love_War.parsed.xml")

        # act
        wn.parse_dump(dump_file, parsed_file)
        db_wrapper = wn.build_database_wrapper(parsed_file,
                                               StopWordsStemmer([]))

        titles_count = len(db_wrapper.title_index)
        concepts_count = len(db_wrapper.concepts_index)

        # assert
        self.assertEqual(
            titles_count, 3,
            "number of tiltes should be 3, got {0}".format(titles_count))
        self.assertEqual(
            concepts_count, 3,
            "number of tiltes should be 3, got {0}".format(concepts_count))