Beispiel #1
0
 def get_value(args):
     _log.debug("run build with args={}".format(args))
     if isinstance(args.dbpath , list):
         args.dbpath=args.dbpath[0]
     wdb = wiki_knowledge.load_db_wrapper_from_wdb(args.dbpath)
     v = wdb.get_text_centroid(args.text)
     
     _log.info("vector = {0}".format(v.data))
    def test__save_and_load(self):

        wiki_knowledge.parse_dump(self.expected_xml_path, self.tmp_parse_file)
        expected_db = wiki_knowledge.build_database_wrapper(self.tmp_parse_file, StopWordsStemmer([]))
        wiki_knowledge.save_db_wrapper_to_wdb(expected_db, self.tmp_wdb_file)                
        actual = wiki_knowledge.load_db_wrapper_from_wdb(self.tmp_wdb_file) 
        self.assertEqual(expected_db.words_num, actual.words_num, "Mismatch WikiKnowledges number of words")
        self.assertEqual(expected_db.title_index, actual.title_index, "Mismatch WikiKnowledges titles")
Beispiel #3
0
    def test__many_articles(self):
        wdb_path = getInputFile("many_articles_dump.wdb")
        db_wrapper = wiki_knowledge.load_db_wrapper_from_wdb(wdb_path)

        d = db_wrapper.get_readable_centroid(ibm_licence_text_full)
        s = sorted(d.items(), key=lambda x: x[1], reverse=True)[:5]
        print s

        d = db_wrapper.get_readable_centroid(ibm_licence_text)
        s = sorted(d.items(), key=lambda x: x[1], reverse=True)[:5]
        print s
    def test__save_and_load(self):

        wiki_knowledge.parse_dump(self.expected_xml_path, self.tmp_parse_file)
        expected_db = wiki_knowledge.build_database_wrapper(
            self.tmp_parse_file, StopWordsStemmer([]))
        wiki_knowledge.save_db_wrapper_to_wdb(expected_db, self.tmp_wdb_file)
        actual = wiki_knowledge.load_db_wrapper_from_wdb(self.tmp_wdb_file)
        self.assertEqual(expected_db.words_num, actual.words_num,
                         "Mismatch WikiKnowledges number of words")
        self.assertEqual(expected_db.title_index, actual.title_index,
                         "Mismatch WikiKnowledges titles")
Beispiel #5
0
    def test__many_articles(self):
        wdb_path = getInputFile("many_articles_dump.wdb")
        db_wrapper = wiki_knowledge.load_db_wrapper_from_wdb(wdb_path)
        
        
        d = db_wrapper.get_readable_centroid(ibm_licence_text_full)
        s = sorted(d.items(), key=lambda x: x[1], reverse=True)[:5]
        print s 

        d = db_wrapper.get_readable_centroid(ibm_licence_text)
        s = sorted(d.items(), key=lambda x: x[1], reverse=True)[:5]
        print s 
Beispiel #6
0
    def test__many_articles(self):
        wdb_path = getInputFile("many_articles_dump.wdb")
        db_wrapper = wiki_knowledge.load_db_wrapper_from_wdb(wdb_path)

        d = db_wrapper.get_readable_centroid(ibm_licence_text_full)

        top = get_top(d, 5)
        self.assertIn("Computer", dict(top))

        d = db_wrapper.get_readable_centroid(ibm_licence_text)
        top = get_top(d, 5)
        self.assertIn("Computer", dict(top))
Beispiel #7
0
    def test__many_articles(self):
        wdb_path = getInputFile("many_articles_dump.wdb")
        db_wrapper = wiki_knowledge.load_db_wrapper_from_wdb(wdb_path)
                
        d = db_wrapper.get_readable_centroid(ibm_licence_text_full)

        top = get_top(d,5)
        self.assertIn("Computer", dict(top))

        d = db_wrapper.get_readable_centroid(ibm_licence_text)
        top = get_top(d,5)
        self.assertIn("Computer", dict(top))