Exemple #1
0
'''
Created on Apr 24, 2013

@author: inesmeya
'''
import unittest
from wiki_knows import wiki_knowledge
from io_test_utils import getOutputFile, getInputFile
from model.stemmers import PorterStemmer
from model.semantic_interpreter import SemanticComparer

from model.logger import getTestLogger
_log = getTestLogger("wiki_exec")

"""

Articles
--------

    Politics, Technology, Computer, Nature, Art, 
    Busyness, Internet, Biology, Physics, Law, Economics, History,
    Education, War, Love, Emotion, Medicine, Research, Sociology,
    Wealth, Science


Code to download articles
-------------------------

    articles_titles = '''Politics, Technology, Computer, Nature, Art, 
                    Busyness, Internet, Biology, Physics, Law, Economics, History,
                    Education, War, Love, Emotion, Medicine, Research, Sociology,
import unittest
import wiki_knows.wiki_knowledge as wn
import io_test_utils as io_tu
from model.stemmers import StopWordsStemmer
from model.semantic_interpreter import SemanticComparer

import test.test_utils as test_utils
from io_test_utils import getOutputFile, getInputFile

import os

from model.logger import getTestLogger
_log = getTestLogger("Test")


class Test(unittest.TestCase):
    def setUp(self):
        pass

    def tearDown(self):
        pass

    def test__parse_dump(self):
        wiki_dump_path = io_tu.getInputFile(io_tu.FilesList.test__parse_tools)
        wiki_parsed_dump_path = io_tu.getOutputFile(
            io_tu.FilesList.test__parse_tools)

        wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)

    def test_number_of_concepts(self):
        """ db builder reads parsed xml properly"""
Exemple #3
0
'''
Created on Apr 24, 2013

@author: inesmeya
'''
import unittest
from wiki_knows import wiki_knowledge
from io_test_utils import getOutputFile, getInputFile
from model.stemmers import PorterStemmer
from model.semantic_interpreter import SemanticComparer

from model.logger import getTestLogger
_log = getTestLogger("wiki_exec")
"""

Articles
--------

    Politics, Technology, Computer, Nature, Art, 
    Busyness, Internet, Biology, Physics, Law, Economics, History,
    Education, War, Love, Emotion, Medicine, Research, Sociology,
    Wealth, Science


Code to download articles
-------------------------

    articles_titles = '''Politics, Technology, Computer, Nature, Art, 
                    Busyness, Internet, Biology, Physics, Law, Economics, History,
                    Education, War, Love, Emotion, Medicine, Research, Sociology,
                    Wealth, Science'''
Exemple #4
0
import xml.etree.ElementTree as etree
import gzip

import WikiExtractor 
from parsers.wikitext_processor import  WikiTextProcessor

from  model.wiki_doc import WikiDocument
from  model.wiki_doc import doc_from_xml, WdNames


from model.logger import getTestLogger
_log = getTestLogger(__name__)

#==================================================================================================

WIKIPEDIA_NAMESPACE = 'http://www.mediawiki.org/xml/export-0.8/'

def make_wikipedia_tag(tag):
    '''Creates tag full name according to wikipedia namespace
    @param tag: string. Tag in wikipedia xml dump file, such as 'page'
    @return: tag with namespace: {WIKIPEDIA_NAMESPACE}{tag}"
    '''
    return "{%s}%s" % (WIKIPEDIA_NAMESPACE, tag)

# wikipedia page tag
PAGE_TAG = make_wikipedia_tag('page')

# parsed doc tag
DOC_TAG = 'doc'

import unittest
import wiki_knows.wiki_knowledge as wn
import io_test_utils as io_tu  
from model.stemmers import StopWordsStemmer
from model.semantic_interpreter import SemanticComparer

import test.test_utils as test_utils
from io_test_utils import getOutputFile, getInputFile 

import os

from model.logger import getTestLogger
_log = getTestLogger("Test")

class Test(unittest.TestCase):


    def setUp(self):
        pass


    def tearDown(self):
        pass


    def test__parse_dump(self):
        wiki_dump_path =  io_tu.getInputFile(io_tu.FilesList.test__parse_tools)
        wiki_parsed_dump_path =  io_tu.getOutputFile(io_tu.FilesList.test__parse_tools)
      
        wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)
Exemple #6
0
import xml.etree.ElementTree as etree
import gzip

import WikiExtractor
from parsers.wikitext_processor import WikiTextProcessor

from model.wiki_doc import WikiDocument
from model.wiki_doc import doc_from_xml, WdNames

from model.logger import getTestLogger
_log = getTestLogger(__name__)

#==================================================================================================

WIKIPEDIA_NAMESPACE = 'http://www.mediawiki.org/xml/export-0.8/'


def make_wikipedia_tag(tag):
    '''Creates tag full name according to wikipedia namespace
    @param tag: string. Tag in wikipedia xml dump file, such as 'page'
    @return: tag with namespace: {WIKIPEDIA_NAMESPACE}{tag}"
    '''
    return "{%s}%s" % (WIKIPEDIA_NAMESPACE, tag)


# wikipedia page tag
PAGE_TAG = make_wikipedia_tag('page')

# parsed doc tag
DOC_TAG = 'doc'