def setUp(self):
        engine = create_engine('sqlite:///:memory:', echo=False)
        db.create_tables(engine)

        Session = sessionmaker(bind=engine)
        self.session = Session()
        self.word_count_repository = WordCountRepository(self.session)
class TestWordCountRepository(unittest.TestCase):

    def setUp(self):
        engine = create_engine('sqlite:///:memory:', echo=False)
        db.create_tables(engine)

        Session = sessionmaker(bind=engine)
        self.session = Session()
        self.word_count_repository = WordCountRepository(self.session)


    def tearDown(self):
        self.session.close()

    def test_create_word_count_with_one_word(self):
        self.word_count_repository.create_word_count('test', 2, 1)

        result_word_count = self.word_count_repository\
            .get_word_counts_by_website_id(1)

        assert result_word_count[0][1] == 'test'
        assert result_word_count[0][2] == 2

    def test_create_word_count_with_three_words(self):
        test_words = [('test', 2, 100),
            ('pero', 10, 21),
            ('gusto', 2, 23000)]

        for test_word in test_words:
            self.word_count_repository.create_word_count(test_word[0],
                test_word[1],
                test_word[2])

        for test_word in test_words:
            result_word_count = self.word_count_repository\
                .get_word_counts_by_website_id(test_word[2])
            assert result_word_count[0][1] == test_word[0]
            assert result_word_count[0][2] == test_word[1]
Beispiel #3
0
from dataAccessLayer.helper import db

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

if __name__ == "__main__":
    #Set database up
    engine = create_engine('sqlite:///../../database/hegemone_dev.sqlite3',
                            echo=False)
    db.setup_db(engine)
    Session = sessionmaker(bind=engine)
    session = Session()

    #Set repositories up
    website_repository = WebsiteRepository(session)
    word_count_repository = WordCountRepository(session)

    #Get websites
    websites = website_repository.get_all_websites()

    #Crawl websites
    for website_tuple in websites:
        raw_text = converter.get_text(website_tuple[1], 0)
        word_counts = WordFrequency.count_words(raw_text)

        #Store word counts in database
        website_id = website_tuple[0]
        for word in word_counts.keys():
            word_count_repository.create_word_count(word,
                                                    word_counts[word],
                                                    website_id)