Beispiel #1
0
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

if __name__ == "__main__":
    #Set database up
    engine = create_engine('sqlite:///../../database/hegemone_dev.sqlite3',
                            echo=False)
    db.setup_db(engine)
    Session = sessionmaker(bind=engine)
    session = Session()

    #Set repositories up
    website_repository = WebsiteRepository(session)
    word_count_repository = WordCountRepository(session)

    #Get websites
    websites = website_repository.get_all_websites()

    #Crawl websites
    for website_tuple in websites:
        raw_text = converter.get_text(website_tuple[1], 0)
        word_counts = WordFrequency.count_words(raw_text)

        #Store word counts in database
        website_id = website_tuple[0]
        for word in word_counts.keys():
            word_count_repository.create_word_count(word,
                                                    word_counts[word],
                                                    website_id)
        session.commit()
class TestWebsiteRepository(unittest.TestCase):

    def setUp(self):
        engine = create_engine('sqlite:///:memory:', echo=False)
        db.setup_db(engine)

        Session = sessionmaker(bind=engine)
        self.session = Session()
        self.category_repository = CategoryRepository(self.session)
        self.website_repository = WebsiteRepository(self.session)

    def tearDown(self):
        self.session.close()

    def test_create_website(self):
        self.website_repository.create_website('www.wikipedia.com', 'english', 'test')

        all_result_websites = self.website_repository.get_all_websites()

        assert len(all_result_websites) == 1
        assert all_result_websites[0][0] == 1
        assert all_result_websites[0][1] == 'www.wikipedia.com'
        assert all_result_websites[0][2] == 'english'

    def test_create_three_websites(self):
        test_websites = [('www.wikipedia.com', 'english', 'english test'),
            ('www.wikipedia.it', 'italian', 'italiano'),
            ('www.simple.wikipedia.com', 'english', 'simple english')]

        for test_website in test_websites:
            self.website_repository.create_website(*test_website)

        all_result_websites = self.website_repository.get_all_websites()

        assert len(all_result_websites) == 3
        for i in range(3):
            assert all_result_websites[i][0] == i+1
            assert all_result_websites[i][1] == test_websites[i][0]
            assert all_result_websites[i][2] == test_websites[i][1]

    def test_get_website_by_category(self):
        website_id = self.website_repository.create_website('www.wikipedia.it', 'italian', 'italiano')
        category_id = self.category_repository.create_category('wiki')
        self.category_repository.add_website(category_id, website_id)

        all_result_websites = self.website_repository.get_websites_by_category(category_id)

        assert all_result_websites[0][0] == 1
        assert all_result_websites[0][1] == 'www.wikipedia.it'
        assert all_result_websites[0][2] == 'italian'

    def test_get_website_id_by_search_term(self):
        website_id = self.website_repository.create_website(
            'https://it.wikipedia.org/wiki/Lingua_italiana',
            'italian',
            'lingua italiana')
        self.session.commit()

        website_ids = self.website_repository.get_website_ids_by_title(
            'lingua')

        assert len(website_ids) == 1
        assert website_ids[0] == website_id
Beispiel #3
0
from dataAccessLayer.repositories.websiteRepository import WebsiteRepository
from dataAccessLayer.helper import db
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

new_entries = [('https://de.wikipedia.org/wiki/Deutsche_Sprache', 'German', 'Deutsche Sprache'),
                ('https://it.wikipedia.org/wiki/Lingua_italiana', 'Italian', 'Lingua italiana')]

if __name__ == "__main__":
    #Set database up
    engine = create_engine('sqlite:///../database/hegemone_dev.sqlite3',
        echo=False)
    db.setup_db(engine)
    Session = sessionmaker(bind=engine)
    session = Session()

    #Setup website repository up
    website_repository = WebsiteRepository(session)

    #Create websites
    print 'Creating website entries...'
    for new_entry in new_entries:
        website_repository.create_website(*new_entry)
    session.commit()

    print website_repository.get_all_websites()