Exemple #1
0
def get_reviews(db_location):
    '''
    The controlling function for the process that scrapes reviews from steam.
    Accessed from run_app.py
    '''

    sleep_time_between_requests = 1  # So Steam can't complain this is a burden on their scrapers.
    scraper_increment = 5  # app_num increases this much every scraper request
    start_scraping_app_num = 300000  # If the database contains no reviews, start with this app_num

    database_manager.create_steam_reviews(db_location)

    last_record = database_manager.retrieve_last_steam_review(db_location)

    if last_record is None:
        last_app_num = start_scraping_app_num
    else:
        last_app_num = last_record[2]

    while True:
        '''
        This process of scraping Steam continues until it is disrupted.
        '''

        time.sleep(sleep_time_between_requests)

        last_app_num += scraper_increment

        base_url = 'http://store.steampowered.com/app/'

        content_from_steam = scrape_app_page(base_url, last_app_num)
        date_scraped = datetime.datetime.now()

        if page_has_reviews(content_from_steam) == True:
            reviews_on_page = get_reviews_on_page(content_from_steam)
            number_of_reviews = len(reviews_on_page)
            print('Found %s reviews for app number %s' %
                  (number_of_reviews, last_app_num))
        else:
            reviews_on_page = []
            print('No review element found for number %s' % (last_app_num))

        for review in reviews_on_page:
            url = '%s%s/' % (base_url, last_app_num)
            classified = 0
            user_recommendation = review['user_recommendation']
            user_review_text = review['user_review_text']
            user_name = review['user_name']

            database_manager.insert_data_steam_reviews(
                db_location, url, last_app_num, date_scraped, classified,
                user_recommendation, user_review_text, user_name)
Exemple #2
0
    def test(self):
        db_location = 'database_test.db'

        url = 'url'
        app_num = 300000
        date_scraped = 'today'
        user_recommendation = 'Recommended'
        user_review_text = 'great'
        user_name = 'Bob'
        classified = 1
        database_manager.insert_data_steam_reviews(db_location, url, app_num, date_scraped, classified, user_recommendation, user_review_text, user_name)

        response = database_manager.retrieve_steam_reviews(db_location, 'Recommended', 0, 1)
        assert len(response) == 0
Exemple #3
0
    def test(self):
        db_location = 'database_test.db'

        url = 'url'
        app_num = 300000
        date_scraped = 'today'
        user_recommendation = 'great'
        user_review_text = 'great'
        user_name = 'Bob'
        classified = 0
        database_manager.insert_data_steam_reviews(db_location, url, app_num, date_scraped, classified, user_recommendation, user_review_text, user_name)

        with sqlite3.connect(db_location, timeout=20) as db:
            cur = db.cursor()
            response = cur.execute("SELECT * FROM steam_reviews;")
            response_one_data = response.fetchone()
            assert response_one_data == (1, 'url', 300000, 'today', 0, 'great', 'great', 'Bob')
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Not Recommended', 'It was great', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_3', 300025, '2011-01-01', 0, 'Not Recommended', 'OMG', 'Makiavelli')
     database_manager.insert_data_steam_reviews(db_location, 'url_4', 300040, '2011-01-01', 0, 'Not Recommended', 'I want to cry myself to sleep', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_5', 300040, '2011-01-01', 0, 'Not Recommended', 'When I get out of this padded cell I will bake a cake', 'Sluggish666')
     database_manager.insert_data_steam_reviews(db_location, 'url_6', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_7', 300020, '2011-01-01', 0, 'Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_8', 300025, '2011-01-01', 0, 'Recommended', 'OMG', 'Makiavelli')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'I want to cry myself to sleep', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_10', 300040, '2011-01-01', 0, 'Recommended', 'When I get out of this padded cell I will bake a cake', 'Sluggish666')
Exemple #5
0
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
Exemple #6
0
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_3', 300025, '2011-01-01', 0, 'Recommended', 'OMG', 'Makiavelli')
Exemple #7
0
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')