Beispiel #1
0
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Not Recommended', 'It was Awful', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_4', 300040, '2011-01-01', 0, 'Not Recommended', 'I want to cry myself to sleep', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_5', 300040, '2011-01-01', 0, 'Not Recommended', 'When I get out of this padded cell I will bake a cake', 'Sluggish666')
     database_manager.insert_data_steam_reviews(db_location, 'url_6', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Creater')
     database_manager.insert_data_steam_reviews(db_location, 'url_8', 300025, '2011-01-01', 0, 'Recommended', 'OMG', 'Makiavelli')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'More, More, More', 'GiveMeSalt')
     database_manager.insert_data_steam_reviews(db_location, 'url_10', 300040, '2011-01-01', 0, 'Recommended', 'Loved it. Would play again', 'Speedy99')
Beispiel #2
0
def get_reviews(db_location):
    '''
    The controlling function for the process that scrapes reviews from steam.
    Accessed from run_app.py
    '''

    sleep_time_between_requests = 1  # So Steam can't complain this is a burden on their scrapers.
    scraper_increment = 5  # app_num increases this much every scraper request
    start_scraping_app_num = 300000  # If the database contains no reviews, start with this app_num

    database_manager.create_steam_reviews(db_location)

    last_record = database_manager.retrieve_last_steam_review(db_location)

    if last_record is None:
        last_app_num = start_scraping_app_num
    else:
        last_app_num = last_record[2]

    while True:
        '''
        This process of scraping Steam continues until it is disrupted.
        '''

        time.sleep(sleep_time_between_requests)

        last_app_num += scraper_increment

        base_url = 'http://store.steampowered.com/app/'

        content_from_steam = scrape_app_page(base_url, last_app_num)
        date_scraped = datetime.datetime.now()

        if page_has_reviews(content_from_steam) == True:
            reviews_on_page = get_reviews_on_page(content_from_steam)
            number_of_reviews = len(reviews_on_page)
            print('Found %s reviews for app number %s' %
                  (number_of_reviews, last_app_num))
        else:
            reviews_on_page = []
            print('No review element found for number %s' % (last_app_num))

        for review in reviews_on_page:
            url = '%s%s/' % (base_url, last_app_num)
            classified = 0
            user_recommendation = review['user_recommendation']
            user_review_text = review['user_review_text']
            user_name = review['user_name']

            database_manager.insert_data_steam_reviews(
                db_location, url, last_app_num, date_scraped, classified,
                user_recommendation, user_review_text, user_name)
Beispiel #3
0
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
     database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
Beispiel #4
0
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
Beispiel #5
0
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
Beispiel #6
0
 def setUp(self):
     db_location = 'database_test.db'
     database_manager.create_steam_reviews(db_location)
     database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
     database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler')
     database_manager.insert_data_steam_reviews(db_location, 'url_3', 300025, '2011-01-01', 0, 'Recommended', 'OMG', 'Makiavelli')