def get_reviews(db_location): ''' The controlling function for the process that scrapes reviews from steam. Accessed from run_app.py ''' sleep_time_between_requests = 1 # So Steam can't complain this is a burden on their scrapers. scraper_increment = 5 # app_num increases this much every scraper request start_scraping_app_num = 300000 # If the database contains no reviews, start with this app_num database_manager.create_steam_reviews(db_location) last_record = database_manager.retrieve_last_steam_review(db_location) if last_record is None: last_app_num = start_scraping_app_num else: last_app_num = last_record[2] while True: ''' This process of scraping Steam continues until it is disrupted. ''' time.sleep(sleep_time_between_requests) last_app_num += scraper_increment base_url = 'http://store.steampowered.com/app/' content_from_steam = scrape_app_page(base_url, last_app_num) date_scraped = datetime.datetime.now() if page_has_reviews(content_from_steam) == True: reviews_on_page = get_reviews_on_page(content_from_steam) number_of_reviews = len(reviews_on_page) print('Found %s reviews for app number %s' % (number_of_reviews, last_app_num)) else: reviews_on_page = [] print('No review element found for number %s' % (last_app_num)) for review in reviews_on_page: url = '%s%s/' % (base_url, last_app_num) classified = 0 user_recommendation = review['user_recommendation'] user_review_text = review['user_review_text'] user_name = review['user_name'] database_manager.insert_data_steam_reviews( db_location, url, last_app_num, date_scraped, classified, user_recommendation, user_review_text, user_name)
def test(self): db_location = 'database_test.db' url = 'url' app_num = 300000 date_scraped = 'today' user_recommendation = 'Recommended' user_review_text = 'great' user_name = 'Bob' classified = 1 database_manager.insert_data_steam_reviews(db_location, url, app_num, date_scraped, classified, user_recommendation, user_review_text, user_name) response = database_manager.retrieve_steam_reviews(db_location, 'Recommended', 0, 1) assert len(response) == 0
def test(self): db_location = 'database_test.db' url = 'url' app_num = 300000 date_scraped = 'today' user_recommendation = 'great' user_review_text = 'great' user_name = 'Bob' classified = 0 database_manager.insert_data_steam_reviews(db_location, url, app_num, date_scraped, classified, user_recommendation, user_review_text, user_name) with sqlite3.connect(db_location, timeout=20) as db: cur = db.cursor() response = cur.execute("SELECT * FROM steam_reviews;") response_one_data = response.fetchone() assert response_one_data == (1, 'url', 300000, 'today', 0, 'great', 'great', 'Bob')
def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location) database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Not Recommended', 'It was great', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_3', 300025, '2011-01-01', 0, 'Not Recommended', 'OMG', 'Makiavelli') database_manager.insert_data_steam_reviews(db_location, 'url_4', 300040, '2011-01-01', 0, 'Not Recommended', 'I want to cry myself to sleep', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_5', 300040, '2011-01-01', 0, 'Not Recommended', 'When I get out of this padded cell I will bake a cake', 'Sluggish666') database_manager.insert_data_steam_reviews(db_location, 'url_6', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_7', 300020, '2011-01-01', 0, 'Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_8', 300025, '2011-01-01', 0, 'Recommended', 'OMG', 'Makiavelli') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'I want to cry myself to sleep', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_10', 300040, '2011-01-01', 0, 'Recommended', 'When I get out of this padded cell I will bake a cake', 'Sluggish666')
def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location) database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location) database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_3', 300025, '2011-01-01', 0, 'Recommended', 'OMG', 'Makiavelli')
def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location) database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')