def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location) database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Not Recommended', 'It was Awful', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_4', 300040, '2011-01-01', 0, 'Not Recommended', 'I want to cry myself to sleep', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_5', 300040, '2011-01-01', 0, 'Not Recommended', 'When I get out of this padded cell I will bake a cake', 'Sluggish666') database_manager.insert_data_steam_reviews(db_location, 'url_6', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Creater') database_manager.insert_data_steam_reviews(db_location, 'url_8', 300025, '2011-01-01', 0, 'Recommended', 'OMG', 'Makiavelli') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'More, More, More', 'GiveMeSalt') database_manager.insert_data_steam_reviews(db_location, 'url_10', 300040, '2011-01-01', 0, 'Recommended', 'Loved it. Would play again', 'Speedy99')
def get_reviews(db_location): ''' The controlling function for the process that scrapes reviews from steam. Accessed from run_app.py ''' sleep_time_between_requests = 1 # So Steam can't complain this is a burden on their scrapers. scraper_increment = 5 # app_num increases this much every scraper request start_scraping_app_num = 300000 # If the database contains no reviews, start with this app_num database_manager.create_steam_reviews(db_location) last_record = database_manager.retrieve_last_steam_review(db_location) if last_record is None: last_app_num = start_scraping_app_num else: last_app_num = last_record[2] while True: ''' This process of scraping Steam continues until it is disrupted. ''' time.sleep(sleep_time_between_requests) last_app_num += scraper_increment base_url = 'http://store.steampowered.com/app/' content_from_steam = scrape_app_page(base_url, last_app_num) date_scraped = datetime.datetime.now() if page_has_reviews(content_from_steam) == True: reviews_on_page = get_reviews_on_page(content_from_steam) number_of_reviews = len(reviews_on_page) print('Found %s reviews for app number %s' % (number_of_reviews, last_app_num)) else: reviews_on_page = [] print('No review element found for number %s' % (last_app_num)) for review in reviews_on_page: url = '%s%s/' % (base_url, last_app_num) classified = 0 user_recommendation = review['user_recommendation'] user_review_text = review['user_review_text'] user_name = review['user_name'] database_manager.insert_data_steam_reviews( db_location, url, last_app_num, date_scraped, classified, user_recommendation, user_review_text, user_name)
def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location) database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar') database_manager.insert_data_steam_reviews(db_location, 'url_9', 300040, '2011-01-01', 0, 'Recommended', 'It was great', 'GiveMeSugar')
def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location)
def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location) database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer')
def setUp(self): db_location = 'database_test.db' database_manager.create_steam_reviews(db_location) database_manager.insert_data_steam_reviews(db_location, 'url_1', 300000, '2011-01-01', 0, 'Recommended', 'It was great', 'Destroyer') database_manager.insert_data_steam_reviews(db_location, 'url_2', 300020, '2011-01-01', 0, 'Not Recommended', 'It was bad', 'Dismantler') database_manager.insert_data_steam_reviews(db_location, 'url_3', 300025, '2011-01-01', 0, 'Recommended', 'OMG', 'Makiavelli')