def all_items_in_game(game): database = SteamDatabase(False) results = database.query_database( f"SELECT * FROM \"Item\" WHERE name='{game}'") database.shutdown() return dumps(results)
def all_prices_for_item_in_game(app_id, item, type): database = SteamDatabase(False) results = database.query_database( f"SELECT market_hash_name, time, median_price, volume FROM \"{type}\" WHERE market_hash_name='{database.clean_market_hash_name(item)}'" ) database.shutdown() return dumps(results, default=str)
def __init__(self): # Connection to database self.database = SteamDatabase() self.database.ping_database("Started scraper") # Cookie for scraping try: self.last_cookie_check = datetime.utcnow() self.cookie = None self.get_cookie() except: print_issue("Could not get cookie")
def transaction_amount(): """ Obtain's total volume price of the Steam Market """ database = SteamDatabase(False) results = database.query_database(""" SELECT time::date as time, sum(median_price * volume) as transaction_volume FROM "PriceDaily" GROUP BY time """) database.shutdown() return dumps(results, default=str)
def market_seasonality(): """ Seasonality of the entire market calculated using the earliest date of a month and the latest date of a month: (end_price - start_price) / start_price """ database = SteamDatabase(False) results = database.query_database(""" SELECT start_month_prices.month, AVG(((end_month_prices.median_price - start_month_prices.median_price) / start_month_prices.median_price) * 100) as seasonal_percentage_move FROM (SELECT daily_price_action.month, daily_price_action.year, daily_price_action.median_price, daily_price_action.market_hash_name FROM (SELECT max(day) as day, month, year, market_hash_name FROM (SELECT date_part('day', time) as day, date_part('month', time) as month, date_part('year', time) as year, market_hash_name FROM "PriceDaily") as split_data GROUP BY month, year, market_hash_name) as end_month_data JOIN (SELECT date_part('day', time) as day, date_part('month', time) as month, date_part('year', time) as year, market_hash_name, median_price FROM "PriceDaily") as daily_price_action ON daily_price_action.market_hash_name = end_month_data.market_hash_name AND daily_price_action.day = end_month_data.day AND daily_price_action.month = end_month_data.month AND daily_price_action.year = end_month_data.year GROUP BY daily_price_action.month, daily_price_action.year, daily_price_action.median_price, daily_price_action.market_hash_name) as end_month_prices JOIN (SELECT daily_price_action.month, daily_price_action.year, daily_price_action.median_price, daily_price_action.market_hash_name FROM (SELECT min(day) as day, month, year, market_hash_name FROM (SELECT date_part('day', time) as day, date_part('month', time) as month, date_part('year', time) as year, market_hash_name FROM "PriceDaily") as split_data GROUP BY month, year, market_hash_name) as start_month_data JOIN (SELECT date_part('day', time) as day, date_part('month', time) as month, date_part('year', time) as year, market_hash_name, median_price FROM "PriceDaily") as daily_price_action ON daily_price_action.market_hash_name = start_month_data.market_hash_name AND daily_price_action.day = start_month_data.day AND daily_price_action.month = start_month_data.month AND daily_price_action.year = start_month_data.year GROUP BY daily_price_action.month, daily_price_action.year, daily_price_action.median_price, daily_price_action.market_hash_name) as start_month_prices ON start_month_prices.month = end_month_prices.month AND start_month_prices.year = end_month_prices.year AND start_month_prices.market_hash_name = end_month_prices.market_hash_name GROUP BY start_month_prices.month """) return dumps(results, default=str)
def market_overview(): """ Obtain's price and volume of the Steam Market """ database = SteamDatabase(False) results = database.query_database(""" SELECT daily_price_action.time as time, daily_price_action.volume as volume, daily_hour_price_action.high as high, daily_hour_price_action.low as low, daily_price_action.median_price as close FROM (SELECT time::date as time, sum(volume) as volume, sum(median_price) as median_price FROM "PriceDaily" GROUP BY time::date ORDER BY time) as daily_price_action LEFT JOIN (SELECT time::date as time, max(median_price) as high, min(median_price) as low FROM (SELECT time as time, sum(median_price) as median_price FROM "PriceHourly" GROUP BY time ORDER BY time) as hourly_price_action GROUP BY time::date ORDER BY time::date) as daily_hour_price_action ON daily_price_action.time=daily_hour_price_action.time """) database.shutdown() return dumps(results, default=str)
def priority_of_items(): """ Checks the database for mistakes in priority and adjusts them accordingly Finds items which have yet to have a price scan and marks them as urgent """ # Creating database database = SteamDatabase() # Obtaining all items which do not have a price point work = database.query_database(""" SELECT distinct market_hash_name, app_id FROM "Item" where market_hash_name not in (SELECT distinct market_hash_name FROM "PriceDaily" INTERSECT select distinct market_hash_name from "PriceHourly") """) # Removing timeouts for item in work: database.queue_database( f"UPDATE task SET due_date='{datetime.utcnow() - relativedelta.relativedelta(days=999)}'::timestamp WHERE item='{database.clean_market_hash_name(item[0])}' AND app_id={item[1]} AND action='Official Price'" ) # Closing session database.shutdown()
def check_for_tasks(): """ Checks if all items have their corresponding tasks, if not, create them """ # Creating database database = SteamDatabase() tasks = set( database.query_database("SELECT item, app_id, action FROM task")) items = database.query_database( 'SELECT market_hash_name, app_id FROM public."Item"') games = database.query_database('SELECT app_id FROM public."Game"') # Checking games for game in games: # Operation Phoenix Weapon Case is a placeholder to pass foreign key - mostly because I don't want another table of items or to split it if ("Operation Phoenix Weapon Case", game, "New Items") not in tasks: # Add actions associated with game database.add_task_game(game) # Checking items for item in items: if (item[0], item[1], "Official Price") not in tasks: # Add actions associated with item database.add_task_item(item[0], item[1], live_price=False) if (item[0], item[1], "Live Price") not in tasks: # Add actions associated with item database.add_task_item(item[0], item[1], official_price=False) # Finalising database database.shutdown()
from steam_database import SteamDatabase if __name__ == "__main__": # Connecting to database database = SteamDatabase() # Checking if workers have recently pinged pings = database.query_database(""" SELECT distinct name FROM workers WHERE last_ping >= (timezone('utc', now()) - INTERVAL '0.1 DAY')::timestamp """) # Printing information of workers for ping in pings: print(ping)
""" Changes the cookie of the database """ import sys from steam_database import SteamDatabase if __name__ == "__main__": # Creating database database = SteamDatabase() # Obtaining all items which have a timeout work = database.update_database([ f""" UPDATE information SET value='{sys.argv[1]}' where name='cookie' """ ]) # Closing session database.shutdown()
class SteamScraper: """ Scrapes the Steam Market for prices, volumes and spread along with item information """ def __init__(self): # Connection to database self.database = SteamDatabase() self.database.ping_database("Started scraper") # Cookie for scraping try: self.last_cookie_check = datetime.utcnow() self.cookie = None self.get_cookie() except: print_issue("Could not get cookie") def get_cookie(self): """ Obtain's cookie for better access to pages """ try: # Attempting to get cookie self.cookie = self.database.get_cookie() self.last_cookie_check = datetime.utcnow() except Exception as e: # Informing user log_issue("steam_scraper_stack", f"get_cookie\tdatabase\t\tCould not get cookie {e}") raise Exception("Unable to get cookie") def get_page(self, url): """ Obtains data about a game and waits until unblocked if blocked Steam blocks requests after more than 20 requests in a given minute """ # Logging internet activity log_issue("steam_scraper_activity", f"{url}\t", date=False, new_line=False) tries = 0 while True: # Delay by three seconds sleep(3) if self.last_cookie_check + relativedelta.relativedelta( days=0.25) < datetime.utcnow(): try: self.cookie = self.get_cookie() except Exception as e: log_issue( "steam_scraper_stack", f"get_page\tdatabase\t\tCould not get cookie {e}") # Obtaining page - providing a cookie appears to show you are more trustworthy to Steam and hence, more leeway but it expires in a few days page = get(url, cookies={"steamLoginSecure": self.cookie}) # Returning page if successful if page.status_code == 200: # Placing a new line and dating the request log_issue("steam_scraper_activity", "") # Page successfully obtained return page # An issue occured, handling issue if page.status_code != 429 and page.status != 500 and page.status != 502: # Handler for invalid URLs and other issues log_issue( "steam_scraper_stack", f"get_page\turl_issue\turl={url}\tGot status_code {page.status_code}" ) # Adding a new line to scraper activity page log_issue("steam_scraper_activity", "") raise Exception("URL is invalid") # Notifying user of re-request log_issue("steam_scraper_activity", "!", date=False, new_line=False) # Counting number of tries tries += 1 # Notifying database that the process has been stuck - occurs every ~10 minutes of trying if tries % 200 == 0: self.database.ping_database("Blocked by Steam") # Committing all work if the process has been temporarily blocked by Steam (~10 minutes of trying) if tries == 200: self.database.commit() def solve_tasks(self): """ Works through tasks """ while True: # Obtaining tasks try: tasks = self.database.obtain_tasks() except Exception as e: log_issue( "steam_scraper_stack", f"solve_tasks\tdatabase\t\tCould not obtain tasks {e}") continue # Solving the tasks for task in tasks: # Notifying user of progress print(task) try: # Solving task if task[2] == "New Items": self.scan_for_new_items_all(task[1]) elif task[2] == "Official Price": self.scan_for_new_official_prices(task[0], task[1]) elif task[2] == "Live Price": self.scan_for_live_prices(task[0], task[1]) else: log_issue( "steam_scraper_stack", f"solve_tasks\tunknown\ttask={task}\tUnknown item") # Successfully added new item try: self.database.update_task(task[0], task[1], task[2]) except Exception as e: log_issue( "steam_scraper_stack", f"solve_tasks\tdatabase\ttask={task}\tFailed to add completed task {e}" ) # Checking if a commit is required try: self.database.commit_checker() except Exception as e: log_issue( "solve_tasks\tdatabase\t\tFailed to check for commits" ) except Exception as e: # Logging issue log_issue( "steam_scraper_stack", f"solve_tasks\tfunctions\ttask={task}\tFailed to complete task {e}" ) # Trying to free item try: self.database.update_task(task[0], task[1], task[2], True) except Exception as e: log_issue( "steam_scraper_stack", f"solve_tasks\tdatabase\ttask={task}\tFailed to let go of task {e}" ) # Updating database that this worker is still working self.database.ping_database("Completed a round") def scan_for_new_items_all(self, app_id): """ Scans for new items for a given game which are not yet in the database """ # Tracker for progress start = 0 count_of_items = float("inf") # Obtaining already known items and games try: saved_items_and_games = set( self.database.query_database( 'SELECT market_hash_name FROM Public."Item"')) saved_items_and_games.update( self.database.query_database( 'SELECT app_id FROM Public."Game"')) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_items_all\tdatabase\t\tCould not obtain saved items and games\t{e}" ) raise Exception("Could not find obtain saved_items_and_games") # While there are still more items to obtain while start < count_of_items: # Obtaining page try: # Obtaining page information page = self.get_page( f"https://steamcommunity.com/market/search/render/?count=100&norender=1&appid={app_id}&start={start}" ) page = page.content page = loads(page) except Exception as e: log_issue( f"steam_scraper_stack", f"scan_for_new_items_all\turl\turl=https://steamcommunity.com/market/search/render/?count=100&norender=1&appid={app_id}&start={start}\tCould not get page\t{e}" ) continue # Adding new items try: self.scan_for_new_items_page(page, saved_items_and_games) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_items_all\tpage\turl=https://steamcommunity.com/market/search/render/?count=100&norender=1&appid={app_id}&start={start}\tCould not access json\t{e}" ) # Iterating to next count count_of_items = page["total_count"] start += 100 def scan_for_new_items_page(self, page, saved_items_and_games): """ Scans for item details given a page """ # For all items on the page for item in page["results"]: try: # Obtaining potential new game if item["asset_description"][ "appid"] not in saved_items_and_games: # New game found try: # Testing if the image is possible to obtain self.get_page( f"https://steamcdn-a.akamaihd.net/steam/apps/{item['asset_description']['appid']}/header.jpg" ) # Adding new game self.database.add_game( item["asset_description"]["appid"], item["app_name"], f"https://steamcdn-a.akamaihd.net/steam/apps/{item['asset_description']['appid']}/header.jpg" ) # Confirming that game has been added saved_items_and_games.add( item["asset_description"]["appid"]) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_items_page\ticon\tapp_id={item['asset_description']['appid']},name={item['app_name']}\tCould not get icon\t{e}" ) # Obtaining potential new item if item["asset_description"][ "market_hash_name"] not in saved_items_and_games: # New item found # Adding new item # Choosing large icon if possible try: # Obtaining icon if item['asset_description']['icon_url_large'] == "": icon = f"https://steamcommunity-a.akamaihd.net/economy/image/{item['asset_description']['icon_url']}" else: icon = f"https://steamcommunity-a.akamaihd.net/economy/image/{item['asset_description']['icon_url_large']}" # Checking if page is real self.get_page(icon) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_items_page\tapp_id={item['asset_description']['appid']},name={item['app_name']}\tCould not get icon\t{e}" ) raise Exception("Could not find page for icon") # Obtaining market page as a test try: page = self.get_page( f"https://steamcommunity.com/market/listings/{item['asset_description']['appid']}/{item['asset_description']['market_hash_name']}" ) except Exception as e: log_issue( "steam_scraper_stack", f"add_item\tpage\tmarket_hash_name={item['asset_description']['appid']},app_id={item['app_name']}\tCould not get page\t{e}" ) raise Exception(f"Could not find market listing") # Parsing for item_name_id try: # Obtaining item_name_id soup = page.text soup = findall(r"Market_LoadOrderSpread\( [0-9]* \)", soup) soup = soup[0].split(" ") item_name_id = soup[1] # Veryifying item_name_id try: self.get_page( f"https://steamcommunity.com/market/itemordershistogram?country=US&language=english¤cy=1&item_nameid={item_name_id}" ) except Exception as e: log_issue( "steam_scraper_stack", f"item_entry\turl\tmarket_hash_name={item['asset_description']['market_hash_name']},app_id={item['asset_description']['appid']},item_name_id={item_name_id}\tCould not get page\t{e}" ) except Exception as e: log_issue( "steam_scraper_stack", f"item_entry\tpage\tmarket_hash_name={item['asset_description']['market_hash_name']},app_id={item['asset_description']['appid']}\tould not get item_name_id" ) raise Exception( f"Could not find item_name_id for {item['asset_description']['market_hash_name']}, {item['asset_description']['appid']}" ) # Adding newly obtained item self.database.add_item( item["asset_description"]["market_hash_name"], item["name"], item["asset_description"]["appid"], icon, item_name_id) # Confirming that game has been added saved_items_and_games.add( item["asset_description"]["market_hash_name"]) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_items\tpage\titem={item}\tCould not access json\t{e}" ) def scan_for_new_official_prices(self, market_hash_name, app_id): """ Scans for new prices of items which are not yet in the database and adds them """ # Obtaining page if not given - not using API as that requires a cookie which expires within a couple days try: page = self.get_page( f"https://steamcommunity.com/market/listings/{app_id}/{market_hash_name}" ) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_official_prices\turl\turl=https://steamcommunity.com/market/listings/{app_id}/{market_hash_name}\tCould not get page\t{e}" ) raise Exception("Could not get page") # Filtering page to useful data try: # Checking if there are prices soup = findall( "There is no price history available for this item yet.", page.text) if soup == [ "There is no price history available for this item yet." ]: # There is no price history history = [] else: # There are prices soup = findall(r"var line1=.*", page.text) history = literal_eval(soup[0][10:-2]) except Exception as e: # Issue occurred, logging log_issue( "steam_scraper_stack", f"scan_for_new_official_prices\tpage\tpage={page}\tCould not access elements for historical prices\t{e}" ) raise Exception("Could not get price history") # Obtaining price points already added - daily try: hourly_prices = set( self.database.query_database( f"SELECT time from public.\"PriceHourly\" where market_hash_name='{self.database.clean_market_hash_name(market_hash_name)}'" )) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_official_prices\tdatabase\t\tCould not get hourly prices\t{e}" ) raise Exception("Could not get hourly prices") # Obtaining price points already added - hourly try: daily_prices = set( self.database.query_database( f"SELECT time from public.\"PriceDaily\" where market_hash_name='{self.database.clean_market_hash_name(market_hash_name)}'" )) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_official_prices\tdatabase\t\tCould not get daily prices\t{e}" ) raise Exception("Could not get daily prices") # Getting last month's date last_month = datetime.utcnow() - relativedelta.relativedelta(months=1) # For all prices for point in history: try: point_date = datetime.strptime(point[0][:14], "%b %d %Y %H") except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_new_official_prices\tdatabase\tpoint={point}\tCould not access elements for time\t{e}" ) continue # Checking if point belongs to hourly or daily if last_month <= point_date: # Hourly point if point_date not in hourly_prices: # New point self.database.add_price_hourly(market_hash_name, point_date, point[1], point[2]) else: # Daily point if point_date not in daily_prices: # New point self.database.add_price_daily(market_hash_name, point_date, point[1], point[2]) def scan_for_live_prices(self, market_hash_name, app_id): """ Obtain's unofficial but detailed information about an item """ # Obtaining item_name_id try: item_name_id = self.database.query_database( f"SELECT item_name_id FROM public.\"Item\" WHERE market_hash_name='{self.database.clean_market_hash_name(market_hash_name)}' AND app_id={app_id}" ) item_name_id = item_name_id[0] except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\tdatabase\t\tCould not get item_name_id\t{e}" ) raise Exception("Could not get item name id") # Obtaining approximate time of scrape initation time = datetime.utcnow() # Obtaining buy order page try: page = self.get_page( f"https://steamcommunity.com/market/itemordershistogram?country=US&language=english¤cy=1&item_nameid={item_name_id}" ) page = page.content page = loads(page) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\tpage\turl=https://steamcommunity.com/market/itemordershistogram?country=US&language=english¤cy=1&item_nameid={item_name_id}\tCould not get page\t{e}" ) raise Exception("Could not get order page") # Obtaining current bid and ask try: if page["sell_order_summary"] == "There are no active listings for this item.": # There is currently no one buying buy_price = "NULL" else: # There is currently a price buy_price = float(page["lowest_sell_order"]) / 100 except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\tjson\tpage={page}\tCould not access json - lowest_sell_order\t{e}" ) raise Exception("Could not get sell orders") try: if page["buy_order_summary"] == "There are no active buy orders for this item.": # There is currently no one buying sell_price = "NULL" else: # There is currently a buyer sell_price = float(page["highest_buy_order"]) / 100 except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\tjson\tpage={page}\tCould not access json - highest_buy_order\t{e}" ) raise Exception("Could not get buy orders") # Obtaining reasonably priced sell quantity try: sell_quantity = 0 # Going through all orders for sell_order in page["buy_order_graph"]: # Adding if the price meets condition, otherwise stop if sell_order[0] >= sell_price * 0.9: sell_quantity += sell_order[1] else: break except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\tjson\tpage={page}\tCould not access json - buy_order_graph\t{e}" ) raise Exception("Could not get buy orders reasonable") # Obtaining reasonably priced buy quantity try: buy_quantity = 0 # Going through all orders for buy_order in page["sell_order_graph"]: # Adding quantity if the price meets condition, otherwise stop if buy_order[0] <= buy_price / 0.9: buy_quantity += buy_order[1] else: break except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\tjson\tpage={page}\tCould not access json - sell_order_graph\t{e}" ) raise Exception("Could not get sell orders reasonable") # Obtaining total items being sold and desired to be bought try: if page["sell_order_summary"] == "There are no active listings for this item.": # No buying demand total_buy_quantity = 0 else: # There are buyers total_buy_quantity = findall( ">[0-9]*<", page["sell_order_summary"])[0][1:-1] except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\tjson\tpage={page}\tCould not access json array - sell_order_summary\t{e}" ) raise Exception("Could not get total items sold") try: if page["buy_order_summary"] == "There are no active buy orders for this item.": # No sell demand total_sell_quantity = 0 else: # There are people selling total_sell_quantity = findall( ">[0-9]*<", page["buy_order_summary"])[0][1:-1] except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\tjson\tpage={page}\tCould not access json array - buy_order_graph\t{e}" ) raise Exception("Could not get total items bought") # Obtaining overview page try: page = self.get_page( f"https://steamcommunity.com/market/priceoverview/?appid={app_id}&market_hash_name={market_hash_name}" ) page = page.content page = loads(page) except Exception as e: log_issue( "steam_scraper_stack", f"scan_for_live_prices\turl\turl=https://steamcommunity.com/market/priceoverview/?appid={app_id}&market_hash_name={market_hash_name}\tCould not get page\t{e}" ) raise Exception("Could not get overview page") # Obtaining median price - can not be there if no volume try: median_price = page["median_price"][1:].replace(",", "") except Exception as e: median_price = "NULL" # Obtaining volume - can not be there if no volume try: # Obtaining volume and turning comma separated number into a number volume = page["volume"].replace(",", "") except Exception as e: volume = "NULL" try: self.database.add_price_live(market_hash_name, time, sell_price, buy_price, median_price, volume, sell_quantity, buy_quantity, total_sell_quantity, total_buy_quantity) except Exception as e: raise Exception(f"Could not update database {e}")
def all_game(): database = SteamDatabase(False) results = database.query_database('SELECT name, icon FROM "Game"') database.shutdown() return dumps(results)
""" Resets the timeout for items which may be stuck in limbo for an unnecessarily long time """ from steam_database import SteamDatabase if __name__ == "__main__": # Creating database database = SteamDatabase() # Obtaining all items which have a timeout work = database.query_database(""" SELECT item, app_id, action FROM task WHERE timeout_time IS NOT NULL """) # Removing timeouts for item in work: database.update_task(item[0], item[1], item[2], True) # Closing session database.shutdown()