def test_load(self): try: # Download data = steamspypi.load() except json.decoder.JSONDecodeError: data = {"name": self.get_api_error_message()} self.assertGreater(len(data), 0) try: # Load from cache data = steamspypi.load() except json.decoder.JSONDecodeError: data = {"name": self.get_api_error_message()} self.assertGreater(len(data), 0)
def download_steam_spy_data(json_filename="steamspy.json", genre=None): # Data folder data_path = "data/" # Reference of the following line: https://stackoverflow.com/a/14364249 pathlib.Path(data_path).mkdir(parents=True, exist_ok=True) data_filename = data_path + json_filename try: with open(data_filename, 'r', encoding="utf8") as in_json_file: data = json.load(in_json_file) except FileNotFoundError: print("Downloading and caching data from SteamSpy") if genre is None: data = steamspypi.load() else: data_request = dict() data_request['request'] = 'genre' data_request['genre'] = genre data = steamspypi.download(data_request) steamspypi.print_data(data, data_filename) return data
def test_find_most_similar_game_names_with_diff_lib(self): steamspy_database = steamspypi.load() num_games_to_print = 10 input_text = 'Crash Bandicoot' cutoff = 0.5 sorted_app_ids, text_distances = steampi.find_most_similar_game_names(input_text, steamspy_database, use_levenshtein_distance=False, n=num_games_to_print, cutoff=cutoff, verbose=True) print('Using the longest contiguous matching subsequence for input {}:'.format(input_text)) for i in range(num_games_to_print): try: app_id = sorted_app_ids[i] except IndexError: continue similar_game = steamspy_database[app_id] textual_distance = text_distances[app_id] similar_game_name = similar_game['name'] print('{}) distance = {} ; {}'.format(i + 1, textual_distance, similar_game_name)) self.assertGreater(len(sorted_app_ids), 0)
def fix_matched_meta_data_dict(matched_meta_data_dict, is_verbose=False): # Manually fix mismatches steamspy_database = steamspypi.load() all_game_names = list(matched_meta_data_dict.keys()) for game_name in all_game_names: if check_if_incorrect_match(game_name, matched_meta_data_dict): fixed_app_id = fix_incorrect_match(game_name) if fixed_app_id is not None: # Fix incorrect match fixed_matched_name = steamspy_database[fixed_app_id]['name'] fixed_distance = lv.distance(game_name.lower(), fixed_matched_name.lower()) matched_meta_data_dict[game_name]['matched-name'] = [ fixed_matched_name ] matched_meta_data_dict[game_name]['appID'] = [fixed_app_id] matched_meta_data_dict[game_name]['Levenshtein-distance'] = [ fixed_distance ] else: # Delete incorrect match if is_verbose: print('\nDeleting entry for ' + game_name) print(matched_meta_data_dict[game_name]) del matched_meta_data_dict[game_name] return matched_meta_data_dict
def download_free_apps(method='price', verbose=True): if method == 'price': data = steamspypi.load() free_apps = [ int(game['appid']) for game in data.values() if game['initialprice'] is not None # I don't know what to do in the rare case that price is None. and int(game['initialprice']) == 0 ] else: data_request = dict() if method == 'genre': data_request['request'] = 'genre' data_request['genre'] = 'Free to Play' else: data_request['request'] = 'tag' data_request['tag'] = 'Free to Play' data = steamspypi.download(data_request) free_apps = [int(app_id) for app_id in data.keys()] if verbose: print('Free apps (based on {}): {}'.format(method, len(free_apps))) return free_apps
def find_app_ids_missing_from_steam_card_exchange(force_download=False, verbose=False): steamspy_dico = steamspypi.load() steam_card_exchange_dico = load_data_from_steam_card_exchange( force_download=force_download) steam_card_exchange_app_ids = set(steam_card_exchange_dico.keys()) steam_points_shop_dico = load_data_from_steam_points_shop( force_download=force_download) steam_points_shop_app_ids = set(steam_points_shop_dico.keys()) missing_app_ids = steam_points_shop_app_ids.difference( steam_card_exchange_app_ids) missing_app_ids = sorted(missing_app_ids, key=int) print("# {} games missing from SteamCardExchange.".format( len(missing_app_ids))) if verbose: for app_id in missing_app_ids: try: app_info = steamspy_dico[app_id] except KeyError: app_info = {"name": None} print("- {} (appID = {}): {}".format( app_info["name"], app_id, get_urls_for_markdown_display(app_id), )) return missing_app_ids
def load_game_names_from_steamspy(): data = steamspypi.load() game_names = dict() for app_id in data.keys(): game_names[app_id] = data[app_id]['name'] return game_names
def main(): from appids import appid_hidden_gems_reference_set # SteamSpy's data in JSON format data = steamspypi.load() # A dictionary will be stored in the following text file output_filename = "dict_top_rated_games_on_steam.txt" create_local_dictionary(data, output_filename, appid_hidden_gems_reference_set) return True
def main(num_query_app_ids=100, num_items_displayed=10, similarity_threshold=0.2): # Data is already sorted by decreasing number of owners. data = steamspypi.load() all_app_ids_sorted_by_num_owners = list( int(app_id) for app_id in data.keys()) query_app_ids = all_app_ids_sorted_by_num_owners[:num_query_app_ids] apply_workflow(query_app_ids, num_items_displayed=num_items_displayed, similarity_threshold=similarity_threshold) return
def compute_all_game_name_distances_with_levenshtein(input_game_name, steamspy_database=None): if steamspy_database is None: steamspy_database = steamspypi.load() lower_case_input = input_game_name.lower() text_distances = dict() for app_id in steamspy_database: text = steamspy_database[app_id]['name'] # Compare names in lower cases, to avoid mismatches for Tekken vs. TEKKEN, or Warhammer vs. WARHAMMER text_distances[app_id] = lv.distance(lower_case_input, text.lower()) return text_distances
def build_lower_case_game_name_dictionary(steamspy_database=None): # Build a Python dictionary mapping **lower-case** game names found in SteamSpy database to their Steam appID. if steamspy_database is None: steamspy_database = steamspypi.load() lower_case_game_name_dictionary = dict() for app_id in steamspy_database: text = steamspy_database[app_id]['name'] lower_case_text = text.lower() lower_case_game_name_dictionary[lower_case_text] = app_id return lower_case_game_name_dictionary
def print_ranking(ranking, data, criterion, max_ranking_length=100): steamspy_data = steamspypi.load() counter = 1 width = 1 + math.floor(math.log10(max_ranking_length)) if criterion == 'playtime_forever': title = 'The most played games ever' elif criterion == 'num_players_forever': title = 'The games with the highest number of owners' elif criterion == 'playtime_2weeks': title = 'The most played games during the first two weeks of July' else: if not (criterion == 'num_players_2weeks'): raise AssertionError() title = 'The games which were started by the highest number of people during the first two weeks of July' print(f'\n{title}\n') for app_id in ranking: if counter > max_ranking_length: break app_id_str = str(app_id) try: game_name = steamspy_data[app_id_str]['name'] store_url = 'https://store.steampowered.com/app/' except KeyError: game_name = 'redacted' store_url = 'https://steamdb.info/app/' hyperlink = '[' + game_name + '](' + store_url + app_id_str + ')' criterion_value = data[app_id][criterion] print( f'{counter: >{width}}. {hyperlink} ({criterion}={criterion_value})' ) counter += 1 return
def print_ranking_according_to_keyword(hype_dict, keyword='hype'): # Download latest SteamSpy data to have access to the matching between appID and game name steam_spy_data = steamspypi.load() hype_ranking = sorted(hype_dict.keys(), key=lambda x: hype_dict[x][keyword], reverse=True) formatted_keyword = keyword.capitalize().replace('_', ' ') print('\n' + formatted_keyword + ' output_ranking:') for (rank, appID) in enumerate(hype_ranking): try: app_name = steam_spy_data[appID]['name'] except KeyError: app_name = 'unknown' sentence = '{0:3}. AppID: ' + appID + '\t' + formatted_keyword + ': {1:.3f}' + '\t(' + app_name + ')' print(sentence.format(rank + 1, hype_dict[appID][keyword])) return
def get_x_y(): steam_spy_dict = steamspypi.load() num_owners_list = [] num_reviews_list = [] for appID in steam_spy_dict.keys(): num_owners = steam_spy_dict[appID]['owners'] try: num_owners = float(num_owners) except ValueError: num_owners = get_mid_of_interval(num_owners) num_reviews = sum(steam_spy_dict[appID][keyword] for keyword in ['positive', 'negative']) num_owners_list.append(num_owners) num_reviews_list.append(num_reviews) return num_owners_list, num_reviews_list
def run_regional_workflow(quality_measure_str='wilson_score', popularity_measure_str='num_reviews', perform_optimization_at_runtime=True, num_top_games_to_print=250, verbose=False, keywords_to_include=None, keywords_to_exclude=None, load_from_cache=True, compute_prior_on_whole_steam_catalog=True, compute_language_specific_prior=False): if keywords_to_include is None: keywords_to_include = [] # ["Rogue-Like"] if keywords_to_exclude is None: keywords_to_exclude = [] # ["Visual Novel", "Anime"] if not load_from_cache: download_steam_reviews() (game_feature_dict, all_languages) = get_input_data(load_from_cache) # noinspection PyPep8Naming D = prepare_dictionary_for_ranking_of_hidden_gems( steamspypi.load(), game_feature_dict, all_languages, compute_prior_on_whole_steam_catalog, compute_language_specific_prior, verbose=verbose) for language in all_languages: ranking = compute_ranking(D, num_top_games_to_print, keywords_to_include, keywords_to_exclude, language, perform_optimization_at_runtime, popularity_measure_str, quality_measure_str) save_ranking_to_file(get_regional_ranking_filename(language), ranking, only_show_appid=False, verbose=verbose) return True
def match_all_game_names_with_app_id(game_names, num_closest_neighbors=1): steamspy_database = steamspypi.load() matched_meta_data_dict = dict() for game_name in game_names: # noinspection PyPep8 (closest_app_id, closest_distance, closest_name) = match_game_name_with_app_id(game_name, steamspy_database, num_closest_neighbors) matched_meta_data_dict[game_name] = dict() matched_meta_data_dict[game_name]['original-name'] = game_name matched_meta_data_dict[game_name]['matched-name'] = closest_name matched_meta_data_dict[game_name]['appID'] = closest_app_id matched_meta_data_dict[game_name][ 'Levenshtein-distance'] = closest_distance return matched_meta_data_dict
def test_find_most_similar_game_names_with_levenshtein(self): steamspy_database = steamspypi.load() input_text = 'Crash Bandicoot' sorted_app_ids, text_distances = steampi.find_most_similar_game_names(input_text, steamspy_database, use_levenshtein_distance=True, ) num_games_to_print = 10 print('Using the Levenshtein distance for input {}:'.format(input_text)) for i in range(num_games_to_print): app_id = sorted_app_ids[i] similar_game = steamspy_database[app_id] textual_distance = text_distances[app_id] similar_game_name = similar_game['name'] print('{}) distance = {} ; {}'.format(i + 1, textual_distance, similar_game_name)) self.assertGreater(len(sorted_app_ids), 0)
for keyword in keyword_list: current_app_ids = get_appid_by_keyword(keyword) if len(current_app_ids) == 0: print("The keyword " + keyword + " does not return any appID.") if is_first_iteration: app_ids = current_app_ids is_first_iteration = False else: # Intersection of appIDs so that the result are appIDs which correspond to every keyword app_ids = app_ids.intersection(current_app_ids) return app_ids def get_appid_by_keyword_list_to_exclude(keyword_list): app_ids = set() # This is the true initialization of this variable. for keyword in keyword_list: current_app_ids = get_appid_by_keyword(keyword) if len(current_app_ids) == 0: print("The keyword " + keyword + " does not return any appID.") # Union of appIDs so that the result are appIDs which correspond to at least one keyword app_ids = app_ids.union(current_app_ids) return app_ids if __name__ == "__main__": steamspypi.load()
def scrape_steam_data(import_my_own_steam_catalog=True, try_again_faulty_app_ids=False, allow_to_overwrite_existing_app_details=False, focus_on_probable_games=False): logging.basicConfig(level=logging.DEBUG) logging.getLogger('requests').setLevel(logging.DEBUG) log = logging.getLogger(__name__) query_rate_limit = 200 # Number of queries which can be successfully issued during a 4-minute time window wait_time = (4 * 60) + 10 # 4 minutes plus a cushion successful_status_code = 200 # Status code for a successful HTTP response query_count = 0 if import_my_own_steam_catalog: (steam_catalog, is_success, query_status_code) = load_steam_catalog() if not is_success: raise AssertionError() if query_status_code is not None: query_count += 1 else: steam_catalog = steamspypi.load() all_app_ids = list(steam_catalog.keys()) if import_my_own_steam_catalog and focus_on_probable_games: # Caveat: this is not foolproof! # The following is merely a way to focus on appIDs which are very likely linked to a game (and not a DLC, etc.). # # Most of Steam games have an appID which ends with a '0'. # For instance, 99.8% (27421/27468) of games in the offical SteamSpy database have an appID ending with a '0'. # # In comparison, in my home-made Steam catalog, 71.8% (52741/73453) of appIDs end with a '0'. # Before we download the app details, we do not know whether they are linked to games, DLC, videos, etc. all_app_ids = [ app_id for app_id in all_app_ids if app_id.endswith('0') ] include_faulty_app_ids = not try_again_faulty_app_ids previously_seen_app_ids = load_previously_seen_app_ids( include_faulty_app_ids=include_faulty_app_ids) unseen_app_ids = set(all_app_ids).difference(previously_seen_app_ids) unseen_app_ids = sorted(unseen_app_ids, key=int) success_filename = get_previously_seen_app_ids_of_games() error_filename = get_previously_seen_app_ids_of_non_games() for appID in unseen_app_ids: if query_count >= query_rate_limit: log.info("query count is %d ; limit %d reached. Wait for %d sec", query_count, query_rate_limit, wait_time) time.sleep(wait_time) query_count = 0 if allow_to_overwrite_existing_app_details: (loaded_app_details, is_success, query_status_code) = steampi.api.download_app_details(appID) if is_success: json_filename = steampi.api.get_appdetails_filename(appID) steampi.json_utils.save_json_data(json_filename, loaded_app_details) else: (_, is_success, query_status_code) = steampi.api.load_app_details(appID) if query_status_code is not None: query_count += 1 while (query_status_code is not None) and (query_status_code != successful_status_code): log.info("query count is %d ; HTTP response %d. Wait for %d sec", query_count, query_status_code, wait_time) time.sleep(wait_time) query_count = 0 (_, is_success, query_status_code) = steampi.api.load_app_details(appID) if query_status_code is not None: query_count += 1 appid_log_file_name = success_filename if (query_status_code is not None) and not is_success: if not (query_status_code == successful_status_code): raise AssertionError() appid_log_file_name = error_filename with open(appid_log_file_name, "a") as f: f.write(appID + '\n')
def get_steamspy_catalog(): steamspy_data = steamspypi.load() steamspy_catalog = set(int(app_id) for app_id in steamspy_data.keys()) return steamspy_catalog
def get_steam_data(games_list, progress, percent): cleaned_games_list = remove_nan(games_list) steamspy_database = steamspypi.load() all_games_data = requests.get( 'http://steamspy.com/api.php?request=all').json() if len(all_games_data) == 0: print( "Steamspy API is down. For example, http://steamspy.com/api.php?request=all is returning empty dictionary" ) # TODO: Implement an alternate method else: total_game_count = len(cleaned_games_list) first = True # Construct table for game_name in cleaned_games_list: sorted_app_ids, text_distances = steampi.text_distances.find_most_similar_game_names( game_name, steamspy_database ) #Crysis 2 is being matched with Crysis, instead of Crysis 2 - Maximum Edition. preprocess separate games app_id = sorted_app_ids[0] from operator import itemgetter data = requests.get( 'https://steamspy.com/api.php?request=appdetails&appid=' + str(app_id)).json() # If first time inside the for loop, create all dictionaries if first: first = False keydata = {} keydata['input_names'] = [] keydata['levenshtein_distance'] = [] for key in data.keys(): keydata[key] = [] all_dicts = [] keydata['input_names'].append(game_name) keydata['levenshtein_distance'].append(text_distances[app_id]) for key in data.keys(): keydata[key].append(data[key]) # Done constructing table, Update progress to 100% progress['value'] = 100 percent['text'] = "{}%".format(int(100)) df1 = pd.DataFrame.from_dict(keydata) #https://xlsxwriter.readthedocs.io/working_with_pandas.html workbook = xlsxwriter.Workbook('subcatalogdata.xlsx') # Create a Pandas Excel writer using XlsxWriter as the engine. writer = pd.ExcelWriter('subcatalogdata.xlsx', engine='xlsxwriter') # Convert the dataframe to an XlsxWriter Excel object. df1.to_excel(writer, sheet_name='Sheet1') df1.to_excel(writer, sheet_name='Sheet1') # Close the Pandas Excel writer and output the Excel file. writer.save()
def display_all_data(time_series_bundle_release_date, time_series_bundle_content_release_dates, time_series_bundle_content_app_ids, output_folder=None): # Objective: display prepared data steamspy_database = steamspypi.load() # Display options x_tick_as_dates = time_series_bundle_release_date # Display the number of Steam games per monthly bundle feature_str = 'Number of Steam games' x_list = [ len(bundle_content) for bundle_content in time_series_bundle_content_app_ids ] plot_time_series(x_list, feature_str, x_tick_as_dates, output_folder) # Display the number of reviews feature_str = 'Number of reviews' x_list = [[(steamspy_database[appID]['positive'] + steamspy_database[appID]['negative']) for appID in bundle_content] for bundle_content in time_series_bundle_content_app_ids] plot_time_series(x_list, feature_str, x_tick_as_dates, output_folder) # Display the time between game release dates and bundle release date feature_str = 'Time to bundle (in years)' x_list = [[ (bundle_date - game_date).days / 365.25 for game_date in content_dates ] for (bundle_date, content_dates) in zip(time_series_bundle_release_date, time_series_bundle_content_release_dates)] plot_time_series(x_list, feature_str, x_tick_as_dates, output_folder) # Additional displays feature_list = [ 'score_rank', 'userscore', 'positive', 'negative', 'owners', 'players_forever', 'average_forever', 'median_forever', 'price' ] for feature_str in feature_list: try: x_list = [ [ int(steamspy_database[appID][feature_str]) for appID in bundle_content # Ignore empty features. NB: It only happened once for appID=438790 ('Random Access Murder') for # which SteamSpy shows an empty string as 'score_rank' due to 'userscore' being '0', which is # likely a bug. if steamspy_database[appID][feature_str] != '' ] for bundle_content in time_series_bundle_content_app_ids ] except ValueError: # Catch problem due to SteamSpy providing a range of owners instead of a point-estimate. print('Impossible conversion to int for feature = ' + feature_str) continue except KeyError: print('Impossible to find feature = ' + feature_str) continue plot_time_series(x_list, feature_str, x_tick_as_dates, output_folder) return