예제 #1
0
    def test_load(self):
        try:
            # Download
            data = steamspypi.load()
        except json.decoder.JSONDecodeError:
            data = {"name": self.get_api_error_message()}
        self.assertGreater(len(data), 0)

        try:
            # Load from cache
            data = steamspypi.load()
        except json.decoder.JSONDecodeError:
            data = {"name": self.get_api_error_message()}
        self.assertGreater(len(data), 0)
예제 #2
0
def download_steam_spy_data(json_filename="steamspy.json", genre=None):
    # Data folder
    data_path = "data/"
    # Reference of the following line: https://stackoverflow.com/a/14364249
    pathlib.Path(data_path).mkdir(parents=True, exist_ok=True)

    data_filename = data_path + json_filename

    try:
        with open(data_filename, 'r', encoding="utf8") as in_json_file:
            data = json.load(in_json_file)
    except FileNotFoundError:
        print("Downloading and caching data from SteamSpy")

        if genre is None:
            data = steamspypi.load()
        else:
            data_request = dict()
            data_request['request'] = 'genre'
            data_request['genre'] = genre

            data = steamspypi.download(data_request)

        steamspypi.print_data(data, data_filename)

    return data
예제 #3
0
    def test_find_most_similar_game_names_with_diff_lib(self):
        steamspy_database = steamspypi.load()

        num_games_to_print = 10

        input_text = 'Crash Bandicoot'
        cutoff = 0.5
        sorted_app_ids, text_distances = steampi.find_most_similar_game_names(input_text,
                                                                              steamspy_database,
                                                                              use_levenshtein_distance=False,
                                                                              n=num_games_to_print,
                                                                              cutoff=cutoff,
                                                                              verbose=True)

        print('Using the longest contiguous matching subsequence for input {}:'.format(input_text))
        for i in range(num_games_to_print):
            try:
                app_id = sorted_app_ids[i]
            except IndexError:
                continue

            similar_game = steamspy_database[app_id]
            textual_distance = text_distances[app_id]

            similar_game_name = similar_game['name']

            print('{}) distance = {} ; {}'.format(i + 1,
                                                  textual_distance,
                                                  similar_game_name))

        self.assertGreater(len(sorted_app_ids), 0)
예제 #4
0
def fix_matched_meta_data_dict(matched_meta_data_dict, is_verbose=False):
    # Manually fix mismatches

    steamspy_database = steamspypi.load()

    all_game_names = list(matched_meta_data_dict.keys())

    for game_name in all_game_names:
        if check_if_incorrect_match(game_name, matched_meta_data_dict):
            fixed_app_id = fix_incorrect_match(game_name)
            if fixed_app_id is not None:
                # Fix incorrect match
                fixed_matched_name = steamspy_database[fixed_app_id]['name']
                fixed_distance = lv.distance(game_name.lower(),
                                             fixed_matched_name.lower())

                matched_meta_data_dict[game_name]['matched-name'] = [
                    fixed_matched_name
                ]
                matched_meta_data_dict[game_name]['appID'] = [fixed_app_id]
                matched_meta_data_dict[game_name]['Levenshtein-distance'] = [
                    fixed_distance
                ]
            else:
                # Delete incorrect match
                if is_verbose:
                    print('\nDeleting entry for ' + game_name)
                    print(matched_meta_data_dict[game_name])

                del matched_meta_data_dict[game_name]

    return matched_meta_data_dict
def download_free_apps(method='price', verbose=True):
    if method == 'price':
        data = steamspypi.load()

        free_apps = [
            int(game['appid']) for game in data.values()
            if game['initialprice'] is
            not None  # I don't know what to do in the rare case that price is None.
            and int(game['initialprice']) == 0
        ]

    else:
        data_request = dict()

        if method == 'genre':
            data_request['request'] = 'genre'
            data_request['genre'] = 'Free to Play'
        else:
            data_request['request'] = 'tag'
            data_request['tag'] = 'Free to Play'

        data = steamspypi.download(data_request)

        free_apps = [int(app_id) for app_id in data.keys()]

    if verbose:
        print('Free apps (based on {}): {}'.format(method, len(free_apps)))

    return free_apps
예제 #6
0
def find_app_ids_missing_from_steam_card_exchange(force_download=False,
                                                  verbose=False):
    steamspy_dico = steamspypi.load()

    steam_card_exchange_dico = load_data_from_steam_card_exchange(
        force_download=force_download)
    steam_card_exchange_app_ids = set(steam_card_exchange_dico.keys())

    steam_points_shop_dico = load_data_from_steam_points_shop(
        force_download=force_download)
    steam_points_shop_app_ids = set(steam_points_shop_dico.keys())

    missing_app_ids = steam_points_shop_app_ids.difference(
        steam_card_exchange_app_ids)

    missing_app_ids = sorted(missing_app_ids, key=int)

    print("# {} games missing from SteamCardExchange.".format(
        len(missing_app_ids)))
    if verbose:
        for app_id in missing_app_ids:
            try:
                app_info = steamspy_dico[app_id]
            except KeyError:
                app_info = {"name": None}

            print("-   {} (appID = {}): {}".format(
                app_info["name"],
                app_id,
                get_urls_for_markdown_display(app_id),
            ))

    return missing_app_ids
def load_game_names_from_steamspy():
    data = steamspypi.load()

    game_names = dict()
    for app_id in data.keys():
        game_names[app_id] = data[app_id]['name']

    return game_names
예제 #8
0
def main():
    from appids import appid_hidden_gems_reference_set

    # SteamSpy's data in JSON format
    data = steamspypi.load()

    # A dictionary will be stored in the following text file
    output_filename = "dict_top_rated_games_on_steam.txt"

    create_local_dictionary(data, output_filename,
                            appid_hidden_gems_reference_set)

    return True
def main(num_query_app_ids=100,
         num_items_displayed=10,
         similarity_threshold=0.2):
    # Data is already sorted by decreasing number of owners.
    data = steamspypi.load()
    all_app_ids_sorted_by_num_owners = list(
        int(app_id) for app_id in data.keys())

    query_app_ids = all_app_ids_sorted_by_num_owners[:num_query_app_ids]

    apply_workflow(query_app_ids,
                   num_items_displayed=num_items_displayed,
                   similarity_threshold=similarity_threshold)

    return
예제 #10
0
def compute_all_game_name_distances_with_levenshtein(input_game_name,
                                                     steamspy_database=None):
    if steamspy_database is None:
        steamspy_database = steamspypi.load()

    lower_case_input = input_game_name.lower()

    text_distances = dict()

    for app_id in steamspy_database:
        text = steamspy_database[app_id]['name']

        # Compare names in lower cases, to avoid mismatches for Tekken vs. TEKKEN, or Warhammer vs. WARHAMMER
        text_distances[app_id] = lv.distance(lower_case_input, text.lower())

    return text_distances
예제 #11
0
def build_lower_case_game_name_dictionary(steamspy_database=None):
    # Build a Python dictionary mapping **lower-case** game names found in SteamSpy database to their Steam appID.

    if steamspy_database is None:
        steamspy_database = steamspypi.load()

    lower_case_game_name_dictionary = dict()

    for app_id in steamspy_database:
        text = steamspy_database[app_id]['name']

        lower_case_text = text.lower()

        lower_case_game_name_dictionary[lower_case_text] = app_id

    return lower_case_game_name_dictionary
예제 #12
0
def print_ranking(ranking, data, criterion, max_ranking_length=100):
    steamspy_data = steamspypi.load()

    counter = 1

    width = 1 + math.floor(math.log10(max_ranking_length))

    if criterion == 'playtime_forever':
        title = 'The most played games ever'
    elif criterion == 'num_players_forever':
        title = 'The games with the highest number of owners'
    elif criterion == 'playtime_2weeks':
        title = 'The most played games during the first two weeks of July'
    else:
        if not (criterion == 'num_players_2weeks'):
            raise AssertionError()
        title = 'The games which were started by the highest number of people during the first two weeks of July'

    print(f'\n{title}\n')

    for app_id in ranking:

        if counter > max_ranking_length:
            break

        app_id_str = str(app_id)

        try:
            game_name = steamspy_data[app_id_str]['name']
            store_url = 'https://store.steampowered.com/app/'
        except KeyError:
            game_name = 'redacted'
            store_url = 'https://steamdb.info/app/'

        hyperlink = '[' + game_name + '](' + store_url + app_id_str + ')'

        criterion_value = data[app_id][criterion]

        print(
            f'{counter: >{width}}. {hyperlink} ({criterion}={criterion_value})'
        )

        counter += 1

    return
예제 #13
0
def print_ranking_according_to_keyword(hype_dict, keyword='hype'):
    # Download latest SteamSpy data to have access to the matching between appID and game name
    steam_spy_data = steamspypi.load()

    hype_ranking = sorted(hype_dict.keys(), key=lambda x: hype_dict[x][keyword], reverse=True)

    formatted_keyword = keyword.capitalize().replace('_', ' ')

    print('\n' + formatted_keyword + ' output_ranking:')
    for (rank, appID) in enumerate(hype_ranking):
        try:
            app_name = steam_spy_data[appID]['name']
        except KeyError:
            app_name = 'unknown'
        sentence = '{0:3}. AppID: ' + appID + '\t' + formatted_keyword + ': {1:.3f}' + '\t(' + app_name + ')'
        print(sentence.format(rank + 1, hype_dict[appID][keyword]))

    return
예제 #14
0
def get_x_y():
    steam_spy_dict = steamspypi.load()

    num_owners_list = []
    num_reviews_list = []

    for appID in steam_spy_dict.keys():
        num_owners = steam_spy_dict[appID]['owners']
        try:
            num_owners = float(num_owners)
        except ValueError:
            num_owners = get_mid_of_interval(num_owners)
        num_reviews = sum(steam_spy_dict[appID][keyword]
                          for keyword in ['positive', 'negative'])

        num_owners_list.append(num_owners)
        num_reviews_list.append(num_reviews)

    return num_owners_list, num_reviews_list
예제 #15
0
def run_regional_workflow(quality_measure_str='wilson_score',
                          popularity_measure_str='num_reviews',
                          perform_optimization_at_runtime=True,
                          num_top_games_to_print=250,
                          verbose=False,
                          keywords_to_include=None,
                          keywords_to_exclude=None,
                          load_from_cache=True,
                          compute_prior_on_whole_steam_catalog=True,
                          compute_language_specific_prior=False):
    if keywords_to_include is None:
        keywords_to_include = []  # ["Rogue-Like"]

    if keywords_to_exclude is None:
        keywords_to_exclude = []  # ["Visual Novel", "Anime"]

    if not load_from_cache:
        download_steam_reviews()

    (game_feature_dict, all_languages) = get_input_data(load_from_cache)

    # noinspection PyPep8Naming
    D = prepare_dictionary_for_ranking_of_hidden_gems(
        steamspypi.load(),
        game_feature_dict,
        all_languages,
        compute_prior_on_whole_steam_catalog,
        compute_language_specific_prior,
        verbose=verbose)

    for language in all_languages:
        ranking = compute_ranking(D, num_top_games_to_print,
                                  keywords_to_include, keywords_to_exclude,
                                  language, perform_optimization_at_runtime,
                                  popularity_measure_str, quality_measure_str)

        save_ranking_to_file(get_regional_ranking_filename(language),
                             ranking,
                             only_show_appid=False,
                             verbose=verbose)

    return True
예제 #16
0
def match_all_game_names_with_app_id(game_names, num_closest_neighbors=1):
    steamspy_database = steamspypi.load()

    matched_meta_data_dict = dict()

    for game_name in game_names:
        # noinspection PyPep8
        (closest_app_id, closest_distance,
         closest_name) = match_game_name_with_app_id(game_name,
                                                     steamspy_database,
                                                     num_closest_neighbors)

        matched_meta_data_dict[game_name] = dict()
        matched_meta_data_dict[game_name]['original-name'] = game_name
        matched_meta_data_dict[game_name]['matched-name'] = closest_name
        matched_meta_data_dict[game_name]['appID'] = closest_app_id
        matched_meta_data_dict[game_name][
            'Levenshtein-distance'] = closest_distance

    return matched_meta_data_dict
예제 #17
0
    def test_find_most_similar_game_names_with_levenshtein(self):
        steamspy_database = steamspypi.load()

        input_text = 'Crash Bandicoot'
        sorted_app_ids, text_distances = steampi.find_most_similar_game_names(input_text,
                                                                              steamspy_database,
                                                                              use_levenshtein_distance=True,
                                                                              )

        num_games_to_print = 10

        print('Using the Levenshtein distance for input {}:'.format(input_text))
        for i in range(num_games_to_print):
            app_id = sorted_app_ids[i]
            similar_game = steamspy_database[app_id]
            textual_distance = text_distances[app_id]

            similar_game_name = similar_game['name']

            print('{}) distance = {} ; {}'.format(i + 1,
                                                  textual_distance,
                                                  similar_game_name))

        self.assertGreater(len(sorted_app_ids), 0)
예제 #18
0
    for keyword in keyword_list:
        current_app_ids = get_appid_by_keyword(keyword)
        if len(current_app_ids) == 0:
            print("The keyword " + keyword + " does not return any appID.")
        if is_first_iteration:
            app_ids = current_app_ids
            is_first_iteration = False
        else:
            # Intersection of appIDs so that the result are appIDs which correspond to every keyword
            app_ids = app_ids.intersection(current_app_ids)

    return app_ids


def get_appid_by_keyword_list_to_exclude(keyword_list):
    app_ids = set()  # This is the true initialization of this variable.

    for keyword in keyword_list:
        current_app_ids = get_appid_by_keyword(keyword)
        if len(current_app_ids) == 0:
            print("The keyword " + keyword + " does not return any appID.")
        # Union of appIDs so that the result are appIDs which correspond to at least one keyword
        app_ids = app_ids.union(current_app_ids)

    return app_ids


if __name__ == "__main__":
    steamspypi.load()
예제 #19
0
def scrape_steam_data(import_my_own_steam_catalog=True,
                      try_again_faulty_app_ids=False,
                      allow_to_overwrite_existing_app_details=False,
                      focus_on_probable_games=False):
    logging.basicConfig(level=logging.DEBUG)
    logging.getLogger('requests').setLevel(logging.DEBUG)
    log = logging.getLogger(__name__)

    query_rate_limit = 200  # Number of queries which can be successfully issued during a 4-minute time window
    wait_time = (4 * 60) + 10  # 4 minutes plus a cushion
    successful_status_code = 200  # Status code for a successful HTTP response

    query_count = 0

    if import_my_own_steam_catalog:
        (steam_catalog, is_success, query_status_code) = load_steam_catalog()

        if not is_success:
            raise AssertionError()
        if query_status_code is not None:
            query_count += 1
    else:
        steam_catalog = steamspypi.load()

    all_app_ids = list(steam_catalog.keys())

    if import_my_own_steam_catalog and focus_on_probable_games:
        # Caveat: this is not foolproof!
        # The following is merely a way to focus on appIDs which are very likely linked to a game (and not a DLC, etc.).
        #
        # Most of Steam games have an appID which ends with a '0'.
        # For instance, 99.8% (27421/27468) of games in the offical SteamSpy database have an appID ending with a '0'.
        #
        # In comparison, in my home-made Steam catalog, 71.8% (52741/73453) of appIDs end with a '0'.
        # Before we download the app details, we do not know whether they are linked to games, DLC, videos, etc.
        all_app_ids = [
            app_id for app_id in all_app_ids if app_id.endswith('0')
        ]

    include_faulty_app_ids = not try_again_faulty_app_ids
    previously_seen_app_ids = load_previously_seen_app_ids(
        include_faulty_app_ids=include_faulty_app_ids)

    unseen_app_ids = set(all_app_ids).difference(previously_seen_app_ids)

    unseen_app_ids = sorted(unseen_app_ids, key=int)

    success_filename = get_previously_seen_app_ids_of_games()
    error_filename = get_previously_seen_app_ids_of_non_games()

    for appID in unseen_app_ids:

        if query_count >= query_rate_limit:
            log.info("query count is %d ; limit %d reached. Wait for %d sec",
                     query_count, query_rate_limit, wait_time)
            time.sleep(wait_time)
            query_count = 0

        if allow_to_overwrite_existing_app_details:
            (loaded_app_details, is_success,
             query_status_code) = steampi.api.download_app_details(appID)
            if is_success:
                json_filename = steampi.api.get_appdetails_filename(appID)
                steampi.json_utils.save_json_data(json_filename,
                                                  loaded_app_details)
        else:
            (_, is_success,
             query_status_code) = steampi.api.load_app_details(appID)

        if query_status_code is not None:
            query_count += 1

        while (query_status_code
               is not None) and (query_status_code != successful_status_code):
            log.info("query count is %d ; HTTP response %d. Wait for %d sec",
                     query_count, query_status_code, wait_time)
            time.sleep(wait_time)
            query_count = 0

            (_, is_success,
             query_status_code) = steampi.api.load_app_details(appID)
            if query_status_code is not None:
                query_count += 1

        appid_log_file_name = success_filename
        if (query_status_code is not None) and not is_success:
            if not (query_status_code == successful_status_code):
                raise AssertionError()
            appid_log_file_name = error_filename

        with open(appid_log_file_name, "a") as f:
            f.write(appID + '\n')
예제 #20
0
def get_steamspy_catalog():
    steamspy_data = steamspypi.load()
    steamspy_catalog = set(int(app_id) for app_id in steamspy_data.keys())

    return steamspy_catalog
예제 #21
0
def get_steam_data(games_list, progress, percent):
    cleaned_games_list = remove_nan(games_list)
    steamspy_database = steamspypi.load()
    all_games_data = requests.get(
        'http://steamspy.com/api.php?request=all').json()
    if len(all_games_data) == 0:
        print(
            "Steamspy API is down. For example, http://steamspy.com/api.php?request=all is returning empty dictionary"
        )
        # TODO: Implement an alternate method
    else:
        total_game_count = len(cleaned_games_list)
        first = True

        # Construct table
        for game_name in cleaned_games_list:
            sorted_app_ids, text_distances = steampi.text_distances.find_most_similar_game_names(
                game_name, steamspy_database
            )  #Crysis 2 is being matched with Crysis, instead of Crysis 2 - Maximum Edition. preprocess separate games
            app_id = sorted_app_ids[0]

            from operator import itemgetter
            data = requests.get(
                'https://steamspy.com/api.php?request=appdetails&appid=' +
                str(app_id)).json()

            # If first time inside the for loop, create all dictionaries
            if first:
                first = False
                keydata = {}
                keydata['input_names'] = []
                keydata['levenshtein_distance'] = []
                for key in data.keys():
                    keydata[key] = []

            all_dicts = []
            keydata['input_names'].append(game_name)
            keydata['levenshtein_distance'].append(text_distances[app_id])

            for key in data.keys():
                keydata[key].append(data[key])

        # Done constructing table, Update progress to 100%
        progress['value'] = 100
        percent['text'] = "{}%".format(int(100))

    df1 = pd.DataFrame.from_dict(keydata)

    #https://xlsxwriter.readthedocs.io/working_with_pandas.html

    workbook = xlsxwriter.Workbook('subcatalogdata.xlsx')

    # Create a Pandas Excel writer using XlsxWriter as the engine.
    writer = pd.ExcelWriter('subcatalogdata.xlsx', engine='xlsxwriter')

    # Convert the dataframe to an XlsxWriter Excel object.
    df1.to_excel(writer, sheet_name='Sheet1')
    df1.to_excel(writer, sheet_name='Sheet1')

    # Close the Pandas Excel writer and output the Excel file.
    writer.save()
예제 #22
0
def display_all_data(time_series_bundle_release_date,
                     time_series_bundle_content_release_dates,
                     time_series_bundle_content_app_ids,
                     output_folder=None):
    # Objective: display prepared data

    steamspy_database = steamspypi.load()

    # Display options

    x_tick_as_dates = time_series_bundle_release_date

    # Display the number of Steam games per monthly bundle

    feature_str = 'Number of Steam games'

    x_list = [
        len(bundle_content)
        for bundle_content in time_series_bundle_content_app_ids
    ]

    plot_time_series(x_list, feature_str, x_tick_as_dates, output_folder)

    # Display the number of reviews

    feature_str = 'Number of reviews'

    x_list = [[(steamspy_database[appID]['positive'] +
                steamspy_database[appID]['negative'])
               for appID in bundle_content]
              for bundle_content in time_series_bundle_content_app_ids]

    plot_time_series(x_list, feature_str, x_tick_as_dates, output_folder)

    # Display the time between game release dates and bundle release date

    feature_str = 'Time to bundle (in years)'

    x_list = [[
        (bundle_date - game_date).days / 365.25 for game_date in content_dates
    ] for (bundle_date,
           content_dates) in zip(time_series_bundle_release_date,
                                 time_series_bundle_content_release_dates)]

    plot_time_series(x_list, feature_str, x_tick_as_dates, output_folder)

    # Additional displays

    feature_list = [
        'score_rank', 'userscore', 'positive', 'negative', 'owners',
        'players_forever', 'average_forever', 'median_forever', 'price'
    ]

    for feature_str in feature_list:

        try:
            x_list = [
                [
                    int(steamspy_database[appID][feature_str])
                    for appID in bundle_content
                    # Ignore empty features. NB: It only happened once for appID=438790 ('Random Access Murder') for
                    # which SteamSpy shows an empty string as 'score_rank' due to 'userscore' being '0', which is
                    # likely a bug.
                    if steamspy_database[appID][feature_str] != ''
                ] for bundle_content in time_series_bundle_content_app_ids
            ]

        except ValueError:
            # Catch problem due to SteamSpy providing a range of owners instead of a point-estimate.
            print('Impossible conversion to int for feature = ' + feature_str)
            continue
        except KeyError:
            print('Impossible to find feature = ' + feature_str)
            continue

        plot_time_series(x_list, feature_str, x_tick_as_dates, output_folder)

    return