def collect_gamedata(game_list): """ Get game data from BGG. Description: Get game data in chunks of 50 games per api call inputs: game_list (list): List of ids returns: games (list): List of game objects """ bgg = BGGClient(retries=6, retry_delay=4) chunksize = 50 if len(game_list) < chunksize: games = bgg.game_list(game_list) return games games = [] id_chunks = [ game_list[i:i + chunksize] for i in range(0, len(game_list), chunksize) ] for i in id_chunks: games = games + bgg.game_list(i) return games
def get_games(username): """ Get games and game collection using bgg API2 returns: list of games and collection object """ bgg = BGGClient(timeout=120, requests_per_minute=20) print("Getting collection from BGG..") collection = bgg.collection(username, exclude_subtype='boardgameexpansion', own=True, wishlist=None) ids = [x.id for x in collection.items] game_list = [] # get games from BGG try: print("Getting games from BGG..") game_list = bgg.game_list(ids) if not game_list: print("Error: empy list returned.") except: print("An Error occured..") raise TimeoutError else: print("Done.") return game_list, collection
class Downloader(): def __init__(self, project_name, cache_bgg): if cache_bgg: self.client = BGGClient(cache=CacheBackendSqlite( path=f"{project_name}-cache.sqlite", ttl=60 * 60 * 24, )) else: self.client = BGGClient() def collection(self, user_name, extra_params): collection = [] if isinstance(extra_params, list): for params in extra_params: collection += self.client.collection( user_name=user_name, **params, ) else: collection = list( self.client.collection( user_name=user_name, **extra_params, )) games_data = self.client.game_list( [game_in_collection.id for game_in_collection in collection]) games = list(filter(lambda x: not x.expansion, games_data)) expansions = list(filter(lambda x: x.expansion, games_data)) game_id_to_expansion = {game.id: [] for game in games} for expansion_data in expansions: for expands_game in expansion_data.expands: if expands_game.id in game_id_to_expansion: game_id_to_expansion[expands_game.id].append( expansion_data) game_id_to_tags = {game.id: [] for game in games} for stats_data in collection: if stats_data.id in game_id_to_tags: for tag in [ 'preordered', 'prevowned', 'want', 'wanttobuy', 'wanttoplay', 'fortrade', 'wishlist' ]: if int(getattr(stats_data, tag)): game_id_to_tags[stats_data.id].append(tag) return [ BoardGame( game_data, tags=game_id_to_tags[game_data.id], expansions=[ BoardGame(expansion_data) for expansion_data in game_id_to_expansion[game_data.id] ]) for game_data in games ]
def get_games_from_bgg(self, bgg: BGGClient, game_ids) -> List[BoardGame]: uncached_games = [] found_cache_games = self.__get_games_from_cache(game_ids) cached_ids = self.__extract_ids_from_games(found_cache_games) game_list_not_found = [id for id in game_ids if id not in cached_ids] if game_list_not_found: uncached_games = bgg.game_list(game_list_not_found) for game in uncached_games: self.game_cache.save(game) return found_cache_games + uncached_games
class Downloader(): def __init__(self, cache_bgg): project_name = SETTINGS["project"]["name"] if cache_bgg: self.client = BGGClient(cache=CacheBackendSqlite( path=f"{SETTINGS['project']['name']}-cache.sqlite", ttl=60 * 60 * 24, )) else: self.client = BGGClient() def collection(self, user_name): collection = self.client.collection( user_name=user_name, **SETTINGS["boardgamegeek"]["extra_params"]) games_data = self.client.game_list( [game_in_collection.id for game_in_collection in collection.items]) return [BoardGame(game_data) for game_data in games_data]
class Downloader(): def __init__(self): project_name = SETTINGS["project"]["name"] self.client = BGGClient(cache=CacheBackendSqlite( path=f"{project_name}-cache.sqlite", ttl=60 * 60 * 24)) def collection(self, user_name): collection = self.client.collection( user_name=user_name, exclude_subtype=u'boardgameexpansion', **SETTINGS["boardgamegeek"]["extra_params"]) game_data = self.client.game_list( [game_in_collection.id for game_in_collection in collection.items]) return [self.game_data_to_boardgame(game) for game in game_data] def _num_players_is_recommended(self, num, votes): return int(votes['best_rating']) + int( votes['recommended_rating']) > int(votes['not_recommended_rating']) def _facet_for_num_player(self, num, num_with_maybe_plus, votes): is_best = int(votes['best_rating']) > 10 and int( votes['best_rating']) > int(votes['recommended_rating']) best_or_recommended = "Best" if is_best else "Recommended" return { "level1": num, "level2": f"{num} > " + best_or_recommended + f" with {num_with_maybe_plus}", } def game_data_to_boardgame(self, game): num_players = [] for num, votes in game.suggested_players['results'].items(): if not self._num_players_is_recommended(num, votes): continue if "+" not in num: num_players.append(self._facet_for_num_player(num, num, votes)) else: for i in range(int(num.replace("+", "")) + 1, 11): num_players.append( self._facet_for_num_player(i, num, votes)) playing_time_mapping = { 30: '< 30min', 60: '30min - 1h', 120: '1-2h', 180: '2-3h', 240: '3-4h', } for playing_time_max, playing_time in playing_time_mapping.items(): if playing_time_max > int(game.playing_time): break else: playing_time = '> 4h' weight_mapping = { 0: "Light", 1: "Light", 2: "Light Medium", 3: "Medium", 4: "Medium Heavy", 5: "Heavy", } weight = weight_mapping[math.ceil(game.rating_average_weight)] return BoardGame( id=game.id, name=game.name, description=game.description, image=game.thumbnail, categories=[cat for cat in game.categories], mechanics=[mec for mec in game.mechanics], players=num_players, weight=weight, playing_time=playing_time, )
num_player not in numfit[1] and round(game.rating_average_weight) == weight): yield i, game.name bgg = BGGClient() USERNAME = eval(input("BGG Username: "******"How many players: ")) playtime = eval(input("Maximum playtime in minutes: ")) weight = eval(input("Weight (1 = light, 5 = heavy): ")) match = search_match(game_list) for num in range(5): print('Match: Game No. %d: %s' % (next(match)))
writer.writerow(line) bgg = BGGClient() collection = bgg.collection('Oniwa', exclude_subtype='boardgameexpansion', own=True) foo = [] game_id = [] for item in collection: game_id.append(item.id) games = bgg.game_list(game_id) for personal_game, db_game in zip(collection, games): game = BoardGame.BoardGame() game.collection_to_game(personal_game, db_game) foo.append(game) # TODO: Change Average rank and personal rank to rating and add a personal rank field csv_data = [[ 'Name', 'BGG Rank', 'Average Rank', 'Personal Rank', 'Weight', 'Number Plays', 'Category', 'Mechanics', 'Min Players', 'Max Players', 'Suggested Players', 'Year Published', 'Purchase Date', 'Months Owned' ]] for item in foo: csv_data.append(item.csv())
if args.force: print("Forcing cache refresh") effective_cache_ttl = 0 cache = CacheBackendSqlite(path=".cache.bgg", ttl=effective_cache_ttl) bgg1 = BGGClientLegacy(cache=cache) list = bgg1.geeklist(args.list_id, comments=True) print(f"[{list.id}] {list.name}\n{list.description}") bgg2 = BGGClient(cache=cache) games_id_list = [ item.object.id for item in list if item.object.type == 'thing' and item.object.subtype == 'boardgame' ] games = bgg2.game_list(games_id_list) games_dict = {game.id: game for game in games} collection = bgg2.collection(user_name=args.username) collection_dict = {colgame.id: colgame for colgame in collection} def text_formatter(game, effective_name, effective_imageid): print(f" [{game.id}]" f" img:{effective_imageid} {effective_name}", end='') print(f" #{game.bgg_rank}", end='') print(f" year:{game.year}", end='') print(f" players:[{game.min_players}-{game.max_players}]", end='') print(f" age:>={game.min_age}yr", end='') print(f" time:{game.playing_time}'", end='') print(f" weight:{game.rating_average_weight:.1f}/5", end='')
def batch_api_call(ids: np.array, batch_size: int = 100, requests_per_minute: int = 100) -> list: """Fetches games data in batches First fetches data one batch at a time. Then fetches one more batch with remainder_ids i.e. ids which were left over after the last full batch was formed Args: ids (`numpy.array`): the game_ids to fetch data for batch_size (`int`): the size of the batches (Recommend to not exceed 100) requests_per_minute (`int`): limit the number of requests of our API client Returns: games (`list`): List of BoardGame objects. See boardgamegeek package documentation for details: https://lcosmin.github.io/boardgamegeek/modules.html#boardgamegeek.objects.games.BoardGame """ # Instantiate API client bgg = BGGClient(requests_per_minute=requests_per_minute) logger.info( f'Instantiated API client with limit of {requests_per_minute} requests per minute.' ) # Form batches number_of_ids = len(ids) remainder = number_of_ids % batch_size ids_no_remainder = ids[:number_of_ids - remainder] # Making sure ids are divisible by batch_size batches = ids_no_remainder.reshape(int(len(ids_no_remainder) / batch_size), batch_size) batches = list(batches) # Converting from np.ndarray to regular list batches = [lst.tolist() for lst in batches] # converting each batch to regular list # Fetch batches via API batch_number = 0 batches_successful = 0 batches_failed = 0 games = [] logger.info( f'Beginning batch calls to BoardGameGeek API for {batch_size} games per batch.' ) logger.info( f"This will take approximately 15 minutes. Thank you for your patience." ) for batch in batches: logger.debug( f"Fetching data for batch number {batch_number} / {len(batches)}") try: game_batch = bgg.game_list(batch) batches_successful += 1 logger.debug( f"Successfully fetched games in batch number {batch_number}") if batches_successful % 10 == 0: logger.info( f'Successfully fetched games for {batches_successful} batches' ) games.extend(game_batch) except BGGApiError: batches_failed += 1 logger.debug( f"Failed to fetch games data for batch number {batch_number}") batch_number += 1 logger.info(f"Successful Batches: {batches_successful} ") logger.info(f"Failed Batches: {batches_failed} ") # Fetch remaining games, which didn't fit in a batch remainder_ids = list(ids[len(ids_no_remainder):]) remainder_ids_successful = 0 remainder_ids_failed = 0 try: games.extend(bgg.game_list(remainder_ids)) logger.debug("Successfully fetched games with remainder ids") remainder_ids_successful = len(remainder_ids) except BGGApiError: logger.debug("Failed to fetch games with remainder ids") remainder_ids_failed = len(remainder_ids) logger.info( f"Total games successfully fetched: {batch_size*batches_successful+remainder_ids_successful}" ) logger.info( f"Total games failed to fetch: {batch_size * batches_failed+ remainder_ids_failed}" ) return games
class BGGManager(): ''' The BGGManager class is a wrapper around the boardgamegeek2 API. Init with an ids file, a names file, and a details file. The instance can optionally download from boardgamegeek, or loaded from disk.''' def __init__(self, ids_file, names_file, details_file): '''ids_file - name of a file with/for BGG ids names_file - name of a file with/for BGG game names details_file - name of a file with/for BGG game details''' self._ids_file = ids_file self._ids = None self._names_file = names_file self._names = None self._details_file = details_file self._bgg = BGGClient() @staticmethod def grouper(n, iterable, fillvalue=None): '''Turns a flat list into a set of groups of length n. BGGManager uses this to group the downloaded ids into batches for retrieval from boargamegeek's batch API. Example: grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx ''' args = [iter(iterable)] * n return zip_longest(fillvalue=fillvalue, *args) @staticmethod def boardgame_to_dict(bg): ''' Turns the structure returned by boardgamegeek2 into a dict, suitable for uploading to Elasticsearch. TODO: there are some deeper substructures - stats and ranks that would be nice to have as well. They don't bear directly, so I have ignored them. Initially I was trying to use class introspection to implement this but not all of the below are actually attributes of the class. This method is messier but actually works.''' return { "accessory": bg.accessory, "alternative_names": bg.alternative_names, "artists": bg.artists, "bgg_rank": bg.bgg_rank, "boardgame_rank": bg.boardgame_rank, "categories": bg.categories, "comments": bg.comments, "description": bg.description, "designers": bg.designers, "expansion": bg.expansion, "families": bg.families, "id": bg.id, "image": bg.image, "implementations": bg.implementations, "max_players": bg.max_players, "max_playing_time": bg.max_playing_time, "mechanics": bg.mechanics, "min_age": bg.min_age, "min_players": bg.min_players, "min_playing_time": bg.min_playing_time, "name": bg.name, "playing_time": bg.playing_time, "publishers": bg.publishers, "rating_average": bg.rating_average, "rating_average_weight": bg.rating_average_weight, "rating_bayes_average": bg.rating_bayes_average, "rating_median": bg.rating_median, "rating_num_weights": bg.rating_num_weights, "rating_stddev": bg.rating_stddev, "thumbnail": bg.thumbnail, "users_commented": bg.users_commented, "users_owned": bg.users_owned, "users_rated": bg.users_rated, "versions": bg.versions, "videos": bg.videos, "year": bg.year } def _download_and_save_games(self): ''' Use the API to pull games from boardgamegeek. There isn't a good way to search generically, the API requires at least 1 character for the wildcard. So I took the approach of wildcarding each letter of the alphabet. This misses some games, even some top games, but gets most everything. Note, since the API is searching across words, there are duplicate names and ids downloaded. This is handled with a set() structure to hold names and ids. Pickles ids and names that it downloads to the names file and ids file.''' self._ids = set() self._names = set() small_letters = map(chr, range(ord('a'), ord('z') + 1)) for letter in small_letters: things = self._bgg.search('{}*'.format(letter)) for thing in things: self._ids.add(thing.id) self._names.add(thing.name) with open(self._ids_file, 'wb') as ids_file: pickle.dump(self._ids, ids_file) with open(self._names_file, 'wb') as names_file: pickle.dump(self._names, names_file) def _load_saved_games(self): '''Loads the pickled game names and ids''' with open(self._names_file, 'rb') as names_file: self._names = pickle.load(names_file) with open(self._ids_file, 'rb') as ids_file: self._ids = pickle.load(ids_file) def load_game_names_and_ids(self, download=False): ''' Entry point for loading game names and ids. Set download=True to use the Boardgame Geek APIs to download them fresh, or False to load pickled data from a prior run. ''' if download: self._download_and_save_games() else: self._load_saved_games() print('Got {} ids and {} names'.format(len(self._ids), len(self._names))) def _download_and_save_game_details(self): ''' Downloads game details from the BoardgameGeek API. This takes the ids and splits them into chunks of 100 so that it can call the BGG batch API for retrieval. After retrieving the game details, this reformats them as a dict for easier transmission to ES. I made the decision to store the text JSON of the details instead of pickling or storing in some other binary format. This facilitates quickly grepping the source data as well as processing to generate test sets. A bunch of the parameters (e.g. group size) might be better specified on the command line. For simplicity, they're hard-coded here.''' n = 0 chunk_size = 100 with open(self._details_file, 'w') as details_file: for chunk in self.grouper(chunk_size, self._ids, fillvalue=None): games = self._bgg.game_list(game_id_list=list(chunk)) n += chunk_size print('Downloaded {} games.'.format(n)) for g in games: try: d = self.boardgame_to_dict(g) if d: json.dump(d, details_file) details_file.write('\n') except Exception as e: print( 'Exception getting details. Skipping "{}".'.format( g.name)) def _load_saved_game_details(self): ''' Loads the game details from the JSON file where they are stored. ''' self._details = list() with open(self._details_file, 'r') as f_in: for line in f_in: dic = json.loads(line.lstrip().rstrip()) self._details.append(dic) print("Loaded {} game records".format(len(self._details))) def load_game_details(self, download=False): ''' Entry point for downloading or loading the game details. Specify download=True to pull from BGG or download=False to load a previous data set. ''' if download: if not self._ids or not self._names: # TODO: Better exception raise Exception( 'Can\'t download game details without loading names and ids first!' ) self._download_and_save_game_details() else: self._load_saved_game_details() def game_details(self): ''' Iterator over the game details. Details must be loaded first. ''' if not self._details: raise ValueError( 'Trying to send iterate game details, but none loaded') for detail in self._details: yield detail