def _runTurn(self): for p in range(len(self.players)): # refill player sp before running actions self.state["player_{}_external".format(p)]["sp"] = self.state["player_{}_external".format(p)]["max_sp"] self._runActionsForP(p) if self.winning_player != None: return Stats.recordStat("turns") self.state["g"]["turn"] += 1
def _selectAction(self, recommended_a_id=None): # if no action is recommended or we randomly roll below our # random_action_rate, select a random action. TODO it might be a good idea # to have state similarity here to pick a closest observed action possible_actions = getValidActionsInState(self.s) possible_action_ids = [ Database.upsertAction(action) for action in possible_actions ] if not recommended_a_id or np.random.random( ) < self.random_action_rate or recommended_a_id not in possible_action_ids: random_action_index = np.random.randint(len(possible_action_ids)) random_action_id = possible_action_ids[random_action_index] self._printIfVerbose("agent randomly chose", possible_actions[random_action_index]) return random_action_id, possible_actions[random_action_index] else: action = Database.getAction(recommended_a_id) self._printIfVerbose("agent chose", action) Stats.recordStat("{}{}".format( "chosen_action={}".format(action["action"]), "_id={}".format(action["card_id"]) if "card_id" in action and action["card_id"] != None else "")) return recommended_a_id, action
def main(): # TODO implement command-line args # initialize database Database.initialize() q = Database.getQTable() cards = loadCardDefinitions() characters = loadCharacterDefinitions() # initialize card definitions for querying CardDefinitions.setDefinitions(cards["main"], cards["treasures"], cards["answers"]) # how many games to play per run num_games = run_constants["num_games"] # every nth game will be verbose verbose_mod = run_constants["verbose_mod"] # game params as defined in game/game.py game_params = { # "num_agents" # "num_humans" # "max_turns" } # agent params as defined in player/agent.py agent_params = { "learning_rate": agent_constants["learning_rate"] # "discount_factor" # "endgame_discount_factor" # "random_action_rate" # "dyna_steps" } # deck params as defined in game/game.py deck_params = { "main_cards": CardDefinitions.cards["main"], "treasure_cards": CardDefinitions.cards["treasures"], "answer_cards": CardDefinitions.cards["answers"] } # character params as defined in game/game.py character_params = {"characters": characters} for game_number in range(num_games): verbose = game_number % verbose_mod == verbose_mod - 1 game_params["verbose"] = verbose agent_params["verbose"] = verbose print("Running game {}".format(game_number + 1)) game = Game(q, game_params, agent_params, deck_params, character_params) game.run() Stats.recordStat("games") agent_params[ "learning_rate"] *= 1 - agent_constants["learning_rate_decay"] Database.commit() # deinitialize database Database.destroy() Stats.printStats() Stats.printQStats(q) Stats.graphChosenActionUsage() Stats.graphTurnCountPerGame()