def turn(agent: Agent, agent_policy_step: str, foe: Foe, rand="random") -> (str, str): # Not currently very extensible. Oh well. agent_action = agent.act(agent_policy_step) new_states = (agent_action.resolve_action(foe) if rand == "random" else agent_action.action_expectation(foe)) if agent_action.target_id == "self": agent.update_states(new_states["target"]) else: foe.update_states(new_states["target"]) foe_action = foe.act(rand) new_states = (foe_action.resolve_action(agent) if rand == "random" else foe_action.action_expectation(agent)) if foe_action.target_id == "self": foe.update_states(new_states["target"]) else: agent.update_states(new_states["target"]) foe.decrement_cooldowns() foe_reaction = foe.react() return (agent_action, foe_reaction)
def __init__(self, screen_dim): self.screen_dim = screen_dim self.background = [ StarsLayer(screen_dim, (100, 100, 105), 1, 3), StarsLayer(screen_dim, (80, 80, 120), .5, 2), StarsLayer(screen_dim, (55, 55, 55), .25, 1) ] self.foreground = [ StarsLayer(screen_dim, (255, 255, 255), 1.5, 4, 22), StarsLayer(screen_dim, (255, 255, 255), 6, 8, 7) ] self.enemies = [Foe(screen_dim), Foe(screen_dim)] self.exploding = [] self.bubbles = [bubble.BubbleFactory().create_random(screen_dim)] self.food = []
def update_foe_belief(faux_foe: Foe, foe_reaction: str): if "RAW" in foe_reaction: print(foe_reaction) faux_foe.hp = (14 - (len(foe_reaction) - 3)) * 10 return faux_foe
def spawn_foe(self): randy = random.random() if randy > SPAWN_RATE: return foe = Foe(self) foe.set_color(0, 255, 0) randy = random.uniform(0, 1.25) foe.set_radius(randy * self.player.radius) foe.choose_location_foe(self.screen_size, self.player) self.foes_list.append(foe)
def __init__( self, agent: Agent = Agent(), foe: Foe = Foe(), reward_for_kill: float = 1000, penalty_for_dying: float = -1000, agent_hp_bonus: float = 0, foe_hp_bonus: float = -0.5, ): # See numpy.interp self.agent_xp = np.arange(start=1, stop=agent.max_hp + 1) self.reward_per_agent_hp = agent_hp_bonus * self.agent_xp self.penalty_for_dying = penalty_for_dying self.foe_xp = np.arange(start=1, stop=foe.max_hp + 1) self.reward_per_foe_hp = foe_hp_bonus * self.foe_xp self.reward_for_kill = reward_for_kill
def encounter( agent=Agent, foe=Foe, max_turns=int, forward_search_and_reward_kwargs={}, ) -> pd.DataFrame: """ TODO: document me!! :param agent: :param foe: :param max_turns: :param forward_search_and_reward_kwargs: - forward_search_kwargs: - depth: int = 3, - reward_kwargs: - reward_for_kill: float = 1000, - penalty_for_dying: float = -1000, - agent_hp_bonus: float = 2, - foe_hp_bonus: float = -1 :return: """ # Handle kwargs forward_search_kwargs, reward_kwargs = __get_kwargs( forward_search_and_reward_kwargs) reward = Reward(agent, foe, **reward_kwargs) utility = reward.get_reward(agent, foe) faux_foe = Foe() # The belief state of our foe # Arrays to hold encounter_stats agent_policies = [] agent_spell_slots = [] agent_shields = [] agent_healths = [] foe_healths = [] foe_reactions = [] faux_foe_healths = [] forward_search_utilities = [] rewards = [] for i in range(max_turns): agent_policy, forward_search_utility = forward_search( agent=copy.deepcopy(agent), foe=copy.deepcopy(faux_foe), reward=reward, utility=utility, **forward_search_kwargs) agent_action, foe_reaction = turn(agent, agent_policy, foe) faux_foe = update_foe_belief(faux_foe, foe_reaction) utility += reward.get_reward(agent, foe) # Collect turn data into encounter_stats agent_policies.append(agent_policy) agent_spell_slots.append(agent.states["spell slots"]) agent_shields.append(agent.states["shield"]) agent_healths.append(agent.hp) foe_healths.append(foe.hp) foe_reactions.append(foe_reaction) faux_foe_healths.append(faux_foe.hp) forward_search_utilities.append(forward_search_utility) rewards.append(utility) if agent.hp <= 0 or foe.hp <= 0: # end encounter if either dies break encounter_stats = pd.DataFrame({ "agent actions": agent_policies, "agent spell slots": agent_spell_slots, "agent shield": agent_shields, "agent health": agent_healths, "foe health": foe_healths, "foe reactions": foe_reactions, "faux foe health": faux_foe_healths, "forward search utility": forward_search_utilities, "utility": rewards, }) return agent, foe, encounter_stats
import pandas as pd from agent import Agent from encounter import encounter from foe import Foe bad_guy = Foe() good_guy = Agent() # good_guy, bad_guy, encounter_stats = encounter(good_guy, bad_guy, 100) good_guy, bad_guy, encounter_stats = encounter( good_guy, bad_guy, 100, forward_search_and_reward_kwargs={ "forward_search": { "depth": 25 }, "reward": { "penalty_for_dying": -5000 }, }) ## Note: Valerie moved plotting to test.ipynb here because reasons
def inc_enemies(self): self.enemies.append(Foe(self.screen_dim))