def train_test_agent(): M = 10 env = GraphSamplingEnv(max_samples=M) num_train_graphs = 10 agent = BaseAgent(env=env) agent.learn(num_train_graphs) agent.test()
def run(mdp_domain): domain = mdp_domain() solver = ValueIterationSolver(domain, discount=GAMMA, threshold=TAU, verbose=True) agent = BaseAgent(domain, solver, epochs=STEPS) state_values = agent.train() rewards, samples = agent.run(external_policy='randomized') states = extract_states(samples) bucket_count = select_bin_counts(samples=states) mdp_aggregate, aggregation_mapping = aggregate_mdp(values=state_values, bin_count=bucket_count, domain=domain) domain_aggregate = mdp_domain(mdp_aggregate) solver_aggregate = ValueIterationSolver(domain=domain_aggregate, discount=GAMMA, threshold=TAU, verbose=True) agent_aggregate = BaseAgent(domain=domain_aggregate, solver=solver_aggregate, epochs=STEPS) state_values_aggregate = agent_aggregate.train() rewards_aggregate, samples_aggregate = agent_aggregate.run() policy_aggregate = solver_aggregate.policy adapted_policy_aggregate = map_aggregate_policy( aggregate_policy=policy_aggregate, state_mapping=aggregation_mapping, original_domain=domain) domain.reset() rewards_aggregate_adapted, samples_aggregate_adapted = agent.run( external_policy=adapted_policy_aggregate) print('original return:', rewards.sum()) print('aggregate return:', rewards_aggregate.sum()) print('adapted return:', rewards_aggregate_adapted.sum()) print('bin count:', bucket_count) return rewards, rewards_aggregate, rewards_aggregate_adapted
def run(args): M = 5 env = GraphSamplingEnv(max_samples=M) agent = BaseAgent(env=env) now = datetime.now() logger.configure( dir=f"./results/fixed_env/{now.strftime(TIMESTAMP_FORMAT)}") agent.learn() agent.test()
def run(args): M = 3 env = GraphSamplingEnv(max_samples=M) agent = BaseAgent( env=env, gamma=args["gamma"], learning_rate=args["learning_rate"], replay_buffer_size=args["replay_buffer_size"], exploration_schedule_steps=args["exploration_schedule_steps"], exploration_initial_prob=args["exploration_initial_prob"], exploration_final_prob=args["exploration_final_prob"], random_walk_sampling_args=SAMPLING_ARGS) now = datetime.now() logger.configure(dir=LOGDIR + f"{now.strftime(TIMESTAMP_FORMAT)}") agent.learn() agent.test()
def play(): first_move = random.randint(1, 100) env = TicTacToeEnv(False) human = HumanAgent("X") machine = BaseAgent("O") agents = [human, machine] start_mark = "O" if first_move % 2 == 0 else "X" while True: env.set_start_mark(start_mark) state = env.reset() board, mark = state done = False env.render() while not done: agent = agent_by_mark(agents, mark) human = isinstance(agent, HumanAgent) env.show_turn(True, mark) available_actions = env.available_actions() if human: action = agent.act(available_actions) if action is None: sys.exit() else: action = agent.act(board, state, available_actions) state, reward, done, info = env.step(action) env.render(mode="human") if done: env.show_result(True, mark, reward) break else: board, mark = state start_mark = next_mark(start_mark)
def __init__(self, parameters: Parameters): super().__init__() self.normals = 0 self.schedule = BaseScheduler(self) self.ready_to_mate = [] self.net_grow = 0 self.average_age = 0 self.average_fitness = 0 self.nonAltruist_fitness = 0 self.altruist_fitness = 0 self.birthrate = 0 self.altruists = 0 self.nonAltruists = 0 self.parameters = parameters self.population = 0 self.altruistic_acts_altruists = 0 self.altruistic_acts_base_agent = 0 self.average_fitness_cost_round = [] self.average_fitness_cost = [] self.died = [] self.died_this_round = [] self.died_of_fitness_loss = 0 self.died_of_age = 0 self.died_of_chance = 0 self.age_at_death = 0 self.fitness_at_death = 0 self.reset_randomizer(seed=self.parameters.SEED) # Zufallsseed self.grid = MultiGrid(100, 100, True) # Initiale Agenten werden angelegt self.initial_agents = [] i = 0 while len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS: # Mit einer x% Chance spawnt ein spezieller Charakter rand = self.random.randint(0, 100) appended = False if rand < self.parameters.SPAWN_NONALTRUIST and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS: a = NonAltruist(i, self) self.initial_agents.append(a) i += 1 appended = True if rand < self.parameters.SPAWN_ALTRUIST and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS: b = Altruist(i, self) self.initial_agents.append(b) i += 1 appended = True if not appended and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS: c = BaseAgent(i, self) self.initial_agents.append(c) i += 1 for agent in self.initial_agents: self.schedule.add(agent) x = self.random.randrange(self.grid.width) y = self.random.randrange(self.grid.height) self.grid.place_agent(agent, (x, y))
from envs import GraphSamplingEnv from agents import BaseAgent # def train_test_agent(): print ("here") M = 10 env = GraphSamplingEnv(max_samples=M) num_train_graphs = 10 agent = BaseAgent(env=env) agent.learn()#num_train_graphs) agent.test() # if __name__ == "__main__": # train_test_agent()