コード例 #1
0
def train_test_agent():
    M = 10
    env = GraphSamplingEnv(max_samples=M)

    num_train_graphs = 10

    agent = BaseAgent(env=env)
    agent.learn(num_train_graphs)
    agent.test()
コード例 #2
0
def run(mdp_domain):
    domain = mdp_domain()
    solver = ValueIterationSolver(domain,
                                  discount=GAMMA,
                                  threshold=TAU,
                                  verbose=True)
    agent = BaseAgent(domain, solver, epochs=STEPS)
    state_values = agent.train()
    rewards, samples = agent.run(external_policy='randomized')

    states = extract_states(samples)

    bucket_count = select_bin_counts(samples=states)
    mdp_aggregate, aggregation_mapping = aggregate_mdp(values=state_values,
                                                       bin_count=bucket_count,
                                                       domain=domain)

    domain_aggregate = mdp_domain(mdp_aggregate)
    solver_aggregate = ValueIterationSolver(domain=domain_aggregate,
                                            discount=GAMMA,
                                            threshold=TAU,
                                            verbose=True)
    agent_aggregate = BaseAgent(domain=domain_aggregate,
                                solver=solver_aggregate,
                                epochs=STEPS)
    state_values_aggregate = agent_aggregate.train()
    rewards_aggregate, samples_aggregate = agent_aggregate.run()
    policy_aggregate = solver_aggregate.policy

    adapted_policy_aggregate = map_aggregate_policy(
        aggregate_policy=policy_aggregate,
        state_mapping=aggregation_mapping,
        original_domain=domain)
    domain.reset()
    rewards_aggregate_adapted, samples_aggregate_adapted = agent.run(
        external_policy=adapted_policy_aggregate)

    print('original return:', rewards.sum())
    print('aggregate return:', rewards_aggregate.sum())
    print('adapted return:', rewards_aggregate_adapted.sum())
    print('bin count:', bucket_count)

    return rewards, rewards_aggregate, rewards_aggregate_adapted
コード例 #3
0
def run(args):
    M = 5
    env = GraphSamplingEnv(max_samples=M)

    agent = BaseAgent(env=env)
    now = datetime.now()
    logger.configure(
        dir=f"./results/fixed_env/{now.strftime(TIMESTAMP_FORMAT)}")
    agent.learn()
    agent.test()
コード例 #4
0
def run(args):
    M = 3
    env = GraphSamplingEnv(max_samples=M)

    agent = BaseAgent(
        env=env,
        gamma=args["gamma"],
        learning_rate=args["learning_rate"],
        replay_buffer_size=args["replay_buffer_size"],
        exploration_schedule_steps=args["exploration_schedule_steps"],
        exploration_initial_prob=args["exploration_initial_prob"],
        exploration_final_prob=args["exploration_final_prob"],
        random_walk_sampling_args=SAMPLING_ARGS)
    now = datetime.now()
    logger.configure(dir=LOGDIR + f"{now.strftime(TIMESTAMP_FORMAT)}")
    agent.learn()
    agent.test()
コード例 #5
0
def play():
    first_move = random.randint(1, 100)

    env = TicTacToeEnv(False)
    human = HumanAgent("X")
    machine = BaseAgent("O")
    agents = [human, machine]
    start_mark = "O" if first_move % 2 == 0 else "X"

    while True:
        env.set_start_mark(start_mark)
        state = env.reset()
        board, mark = state
        done = False
        env.render()

        while not done:
            agent = agent_by_mark(agents, mark)
            human = isinstance(agent, HumanAgent)
            env.show_turn(True, mark)
            available_actions = env.available_actions()
            if human:
                action = agent.act(available_actions)
                if action is None:
                    sys.exit()
            else:
                action = agent.act(board, state, available_actions)

            state, reward, done, info = env.step(action)

            env.render(mode="human")
            if done:
                env.show_result(True, mark, reward)
                break
            else:
                board, mark = state

        start_mark = next_mark(start_mark)
コード例 #6
0
    def __init__(self, parameters: Parameters):
        super().__init__()
        self.normals = 0
        self.schedule = BaseScheduler(self)
        self.ready_to_mate = []

        self.net_grow = 0
        self.average_age = 0
        self.average_fitness = 0
        self.nonAltruist_fitness = 0
        self.altruist_fitness = 0
        self.birthrate = 0
        self.altruists = 0
        self.nonAltruists = 0
        self.parameters = parameters
        self.population = 0
        self.altruistic_acts_altruists = 0
        self.altruistic_acts_base_agent = 0
        self.average_fitness_cost_round = []
        self.average_fitness_cost = []
        self.died = []
        self.died_this_round = []
        self.died_of_fitness_loss = 0
        self.died_of_age = 0
        self.died_of_chance = 0
        self.age_at_death = 0
        self.fitness_at_death = 0

        self.reset_randomizer(seed=self.parameters.SEED)  # Zufallsseed

        self.grid = MultiGrid(100, 100, True)

        # Initiale Agenten werden angelegt
        self.initial_agents = []
        i = 0
        while len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS:
            # Mit einer x% Chance spawnt ein spezieller Charakter
            rand = self.random.randint(0, 100)
            appended = False

            if rand < self.parameters.SPAWN_NONALTRUIST and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS:
                a = NonAltruist(i, self)
                self.initial_agents.append(a)
                i += 1
                appended = True

            if rand < self.parameters.SPAWN_ALTRUIST and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS:
                b = Altruist(i, self)
                self.initial_agents.append(b)
                i += 1

                appended = True

            if not appended and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS:
                c = BaseAgent(i, self)
                self.initial_agents.append(c)
                i += 1

        for agent in self.initial_agents:
            self.schedule.add(agent)
            x = self.random.randrange(self.grid.width)
            y = self.random.randrange(self.grid.height)
            self.grid.place_agent(agent, (x, y))
コード例 #7
0
from envs import GraphSamplingEnv
from agents import BaseAgent

# def train_test_agent():
print ("here")
M = 10
env = GraphSamplingEnv(max_samples=M)
num_train_graphs = 10
agent = BaseAgent(env=env)
agent.learn()#num_train_graphs)
agent.test()

# if __name__ == "__main__":
# train_test_agent()