예제 #1
0
    def __init__(self,
                 state_size,
                 action_size,
                 force_continue=False,
                 load_weights=False):
        BaseAgent.__init__(self)

        self._state_size = state_size
        self._action_size = action_size

        self._gamma = 0.95
        self._epsilon = 1.0 if not force_continue else 0.01
        self._epsilon_decay = 0.995
        self._epsilon_min = 0.01

        self._learning_rate = 0.001
        self._model = None

        self._build_model()

        # Load up known weights
        max_file_number = 0
        if load_weights or force_continue:
            for file in os.listdir('models'):
                file_without_ending = file.split('.')[0]
                file_number = file_without_ending.split('_')[1]
                if int(file_number) > int(max_file_number):
                    max_file_number = int(file_number)
            file_loaded = 'weights_' + '0' + str(max_file_number) + '.hdf5'
            file_loaded = os.path.join('models', file_loaded)
            print("Loading file " + str(file_loaded) +
                  " to continue training.")
            self.load(file_loaded)

        self.restart_file_number_offset = max_file_number
예제 #2
0
    def __init__(self, state_size, action_size, load_weights=False):
        BaseAgent.__init__(self)

        self._state_size = state_size
        self._action_size  = action_size

        self._gamma = 0.95
        self._epsilon = 1.0
        self._epsilon_decay = 0.995
        self._epsilon_min = 0.01

        self._learning_rate = 0.001
        self._model = None

        self._build_model()

        # Load up known weights
        max_file_number = ''
        if load_weights:
            for file in os.listdir('/models'):
                file_without_ending = file.split('.')[0]
                file_number = file_without_ending.split('_')[1]
                if int(file_number) > int(max_file_number):
                    max_file_number = file_number
            self.load(os.path.join('models', 'weights_' + max_file_number + '.hdf5'))
예제 #3
0
    def __init__(self):
        BaseAgent.__init__(self)

        self._first_shot = True
        self._last_shot_pwr = 0
        self._last_shot_ang = 0

        self._target_pwr = 0
        self._target_ang = 0

        self._distance_to_target = 1024
        self._last_impact = [0, 0]

        self._ready_to_fire = True

        self._exploration_value = 10
예제 #4
0
def run(mdp_domain):
    domain = mdp_domain()
    solver = ValueIterationSolver(domain,
                                  discount=GAMMA,
                                  threshold=TAU,
                                  verbose=True)
    agent = BaseAgent(domain, solver, epochs=STEPS)
    state_values = agent.train()
    rewards, samples = agent.run(external_policy='randomized')

    states = extract_states(samples)

    bucket_count = select_bin_counts(samples=states)
    mdp_aggregate, aggregation_mapping = aggregate_mdp(values=state_values,
                                                       bin_count=bucket_count,
                                                       domain=domain)

    domain_aggregate = mdp_domain(mdp_aggregate)
    solver_aggregate = ValueIterationSolver(domain=domain_aggregate,
                                            discount=GAMMA,
                                            threshold=TAU,
                                            verbose=True)
    agent_aggregate = BaseAgent(domain=domain_aggregate,
                                solver=solver_aggregate,
                                epochs=STEPS)
    state_values_aggregate = agent_aggregate.train()
    rewards_aggregate, samples_aggregate = agent_aggregate.run()
    policy_aggregate = solver_aggregate.policy

    adapted_policy_aggregate = map_aggregate_policy(
        aggregate_policy=policy_aggregate,
        state_mapping=aggregation_mapping,
        original_domain=domain)
    domain.reset()
    rewards_aggregate_adapted, samples_aggregate_adapted = agent.run(
        external_policy=adapted_policy_aggregate)

    print('original return:', rewards.sum())
    print('aggregate return:', rewards_aggregate.sum())
    print('adapted return:', rewards_aggregate_adapted.sum())
    print('bin count:', bucket_count)

    return rewards, rewards_aggregate, rewards_aggregate_adapted
예제 #5
0
def train_test_agent():
    M = 10
    env = GraphSamplingEnv(max_samples=M)

    num_train_graphs = 10

    agent = BaseAgent(env=env)
    agent.learn(num_train_graphs)
    agent.test()
예제 #6
0
def test_stakeOCEAN(alice_agent: BaseAgent, alice_pool):    
    OCEAN_before:float = alice_agent.OCEAN()
    BPT_before:float = alice_agent.BPT(alice_pool)
    
    alice_agent.stakeOCEAN(OCEAN_stake=20.0, pool=alice_pool)
    
    OCEAN_after:float = alice_agent.OCEAN()
    BPT_after:float = alice_agent.BPT(alice_pool)
    assert OCEAN_after == (OCEAN_before - 20.0)
    assert BPT_after > BPT_before
def run(args):
    M = 5
    env = GraphSamplingEnv(max_samples=M)

    agent = BaseAgent(env=env)
    now = datetime.now()
    logger.configure(
        dir=f"./results/fixed_env/{now.strftime(TIMESTAMP_FORMAT)}")
    agent.learn()
    agent.test()
def run(args):
    M = 3
    env = GraphSamplingEnv(max_samples=M)

    agent = BaseAgent(
        env=env,
        gamma=args["gamma"],
        learning_rate=args["learning_rate"],
        replay_buffer_size=args["replay_buffer_size"],
        exploration_schedule_steps=args["exploration_schedule_steps"],
        exploration_initial_prob=args["exploration_initial_prob"],
        exploration_final_prob=args["exploration_final_prob"],
        random_walk_sampling_args=SAMPLING_ARGS)
    now = datetime.now()
    logger.configure(dir=LOGDIR + f"{now.strftime(TIMESTAMP_FORMAT)}")
    agent.learn()
    agent.test()
예제 #9
0
def play():
    first_move = random.randint(1, 100)

    env = TicTacToeEnv(False)
    human = HumanAgent("X")
    machine = BaseAgent("O")
    agents = [human, machine]
    start_mark = "O" if first_move % 2 == 0 else "X"

    while True:
        env.set_start_mark(start_mark)
        state = env.reset()
        board, mark = state
        done = False
        env.render()

        while not done:
            agent = agent_by_mark(agents, mark)
            human = isinstance(agent, HumanAgent)
            env.show_turn(True, mark)
            available_actions = env.available_actions()
            if human:
                action = agent.act(available_actions)
                if action is None:
                    sys.exit()
            else:
                action = agent.act(board, state, available_actions)

            state, reward, done, info = env.step(action)

            env.render(mode="human")
            if done:
                env.show_result(True, mark, reward)
                break
            else:
                board, mark = state

        start_mark = next_mark(start_mark)
예제 #10
0
def evaluate_agent(agent: BaseAgent, env: gym.Env, n_episodes: int = 100) -> dict:
    agent.eval()
    episode_rewards = []
    for i_episode in range(n_episodes):
        state = torch.from_numpy(env.reset()).float()
        t = 0
        done = False
        rewards = []
        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            next_state = torch.from_numpy(next_state).float()
            rewards.append(reward)
            state = next_state
            t += 1

        episode_reward = np.sum(rewards)
        episode_rewards.append(episode_reward)
        _ = agent.end_episode()

    agent.train()
    return {
        'eval_mean_reward': np.mean(episode_rewards)
    }
예제 #11
0
    def __init__(self, parameters: Parameters):
        super().__init__()
        self.normals = 0
        self.schedule = BaseScheduler(self)
        self.ready_to_mate = []

        self.net_grow = 0
        self.average_age = 0
        self.average_fitness = 0
        self.nonAltruist_fitness = 0
        self.altruist_fitness = 0
        self.birthrate = 0
        self.altruists = 0
        self.nonAltruists = 0
        self.parameters = parameters
        self.population = 0
        self.altruistic_acts_altruists = 0
        self.altruistic_acts_base_agent = 0
        self.average_fitness_cost_round = []
        self.average_fitness_cost = []
        self.died = []
        self.died_this_round = []
        self.died_of_fitness_loss = 0
        self.died_of_age = 0
        self.died_of_chance = 0
        self.age_at_death = 0
        self.fitness_at_death = 0

        self.reset_randomizer(seed=self.parameters.SEED)  # Zufallsseed

        self.grid = MultiGrid(100, 100, True)

        # Initiale Agenten werden angelegt
        self.initial_agents = []
        i = 0
        while len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS:
            # Mit einer x% Chance spawnt ein spezieller Charakter
            rand = self.random.randint(0, 100)
            appended = False

            if rand < self.parameters.SPAWN_NONALTRUIST and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS:
                a = NonAltruist(i, self)
                self.initial_agents.append(a)
                i += 1
                appended = True

            if rand < self.parameters.SPAWN_ALTRUIST and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS:
                b = Altruist(i, self)
                self.initial_agents.append(b)
                i += 1

                appended = True

            if not appended and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS:
                c = BaseAgent(i, self)
                self.initial_agents.append(c)
                i += 1

        for agent in self.initial_agents:
            self.schedule.add(agent)
            x = self.random.randrange(self.grid.width)
            y = self.random.randrange(self.grid.height)
            self.grid.place_agent(agent, (x, y))
예제 #12
0
def test_BPT(alice_agent: BaseAgent, alice_pool: bpool.BPool):    
    assert alice_agent.BPT(alice_pool) == 100.0
from envs import GraphSamplingEnv
from agents import BaseAgent

# def train_test_agent():
print ("here")
M = 10
env = GraphSamplingEnv(max_samples=M)
num_train_graphs = 10
agent = BaseAgent(env=env)
agent.learn()#num_train_graphs)
agent.test()

# if __name__ == "__main__":
# train_test_agent()
예제 #14
0
from agents import BaseAgent
from util import LoadConfig, PlotOptimalSharpeRatio, PlotTraining, PlotWeight, PlotSMA
    
if __name__ == "__main__":
    dataconfig = LoadConfig('config/DataConfig.yaml')
    rrltraderconfig = LoadConfig('config/rrltraderconfig.yaml')

    InitialAgent = BaseAgent.BaseRRLTrader(
        trading_periods = rrltraderconfig["trading_periods"], 
        start_period = rrltraderconfig["start_period"],
        learning_rate = rrltraderconfig["learning_rate"],
        n_epochs = rrltraderconfig["n_epochs"],
        transaction_costs = rrltraderconfig["transaction_costs"],
        input_size = rrltraderconfig["input_size"],
        added_features = rrltraderconfig["features"],
        SMA = rrltraderconfig["SMA"],
        epsilon_greedy = rrltraderconfig["epsilon_greedy"])

    InitialAgent.upload_data(
        ticker = dataconfig['ticker'],
        start_date = dataconfig['start_date'],
        end_date = dataconfig['end_date'],
        csv_path="sourcefiles/^GSPC.csv")
    
    
    print(InitialAgent.time)
    #InitialAgent.load_weight(epoch_path=rrltraderconfig['weight_path'])
    #InitialAgent.fit()

    #PlotOptimalSharpeRatio(InitialAgent)