def __init__(self, state_size, action_size, force_continue=False, load_weights=False): BaseAgent.__init__(self) self._state_size = state_size self._action_size = action_size self._gamma = 0.95 self._epsilon = 1.0 if not force_continue else 0.01 self._epsilon_decay = 0.995 self._epsilon_min = 0.01 self._learning_rate = 0.001 self._model = None self._build_model() # Load up known weights max_file_number = 0 if load_weights or force_continue: for file in os.listdir('models'): file_without_ending = file.split('.')[0] file_number = file_without_ending.split('_')[1] if int(file_number) > int(max_file_number): max_file_number = int(file_number) file_loaded = 'weights_' + '0' + str(max_file_number) + '.hdf5' file_loaded = os.path.join('models', file_loaded) print("Loading file " + str(file_loaded) + " to continue training.") self.load(file_loaded) self.restart_file_number_offset = max_file_number
def __init__(self, state_size, action_size, load_weights=False): BaseAgent.__init__(self) self._state_size = state_size self._action_size = action_size self._gamma = 0.95 self._epsilon = 1.0 self._epsilon_decay = 0.995 self._epsilon_min = 0.01 self._learning_rate = 0.001 self._model = None self._build_model() # Load up known weights max_file_number = '' if load_weights: for file in os.listdir('/models'): file_without_ending = file.split('.')[0] file_number = file_without_ending.split('_')[1] if int(file_number) > int(max_file_number): max_file_number = file_number self.load(os.path.join('models', 'weights_' + max_file_number + '.hdf5'))
def __init__(self): BaseAgent.__init__(self) self._first_shot = True self._last_shot_pwr = 0 self._last_shot_ang = 0 self._target_pwr = 0 self._target_ang = 0 self._distance_to_target = 1024 self._last_impact = [0, 0] self._ready_to_fire = True self._exploration_value = 10
def run(mdp_domain): domain = mdp_domain() solver = ValueIterationSolver(domain, discount=GAMMA, threshold=TAU, verbose=True) agent = BaseAgent(domain, solver, epochs=STEPS) state_values = agent.train() rewards, samples = agent.run(external_policy='randomized') states = extract_states(samples) bucket_count = select_bin_counts(samples=states) mdp_aggregate, aggregation_mapping = aggregate_mdp(values=state_values, bin_count=bucket_count, domain=domain) domain_aggregate = mdp_domain(mdp_aggregate) solver_aggregate = ValueIterationSolver(domain=domain_aggregate, discount=GAMMA, threshold=TAU, verbose=True) agent_aggregate = BaseAgent(domain=domain_aggregate, solver=solver_aggregate, epochs=STEPS) state_values_aggregate = agent_aggregate.train() rewards_aggregate, samples_aggregate = agent_aggregate.run() policy_aggregate = solver_aggregate.policy adapted_policy_aggregate = map_aggregate_policy( aggregate_policy=policy_aggregate, state_mapping=aggregation_mapping, original_domain=domain) domain.reset() rewards_aggregate_adapted, samples_aggregate_adapted = agent.run( external_policy=adapted_policy_aggregate) print('original return:', rewards.sum()) print('aggregate return:', rewards_aggregate.sum()) print('adapted return:', rewards_aggregate_adapted.sum()) print('bin count:', bucket_count) return rewards, rewards_aggregate, rewards_aggregate_adapted
def train_test_agent(): M = 10 env = GraphSamplingEnv(max_samples=M) num_train_graphs = 10 agent = BaseAgent(env=env) agent.learn(num_train_graphs) agent.test()
def test_stakeOCEAN(alice_agent: BaseAgent, alice_pool): OCEAN_before:float = alice_agent.OCEAN() BPT_before:float = alice_agent.BPT(alice_pool) alice_agent.stakeOCEAN(OCEAN_stake=20.0, pool=alice_pool) OCEAN_after:float = alice_agent.OCEAN() BPT_after:float = alice_agent.BPT(alice_pool) assert OCEAN_after == (OCEAN_before - 20.0) assert BPT_after > BPT_before
def run(args): M = 5 env = GraphSamplingEnv(max_samples=M) agent = BaseAgent(env=env) now = datetime.now() logger.configure( dir=f"./results/fixed_env/{now.strftime(TIMESTAMP_FORMAT)}") agent.learn() agent.test()
def run(args): M = 3 env = GraphSamplingEnv(max_samples=M) agent = BaseAgent( env=env, gamma=args["gamma"], learning_rate=args["learning_rate"], replay_buffer_size=args["replay_buffer_size"], exploration_schedule_steps=args["exploration_schedule_steps"], exploration_initial_prob=args["exploration_initial_prob"], exploration_final_prob=args["exploration_final_prob"], random_walk_sampling_args=SAMPLING_ARGS) now = datetime.now() logger.configure(dir=LOGDIR + f"{now.strftime(TIMESTAMP_FORMAT)}") agent.learn() agent.test()
def play(): first_move = random.randint(1, 100) env = TicTacToeEnv(False) human = HumanAgent("X") machine = BaseAgent("O") agents = [human, machine] start_mark = "O" if first_move % 2 == 0 else "X" while True: env.set_start_mark(start_mark) state = env.reset() board, mark = state done = False env.render() while not done: agent = agent_by_mark(agents, mark) human = isinstance(agent, HumanAgent) env.show_turn(True, mark) available_actions = env.available_actions() if human: action = agent.act(available_actions) if action is None: sys.exit() else: action = agent.act(board, state, available_actions) state, reward, done, info = env.step(action) env.render(mode="human") if done: env.show_result(True, mark, reward) break else: board, mark = state start_mark = next_mark(start_mark)
def evaluate_agent(agent: BaseAgent, env: gym.Env, n_episodes: int = 100) -> dict: agent.eval() episode_rewards = [] for i_episode in range(n_episodes): state = torch.from_numpy(env.reset()).float() t = 0 done = False rewards = [] while not done: action = agent.get_action(state) next_state, reward, done, info = env.step(action) next_state = torch.from_numpy(next_state).float() rewards.append(reward) state = next_state t += 1 episode_reward = np.sum(rewards) episode_rewards.append(episode_reward) _ = agent.end_episode() agent.train() return { 'eval_mean_reward': np.mean(episode_rewards) }
def __init__(self, parameters: Parameters): super().__init__() self.normals = 0 self.schedule = BaseScheduler(self) self.ready_to_mate = [] self.net_grow = 0 self.average_age = 0 self.average_fitness = 0 self.nonAltruist_fitness = 0 self.altruist_fitness = 0 self.birthrate = 0 self.altruists = 0 self.nonAltruists = 0 self.parameters = parameters self.population = 0 self.altruistic_acts_altruists = 0 self.altruistic_acts_base_agent = 0 self.average_fitness_cost_round = [] self.average_fitness_cost = [] self.died = [] self.died_this_round = [] self.died_of_fitness_loss = 0 self.died_of_age = 0 self.died_of_chance = 0 self.age_at_death = 0 self.fitness_at_death = 0 self.reset_randomizer(seed=self.parameters.SEED) # Zufallsseed self.grid = MultiGrid(100, 100, True) # Initiale Agenten werden angelegt self.initial_agents = [] i = 0 while len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS: # Mit einer x% Chance spawnt ein spezieller Charakter rand = self.random.randint(0, 100) appended = False if rand < self.parameters.SPAWN_NONALTRUIST and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS: a = NonAltruist(i, self) self.initial_agents.append(a) i += 1 appended = True if rand < self.parameters.SPAWN_ALTRUIST and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS: b = Altruist(i, self) self.initial_agents.append(b) i += 1 appended = True if not appended and len(self.initial_agents) < self.parameters.NUMBER_OF_AGENTS: c = BaseAgent(i, self) self.initial_agents.append(c) i += 1 for agent in self.initial_agents: self.schedule.add(agent) x = self.random.randrange(self.grid.width) y = self.random.randrange(self.grid.height) self.grid.place_agent(agent, (x, y))
def test_BPT(alice_agent: BaseAgent, alice_pool: bpool.BPool): assert alice_agent.BPT(alice_pool) == 100.0
from envs import GraphSamplingEnv from agents import BaseAgent # def train_test_agent(): print ("here") M = 10 env = GraphSamplingEnv(max_samples=M) num_train_graphs = 10 agent = BaseAgent(env=env) agent.learn()#num_train_graphs) agent.test() # if __name__ == "__main__": # train_test_agent()
from agents import BaseAgent from util import LoadConfig, PlotOptimalSharpeRatio, PlotTraining, PlotWeight, PlotSMA if __name__ == "__main__": dataconfig = LoadConfig('config/DataConfig.yaml') rrltraderconfig = LoadConfig('config/rrltraderconfig.yaml') InitialAgent = BaseAgent.BaseRRLTrader( trading_periods = rrltraderconfig["trading_periods"], start_period = rrltraderconfig["start_period"], learning_rate = rrltraderconfig["learning_rate"], n_epochs = rrltraderconfig["n_epochs"], transaction_costs = rrltraderconfig["transaction_costs"], input_size = rrltraderconfig["input_size"], added_features = rrltraderconfig["features"], SMA = rrltraderconfig["SMA"], epsilon_greedy = rrltraderconfig["epsilon_greedy"]) InitialAgent.upload_data( ticker = dataconfig['ticker'], start_date = dataconfig['start_date'], end_date = dataconfig['end_date'], csv_path="sourcefiles/^GSPC.csv") print(InitialAgent.time) #InitialAgent.load_weight(epoch_path=rrltraderconfig['weight_path']) #InitialAgent.fit() #PlotOptimalSharpeRatio(InitialAgent)