def main(unused_argv): agent = MoveToBeacon8Actions(state_size=_SCREEN, dqn="prio", learning=False) evaluater = Evaluation(50) try: with sc2_env.SC2Env( map_name="MoveToBeacon", players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=_SCREEN, minimap=_MINIMAP), use_feature_units=True), step_mul=16, game_steps_per_episode=0, visualize=_VISUALIZE) as env: # run_loop.run_loop([agent], env, max_episodes=1) episodes = 0 while episodes <= _EPISODES: episodes += 1 agent.setup(env.observation_spec(), env.action_spec()) timesteps = env.reset() agent.reset() while True: step_actions = [agent.step(timesteps[0])] if timesteps[0].last(): # agent.dqn.decrease_epsilon_factor() # this is C if episodes % 5 == 0: agent.dqn.update_target() # agent.dqn.replay(32) evaluater.moving_avg( timesteps[0].observation.score_cumulative[0], agent.dqn.epsilon) break timesteps = env.step(step_actions) agent.dqn.save_weights() # agent.ql.save_q_table() # agent.ql.print_q() except KeyboardInterrupt: print("Exception") pass
def __init__(self, state_size, action_size, path="Learning/Weights/weights.h5", new_weights=True, memory_size=100000, replay_start_size=6000, epsilon=1, epsilon_min=.05, max_step_for_epsilon_decay=125000*3, prioritized_replay=False, alpha=0.6, beta=0.4, beta_inc=0.0000005): self.state_size = state_size self.action_size = action_size self.path = path self.use_prio_buffer = prioritized_replay if not prioritized_replay: self.memory = deque(maxlen=memory_size) else: self.prio_memory = PrioritizedReplayBuffer(memory_size, alpha) self.beta = beta self.beta_inc = beta_inc # self.beta_schedule = LinearSchedule(max_step_for_epsilon_decay, # 1, # 0.4) self.gamma = 0.95 # discount rate self.epsilon = epsilon # exploration rate self.epsilon_min = epsilon_min self.epsilon_decay = 0.995 self.max_step_for_lin_epsilon_decay = max_step_for_epsilon_decay self.epsilon_decay_linear = self.epsilon / self.max_step_for_lin_epsilon_decay self.learning_rate = 0.00025 self.replay_start_size = replay_start_size self.model = self._build_model() self.target_model = clone_model(self.model) #self._build_model() self.target_model.compile(optimizer='sgd', loss='mse') self.step = 0 if not new_weights: self.model.load_weights(path) self.update_target() self.callback = Evaluation.create_tensorboard()