Example #1
0
def main(unused_argv):

    agent = MoveToBeacon8Actions(state_size=_SCREEN,
                                 dqn="prio",
                                 learning=False)

    evaluater = Evaluation(50)

    try:

        with sc2_env.SC2Env(
                map_name="MoveToBeacon",
                players=[sc2_env.Agent(sc2_env.Race.terran)],
                agent_interface_format=features.AgentInterfaceFormat(
                    feature_dimensions=features.Dimensions(screen=_SCREEN,
                                                           minimap=_MINIMAP),
                    use_feature_units=True),
                step_mul=16,
                game_steps_per_episode=0,
                visualize=_VISUALIZE) as env:

            # run_loop.run_loop([agent], env, max_episodes=1)

            episodes = 0

            while episodes <= _EPISODES:

                episodes += 1

                agent.setup(env.observation_spec(), env.action_spec())

                timesteps = env.reset()
                agent.reset()

                while True:
                    step_actions = [agent.step(timesteps[0])]
                    if timesteps[0].last():
                        # agent.dqn.decrease_epsilon_factor()
                        # this is C
                        if episodes % 5 == 0:
                            agent.dqn.update_target()
                        # agent.dqn.replay(32)
                        evaluater.moving_avg(
                            timesteps[0].observation.score_cumulative[0],
                            agent.dqn.epsilon)
                        break
                    timesteps = env.step(step_actions)

            agent.dqn.save_weights()
            # agent.ql.save_q_table()
            # agent.ql.print_q()

    except KeyboardInterrupt:
        print("Exception")
        pass
Example #2
0
    def __init__(self,
                 state_size,
                 action_size,
                 path="Learning/Weights/weights.h5",
                 new_weights=True,
                 memory_size=100000,
                 replay_start_size=6000,
                 epsilon=1,
                 epsilon_min=.05,
                 max_step_for_epsilon_decay=125000*3,
                 prioritized_replay=False,
                 alpha=0.6,
                 beta=0.4,
                 beta_inc=0.0000005):

        self.state_size = state_size
        self.action_size = action_size
        self.path = path

        self.use_prio_buffer = prioritized_replay
        if not prioritized_replay:
            self.memory = deque(maxlen=memory_size)
        else:
            self.prio_memory = PrioritizedReplayBuffer(memory_size, alpha)
            self.beta = beta
            self.beta_inc = beta_inc
            # self.beta_schedule = LinearSchedule(max_step_for_epsilon_decay,
            #                                     1,
            #                                     0.4)

        self.gamma = 0.95    # discount rate
        self.epsilon = epsilon  # exploration rate
        self.epsilon_min = epsilon_min
        self.epsilon_decay = 0.995
        self.max_step_for_lin_epsilon_decay = max_step_for_epsilon_decay

        self.epsilon_decay_linear = self.epsilon / self.max_step_for_lin_epsilon_decay

        self.learning_rate = 0.00025
        self.replay_start_size = replay_start_size
        self.model = self._build_model()
        self.target_model = clone_model(self.model) #self._build_model()
        self.target_model.compile(optimizer='sgd', loss='mse')

        self.step = 0

        if not new_weights:
            self.model.load_weights(path)

        self.update_target()

        self.callback = Evaluation.create_tensorboard()