Ejemplo n.º 1
0
class DDQNGameModel:
    def __init__(self, mode_name, input_shape, action_space, logger_path,
                 model_path):
        self.action_space = action_space
        self.input_shape = input_shape
        self.logger = Logger("Breakout " + mode_name, logger_path)
        self.model_path = model_path
        self.ddqn = ConvolutionalNeuralNetwork(self.input_shape,
                                               action_space).model
        if os.path.isfile(self.model_path):
            self.ddqn.load_weights(self.model_path)

    def save_model(self):
        self.ddqn.save_weights(self.model_path)
        print('Model saved')

    def save_run(self, score, step, run):
        self.logger.add_score(score)
        self.logger.add_step(step)
        self.logger.add_run(run)

    def get_date(self):
        return str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))

    def remember(self, state, action, reward, next_state, done):
        pass

    def step_update(self, total_step):
        pass
Ejemplo n.º 2
0
 def __init__(self, game_name, mode_name, input_shape, action_space,
              logger_path, model_path):
     BaseGameModel.__init__(self, game_name, mode_name, logger_path,
                            input_shape, action_space)
     self.model_path = model_path
     self.model = ConvolutionalNeuralNetwork(input_shape,
                                             action_space).model
Ejemplo n.º 3
0
 def __init__(self, game_name, mode_name, input_shape, action_space,
              logger_path, model_path):
     BaseGameModel.__init__(self, game_name, mode_name, logger_path,
                            input_shape, action_space)
     self.model_path = model_path
     self.ddqn = ConvolutionalNeuralNetwork(self.input_shape,
                                            action_space).model
     if os.path.isfile(self.model_path):
         self.ddqn.load_weights(self.model_path)
Ejemplo n.º 4
0
 def __init__(self, mode_name, input_shape, action_space, logger_path,
              model_path):
     self.action_space = action_space
     self.input_shape = input_shape
     self.logger = Logger("Breakout " + mode_name, logger_path)
     self.model_path = model_path
     self.ddqn = ConvolutionalNeuralNetwork(self.input_shape,
                                            action_space).model
     if os.path.isfile(self.model_path):
         self.ddqn.load_weights(self.model_path)
Ejemplo n.º 5
0
    def __init__(self, input_shape, action_space):
        DDQNGameModel.__init__(
            self, "DDQN training", input_shape, action_space,
            "./output/logs/training/" + self.get_date() + "/",
            "./output/neural_nets/" + self.get_date() + "/model.h5")

        if os.path.exists(os.path.dirname(self.model_path)):
            shutil.rmtree(os.path.dirname(self.model_path), ignore_errors=True)
        os.makedirs(os.path.dirname(self.model_path))

        self.ddqn_target = ConvolutionalNeuralNetwork(self.input_shape,
                                                      action_space).model
        self.reset_target_network()
        self.epsilon = EXPLORATION_MAX
        self.memory = []
Ejemplo n.º 6
0
    def __init__(self, game_name, mode_name, input_shape, action_space,
                 logger_path, model_path):
        BaseGameModel.__init__(self, game_name, mode_name, logger_path,
                               input_shape, action_space)
        self.model_path = model_path
        self.ddqn = ConvolutionalNeuralNetwork(self.input_shape,
                                               action_space).model
        #print("test before here")
        if os.path.isfile(self.model_path):

            print("--------------------------------------")
            self.ddqn = load_model(self.model_path)
            print("LOADED")
        print("model path" + model_path)

        print("logger_path" + logger_path)
Ejemplo n.º 7
0
class GEGameModel(BaseGameModel):

    model = None

    def __init__(self, game_name, mode_name, input_shape, action_space, logger_path, model_path):
        BaseGameModel.__init__(self,
                               game_name,
                               mode_name,
                               logger_path,
                               input_shape,
                               action_space)
        self.model_path = model_path
        self.model = ConvolutionalNeuralNetwork(input_shape, action_space).model

    def _predict(self, state):
        if np.random.rand() < 0.02:
            return random.randrange(self.action_space)
        q_values = self.model.predict(np.expand_dims(np.asarray(state).astype(np.float64), axis=0), batch_size=1)
        return np.argmax(q_values[0])
Ejemplo n.º 8
0
class DDQNTrainer(DDQNGameModel):
    def __init__(self, game_name, input_shape, action_space):
        DDQNGameModel.__init__(
            self, game_name, "DDQN training", input_shape, action_space,
            "./output/logs/" + game_name + "/ddqn/training/" +
            self._get_date() + "/", "./output/neural_nets/" + game_name +
            "/ddqn/" + self._get_date() + "/model.h5")

        if os.path.exists(os.path.dirname(self.model_path)):
            shutil.rmtree(os.path.dirname(self.model_path), ignore_errors=True)
        os.makedirs(os.path.dirname(self.model_path))

        self.ddqn_target = ConvolutionalNeuralNetwork(self.input_shape,
                                                      action_space).model
        self._reset_target_network()
        self.epsilon = EXPLORATION_MAX
        self.memory = []

    def move(self, state):
        if np.random.rand() < self.epsilon or len(
                self.memory) < REPLAY_START_SIZE:
            return random.randrange(self.action_space)
        q_values = self.ddqn.predict(np.expand_dims(np.asarray(state).astype(
            np.float64),
                                                    axis=0),
                                     batch_size=1)
        return np.argmax(q_values[0])

    def remember(self, current_state, action, reward, next_state, terminal):
        self.memory.append({
            "current_state": current_state,
            "action": action,
            "reward": reward,
            "next_state": next_state,
            "terminal": terminal
        })
        if len(self.memory) > MEMORY_SIZE:
            self.memory.pop(0)

    def step_update(self, total_step):
        if len(self.memory) < REPLAY_START_SIZE:
            return

        if total_step % TRAINING_FREQUENCY == 0:
            loss, accuracy, average_max_q = self._train()
            self.logger.add_loss(loss)
            self.logger.add_accuracy(accuracy)
            self.logger.add_q(average_max_q)

        self._update_epsilon()

        if total_step % MODEL_PERSISTENCE_UPDATE_FREQUENCY == 0:
            self._save_model()

        if total_step % TARGET_NETWORK_UPDATE_FREQUENCY == 0:
            self._reset_target_network()
            print('{{"metric": "epsilon", "value": {}}}'.format(self.epsilon))
            print('{{"metric": "total_step", "value": {}}}'.format(total_step))

    def _train(self):
        batch = np.asarray(random.sample(self.memory, BATCH_SIZE))
        if len(batch) < BATCH_SIZE:
            return

        current_states = []
        q_values = []
        max_q_values = []

        for entry in batch:
            current_state = np.expand_dims(np.asarray(
                entry["current_state"]).astype(np.float64),
                                           axis=0)
            current_states.append(current_state)
            next_state = np.expand_dims(np.asarray(entry["next_state"]).astype(
                np.float64),
                                        axis=0)
            next_state_prediction = self.ddqn_target.predict(
                next_state).ravel()
            next_q_value = np.max(next_state_prediction)
            q = list(self.ddqn.predict(current_state)[0])
            if entry["terminal"]:
                q[entry["action"]] = entry["reward"]
            else:
                q[entry["action"]] = entry["reward"] + GAMMA * next_q_value
            q_values.append(q)
            max_q_values.append(np.max(q))

        fit = self.ddqn.fit(np.asarray(current_states).squeeze(),
                            np.asarray(q_values).squeeze(),
                            batch_size=BATCH_SIZE,
                            verbose=0)
        loss = fit.history["loss"][0]
        accuracy = fit.history["accuracy"][0]
        return loss, accuracy, mean(max_q_values)

    def _update_epsilon(self):
        self.epsilon -= EXPLORATION_DECAY
        self.epsilon = max(EXPLORATION_MIN, self.epsilon)

    def _reset_target_network(self):
        self.ddqn_target.set_weights(self.ddqn.get_weights())
Ejemplo n.º 9
0
class DDQNTrainer(DDQNGameModel):

    def __init__(self, game_name, input_shape, action_space):
        DDQNGameModel.__init__(self,
                               game_name,
                               "DDQN training",
                               input_shape,
                               action_space,
                               "./output/logs/" + game_name + "/ddqn/training/" + self._get_date() + "/",
                               "./output/neural_nets/" + game_name + "/ddqn/" + self._get_date() + "/model.h5")

        if os.path.exists(os.path.dirname(self.model_path)):
            shutil.rmtree(os.path.dirname(self.model_path), ignore_errors=True)
        os.makedirs(os.path.dirname(self.model_path))
        print('PG explore decay:', EXPLORATION_DECAY)
        self.ddqn_target = ConvolutionalNeuralNetwork(self.input_shape, action_space).model
        self._reset_target_network()
        self.epsilon = EXPLORATION_MAX
        self.memory = []

    def move(self, state):
        # want to make sure the first action is build, second action is make ship
        # need seperate actions for the ships and yards right?
        # lets try just manually performing the first two actions
        # in the main loop and having no actions for the yards...

        # also need to preprocess state from board...

        if np.random.rand() < self.epsilon or len(self.memory) < REPLAY_START_SIZE:
            return random.randrange(self.action_space)
        state = [np.expand_dims(np.asarray(state[0]).astype(np.float64), axis=0), np.expand_dims(np.asarray(state[1]).astype(np.float64), axis=0)]
        q_values = self.ddqn.predict(state, batch_size=1)
        return np.argmax(q_values[0])

    def remember(self, current_state, action, reward, next_state, terminal):
        self.memory.append({"current_state": current_state,
                            "action": action,
                            "reward": reward,
                            "next_state": next_state,
                            "terminal": terminal})
        if len(self.memory) > MEMORY_SIZE:
            self.memory.pop(0)

    def step_update(self, total_step):
        if len(self.memory) < REPLAY_START_SIZE:
            return

        if total_step % TRAINING_FREQUENCY == 0:
            loss, accuracy, average_max_q = self._train()
            self.logger.add_loss(loss)
            self.logger.add_accuracy(accuracy)
            self.logger.add_q(average_max_q)

        self._update_epsilon()

        if total_step % MODEL_PERSISTENCE_UPDATE_FREQUENCY == 0:
            self._save_model()

        if total_step % TARGET_NETWORK_UPDATE_FREQUENCY == 0:
            self._reset_target_network()
            print('{{"metric": "epsilon", "value": {}}}'.format(self.epsilon))
            print('{{"metric": "total_step", "value": {}}}'.format(total_step))

    def _train(self):
        batch = np.asarray(random.sample(self.memory, BATCH_SIZE))
        if len(batch) < BATCH_SIZE:
            return

        '''
        current_states = []
        q_values = []
        max_q_values = []

        for entry in batch:
            current_state = np.expand_dims(np.asarray(entry["current_state"]).astype(np.float64), axis=0)
            current_states.append(current_state)
            next_state = np.expand_dims(np.asarray(entry["next_state"]).astype(np.float64), axis=0)
            next_state_prediction = self.ddqn_target.predict(next_state).ravel()
            next_q_value = np.max(next_state_prediction)
            q = list(self.ddqn.predict(current_state)[0])
            #print("PG Q:", q)
            #print(entry["action"])
            if entry["terminal"]:
                q[entry["action"]] = entry["reward"]
            else:
                q[entry["action"]] = entry["reward"] + GAMMA * next_q_value
            q_values.append(q)
            max_q_values.append(np.max(q))
        '''
        current_boards = []
        current_scalars = []
        q_values = []
        max_q_values = []

        for entry in batch:

            current_board = np.expand_dims(np.asarray(entry["current_state"][0]).astype(np.float64), axis=0)
            current_scalar = np.expand_dims(np.asarray(entry["current_state"][1]).astype(np.float64), axis=0)

            current_boards.append(current_board)
            current_scalars.append(current_scalar)


            next_board = np.expand_dims(np.asarray(entry["next_state"][0]).astype(np.float64), axis=0)
            next_scalar = np.expand_dims(np.asarray(entry["next_state"][1]).astype(np.float64), axis=0)


            next_state_prediction = self.ddqn_target.predict([next_board, next_scalar]).ravel()

            next_q_value = np.max(next_state_prediction)
            q = list(self.ddqn.predict([current_board, current_scalar])[0])
            #print("PG Q:", q)
            #print(entry["action"])
            if entry["terminal"]:
                q[entry["action"]] = entry["reward"]
            else:
                q[entry["action"]] = entry["reward"] + GAMMA * next_q_value
            q_values.append(q)
            max_q_values.append(np.max(q))

        fit = self.ddqn.fit([np.asarray(current_boards).squeeze(), np.asarray(current_scalars).squeeze()],
                            np.asarray(q_values).squeeze(),
                            batch_size=BATCH_SIZE,
                            verbose=0)
        #print(fit.history)
        loss = fit.history["loss"][0]
        if LOCAL:
            accuracy = fit.history['accuracy'][0]
        else:
            accuracy = fit.history['acc'][0]
        return loss, accuracy, mean(max_q_values)

    def _update_epsilon(self):
        self.epsilon -= EXPLORATION_DECAY
        self.epsilon = max(EXPLORATION_MIN, self.epsilon)
        #print(self.epsilon)

    def _reset_target_network(self):
        self.ddqn_target.set_weights(self.ddqn.get_weights())