예제 #1
0
def next_state(state, action):
    grid = convert_string_to_list(state)
    move = convert_string_to_list(action)
    grid[move[0][0]][move[0][1]] = 0
    grid[move[1][0]][move[1][1]] = 0
    grid[move[2][0]][move[2][1]] = 1
    return convert_list_to_string(grid)
예제 #2
0
    def update_model_and_eligibilities(self, state, target, td_error):
        features = convert_string_to_list(state.replace(',', ''))[0]
        self.fit(features, target, td_error)  # train model

        # decay eligibilities
        for i in range(len(self.eligibilities)):
            self.eligibilities[i] = self.eligibilities[i] * self.eligibility_decay_rate
예제 #3
0
 def find_value(self, state):
     input_state = convert_string_to_list(state.replace(',', ''))[0]
     input_state = np.array([input_state])
     predictions = self.model(input_state)
     return predictions.numpy()[0][0]
    def run(self):
        # reset history of pegs left
        self.total_pegs_left_per_episode = []

        for i in range(self.config['number_of_episodes']):
            print('Episode ', i)
            # initialize SimWorld: PegBoard and PegPlayer
            peg_board = PegBoard(self.config['size'], self.config['is_diamond'], self.config['empty_nodes'])
            peg_player = PegPlayer(peg_board, self.config['reward_win'], self.config['reward_lose'])

            # whether this episode should be displayed or not
            display = self.config['display_games'] == "all" or (
                      self.config['display_games'] == "last" and i == self.config['number_of_episodes'] - 1)

            # set epsilon to zero for last episode if desired
            if self.config['epsilon_zero_on_last_episode'] and i == self.config['number_of_episodes'] - 1:
                self.actor.set_epsilon_to_zero()

            # get initial state
            state = convert_list_to_string(peg_board.grid)

            # Actor: choose first action
            action = self.actor.choose_action(state)

            # if action == None --> no legal actions for this board configuration
            if not action:
                print('No legal actions!')
                break

            if display:
                visualize_board(convert_string_to_list(state))

            # reset eligibilities
            self.actor.reset_episode_parameters()  # this method will also decrease epsilon
            self.critic.reset_episode_parameters()

            while not peg_player.game_over():
                # set eligibilities to 1 for current state (and action for actor). For critic, only if table based.
                self.actor.set_eligibility(state, action)
                if self.config['critic_table']:
                    self.critic.set_eligibility(state)

                # execute action, receive next state and reward from PegPlayer in SimWorld
                next_state, reward = peg_player.execute_action(action)

                # Actor: choose next action if game is not over
                if not peg_player.game_over():
                    next_action = self.actor.choose_action(next_state)
                else:
                    next_action = None

                # Critic: compute TD error and update values/model and eligibilities
                if self.config['critic_table']:
                    td_error = self.critic.get_TD_error(state, next_state, reward)
                    self.critic.update_values_and_eligibilities(td_error)
                else:
                    target, td_error = self.critic.get_target_and_TD_error(state, next_state, reward)
                    self.critic.update_model_and_eligibilities(state, target, td_error)

                # Actor: use TD error to update SAP values and eligibilities
                self.actor.update_values_and_eligibilities(td_error)

                state = next_state
                action = next_action

                # visualize game is display flag is True
                if display:
                    sleep(self.config['display_delay'])
                    visualize_board(convert_string_to_list(state))

            # save result for plotting
            self.total_pegs_left_per_episode.append(peg_board.total_pegs_left)

            # print result if last episode
            if i == self.config['number_of_episodes'] - 1:
                print('Total pegs left last episode - ', peg_board.total_pegs_left)
예제 #5
0
def get_possible_actions(state):
    grid = convert_string_to_list(state)
    possible_actions = []
    size = len(grid)

    for i in range(size):
        for j in range(size if len(grid[0]) > 1 else i + 1):
            if grid[i][j] == 1:
                if len(grid[0]) > 1:  # Grid is diamond shaped

                    # Direction: up
                    if i > 1 and grid[i - 1][j] == 1 and grid[i - 2][j] == 0:
                        possible_actions.append(
                            str(i) + str(j) + "," + str(i - 1) + str(j) + "," +
                            str(i - 2) + str(j))

                    # Direction: right
                    if j < size - 2 and grid[i][j +
                                                1] == 1 and grid[i][j +
                                                                    2] == 0:
                        possible_actions.append(
                            str(i) + str(j) + "," + str(i) + str(j + 1) + "," +
                            str(i) + str(j + 2))

                else:  # Grid is triangle shaped

                    # Direction: up
                    if i > 1 and j < len(grid[i]) - 2 and grid[
                            i - 1][j] == 1 and grid[i - 2][j] == 0:
                        possible_actions.append(
                            str(i) + str(j) + "," + str(i - 1) + str(j) + "," +
                            str(i - 2) + str(j))

                    # Direction: right
                    if i > 1 and j < len(grid[i]) - 2 and grid[i][
                            j + 1] == 1 and grid[i][j + 2] == 0:
                        possible_actions.append(
                            str(i) + str(j) + "," + str(i) + str(j + 1) + "," +
                            str(i) + str(j + 2))

                # Direction: down & right
                if i < size - 2 and j < size - 2 and grid[i + 1][
                        j + 1] == 1 and grid[i + 2][j + 2] == 0:
                    possible_actions.append(
                        str(i) + str(j) + "," + str(i + 1) + str(j + 1) + "," +
                        str(i + 2) + str(j + 2))

                # Direction: down
                if i < size - 2 and grid[i + 1][j] == 1 and grid[i +
                                                                 2][j] == 0:
                    possible_actions.append(
                        str(i) + str(j) + "," + str(i + 1) + str(j) + "," +
                        str(i + 2) + str(j))

                # Direction: left
                if j > 1 and grid[i][j - 1] == 1 and grid[i][j - 2] == 0:
                    possible_actions.append(
                        str(i) + str(j) + "," + str(i) + str(j - 1) + "," +
                        str(i) + str(j - 2))

                # Direction: up & left
                if i > 1 and j > 1 and grid[i - 1][j - 1] == 1 and grid[i - 2][
                        j - 2] == 0:
                    possible_actions.append(
                        str(i) + str(j) + "," + str(i - 1) + str(j - 1) + "," +
                        str(i - 2) + str(j - 2))

    return possible_actions