Beispiel #1
0
    def __init__(self,
                 reward: float,
                 reward_out: float,
                 sample_size: int,
                 capacity: int,
                 gamma: float,
                 eps_min: float,
                 eps_decay: float,
                 double_q_interval: int = 20):

        super().__init__()
        self.rewardOut = reward_out
        self.reward = reward
        self.model_network = Network(4)
        self.target_network = Network(4)
        self.state = None
        self.final_state = -np.ones(4)
        self.action = None
        self.replayMemory = ReplayMemory(sample_size, capacity)
        self.gamma = gamma
        self.eps_greedy_value = 1.
        self.eps_min = eps_min
        self.eps_decay = eps_decay
        self.double_q_interval = double_q_interval
        self.double_q_counter = 0
Beispiel #2
0
 def __init__(self, boardsize: int):
     super().__init__(boardsize)
     self.score = 0
     self.invalid = False
     self.network = Network(boardsize)
     self.network.network.eval()
     self.otherPlayer = None
Beispiel #3
0
    def __init__(self, board_size: int, rewardInvalidMove: float,
                 rewardWinning: float, rewardLosing: float, sample_size: int, capacity: int,
                 gamma: float, eps_min: float, eps_decay: float, fixed_batch: bool = False,
                 double_q_interval: int = 0):

        super().__init__(board_size)
        self.rewardNoScore = 0
        self.rewardInvalidMove = rewardInvalidMove
        self.rewardWinning = rewardWinning
        self.rewardLosing = rewardLosing
        self.model_network = Network(board_size)
        self.target_network = Network(board_size)
        self.state = None
        self.final_state = np.ones(board_size ** 2) * 5
        self.action = None
        self.invalid = False
        self.replayMemory = ReplayMemory(sample_size, capacity)
        self.gamma = gamma
        self.fixed_batch = fixed_batch
        self.eps_greedy_value = 1.
        self.eps_min = eps_min
        self.eps_decay = eps_decay
        self.double_q_interval = double_q_interval
        self.double_q_counter = 0
        self.winner = False
Beispiel #4
0
class AIPlayer(Player):
    score: int
    invalid: bool
    network: Network

    def __init__(self, boardsize: int):
        super().__init__(boardsize)
        self.score = 0
        self.invalid = False
        self.network = Network(boardsize)
        self.network.network.eval()
        self.otherPlayer = None

    def get_random_valid_move(self, state: np.array) -> int:
        self.invalid = False
        validMoves = np.flatnonzero(state == 0)
        return np.random.choice(validMoves)

    def get_move(self, state: np.array) -> int:
        if not self.invalid:
            return self.network.get_action(state)
        else:
            #or RaiseError?
            return self.get_random_valid_move(state)

    def scored(self, newPoints: int):
        self.score += newPoints

    def invalidMove(self):
        self.invalid = True

    def __str__(self):
        return "AI player [id: " + str(self.id) + "]"
Beispiel #5
0
class AISkier(Skier):

    def __init__(self, filename: string):

        super().__init__()
        self.net = Network(4)
        self.net.load_weights(filename)
        self.net.network.eval()

    def get_action(self, state: PhysicalProperties) -> int:
        state = self.get_state(state)
        action = self.net.get_action(state)
        return self.convert_action(action)

    def get_state(self, state: PhysicalProperties):
        return np.array([state.position.x, state.position.y, state.v[0], state.v[1]])

    def convert_action(self, action):
        if action == 0:
            return -1
        if action == 1:
            return 0
        if action == 2:
            return 1
Beispiel #6
0
    def __init__(self, filename: string):

        super().__init__()
        self.net = Network(4)
        self.net.load_weights(filename)
        self.net.network.eval()
Beispiel #7
0
class AITrainer(Skier):
    def __init__(self,
                 reward: float,
                 reward_out: float,
                 sample_size: int,
                 capacity: int,
                 gamma: float,
                 eps_min: float,
                 eps_decay: float,
                 double_q_interval: int = 20):

        super().__init__()
        self.rewardOut = reward_out
        self.reward = reward
        self.model_network = Network(4)
        self.target_network = Network(4)
        self.state = None
        self.final_state = -np.ones(4)
        self.action = None
        self.replayMemory = ReplayMemory(sample_size, capacity)
        self.gamma = gamma
        self.eps_greedy_value = 1.
        self.eps_min = eps_min
        self.eps_decay = eps_decay
        self.double_q_interval = double_q_interval
        self.double_q_counter = 0

    def get_random_action(self) -> int:
        self.action = np.random.choice([0, 1, 2])
        return self.action

    def get_action(self, state: PhysicalProperties) -> int:
        self.state = self.get_state(state)
        if np.random.rand() > self.eps_greedy_value:
            self.action = self.model_network.get_action(self.state)
        else:
            self.action = self.get_random_action()
        return self.convert_action(self.action)

    def update_eps(self, iteration: int):
        self.eps_greedy_value = self.eps_min + (1 - self.eps_min) * np.exp(
            -self.eps_decay * iteration)

    def train_model_network(self):
        if self.replayMemory.size < self.replayMemory.sampleSize:
            return
        for i in range(2):
            self.model_network.update_weights(self.replayMemory.get_sample(),
                                              self.gamma, self.target_network)
        self.double_q_counter += 1

        if self.double_q_interval == 0:
            return
        if self.double_q_counter % self.double_q_interval == 0:
            self.update_target_network()

    def update_target_network(self):
        self.target_network.take_weights(self.model_network)

    def end(self):
        self.replayMemory.add_record(self.state,
                                     self.action,
                                     self.final_state,
                                     self.reward,
                                     done=True)
        self.train_model_network()

    def out(self):
        self.replayMemory.add_record(self.state,
                                     self.action,
                                     self.final_state,
                                     self.rewardOut,
                                     done=True)
        self.train_model_network()

    def gate_done(self, next_state: PhysicalProperties):
        next_state = self.get_state(next_state)
        self.replayMemory.add_record(self.state,
                                     self.action,
                                     next_state,
                                     self.reward,
                                     done=False)

    def add_record(self, next_state: PhysicalProperties, done: bool):
        next_state = self.get_state(next_state)
        self.replayMemory.add_record(self.state,
                                     self.action,
                                     next_state,
                                     reward=0,
                                     done=done)

    def get_state(self, state: PhysicalProperties):
        return np.array(
            [state.position.x, state.position.y, state.v[0], state.v[1]])

    def convert_action(self, action):
        if action == 0:
            return -1
        if action == 1:
            return 0
        if action == 2:
            return 1
Beispiel #8
0
class AITrainer(Player):
    rewardInvalidMove: float
    rewardWinning: float
    rewardLosing: float
    state: np.array
    action: int
    invalid: bool
    model_network: Network
    target_network: Network
    replayMemory: ReplayMemory
    gamma: float
    fixed_batch: bool
    eps_greedy_value: float
    eps_min: float
    decay: float
    double_q_interval: int
    double_q_counter: int

    def __init__(self, board_size: int, rewardInvalidMove: float,
                 rewardWinning: float, rewardLosing: float, sample_size: int, capacity: int,
                 gamma: float, eps_min: float, eps_decay: float, fixed_batch: bool = False,
                 double_q_interval: int = 0):

        super().__init__(board_size)
        self.rewardNoScore = 0
        self.rewardInvalidMove = rewardInvalidMove
        self.rewardWinning = rewardWinning
        self.rewardLosing = rewardLosing
        self.model_network = Network(board_size)
        self.target_network = Network(board_size)
        self.state = None
        self.final_state = np.ones(board_size ** 2) * 5
        self.action = None
        self.invalid = False
        self.replayMemory = ReplayMemory(sample_size, capacity)
        self.gamma = gamma
        self.fixed_batch = fixed_batch
        self.eps_greedy_value = 1.
        self.eps_min = eps_min
        self.eps_decay = eps_decay
        self.double_q_interval = double_q_interval
        self.double_q_counter = 0
        self.winner = False

    def get_random_valid_move(self, state: np.array) -> int:
        self.invalid = False
        validMoves = np.flatnonzero(state == 0)
        self.action = np.random.choice(validMoves)
        return self.action

    def get_move(self, state: np.array) -> int:
        self.state = state.copy()
        self.action = self.model_network.get_action(self.state)
        return self.action
        # if np.random.rand() > self.eps_greedy_value:
        #     if not self.invalid:
        #         self.action = self.model_network.get_action(self.state)
        #         return self.action
        #     else:
        #         return self.get_random_valid_move(state)
        # else:
        #     return self.get_random_valid_move(state)

    def update_eps(self, iteration: int):
        self.eps_greedy_value = self.eps_min + (1 - self.eps_min) * np.exp(- self.eps_decay * iteration)

    def invalidMove(self):
        self.replayMemory.add_record(self.state, self.action, self.final_state,
                                     self.rewardInvalidMove, done=True)
        self.train_model_network()

    def train_model_network(self):
        if self.replayMemory.size < self.replayMemory.sampleSize:
            return
        for i in range(2):
            self.model_network.update_weights(self.replayMemory.get_sample(), self.gamma, self.target_network)
        self.double_q_counter += 1

        if self.double_q_interval == 0:
            return
        if self.double_q_counter % self.double_q_interval == 0:
            self.update_target_network()

    def update_target_network(self):
        self.target_network.take_weights(self.model_network)

    # def get_trained_player(self, id_number: int) -> AIPlayer:
    #     trained_network = Network(self.boardsize, self.model_network.hidden,
    #                               self.model_network.only_valid_actions, self.model_network.softmax)
    #     trained_network.take_weights(self.model_network)
    #     return AIPlayer(id_number, self.boardsize, trained_network)

    def win(self):
        self.replayMemory.add_record(self.state, self.action, self.final_state,
                                     self.rewardWinning, done=True)
        self.train_model_network()
        self.winner = True

    def lose(self):
        self.replayMemory.add_record(self.state, self.action, self.final_state,
                                     self.rewardLosing, done=True)
        self.train_model_network()
        self.winner = False

    def add_record(self, next_game_state: np.array, done: bool):
        self.replayMemory.add_record(self.state, self.action, next_game_state, reward=0, done=done)