def run(self, board: Board): """ The model performs random actions until there is sufficient data in replay memory Then, the model will also train off replay memory after every action """ sub = 0 max_score = 0 total = 0 for e in range(self.episodes): print('Running episode', e) board.reset_board() game_over = False total_score = 0 print('Memories', len(self.memory.memories)) print('Epsilon', self.epsilon) while not game_over: current_state = Board.get_game_state(board._blocks) new_board, new_state = self.predict(board) reward = 0 if new_board == -1: game_over = True else: reward, game_over = board.do_move(new_board) total_score += reward self.memory.remember( (current_state, new_state, reward, game_over)) print('Score', total_score) self.train() self.epsilon -= self.decay total += total_score max_score = total_score if total_score > max_score else max_score if total_score < 100: sub += 1 # generate summary print('Max score', max_score) print('Avg score', total / self.episodes) print('Sub 100', sub) self.save()
def run(self, board: Board): sub100 = 0 max_score = 0 total = 0 for e in range(self.episodes): print('Iteration: ', e) board.reset_board() game_over = False total_score = 0 #print('Memories', len(self.memory.memories)) #print('Epsilon', self.epsilon) while not game_over: current_state = Board.get_game_state(board._blocks) new_board, new_state = self.predict(board) reward = 0 if new_board == -1: game_over = True else: reward, game_over = board.do_move(new_board) total_score += reward self.memory.remember( (current_state, new_state, reward, game_over)) print('Score', total_score) self.train() self.epsilon -= self.decay total += total_score max_score = total_score if total_score > max_score else max_score if total_score < 100: sub100 += 1 # generate summary print('Max score', max_score) print('Avg score', total / self.episodes) print('Sub 100', sub100) self.save()
def play(self, board: Board): possible_boards = board.get_next() possible_states = [] for b in possible_boards: possible_states.append(Board.get_game_state(b)) if len(possible_boards) == 0: board.reset_board() return q_values = [] for s in possible_states: s = np.reshape(s, (1, -1)) q_values.append(self.model.predict(s)[0]) m = np.argmax(q_values) best_board = possible_boards[m] reward, game_over = board.do_move(best_board) if game_over: self.score = 0 else: self.score += reward