from game_of_life import GameOfLife, FocusArea import numpy as np import matplotlib.pyplot as plt from neural_network import neural_net, decode, random_board from gui import GameOfLifeBoard, Dim width, height = 16, 16 focus_area = FocusArea(max_col=width, max_row=height) brd = GameOfLife(FocusArea(width, height), random_board()) for i in range(height): brd = brd.add((i, i)) brd = brd.add((height - i - 1, i)) brd = brd.add((i, height // 2)) brd = brd.add((i, height // 2 - 1)) brd = brd.add((width // 2, i)) brd = brd.add((width // 2 - 1, i)) GameOfLifeBoard.pixels_per_box = 10 GameOfLifeBoard.inter_frame_time = 250 class Mutator: def __call__(self, brd): action = neural_net.predict(np.array(brd.to_numpy_array()))[0] return brd.add(decode(action))
nnet.load('q_nnet.be') if __name__ == '__main__': for i in range(number_of_epochs): exp_rate = exploration_rate board = GameOfLife(focus_area) #random_board()) board = MultiInputGol(board, 3) for j in range(game_iterations): # if len(board) == 0: # continue bad_board, next_board, action, r = board.next(), None, None, None if np.random.rand() < exp_rate: action = list(random_board(1))[0] is_random = True next_board = board.add(action).next() r = reward(board, next_board, bad_board) nnet.remember(board.to_numpy_array(), r, encode(action), next_board.to_numpy_array()) else: brd, actions, n_brd = [board], [], [] for _ in range(board.max_cnt): _, a = monte_carlo(brd[-1], 8) actions.append(a) action = decode(a) n_brd.append(brd[-1].add(action).next(print_out=True)) brd.append(n_brd[-1]) next_board = n_brd[-1] r = reward(board, next_board, bad_board) rewards = np.geomspace(learning_rate ** -(board.max_cnt - 1), 1, board.max_cnt) * r