Example #1
0
from game_of_life import GameOfLife, FocusArea
import numpy as np
import matplotlib.pyplot as plt

from neural_network import neural_net, decode, random_board

from gui import GameOfLifeBoard, Dim

width, height = 16, 16
focus_area = FocusArea(max_col=width, max_row=height)

brd = GameOfLife(FocusArea(width, height), random_board())

for i in range(height):
    brd = brd.add((i, i))
    brd = brd.add((height - i - 1, i))
    brd = brd.add((i, height // 2))
    brd = brd.add((i, height // 2 - 1))
    brd = brd.add((width // 2, i))
    brd = brd.add((width // 2 - 1, i))

GameOfLifeBoard.pixels_per_box = 10
GameOfLifeBoard.inter_frame_time = 250



class Mutator:
    def __call__(self, brd):
        action = neural_net.predict(np.array(brd.to_numpy_array()))[0]
        return brd.add(decode(action))
Example #2
0

nnet.load('q_nnet.be')
if __name__ == '__main__':
    for i in range(number_of_epochs):
        exp_rate = exploration_rate
        board = GameOfLife(focus_area) #random_board())
        board = MultiInputGol(board, 3)
        for j in range(game_iterations):
            # if len(board) == 0:
            #     continue
            bad_board, next_board, action, r = board.next(), None, None, None
            if np.random.rand() < exp_rate:
                action = list(random_board(1))[0]
                is_random = True
                next_board = board.add(action).next()
                r = reward(board, next_board, bad_board)
                nnet.remember(board.to_numpy_array(), r, encode(action), next_board.to_numpy_array())
            else:
                brd, actions, n_brd = [board], [], []
                for _ in range(board.max_cnt):
                    _, a = monte_carlo(brd[-1], 8)
                    actions.append(a)
                    action = decode(a)
                    n_brd.append(brd[-1].add(action).next(print_out=True))
                    brd.append(n_brd[-1])

                next_board = n_brd[-1]
                r = reward(board, next_board, bad_board)
                rewards = np.geomspace(learning_rate ** -(board.max_cnt - 1), 1, board.max_cnt) * r