Ejemplo n.º 1
0
def run_game():
    game = Game()
    RL = BrainDQN(actions=8)
    step = 0
    for episode in range(500):
        # initial observation
        observation = game.reset_map()
        # print(observation.shape)
        RL.setInitState(observation)
        while True:
            # RL choose action based on observation
            action = RL.getAction()
            # for i in range(6):
            #     print(game.states[:,:,i])
            # print("action:", action)
            # RL take action and get next observation and reward
            observation_, reward, done = game.step(action)
            # for i in range(6):
            #     print(game.states[:,:,i])
            # print("reward:",reward)
            # print("done:", done)
            RL.setPerception(observation_, action, reward, done)

            # break while loop when end of this episode
            if done:
                # print("done,reset")
                observation = game.reset_map()
                RL.setInitState(observation)
Ejemplo n.º 2
0
            # break while loop when end of this episode
            if is_done:
                print("moves:", moves, "score:", score)
                scores.append(score)
                break
            step += 1
    import matplotlib.pyplot as plt
    plt.plot(np.arange(len(scores)), scores)
    plt.ylabel('Scores')
    plt.xlabel('training steps')
    plt.show()


if __name__ == "__main__":
    game = Game()
    # print(game.init_map())
    # print(game.init_map())
    RL = DeepQNetwork(n_actions=9,
                      map_w=12,
                      map_h=12,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=300,
                      memory_size=2000,
                      e_greedy_increment=0.2,
                      output_graph=False)
    train_game()
    RL.plot_cost()
    # print(game.init_map())
Ejemplo n.º 3
0
def main():
    game = Game(800, 450, "Week 6 Solution Template")
    game.run()
# setup the game and players
p1 = DeepQLearningAgent(board_size=board_size,
                        buffer_size=buffer_size,
                        gamma=gamma,
                        n_actions=n_actions,
                        use_target_net=use_target_net,
                        epsilon=epsilon,
                        version=version,
                        name='dqn1')
p2 = DeepQLearningAgent(board_size=board_size, buffer_size=buffer_size,\
                        gamma=gamma, n_actions=n_actions,
                        use_target_net=use_target_net, epsilon=epsilon,
                        version=version, name='dqn2')
p_random = RandomPlayer(board_size=board_size)
g = Game(player1=p1, player2=p2, board_size=board_size)
g2 = Game(player1=p1, player2=p_random, board_size=board_size)

# check the model architecture
print("Model architecture")
p1._model.summary()

# initializing parameters for DQN
reward_type = 'current'
sample_actions = False
decay = 0.85
epsilon_end = 0.1
n_games_buffer = 300
n_games_train = 10
episodes = 1 * (10**5)
log_frequency = 500
Ejemplo n.º 5
0
def main():
    window_size = (1280, 800)
    game_title = "Week 06: Paddles"
    g = Game(window_size, game_title)
    g.run()
Ejemplo n.º 6
0
def main():
    window_size = (1280, 800)
    game_title = "Week 05: Pong"
    g = Game(window_size, game_title)
    g.run()
Ejemplo n.º 7
0
from game_env import (StateEnv, Game, StateEnvBitBoard, StateConverter,
                      StateEnvBitBoardC)
from players import RandomPlayer
import numpy as np
import time
from tqdm import tqdm

board_size = 8

# initialize classes
p1 = RandomPlayer(board_size=board_size)
p2 = RandomPlayer(board_size=board_size)
g = Game(player1=p1, player2=p2, board_size=board_size)


def convert_boards(s, m=None):
    """Convert the board state from bitboard to ndarray

    Parameters
    ----------
    s : list
        contains black, white bitboards and current player
    m : int (64 bit), default None
        bitboard for legal moves
    
    Returns
    -------
    s : list
        contains black, white board arrays and current player
    m : ndarray
        legal moves array
Ejemplo n.º 8
0
def main():
    print("entered main()")
    g = Game((1024, 768), "Week 03")
    g.run()
    print("exited main()")
Ejemplo n.º 9
0
    def bin_pool(self, results):

        results_by_bin = {}

        # rescale and print which bin
        for oneresult in results:
            rms, state, allA, Anumber = oneresult
            game = Game(self.voc, state)
            if self.calculus_mode == 'scalar':
                L, function_number, mytargetnumber, firstder_number, depth, varnumber = game.get_features()
            else:
                L, function_number, mytargetnumber, firstder_number, depth, varnumber, dotnumber, normnumber, crossnumber = game.get_features()

            if Anumber >= self.maxa:
                bin_a = self.maxa
            else:
                bins_for_a = np.linspace(0, self.maxa, num=self.maxa+1)
                for i in range(len(bins_for_a) -1):
                    if Anumber >= bins_for_a[i] and Anumber < bins_for_a[i + 1]:
                        bin_a = i

            if L >= self.maxl:
                bin_l = self.maxl
            else:
                bins_for_l = np.linspace(0, self.maxl, num=self.maxl+1)
                for i in range(len(bins_for_l) - 1):
                    if L >= bins_for_l[i] and L < bins_for_l[i + 1]:
                        bin_l = i

            if function_number >= self.maxf:
                bin_f = self.maxf
            else:
                bins_for_f = np.linspace(0, self.maxf, num = self.maxf+1)
                for i in range(len(bins_for_f) - 1):
                    if function_number >= bins_for_f[i] and function_number < bins_for_f[i + 1]:
                        bin_f = i

            if function_number ==0: #presence ou non de la fonction
                bin_fzero = 0
            else:
                bin_fzero = 1

            if varnumber == 0:  # presence ou non de la variale
                bin_var = 0
            else:
                bin_var = 1

            if firstder_number ==0: #et de la first der
                bin_fone = 0
            else:
                bin_fone = 1
            if config.smallgrid :
                bin_fzero = 0
                bin_var = 0
                bin_fone = 0


            bin_d = 0
            bin_for_d = np.linspace(0, config.MAX_DEPTH, num=config.MAX_DEPTH + 2)
            for i in range(len(bin_for_d) - 1):
                if depth >= bin_for_d[i] and depth < bin_for_d[i + 1]:
                    bin_d = i

            if self.calculus_mode == 'vectorial':
                if dotnumber >= self.maxdot:
                    bin_dot = self.maxdot
                else:
                    bins_for_dot = np.linspace(0, self.maxdot, num=self.maxdot + 1)
                    for i in range(len(bins_for_dot) - 1):
                        if dotnumber >= bins_for_dot[i] and dotnumber < bins_for_dot[i + 1]:
                            bin_dot = i

                if normnumber >= self.maxnorm:
                    bin_norm = self.maxnorm
                else:
                    bins_for_norm = np.linspace(0, self.maxnorm, num=self.maxnorm + 1)
                    for i in range(len(bins_for_norm) - 1):
                        if normnumber >= bins_for_norm[i] and normnumber < bins_for_norm[i + 1]:
                            bin_norm = i

                if crossnumber >= self.maxcross:
                    bin_cross = self.maxcross
                else:
                    bins_for_cross = np.linspace(0, self.maxcross, num=self.maxcross + 1)
                    for i in range(len(bins_for_cross) - 1):
                        if crossnumber >= bins_for_cross[i] and crossnumber < bins_for_cross[i + 1]:
                            bin_cross = i

            if self.calculus_mode =='scalar':
                if str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var]) not in results_by_bin:
                    if rms <config.minrms:
                        results_by_bin.update({str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var]): [rms, state, allA]})
                else:
                    prev_rms = results_by_bin[str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var])][0]
                    if rms < prev_rms:
                        results_by_bin.update({str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var]): [rms, state, allA]})
            else:
                if str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d, bin_var, bin_dot, bin_norm, bin_cross]) not in results_by_bin:
                    if rms <config.minrms:
                        results_by_bin.update({str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d,  bin_var,bin_dot, bin_norm, bin_cross]): [rms, state, allA]})
                else:
                    prev_rms = results_by_bin[str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d,  bin_var,bin_dot, bin_norm, bin_cross])][0]
                    if rms < prev_rms:
                        results_by_bin.update({str([bin_a, bin_l, bin_f, bin_fzero, bin_fone, bin_d,  bin_var,bin_dot, bin_norm, bin_cross]): [rms, state, allA]})

        return results_by_bin
Ejemplo n.º 10
0
from game_env import Game
import numpy as np

game = Game()
game.init_map()
print(game.map)
game.move(0)
game.move(0)
print(game.map)

new_map = game.transform_to_3dim(game.map)
print(new_map)
new_map = np.reshape(new_map, [6, 6, 4])
print(new_map)
Ejemplo n.º 11
0
def main():
    game = Game(800, 450, "Week 6 Starter Template")
    game.run()
Ejemplo n.º 12
0
def main():
    g = Game((1280, 800), "Week 03")
    g.run()
Ejemplo n.º 13
0
    buffer_size = 10000
    gamma = 0.99
    n_actions = 64
    use_target_net = True
    epsilon = 0.9
    version = 'v1'
    batch_size = 512
    supervised = False
    agent_type = 'DeepQLearningAgent'

    # setup the game and players
    p1 = DeepQLearningAgent(board_size=board_size, buffer_size=buffer_size,\
                            gamma=gamma, n_actions=n_actions, use_target_net=use_target_net,\
                            epsilon=epsilon, version=version)
    p2 = RandomPlayer(board_size=board_size)
    g = Game(player1=p1, player2=p2, board_size=board_size)

    # check the model architecture
    print("Model architecture")
    p1._model.summary()

    # initializing parameters for DQN
    reward_type = 'current'
    sample_actions = False
    decay = 0.85
    epsilon_end = 0.1
    n_games_buffer = 300
    n_games_train = 30
    episodes = 30 * (10**4)
    log_frequency = 30 * (500)
    win_list_random = []
Ejemplo n.º 14
0
    def vectorial_delete_one_subtree(self, state):

        # here i make only crossovers between eqs1 resp. and eqs 2

        prev_state = copy.deepcopy(state)

        game = Game(self.voc, prev_state)

        ast = game.convert_to_ast()
        rpn = prev_state.reversepolish
        #print('entering delete with', game.state.reversepolish, game.state.formulas)

        # throw away the last '1' (== halt) if exists:
        if rpn[-1] == 1:
            array = np.asarray(rpn[:-1])
        else:
            array = np.asarray(rpn)

        start = 2

        # get all topnodes of possible subtrees
        positions = np.where(array >= start)[0]

        if positions.size > 0:
            maxretries = 10
            gotone = False
            count = 0
            while gotone is False and count < maxretries:
                which = np.random.choice(positions)
                getnonleafnode = which + 1
                # get the node
                operatornode = ast.from_ast_get_node(ast.topnode,
                                                     getnonleafnode)[0]
                before_swap_rpn = ast.from_ast_to_rpn(operatornode)
                bfstate = State(self.voc, before_swap_rpn, self.calculus_mode)
                bef_game = Game(self.voc, bfstate)
                _, vec_number, _ = bef_game.from_rpn_to_critical_info()
                grandparent = operatornode.parent
                count += 1
                if before_swap_rpn[
                        -1] in self.voc.arity2symbols and grandparent is not None:
                    gotone = True
                    count = 0
                    for child in grandparent.children:
                        if child == operatornode:
                            index = count
                        count += 1

            if gotone == False:
                return False, prev_state

            else:
                #print('le node selectionne est', ast.from_ast_to_rpn(operatornode))
                #print('fils gauche', ast.from_ast_to_rpn(operatornode.children[0]))
                #print('fils droit', ast.from_ast_to_rpn(operatornode.children[1]))

                #print('ready:', before_swap_rpn, bef_game.state.formulas)
                vecs = []
                for child in operatornode.children:
                    rpnchild = ast.from_ast_to_rpn(child)
                    #print('ici', rpnchild)
                    statechild = State(self.voc, rpnchild, self.calculus_mode)
                    gamechild = Game(self.voc, statechild)
                    _, vec_number, _ = gamechild.from_rpn_to_critical_info()
                    vecs.append(vec_number)

                #print('then vec numbers', vecs)
                if vecs == [0, 0]:
                    if random.random() < 0.5:
                        newnode = operatornode.children[0]
                        #print('delete right')
                    else:
                        newnode = operatornode.children[1]
                        #print('delete left')

                elif vecs == [0, 1]:
                    newnode = operatornode.children[1]
                    #print('delete left')

                elif vecs == [1, 0]:
                    newnode = operatornode.children[0]
                    #print('delete right')

                elif vecs == [1, 1] and before_swap_rpn[
                        -1] != self.voc.dot_number:  #exclude dot product
                    if random.random() < 0.5:
                        newnode = operatornode.children[0]
                        #print('delete right')

                    else:
                        newnode = operatornode.children[1]
                        #print('delete left')

                else:  #le cas du doot product
                    return False, prev_state

                grandparent.children[index] = newnode
                # get the new reversepolish:
                newrpn = ast.from_ast_to_rpn(ast.topnode)

        # else cant delete tree
        else:
            return False, prev_state

        # returns the new states
        state = State(self.voc, newrpn, self.calculus_mode)
        #print('finally', state.reversepolish, state.formulas)
        #game = Game(self.voc, state)
        #game.from_rpn_to_critical_info()
        #print('bug?')

        return True, state
Ejemplo n.º 15
0
    def vectorial_crossover(self, state1, state2):

        # here i make only crossovers between eqs1 resp. and eqs 2

        prev_state1 = copy.deepcopy(state1)
        prev_state2 = copy.deepcopy(state2)

        game1 = Game(self.voc, prev_state1)
        game2 = Game(self.voc, prev_state2)

        ast1 = game1.convert_to_ast()
        ast2 = game2.convert_to_ast()

        rpn1 = prev_state1.reversepolish
        rpn2 = prev_state2.reversepolish

        # throw away the last '1' (== halt) if exists:
        if rpn1[-1] == 1:
            array1 = np.asarray(rpn1[:-1])
        else:
            array1 = np.asarray(rpn1)

        if rpn2[-1] == 1:
            array2 = np.asarray(rpn2[:-1])
        else:
            array2 = np.asarray(rpn2)

        # topnode has the max absolute label, so you dont want it/ you want only subtrees, hence the [:-1]
        # subtrees can be scalars == leaves, hence >= 2
        start = 2  # + len(self.voc.arity0symbols)

        # get all topnodes of possible subtrees
        positions1 = np.where(array1 >= start)[0][:-1]
        positions2 = np.where(array2 >= start)[0][:-1]

        if positions1.size > 0 and positions2.size > 0:
            # choose two
            which1 = np.random.choice(positions1)
            which2 = np.random.choice(positions2)

            getnonleafnode1 = which1 + 1
            getnonleafnode2 = which2 + 1

            # get the nodes
            node1 = ast1.from_ast_get_node(ast1.topnode, getnonleafnode1)[0]
            node2 = ast2.from_ast_get_node(ast2.topnode, getnonleafnode2)[0]

            before_swap_rpn1 = ast1.from_ast_to_rpn(node1)
            before_swap_rpn2 = ast2.from_ast_to_rpn(node2)

            bfstate1 = State(self.voc, before_swap_rpn1, self.calculus_mode)
            bfstate2 = State(self.voc, before_swap_rpn2, self.calculus_mode)

            bef_game1 = Game(self.voc, bfstate1)
            bef_game2 = Game(self.voc, bfstate2)

            _, vec_number1, _ = bef_game1.from_rpn_to_critical_info()
            _, vec_number2, _ = bef_game2.from_rpn_to_critical_info()

            if vec_number1 == vec_number2:
                # swap parents and children == swap subtrees
                prev1 = node1.parent
                c = 0
                for child in prev1.children:
                    if child == node1:
                        prev1.children[c] = node2
                    c += 1

                c = 0
                prev2 = node2.parent
                for child in prev2.children:
                    if child == node2:
                        prev2.children[c] = node1
                    c += 1

                # get the new reversepolish:
                rpn1 = ast1.from_ast_to_rpn(ast1.topnode)
                rpn2 = ast2.from_ast_to_rpn(ast2.topnode)

                # but dont crossover at all if the results are eqs longer than maximal_size (see GP_QD) :
                if len(rpn1) > self.maximal_size or len(
                        rpn2) > self.maximal_size:
                    return False, prev_state1, prev_state2
            else:  #cant crossover vector and scalar
                return False, prev_state1, prev_state2

        # else cant crossover
        else:
            return False, prev_state1, prev_state2

        # returns the new states
        state1 = State(self.voc, rpn1, self.calculus_mode)
        state2 = State(self.voc, rpn2, self.calculus_mode)

        if self.usesimplif:
            state1 = game_env.simplif_eq(self.voc, state1)
            state2 = game_env.simplif_eq(self.voc, state2)

            # game1 = Game(self.voc, state1)
            # game1.simplif_eq()
            # state1 = game1.state

            # game2 = Game(self.voc, state2)
            # game2.simplif_eq()
            # state2 = game2.state
        game1 = Game(self.voc, state1)
        game2 = Game(self.voc, state2)
        #print('checkcrossovers enter with', game1.state.reversepolish)

        # print('checkcrossovers end with', game11.state.reversepolish)
        toreturn = []

        # crossover can lead to true zero division thus :
        if self.voc.infinite_number in state1.reversepolish:
            toreturn.append(prev_state1)
            # print('fail')

        # also, if it returns too many nested functions, i dont want it (sort of parsimony)
        elif game1.getnumberoffunctions() > config.MAX_DEPTH:
            toreturn.append(prev_state1)
            # print('fail')
        else:
            toreturn.append(state1)
            # print('succes')

        if self.voc.infinite_number in state2.reversepolish:
            toreturn.append(prev_state2)
            # print('fail')

        elif game2.getnumberoffunctions() > config.MAX_DEPTH:
            toreturn.append(prev_state2)
            # print('fail')

        else:
            toreturn.append(state2)
            # print('succes')

        return True, toreturn[0], toreturn[1]