Ejemplo n.º 1
0
from ai import DQN
Config.set('input', 'mouse', 'mouse, multitouch_on_demand')     # Adding this line if we don't want the right click to put a red point

# Introducing last_signal1, last_signal2 and last_signal3 to let the brain know if it's getting further or closer to the obstacle.

last_signal1 = 0
last_signal2 = 0
last_signal3 = 0
last_x = 0
last_y = 0
GOAL = 'airport'
BOUNDARY = 20

# Getting our AI, which we call "dqn", and that contains our neural network that represents our Q-function

dqn = DQN(8, 3, 0.9)                    # 5 signals, 3 actions, gamma = 0.9
action2rotation = [0, 10, -10]          # action = 0: no rotation, action = 1, rotate 10 degrees, action = 2, rotate -10 degrees
last_reward = 0                         # initializing the last reward
scores = []                             # initializing the mean score curve (sliding window of the rewards) w.r.t time

# Initializing the map
first_update = True                     # using this trick to initialize the map only once
def init():
    global sand                         # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise.
    global goal_x                       # x-coordinate of the goal (where the car has to go, that is the up-left corner or the bot-right corner)
    global goal_y                       # y-coordinate of the goal (where the car has to go, that is the up-left corner or the bot-right corner)
    global first_update
    sand = np.zeros((RIGHT, TOP))       # initializing the sand array with only zeros
    goal_x = 30                         # the goal to reach is at the upper left of the map (the x-coordinate is 20 and not 0 because the car gets bad reward if it touches the wall)
    goal_y = TOP - 30                   # the goal to reach is at the upper left of the map (y-coordinate)
    first_update = False                # trick to initialize the map only once
Ejemplo n.º 2
0
from kivy.clock import Clock

# Importing the DQN object from our AI in ai.py
from ai import DQN

# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')

# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0
length = 0

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = DQN(5, 3, 0.9)
action2rotation = [0, 20, -20]
last_reward = 0
scores = []

# Initializing the map
first_update = True


def init():
    global sand
    global goal_x
    global goal_y
    global first_update
    sand = np.zeros((longueur, largeur))
    goal_x = 20
Ejemplo n.º 3
0
# Importing the Dqn object from our AI in ai.py
# Basically this is the brain of the AI in the car --> from the Ai.py file
# Dqn stands for deep q network
from ai import DQN

# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')

# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0  # the total number of points in the last drawing
length = 0  # the length of the last drawing

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = DQN(5, 3, 0.95)  # 5 sensors, 3 actions, gama = 0.9
action2rotation = [
    0, 20, -20
]  # action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres
last_reward = 0  # initializing the last reward
scores = [
]  # initializing the mean score curve (sliding window of the rewards) with respect to time

# Initializing the map
first_update = True  # using this trick to initialize the map only once


def init():
    global sand  # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise.
    global goal_x  # x-coordinate of the goal (where the car has to go, that is the airport or the downtown)
    global goal_y  # y-coordinate of the goal (where the car has to go, that is the airport or the downtown)
Ejemplo n.º 4
0
    input_size = GAME.board_state.shape
    action_space_size = 32 * 32
    conv_layer_sizes = [(128, 2, 1), (128, 2, 1), (128, 2, 1)]
    hidden_layer_sizes = [256]

    gamma = 0.99
    batch_sz = 32
    num_episodes = 10000
    total_t = 0

    experience_replay_buffer = ReplayMemory(input_size)
    episode_rewards = np.zeros(num_episodes)

    epsilon = 0.001

    model1 = DQN(input_size, action_space_size, conv_layer_sizes,
                 hidden_layer_sizes, 'model1')
    model2 = DQN(input_size, action_space_size, conv_layer_sizes,
                 hidden_layer_sizes, 'model2')
    with tf.Session() as session:
        model1.set_session(session)
        model2.set_session(session)
        session.run(tf.global_variables_initializer())
        GAME.reset()

        wins = 0
        for i in range(MIN_EXPERIENCES):
            GAME.available_moves()
            if GAME.win != 0:
                GAME.reset()
            move = random_play(GAME)
            action = encoding_move(move)