from ai import DQN Config.set('input', 'mouse', 'mouse, multitouch_on_demand') # Adding this line if we don't want the right click to put a red point # Introducing last_signal1, last_signal2 and last_signal3 to let the brain know if it's getting further or closer to the obstacle. last_signal1 = 0 last_signal2 = 0 last_signal3 = 0 last_x = 0 last_y = 0 GOAL = 'airport' BOUNDARY = 20 # Getting our AI, which we call "dqn", and that contains our neural network that represents our Q-function dqn = DQN(8, 3, 0.9) # 5 signals, 3 actions, gamma = 0.9 action2rotation = [0, 10, -10] # action = 0: no rotation, action = 1, rotate 10 degrees, action = 2, rotate -10 degrees last_reward = 0 # initializing the last reward scores = [] # initializing the mean score curve (sliding window of the rewards) w.r.t time # Initializing the map first_update = True # using this trick to initialize the map only once def init(): global sand # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise. global goal_x # x-coordinate of the goal (where the car has to go, that is the up-left corner or the bot-right corner) global goal_y # y-coordinate of the goal (where the car has to go, that is the up-left corner or the bot-right corner) global first_update sand = np.zeros((RIGHT, TOP)) # initializing the sand array with only zeros goal_x = 30 # the goal to reach is at the upper left of the map (the x-coordinate is 20 and not 0 because the car gets bad reward if it touches the wall) goal_y = TOP - 30 # the goal to reach is at the upper left of the map (y-coordinate) first_update = False # trick to initialize the map only once
from kivy.clock import Clock # Importing the DQN object from our AI in ai.py from ai import DQN # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = DQN(5, 3, 0.9) action2rotation = [0, 20, -20] last_reward = 0 scores = [] # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global first_update sand = np.zeros((longueur, largeur)) goal_x = 20
# Importing the Dqn object from our AI in ai.py # Basically this is the brain of the AI in the car --> from the Ai.py file # Dqn stands for deep q network from ai import DQN # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 # the total number of points in the last drawing length = 0 # the length of the last drawing # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = DQN(5, 3, 0.95) # 5 sensors, 3 actions, gama = 0.9 action2rotation = [ 0, 20, -20 ] # action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres last_reward = 0 # initializing the last reward scores = [ ] # initializing the mean score curve (sliding window of the rewards) with respect to time # Initializing the map first_update = True # using this trick to initialize the map only once def init(): global sand # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise. global goal_x # x-coordinate of the goal (where the car has to go, that is the airport or the downtown) global goal_y # y-coordinate of the goal (where the car has to go, that is the airport or the downtown)
input_size = GAME.board_state.shape action_space_size = 32 * 32 conv_layer_sizes = [(128, 2, 1), (128, 2, 1), (128, 2, 1)] hidden_layer_sizes = [256] gamma = 0.99 batch_sz = 32 num_episodes = 10000 total_t = 0 experience_replay_buffer = ReplayMemory(input_size) episode_rewards = np.zeros(num_episodes) epsilon = 0.001 model1 = DQN(input_size, action_space_size, conv_layer_sizes, hidden_layer_sizes, 'model1') model2 = DQN(input_size, action_space_size, conv_layer_sizes, hidden_layer_sizes, 'model2') with tf.Session() as session: model1.set_session(session) model2.set_session(session) session.run(tf.global_variables_initializer()) GAME.reset() wins = 0 for i in range(MIN_EXPERIENCES): GAME.available_moves() if GAME.win != 0: GAME.reset() move = random_play(GAME) action = encoding_move(move)